391 lines
12 KiB
JavaScript
391 lines
12 KiB
JavaScript
const startTime = Date.now();
|
|
const fs = require('fs');
|
|
const ueberDB = require('../src/node_modules/ueberdb2');
|
|
const mysql = require('../src/node_modules/ueberdb2/node_modules/mysql');
|
|
const async = require('../src/node_modules/async');
|
|
const Changeset = require('ep_etherpad-lite/static/js/Changeset');
|
|
const randomString = require('ep_etherpad-lite/static/js/pad_utils').randomString;
|
|
const AttributePool = require('ep_etherpad-lite/static/js/AttributePool');
|
|
|
|
const settingsFile = process.argv[2];
|
|
const sqlOutputFile = process.argv[3];
|
|
|
|
// stop if the settings file is not set
|
|
if (!settingsFile || !sqlOutputFile) {
|
|
console.error('Use: node convert.js $SETTINGSFILE $SQLOUTPUT');
|
|
process.exit(1);
|
|
}
|
|
|
|
log('read settings file...');
|
|
// read the settings file and parse the json
|
|
const settings = JSON.parse(fs.readFileSync(settingsFile, 'utf8'));
|
|
log('done');
|
|
|
|
log('open output file...');
|
|
const sqlOutput = fs.openSync(sqlOutputFile, 'w');
|
|
const sql = 'SET CHARACTER SET UTF8;\n' +
|
|
'CREATE TABLE IF NOT EXISTS `store` ( \n' +
|
|
'`key` VARCHAR( 100 ) NOT NULL , \n' +
|
|
'`value` LONGTEXT NOT NULL , \n' +
|
|
'PRIMARY KEY ( `key` ) \n' +
|
|
') ENGINE = INNODB;\n' +
|
|
'START TRANSACTION;\n\n';
|
|
fs.writeSync(sqlOutput, sql);
|
|
log('done');
|
|
|
|
const etherpadDB = mysql.createConnection({
|
|
host: settings.etherpadDB.host,
|
|
user: settings.etherpadDB.user,
|
|
password: settings.etherpadDB.password,
|
|
database: settings.etherpadDB.database,
|
|
port: settings.etherpadDB.port,
|
|
});
|
|
|
|
// get the timestamp once
|
|
const timestamp = Date.now();
|
|
|
|
let padIDs;
|
|
|
|
async.series([
|
|
// get all padids out of the database...
|
|
function (callback) {
|
|
log('get all padIds out of the database...');
|
|
|
|
etherpadDB.query('SELECT ID FROM PAD_META', [], (err, _padIDs) => {
|
|
padIDs = _padIDs;
|
|
callback(err);
|
|
});
|
|
},
|
|
function (callback) {
|
|
log('done');
|
|
|
|
// create a queue with a concurrency 100
|
|
const queue = async.queue((padId, callback) => {
|
|
convertPad(padId, (err) => {
|
|
incrementPadStats();
|
|
callback(err);
|
|
});
|
|
}, 100);
|
|
|
|
// set the step callback as the queue callback
|
|
queue.drain = callback;
|
|
|
|
// add the padids to the worker queue
|
|
for (let i = 0, length = padIDs.length; i < length; i++) {
|
|
queue.push(padIDs[i].ID);
|
|
}
|
|
},
|
|
], (err) => {
|
|
if (err) throw err;
|
|
|
|
// write the groups
|
|
let sql = '';
|
|
for (const proID in proID2groupID) {
|
|
const groupID = proID2groupID[proID];
|
|
const subdomain = proID2subdomain[proID];
|
|
|
|
sql += `REPLACE INTO store VALUES (${etherpadDB.escape(`group:${groupID}`)}, ${etherpadDB.escape(JSON.stringify(groups[groupID]))});\n`;
|
|
sql += `REPLACE INTO store VALUES (${etherpadDB.escape(`mapper2group:subdomain:${subdomain}`)}, ${etherpadDB.escape(groupID)});\n`;
|
|
}
|
|
|
|
// close transaction
|
|
sql += 'COMMIT;';
|
|
|
|
// end the sql file
|
|
fs.writeSync(sqlOutput, sql, undefined, 'utf-8');
|
|
fs.closeSync(sqlOutput);
|
|
|
|
log('finished.');
|
|
process.exit(0);
|
|
});
|
|
|
|
function log(str) {
|
|
console.log(`${(Date.now() - startTime) / 1000}\t${str}`);
|
|
}
|
|
|
|
let padsDone = 0;
|
|
|
|
function incrementPadStats() {
|
|
padsDone++;
|
|
|
|
if (padsDone % 100 == 0) {
|
|
const averageTime = Math.round(padsDone / ((Date.now() - startTime) / 1000));
|
|
log(`${padsDone}/${padIDs.length}\t${averageTime} pad/s`);
|
|
}
|
|
}
|
|
|
|
var proID2groupID = {};
|
|
var proID2subdomain = {};
|
|
var groups = {};
|
|
|
|
function convertPad(padId, callback) {
|
|
const changesets = [];
|
|
const changesetsMeta = [];
|
|
const chatMessages = [];
|
|
const authors = [];
|
|
let apool;
|
|
let subdomain;
|
|
let padmeta;
|
|
|
|
async.series([
|
|
// get all needed db values
|
|
function (callback) {
|
|
async.parallel([
|
|
// get the pad revisions
|
|
function (callback) {
|
|
const sql = 'SELECT * FROM `PAD_CHAT_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_CHAT_META` WHERE ID=?)';
|
|
|
|
etherpadDB.query(sql, [padId], (err, results) => {
|
|
if (!err) {
|
|
try {
|
|
// parse the pages
|
|
for (let i = 0, length = results.length; i < length; i++) {
|
|
parsePage(chatMessages, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
|
|
}
|
|
} catch (e) { err = e; }
|
|
}
|
|
|
|
callback(err);
|
|
});
|
|
},
|
|
// get the chat entries
|
|
function (callback) {
|
|
const sql = 'SELECT * FROM `PAD_REVS_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_REVS_META` WHERE ID=?)';
|
|
|
|
etherpadDB.query(sql, [padId], (err, results) => {
|
|
if (!err) {
|
|
try {
|
|
// parse the pages
|
|
for (let i = 0, length = results.length; i < length; i++) {
|
|
parsePage(changesets, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, false);
|
|
}
|
|
} catch (e) { err = e; }
|
|
}
|
|
|
|
callback(err);
|
|
});
|
|
},
|
|
// get the pad revisions meta data
|
|
function (callback) {
|
|
const sql = 'SELECT * FROM `PAD_REVMETA_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_REVMETA_META` WHERE ID=?)';
|
|
|
|
etherpadDB.query(sql, [padId], (err, results) => {
|
|
if (!err) {
|
|
try {
|
|
// parse the pages
|
|
for (let i = 0, length = results.length; i < length; i++) {
|
|
parsePage(changesetsMeta, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
|
|
}
|
|
} catch (e) { err = e; }
|
|
}
|
|
|
|
callback(err);
|
|
});
|
|
},
|
|
// get the attribute pool of this pad
|
|
function (callback) {
|
|
const sql = 'SELECT `JSON` FROM `PAD_APOOL` WHERE `ID` = ?';
|
|
|
|
etherpadDB.query(sql, [padId], (err, results) => {
|
|
if (!err) {
|
|
try {
|
|
apool = JSON.parse(results[0].JSON).x;
|
|
} catch (e) { err = e; }
|
|
}
|
|
|
|
callback(err);
|
|
});
|
|
},
|
|
// get the authors informations
|
|
function (callback) {
|
|
const sql = 'SELECT * FROM `PAD_AUTHORS_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_AUTHORS_META` WHERE ID=?)';
|
|
|
|
etherpadDB.query(sql, [padId], (err, results) => {
|
|
if (!err) {
|
|
try {
|
|
// parse the pages
|
|
for (let i = 0, length = results.length; i < length; i++) {
|
|
parsePage(authors, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
|
|
}
|
|
} catch (e) { err = e; }
|
|
}
|
|
|
|
callback(err);
|
|
});
|
|
},
|
|
// get the pad information
|
|
function (callback) {
|
|
const sql = 'SELECT JSON FROM `PAD_META` WHERE ID=?';
|
|
|
|
etherpadDB.query(sql, [padId], (err, results) => {
|
|
if (!err) {
|
|
try {
|
|
padmeta = JSON.parse(results[0].JSON).x;
|
|
} catch (e) { err = e; }
|
|
}
|
|
|
|
callback(err);
|
|
});
|
|
},
|
|
// get the subdomain
|
|
function (callback) {
|
|
// skip if this is no proPad
|
|
if (padId.indexOf('$') == -1) {
|
|
callback();
|
|
return;
|
|
}
|
|
|
|
// get the proID out of this padID
|
|
const proID = padId.split('$')[0];
|
|
|
|
const sql = 'SELECT subDomain FROM pro_domains WHERE ID = ?';
|
|
|
|
etherpadDB.query(sql, [proID], (err, results) => {
|
|
if (!err) {
|
|
subdomain = results[0].subDomain;
|
|
}
|
|
|
|
callback(err);
|
|
});
|
|
},
|
|
], callback);
|
|
},
|
|
function (callback) {
|
|
// saves all values that should be written to the database
|
|
const values = {};
|
|
|
|
// this is a pro pad, let's convert it to a group pad
|
|
if (padId.indexOf('$') != -1) {
|
|
const padIdParts = padId.split('$');
|
|
const proID = padIdParts[0];
|
|
const padName = padIdParts[1];
|
|
|
|
let groupID;
|
|
|
|
// this proID is not converted so far, do it
|
|
if (proID2groupID[proID] == null) {
|
|
groupID = `g.${randomString(16)}`;
|
|
|
|
// create the mappers for this new group
|
|
proID2groupID[proID] = groupID;
|
|
proID2subdomain[proID] = subdomain;
|
|
groups[groupID] = {pads: {}};
|
|
}
|
|
|
|
// use the generated groupID;
|
|
groupID = proID2groupID[proID];
|
|
|
|
// rename the pad
|
|
padId = `${groupID}$${padName}`;
|
|
|
|
// set the value for this pad in the group
|
|
groups[groupID].pads[padId] = 1;
|
|
}
|
|
|
|
try {
|
|
const newAuthorIDs = {};
|
|
const oldName2newName = {};
|
|
|
|
// replace the authors with generated authors
|
|
// we need to do that cause where the original etherpad saves pad local authors, the new (lite) etherpad uses them global
|
|
for (var i in apool.numToAttrib) {
|
|
var key = apool.numToAttrib[i][0];
|
|
const value = apool.numToAttrib[i][1];
|
|
|
|
// skip non authors and anonymous authors
|
|
if (key != 'author' || value == '') continue;
|
|
|
|
// generate new author values
|
|
const authorID = `a.${randomString(16)}`;
|
|
const authorColorID = authors[i].colorId || Math.floor(Math.random() * (exports.getColorPalette().length));
|
|
const authorName = authors[i].name || null;
|
|
|
|
// overwrite the authorID of the attribute pool
|
|
apool.numToAttrib[i][1] = authorID;
|
|
|
|
// write the author to the database
|
|
values[`globalAuthor:${authorID}`] = {colorId: authorColorID, name: authorName, timestamp};
|
|
|
|
// save in mappers
|
|
newAuthorIDs[i] = authorID;
|
|
oldName2newName[value] = authorID;
|
|
}
|
|
|
|
// save all revisions
|
|
for (var i = 0; i < changesets.length; i++) {
|
|
values[`pad:${padId}:revs:${i}`] = {changeset: changesets[i],
|
|
meta: {
|
|
author: newAuthorIDs[changesetsMeta[i].a],
|
|
timestamp: changesetsMeta[i].t,
|
|
atext: changesetsMeta[i].atext || undefined,
|
|
}};
|
|
}
|
|
|
|
// save all chat messages
|
|
for (var i = 0; i < chatMessages.length; i++) {
|
|
values[`pad:${padId}:chat:${i}`] = {text: chatMessages[i].lineText,
|
|
userId: oldName2newName[chatMessages[i].userId],
|
|
time: chatMessages[i].time};
|
|
}
|
|
|
|
// generate the latest atext
|
|
const fullAPool = (new AttributePool()).fromJsonable(apool);
|
|
const keyRev = Math.floor(padmeta.head / padmeta.keyRevInterval) * padmeta.keyRevInterval;
|
|
let atext = changesetsMeta[keyRev].atext;
|
|
let curRev = keyRev;
|
|
while (curRev < padmeta.head) {
|
|
curRev++;
|
|
const changeset = changesets[curRev];
|
|
atext = Changeset.applyToAText(changeset, atext, fullAPool);
|
|
}
|
|
|
|
values[`pad:${padId}`] = {atext,
|
|
pool: apool,
|
|
head: padmeta.head,
|
|
chatHead: padmeta.numChatMessages};
|
|
} catch (e) {
|
|
console.error(`Error while converting pad ${padId}, pad skipped`);
|
|
console.error(e.stack ? e.stack : JSON.stringify(e));
|
|
callback();
|
|
return;
|
|
}
|
|
|
|
let sql = '';
|
|
for (var key in values) {
|
|
sql += `REPLACE INTO store VALUES (${etherpadDB.escape(key)}, ${etherpadDB.escape(JSON.stringify(values[key]))});\n`;
|
|
}
|
|
|
|
fs.writeSync(sqlOutput, sql, undefined, 'utf-8');
|
|
callback();
|
|
},
|
|
], callback);
|
|
}
|
|
|
|
/**
|
|
* This parses a Page like Etherpad uses them in the databases
|
|
* The offsets describes the length of a unit in the page, the data are
|
|
* all values behind each other
|
|
*/
|
|
function parsePage(array, pageStart, offsets, data, json) {
|
|
let start = 0;
|
|
const lengths = offsets.split(',');
|
|
|
|
for (let i = 0; i < lengths.length; i++) {
|
|
let unitLength = lengths[i];
|
|
|
|
// skip empty units
|
|
if (unitLength == '') continue;
|
|
|
|
// parse the number
|
|
unitLength = Number(unitLength);
|
|
|
|
// cut the unit out of data
|
|
const unit = data.substr(start, unitLength);
|
|
|
|
// put it into the array
|
|
array[pageStart + i] = json ? JSON.parse(unit) : unit;
|
|
|
|
// update start
|
|
start += unitLength;
|
|
}
|
|
}
|