Merge pull request #2668 from simong/tidy

Tidy HTML before trying to convert it with abiword
This commit is contained in:
John McLear 2015-05-18 20:04:15 +01:00
commit 5615bab0d9
5 changed files with 156 additions and 35 deletions

View File

@ -90,6 +90,10 @@
Abiword is needed to advanced import/export features of pads*/
"abiword" : null,
/* This is the path to the Tidy executable. Setting it to null, disables Tidy.
Tidy is used to improve the quality of exported pads*/
"tidyHtml" : null,
/* Allow import of file types other than the supported types: txt, doc, docx, rtf, odt, html & htm */
"allowUnknownFileEnds" : true,

View File

@ -28,6 +28,7 @@ var fs = require("fs");
var settings = require('../utils/Settings');
var os = require('os');
var hooks = require("ep_etherpad-lite/static/js/pluginfw/hooks");
var TidyHtml = require('../utils/TidyHtml');
//load abiword only if its enabled
if(settings.abiword != null)
@ -172,12 +173,19 @@ exports.doExport = function(req, res, padId, type)
fs.writeFile(srcFile, html, callback);
}
},
//send the convert job to abiword
// Tidy up the exported HTML
function(callback)
{
//ensure html can be collected by the garbage collector
html = null;
TidyHtml.tidy(srcFile, callback);
},
//send the convert job to abiword
function(callback)
{
destFile = tempDirectory + "/etherpad_export_" + randNum + "." + type;
abiword.convertFile(srcFile, destFile, type, callback);
},

View File

@ -152,6 +152,11 @@ exports.minify = true;
*/
exports.abiword = null;
/**
* The path of the tidy executable
*/
exports.tidyHtml = null;
/**
* Should we support none natively supported file types on import?
*/

View File

@ -0,0 +1,41 @@
/**
* Tidy up the HTML in a given file
*/
var log4js = require('log4js');
var settings = require('./Settings');
var spawn = require('child_process').spawn;
exports.tidy = function(srcFile, callback) {
var logger = log4js.getLogger('TidyHtml');
// Don't do anything if Tidy hasn't been enabled
if (!settings.tidyHtml) {
logger.debug('tidyHtml has not been configured yet, ignoring tidy request');
return callback(null);
}
var errMessage = '';
// Spawn a new tidy instance that cleans up the file inline
logger.debug('Tidying ' + srcFile);
var tidy = spawn(settings.tidyHtml, ['-modify', srcFile]);
// Keep track of any error messages
tidy.stderr.on('data', function (data) {
errMessage += data.toString();
});
// Wait until Tidy is done
tidy.on('close', function(code) {
// Tidy returns a 0 when no errors occur and a 1 exit code when
// the file could be tidied but a few warnings were generated
if (code === 0 || code === 1) {
logger.debug('Tidied ' + srcFile + ' successfully');
return callback(null);
} else {
logger.error('Failed to tidy ' + srcFile + '\n' + errMessage);
return callback('Tidy died with exit code ' + code);
}
});
};

View File

@ -0,0 +1,63 @@
var assert = require('assert')
fs = require('fs'),
path = require('path'),
TidyHtml = null,
Settings = null;
var npm = require("../../../../src/node_modules/npm/lib/npm.js");
describe('tidyHtml', function() {
before(function(done) {
npm.load({}, function(err) {
assert.ok(!err);
TidyHtml = require('../../../../src/node/utils/TidyHtml');
Settings = require('../../../../src/node/utils/Settings');
return done()
});
});
it('Tidies HTML', function(done) {
// If the user hasn't configured Tidy, we skip this tests as it's required for this test
if (!Settings.tidyHtml) {
this.skip();
}
// Try to tidy up a bad HTML file
var tmpDir = process.env.TEMP || "/tmp";
var tmpFile = path.join(tmpDir, 'tmp_' + (Math.floor(Math.random() * 1000000)) + '.html')
fs.writeFileSync(tmpFile, '<html><body><p>a paragraph</p><li>List without outer UL</li>trailing closing p</p></body></html>');
TidyHtml.tidy(tmpFile, function(err){
assert.ok(!err);
// Read the file again
var cleanedHtml = fs.readFileSync(tmpFile).toString();
var expectedHtml = [
'<title></title>',
'</head>',
'<body>',
'<p>a paragraph</p>',
'<ul>',
'<li>List without outer UL</li>',
'<li style="list-style: none">trailing closing p</li>',
'</ul>',
'</body>',
'</html>',
].join('\n');
assert.notStrictEqual(cleanedHtml.indexOf(expectedHtml), -1);
return done();
});
});
it('can deal with errors', function(done) {
// If the user hasn't configured Tidy, we skip this tests as it's required for this test
if (!Settings.tidyHtml) {
this.skip();
}
TidyHtml.tidy('/some/none/existing/file.html', function(err) {
assert.ok(err);
return done();
});
});
});