Intial Commit
This commit is contained in:
188
nodered/rootfs/data/node_modules/html-to-text/lib/html-to-text.js
generated
vendored
Normal file
188
nodered/rootfs/data/node_modules/html-to-text/lib/html-to-text.js
generated
vendored
Normal file
@@ -0,0 +1,188 @@
|
||||
var includes = require('lodash/includes');
|
||||
var trimEnd = require('lodash/trimEnd');
|
||||
var htmlparser = require('htmlparser2');
|
||||
|
||||
var helper = require('./helper');
|
||||
var defaultFormat = require('./formatter');
|
||||
|
||||
// Which type of tags should not be parsed
|
||||
var SKIP_TYPES = [
|
||||
'style',
|
||||
'script'
|
||||
];
|
||||
|
||||
function htmlToText(html, options) {
|
||||
options = Object.assign({
|
||||
wordwrap: 80,
|
||||
tables: [],
|
||||
preserveNewlines: false,
|
||||
uppercaseHeadings: true,
|
||||
singleNewLineParagraphs: false,
|
||||
hideLinkHrefIfSameAsText: false,
|
||||
linkHrefBaseUrl: null,
|
||||
noLinkBrackets: false,
|
||||
noAnchorUrl: true,
|
||||
baseElement: 'body',
|
||||
returnDomByDefault: true,
|
||||
format: {},
|
||||
decodeOptions: {
|
||||
isAttributeValue: false,
|
||||
strict: false
|
||||
},
|
||||
longWordSplit: {
|
||||
wrapCharacters: [],
|
||||
forceWrapOnLimit: false
|
||||
},
|
||||
unorderedListItemPrefix: ' * '
|
||||
}, options || {});
|
||||
|
||||
var handler = new htmlparser.DefaultHandler(function (error, dom) {
|
||||
|
||||
}, {
|
||||
verbose: true
|
||||
});
|
||||
new htmlparser.Parser(handler).parseComplete(html);
|
||||
|
||||
options.lineCharCount = 0;
|
||||
|
||||
var result = '';
|
||||
var baseElements = Array.isArray(options.baseElement) ? options.baseElement : [options.baseElement];
|
||||
for (var idx = 0; idx < baseElements.length; ++idx) {
|
||||
result += walk(filterBody(handler.dom, options, baseElements[idx]), options);
|
||||
}
|
||||
return trimEnd(result);
|
||||
}
|
||||
|
||||
function filterBody(dom, options, baseElement) {
|
||||
var result = null;
|
||||
|
||||
var splitTag = helper.splitCssSearchTag(baseElement);
|
||||
|
||||
function walk(dom) {
|
||||
if (result) return;
|
||||
dom.forEach(function(elem) {
|
||||
if (result) return;
|
||||
if (elem.name === splitTag.element) {
|
||||
var documentClasses = elem.attribs && elem.attribs.class ? elem.attribs.class.split(" ") : [];
|
||||
var documentIds = elem.attribs && elem.attribs.id ? elem.attribs.id.split(" ") : [];
|
||||
|
||||
if ((splitTag.classes.every(function (val) { return documentClasses.indexOf(val) >= 0; })) &&
|
||||
(splitTag.ids.every(function (val) { return documentIds.indexOf(val) >= 0; }))) {
|
||||
result = [elem];
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (elem.children) walk(elem.children);
|
||||
});
|
||||
}
|
||||
walk(dom);
|
||||
return options.returnDomByDefault ? result || dom : result;
|
||||
}
|
||||
|
||||
function containsTable(attr, tables) {
|
||||
if (tables === true) return true;
|
||||
|
||||
function removePrefix(key) {
|
||||
return key.substr(1);
|
||||
}
|
||||
function checkPrefix(prefix) {
|
||||
return function(key) {
|
||||
return key.startsWith(prefix);
|
||||
};
|
||||
}
|
||||
function filterByPrefix(tables, prefix) {
|
||||
return tables
|
||||
.filter(checkPrefix(prefix))
|
||||
.map(removePrefix);
|
||||
}
|
||||
var classes = filterByPrefix(tables, '.');
|
||||
var ids = filterByPrefix(tables, '#');
|
||||
return attr && (includes(classes, attr['class']) || includes(ids, attr['id']));
|
||||
}
|
||||
|
||||
function walk(dom, options, result) {
|
||||
if (arguments.length < 3) {
|
||||
result = '';
|
||||
}
|
||||
var whiteSpaceRegex = /\s$/;
|
||||
var format = Object.assign({}, defaultFormat, options.format);
|
||||
|
||||
if (!dom) {
|
||||
return result;
|
||||
}
|
||||
|
||||
dom.forEach(function(elem) {
|
||||
switch(elem.type) {
|
||||
case 'tag':
|
||||
switch(elem.name.toLowerCase()) {
|
||||
case 'img':
|
||||
result += format.image(elem, options);
|
||||
break;
|
||||
case 'a':
|
||||
// Inline element needs its leading space to be trimmed if `result`
|
||||
// currently ends with whitespace
|
||||
elem.trimLeadingSpace = whiteSpaceRegex.test(result);
|
||||
result += format.anchor(elem, walk, options);
|
||||
break;
|
||||
case 'p':
|
||||
result += format.paragraph(elem, walk, options);
|
||||
break;
|
||||
case 'h1':
|
||||
case 'h2':
|
||||
case 'h3':
|
||||
case 'h4':
|
||||
case 'h5':
|
||||
case 'h6':
|
||||
result += format.heading(elem, walk, options);
|
||||
break;
|
||||
case 'br':
|
||||
result += format.lineBreak(elem, walk, options);
|
||||
break;
|
||||
case 'hr':
|
||||
result += format.horizontalLine(elem, walk, options);
|
||||
break;
|
||||
case 'ul':
|
||||
result += format.unorderedList(elem, walk, options);
|
||||
break;
|
||||
case 'ol':
|
||||
result += format.orderedList(elem, walk, options);
|
||||
break;
|
||||
case 'pre':
|
||||
var newOptions = Object.assign({}, options);
|
||||
newOptions.isInPre = true;
|
||||
result += format.paragraph(elem, walk, newOptions);
|
||||
break;
|
||||
case 'table':
|
||||
result = containsTable(elem.attribs, options.tables)
|
||||
? result + format.table(elem, walk, options)
|
||||
: walk(elem.children || [], options, result);
|
||||
break;
|
||||
case 'blockquote':
|
||||
result += format.blockquote(elem, walk, options);
|
||||
break;
|
||||
default:
|
||||
result = walk(elem.children || [], options, result);
|
||||
}
|
||||
break;
|
||||
case 'text':
|
||||
if (elem.data !== '\r\n') {
|
||||
// Text needs its leading space to be trimmed if `result`
|
||||
// currently ends with whitespace
|
||||
elem.trimLeadingSpace = whiteSpaceRegex.test(result);
|
||||
result += format.text(elem, options);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (!includes(SKIP_TYPES, elem.type)) {
|
||||
result = walk(elem.children || [], options, result);
|
||||
}
|
||||
}
|
||||
|
||||
options.lineCharCount = result.length - (result.lastIndexOf('\n') + 1);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
exports.fromString = function(str, options) {
|
||||
return htmlToText(str, options || {});
|
||||
};
|
||||
Reference in New Issue
Block a user