Избавился от easysax.js в пользу своего sax.js

This commit is contained in:
Book Pauk
2019-01-27 03:23:14 +07:00
parent 0b0692a331
commit bc1f85208a
4 changed files with 192 additions and 787 deletions

View File

@@ -1,4 +1,4 @@
import EasySAXParser from './easysax'; import sax from '../../../../server/core/BookConverter/sax';
import {sleep} from '../../../share/utils'; import {sleep} from '../../../share/utils';
export default class BookParser { export default class BookParser {
@@ -19,8 +19,6 @@ export default class BookParser {
callback = () => {}; callback = () => {};
callback(0); callback(0);
this.data = data;
if (data.indexOf('<FictionBook') < 0) { if (data.indexOf('<FictionBook') < 0) {
throw new Error('Неверный формат файла'); throw new Error('Неверный формат файла');
} }
@@ -35,7 +33,6 @@ export default class BookParser {
let path = ''; let path = '';
let tag = ''; let tag = '';
let nextPerc = 0;
let center = false; let center = false;
let bold = false; let bold = false;
@@ -61,6 +58,7 @@ export default class BookParser {
para[paraIndex] = p; para[paraIndex] = p;
paraOffset += p.length; paraOffset += p.length;
}; };
const growParagraph = (text, len) => { const growParagraph = (text, len) => {
let p = para[paraIndex]; let p = para[paraIndex];
if (p) { if (p) {
@@ -84,16 +82,14 @@ export default class BookParser {
paraOffset += p.length; paraOffset += p.length;
}; };
const parser = new EasySAXParser(); const onStartNode = (elemName) => {// eslint-disable-line no-unused-vars
if (elemName == '?xml')
return;
parser.on('error', (msgError) => {// eslint-disable-line no-unused-vars
});
parser.on('startNode', (elemName, getAttr, isTagEnd, getStrNode) => {// eslint-disable-line no-unused-vars
tag = elemName; tag = elemName;
path += '/' + elemName; path += '/' + elemName;
if ((tag == 'p' || tag == 'empty-line') && path.indexOf('/FictionBook/body/section') == 0) { if ((tag == 'p' || tag == 'empty-line') && path.indexOf('/fictionbook/body/section') == 0) {
newParagraph(' ', 1); newParagraph(' ', 1);
} }
@@ -111,9 +107,9 @@ export default class BookParser {
newParagraph(' ', 1); newParagraph(' ', 1);
bold = true; bold = true;
} }
}); };
parser.on('endNode', (elemName, isTagStart, getStrNode) => {// eslint-disable-line no-unused-vars const onEndNode = (elemName) => {// eslint-disable-line no-unused-vars
if (tag == elemName) { if (tag == elemName) {
if (tag == 'emphasis' || tag == 'strong') { if (tag == 'emphasis' || tag == 'strong') {
growParagraph(`</${tag}>`, 0); growParagraph(`</${tag}>`, 0);
@@ -135,9 +131,9 @@ export default class BookParser {
tag = path; tag = path;
} }
} }
}); };
parser.on('textNode', (text) => { const onTextNode = (text) => {// eslint-disable-line no-unused-vars
text = text.replace(/&nbsp;|[\t\n\r]/g, ' '); text = text.replace(/&nbsp;|[\t\n\r]/g, ' ');
if (text != ' ' && text.trim() == '') if (text != ' ' && text.trim() == '')
@@ -147,30 +143,30 @@ export default class BookParser {
return; return;
switch (path) { switch (path) {
case '/FictionBook/description/title-info/author/first-name': case '/fictionbook/description/title-info/author/first-name':
fb2.firstName = text; fb2.firstName = text;
break; break;
case '/FictionBook/description/title-info/author/middle-name': case '/fictionbook/description/title-info/author/middle-name':
fb2.middleName = text; fb2.middleName = text;
break; break;
case '/FictionBook/description/title-info/author/last-name': case '/fictionbook/description/title-info/author/last-name':
fb2.lastName = text; fb2.lastName = text;
break; break;
case '/FictionBook/description/title-info/genre': case '/fictionbook/description/title-info/genre':
fb2.genre = text; fb2.genre = text;
break; break;
case '/FictionBook/description/title-info/date': case '/fictionbook/description/title-info/date':
fb2.date = text; fb2.date = text;
break; break;
case '/FictionBook/description/title-info/book-title': case '/fictionbook/description/title-info/book-title':
fb2.bookTitle = text; fb2.bookTitle = text;
break; break;
case '/FictionBook/description/title-info/id': case '/fictionbook/description/title-info/id':
fb2.id = text; fb2.id = text;
break; break;
} }
if (path.indexOf('/FictionBook/description/title-info/annotation') == 0) { if (path.indexOf('/fictionbook/description/title-info/annotation') == 0) {
if (!fb2.annotation) if (!fb2.annotation)
fb2.annotation = ''; fb2.annotation = '';
if (tag != 'annotation') if (tag != 'annotation')
@@ -184,11 +180,11 @@ export default class BookParser {
let tClose = (center ? '</center>' : ''); let tClose = (center ? '</center>' : '');
tClose += (bold ? '</strong>' : ''); tClose += (bold ? '</strong>' : '');
if (path.indexOf('/FictionBook/body/title') == 0) { if (path.indexOf('/fictionbook/body/title') == 0) {
newParagraph(`${tOpen}${text}${tClose}`, text.length, true); newParagraph(`${tOpen}${text}${tClose}`, text.length, true);
} }
if (path.indexOf('/FictionBook/body/section') == 0) { if (path.indexOf('/fictionbook/body/section') == 0) {
switch (tag) { switch (tag) {
case 'p': case 'p':
growParagraph(`${tOpen}${text}${tClose}`, text.length); growParagraph(`${tOpen}${text}${tClose}`, text.length);
@@ -197,24 +193,17 @@ export default class BookParser {
growParagraph(`${tOpen}${text}${tClose}`, text.length); growParagraph(`${tOpen}${text}${tClose}`, text.length);
} }
} }
}); };
parser.on('cdata', (data) => {// eslint-disable-line no-unused-vars const onProgress = async(prog) => {
}); await sleep(1);
callback(prog);
};
parser.on('comment', (text) => {// eslint-disable-line no-unused-vars await sax.parse(data, {
onStartNode, onEndNode, onTextNode, onProgress
}); });
parser.on('progress', async(progress) => {
if (progress > nextPerc) {
await sleep(1);
callback(progress);
nextPerc += 10;
}
});
await parser.parse(data);
this.fb2 = fb2; this.fb2 = fb2;
this.para = para; this.para = para;
this.textLength = paraOffset; this.textLength = paraOffset;
@@ -252,17 +241,16 @@ export default class BookParser {
style: {bold: Boolean, italic: Boolean, center: Boolean}, style: {bold: Boolean, italic: Boolean, center: Boolean},
text: String, text: String,
}*/ }*/
const parser = new EasySAXParser();
let style = {}; let style = {};
parser.on('textNode', (text) => { const onTextNode = async(text) => {// eslint-disable-line no-unused-vars
result.push({ result.push({
style: Object.assign({}, style), style: Object.assign({}, style),
text: text text: text
}); });
}); };
parser.on('startNode', (elemName, getAttr, isTagEnd, getStrNode) => {// eslint-disable-line no-unused-vars const onStartNode = async(elemName) => {// eslint-disable-line no-unused-vars
switch (elemName) { switch (elemName) {
case 'strong': case 'strong':
style.bold = true; style.bold = true;
@@ -274,9 +262,9 @@ export default class BookParser {
style.center = true; style.center = true;
break; break;
} }
}); };
parser.on('endNode', (elemName, isTagStart, getStrNode) => {// eslint-disable-line no-unused-vars const onEndNode = async(elemName) => {// eslint-disable-line no-unused-vars
switch (elemName) { switch (elemName) {
case 'strong': case 'strong':
style.bold = false; style.bold = false;
@@ -288,9 +276,11 @@ export default class BookParser {
style.center = false; style.center = false;
break; break;
} }
}); };
parser.parse(`<p>${s}</p>`); sax.parseSync(s, {
onStartNode, onEndNode, onTextNode
});
return result; return result;
} }

View File

@@ -1,736 +0,0 @@
'use strict';
/*
new function() {
var parser = new EasySAXParser();
parser.ns('rss', { // or false
'http://search.yahoo.com/mrss/': 'media',
'http://www.w3.org/1999/xhtml': 'xhtml',
'http://www.w3.org/2005/Atom': 'atom',
'http://purl.org/rss/1.0/': 'rss',
});
parser.on('error', function(msgError) {
});
parser.on('startNode', function(elemName, getAttr, isTagEnd, getStrNode) {
var attr = getAttr();
});
parser.on('endNode', function(elemName, isTagStart, getStrNode) {
});
parser.on('textNode', function(text) {
});
parser.on('cdata', function(data) {
});
parser.on('comment', function(text) {
//console.log('--'+text+'--')
});
//parser.on('unknownNS', function(key) {console.log('unknownNS: ' + key)});
//parser.on('question', function() {}); // <? ... ?>
//parser.on('attention', function() {}); // <!XXXXX zzzz="eeee">
console.time('easysax');
for(var z=1000;z--;) {
parser.parse(xml)
};
console.timeEnd('easysax');
};
*/
// << ------------------------------------------------------------------------ >> //
EasySAXParser.entityDecode = xmlEntityDecode;
export default EasySAXParser;
var stringFromCharCode = String.fromCharCode;
var objectCreate = Object.create;
function NULL_FUNC() {}
function entity2char(x) {
if (x === 'amp') {
return '&';
}
switch(x.toLocaleLowerCase()) {
case 'quot': return '"';
case 'amp': return '&'
case 'lt': return '<'
case 'gt': return '>'
case 'plusmn': return '\u00B1';
case 'laquo': return '\u00AB';
case 'raquo': return '\u00BB';
case 'micro': return '\u00B5';
case 'nbsp': return '\u00A0';
case 'copy': return '\u00A9';
case 'sup2': return '\u00B2';
case 'sup3': return '\u00B3';
case 'para': return '\u00B6';
case 'reg': return '\u00AE';
case 'deg': return '\u00B0';
case 'apos': return '\'';
}
return '&' + x + ';';
}
function replaceEntities(s, d, x, z) {
if (z) {
return entity2char(z);
}
if (d) {
return stringFromCharCode(d);
}
return stringFromCharCode(parseInt(x, 16));
}
function xmlEntityDecode(s) {
s = ('' + s);
if (s.length > 3 && s.indexOf('&') !== -1) {
if (s.indexOf('&lt;') !== -1) {s = s.replace(/&lt;/g, '<');}
if (s.indexOf('&gt;') !== -1) {s = s.replace(/&gt;/g, '>');}
if (s.indexOf('&quot;') !== -1) {s = s.replace(/&quot;/g, '"');}
if (s.indexOf('&') !== -1) {
s = s.replace(/&#(\d+);|&#x([0123456789abcdef]+);|&(\w+);/ig, replaceEntities);
}
}
return s;
}
function cloneMatrixNS(nsmatrix) {
var nn = objectCreate(null);
for (var n in nsmatrix) {
nn[n] = nsmatrix[n];
}
return nn;
}
function EasySAXParser(config) {
if (!this) {
return null;
}
var onTextNode = NULL_FUNC, onStartNode = NULL_FUNC, onEndNode = NULL_FUNC, onCDATA = NULL_FUNC, onError = NULL_FUNC,
onComment, onQuestion, onAttention, onUnknownNS, onProgress;
var is_onComment = false, is_onQuestion = false, is_onAttention = false, is_onUnknownNS = false, is_onProgress = false;
var isAutoEntity = true; // делать "EntityDecode" всегда
var entityDecode = xmlEntityDecode;
var hasSurmiseNS = false;
var isNamespace = false;
var returnError = null;
var parseStop = false; // прервать парсер
var defaultNS;
var nsmatrix = null;
var useNS;
var xml = ''; // string
this.setup = function(op) {
for (var name in op) {
switch(name) {
case 'entityDecode': entityDecode = op.entityDecode || entityDecode; break;
case 'autoEntity': isAutoEntity = !!op.autoEntity; break;
case 'defaultNS': defaultNS = op.defaultNS || null; break;
case 'ns': isNamespace = !!(useNS = op.ns || null); break;
case 'on':
var listeners = op.on;
for (var ev in listeners) {
this.on(ev, listeners[ev]);
}
break;
}
}
};
this.on = function(name, cb) {
if (typeof cb !== 'function') {
if (cb !== null) {
throw Error('required args on(string, function||null)');
}
}
switch(name) {
case 'startNode': onStartNode = cb || NULL_FUNC; break;
case 'textNode': onTextNode = cb || NULL_FUNC; break;
case 'endNode': onEndNode = cb || NULL_FUNC; break;
case 'error': onError = cb || NULL_FUNC; break;
case 'cdata': onCDATA = cb || NULL_FUNC; break;
case 'unknownNS': onUnknownNS = cb; is_onUnknownNS = !!cb; break;
case 'attention': onAttention = cb; is_onAttention = !!cb; break; // <!XXXXX zzzz="eeee">
case 'question': onQuestion = cb; is_onQuestion = !!cb; break; // <? .... ?>
case 'comment': onComment = cb; is_onComment = !!cb; break;
case 'progress': onProgress = cb; is_onProgress = !!cb; break;
}
};
this.ns = function(root, ns) {
if (!root) {
isNamespace = false;
defaultNS = null;
useNS = null;
return this;
}
if (!ns || typeof root !== 'string') {
throw Error('required args ns(string, object)');
}
isNamespace = !!(useNS = ns || null);
defaultNS = root || null;
return this;
};
this.parse = async function(_xml) {
if (typeof _xml !== 'string') {
return 'required args parser(string)'; // error
}
returnError = null;
xml = _xml;
if (isNamespace) {
nsmatrix = objectCreate(null);
nsmatrix.xmlns = defaultNS;
await parse();
nsmatrix = null;
} else {
await parse();
}
parseStop = false;
attrRes = true;
xml = '';
return returnError;
};
this.stop = function() {
parseStop = true;
};
if (config) {
this.setup(config);
}
// -----------------------------------------------------
var stringNodePosStart; // number
var stringNodePosEnd; // number
var attrStartPos; // number начало позиции атрибутов в строке attrString <(div^ class="xxxx" title="sssss")/>
var attrString; // строка атрибутов <(div class="xxxx" title="sssss")/>
var attrRes; // закешированный результат разбора атрибутов , null - разбор не проводился, object - хеш атрибутов, true - нет атрибутов, false - невалидный xml
/*
парсит атрибуты по требованию. Важно! - функция не генерирует исключения.
если была ошибка разбора возврашается false
если атрибутов нет и разбор удачен то возврашается true
если есть атрибуты то возврашается обьект(хеш)
*/
function getAttrs() {
if (attrRes !== null) {
return attrRes;
}
var xmlnsAlias;
var nsAttrName;
var attrList = isNamespace && hasSurmiseNS ? [] : null;
var i = attrStartPos + 1; // так как первый символ уже был проверен
var s = attrString;
var l = s.length;
var hasNewMatrix;
var newalias;
var value;
var alias;
var name;
var res = {};
var ok;
var w;
var j;
for(; i < l; i++) {
w = s.charCodeAt(i);
if (w === 32 || (w < 14 && w > 8) ) { // \f\n\r\t\v
continue
}
if (w < 65 || w > 122 || (w > 90 && w < 97) ) { // недопустимые первые символы
if (w !== 95 && w !== 58) { // char 95"_" 58":"
return attrRes = false; // error. invalid first char
}
}
for(j = i + 1; j < l; j++) { // проверяем все символы имени атрибута
w = s.charCodeAt(j);
if ( w > 96 && w < 123 || w > 64 && w < 91 || w > 47 && w < 59 || w === 45 || w === 95) {
continue;
}
if (w !== 61) { // "=" == 61
return attrRes = false; // error. invalid char "="
}
break;
}
name = s.substring(i, j);
ok = true;
if (name === 'xmlns:xmlns') {
return attrRes = false; // error. invalid name
}
w = s.charCodeAt(j + 1);
if (w === 34) { // '"'
j = s.indexOf('"', i = j + 2 );
} else {
if (w !== 39) { // "'"
return attrRes = false; // error. invalid char
}
j = s.indexOf('\'', i = j + 2 );
}
if (j === -1) {
return attrRes = false; // error. invalid char
}
if (j + 1 < l) {
w = s.charCodeAt(j + 1);
if (w > 32 || w < 9 || (w < 32 && w > 13)) {
// error. invalid char
return attrRes = false;
}
}
value = s.substring(i, j);
i = j + 1; // след. семвол уже проверен потому проверять нужно следуюший
if (isAutoEntity) {
value = entityDecode(value);
}
if (!isNamespace) { //
res[name] = value;
continue;
}
if (hasSurmiseNS) {
// есть подозрение что в атрибутах присутствует xmlns
newalias = (name !== 'xmlns'
? name.charCodeAt(0) === 120 && name.substr(0, 6) === 'xmlns:' ? name.substr(6) : null
: 'xmlns'
);
if (newalias !== null) {
alias = useNS[entityDecode(value)];
if (is_onUnknownNS && !alias) {
alias = onUnknownNS(value);
}
if (alias) {
if (nsmatrix[newalias] !== alias) {
if (!hasNewMatrix) {
nsmatrix = cloneMatrixNS(nsmatrix);
hasNewMatrix = true;
}
nsmatrix[newalias] = alias;
}
} else {
if (nsmatrix[newalias]) {
if (!hasNewMatrix) {
nsmatrix = cloneMatrixNS(nsmatrix);
hasNewMatrix = true;
}
nsmatrix[newalias] = false;
}
}
res[name] = value;
continue;
}
attrList.push(name, value);
continue;
}
w = name.indexOf(':');
if (w === -1) {
res[name] = value;
continue;
}
nsAttrName = nsmatrix[name.substring(0, w)];
if (nsAttrName) {
nsAttrName = nsmatrix['xmlns'] === nsAttrName ? name.substr(w + 1) : nsAttrName + name.substr(w);
res[nsAttrName + name.substr(w)] = value;
}
}
if (!ok) {
return attrRes = true; // атрибутов нет, ошибок тоже нет
}
if (hasSurmiseNS) {
xmlnsAlias = nsmatrix['xmlns'];
for (i = 0, l = attrList.length; i < l; i++) {
name = attrList[i++];
w = name.indexOf(':');
if (w !== -1) {
nsAttrName = nsmatrix[name.substring(0, w)];
if (nsAttrName) {
nsAttrName = xmlnsAlias === nsAttrName ? name.substr(w + 1) : nsAttrName + name.substr(w);
res[nsAttrName] = attrList[i];
}
continue;
}
res[name] = attrList[i];
}
}
return attrRes = res;
}
function getStringNode() {
return xml.substring(stringNodePosStart, stringNodePosEnd + 1);
}
async function parse() {
var stacknsmatrix = [];
var nodestack = [];
var stopIndex = 0;
var _nsmatrix;
var isTagStart = false;
var isTagEnd = false;
var x, y, q, w;
var j = 0;
var i = 0;
var xmlns;
var elem;
var stop; // используется при разборе "namespace" . если встретился неизвестное пространство то события не генерируются
var xmlLength = xml.length;
var progStep = xmlLength/100;
var progCur = 0;
while(j !== -1) {
stop = stopIndex > 0;
if (xml.charCodeAt(j) === 60) { // "<"
i = j;
} else {
i = xml.indexOf('<', j);
}
if (i === -1) { // конец разбора
if (nodestack.length) {
onError(returnError = 'unexpected end parse');
return;
}
if (j === 0) {
onError(returnError = 'missing first tag');
return;
}
return;
}
if (j !== i && !stop) {
onTextNode(isAutoEntity ? entityDecode(xml.substring(j, i)) : xml.substring(j, i));
if (parseStop) {
return;
}
}
w = xml.charCodeAt(i+1);
if (w === 33) { // "!"
w = xml.charCodeAt(i+2);
if (w === 91 && xml.substr(i + 3, 6) === 'CDATA[') { // 91 == "["
j = xml.indexOf(']]>', i);
if (j === -1) {
onError(returnError = 'cdata');
return;
}
if (!stop) {
onCDATA(xml.substring(i + 9, j));
if (parseStop) {
return;
}
}
j += 3;
continue;
}
if (w === 45 && xml.charCodeAt(i + 3) === 45) { // 45 == "-"
j = xml.indexOf('-->', i);
if (j === -1) {
onError(returnError = 'expected -->');
return;
}
if (is_onComment && !stop) {
onComment(isAutoEntity ? entityDecode(xml.substring(i + 4, j)) : xml.substring(i + 4, j));
if (parseStop) {
return;
}
}
j += 3;
continue;
}
j = xml.indexOf('>', i + 1);
if (j === -1) {
onError(returnError = 'expected ">"');
return;
}
if (is_onAttention && !stop) {
onAttention(xml.substring(i, j + 1));
if (parseStop) {
return;
}
}
j += 1;
continue;
}
if (w === 63) { // "?"
j = xml.indexOf('?>', i);
if (j === -1) { // error
onError(returnError = '...?>');
return;
}
if (is_onQuestion) {
onQuestion(xml.substring(i, j + 2));
if (parseStop) {
return;
}
}
j += 2;
continue;
}
j = xml.indexOf('>', i + 1);
if (j == -1) { // error
onError(returnError = 'unclosed tag'); // ...>
return;
}
attrRes = true; // атрибутов нет
//if (xml.charCodeAt(i+1) === 47) { // </...
if (w === 47) { // </...
isTagStart = false;
isTagEnd = true;
// проверяем что должен быть закрыт тотже тег что и открывался
if (!nodestack.length) {
onError(returnError = 'close tag, requires open tag');
return;
}
x = elem = nodestack.pop();
q = i + 2 + elem.length;
if (elem !== xml.substring(i + 2, q)) {
onError(returnError = 'close tag, not equal to the open tag');
return;
}
// проверим что в закрываюшем теге нет лишнего
for(; q < j; q++) {
w = xml.charCodeAt(q);
if (w === 32 || (w > 8 && w < 14)) { // \f\n\r\t\v пробел
continue;
}
onError(returnError = 'close tag');
return;
}
} else {
if (xml.charCodeAt(j - 1) === 47) { // .../>
x = elem = xml.substring(i + 1, j - 1);
isTagStart = true;
isTagEnd = true;
} else {
x = elem = xml.substring(i + 1, j);
isTagStart = true;
isTagEnd = false;
}
if (!(w > 96 && w < 123 || w > 64 && w < 91 || w === 95 || w === 58)) { // char 95"_" 58":"
onError(returnError = 'first char nodeName');
return;
}
for (q = 1, y = x.length; q < y; q++) {
w = x.charCodeAt(q);
if (w > 96 && w < 123 || w > 64 && w < 91 || w > 47 && w < 59 || w === 45 || w === 95) {
continue;
}
if (w === 32 || (w < 14 && w > 8)) { // \f\n\r\t\v пробел
attrRes = null; // возможно есть атирибуты
elem = x.substring(0, q)
break;
}
onError(returnError = 'invalid nodeName');
return;
}
if (!isTagEnd) {
nodestack.push(elem);
}
}
if (isNamespace) {
if (stop) { // потомки неизвестного пространства имен
if (isTagEnd) {
if (!isTagStart) {
if (--stopIndex === 0) {
nsmatrix = stacknsmatrix.pop();
}
}
} else {
stopIndex += 1;
}
j += 1;
continue;
}
// добавляем в stacknsmatrix только если !isTagEnd, иначе сохраняем контекст пространств в переменной
_nsmatrix = nsmatrix;
if (!isTagEnd) {
stacknsmatrix.push(nsmatrix);
}
if (isTagStart && (attrRes === null)) {
hasSurmiseNS = x.indexOf('xmlns', q) !== -1;
if (hasSurmiseNS) { // есть подозрение на xmlns
attrStartPos = q;
attrString = x;
getAttrs();
hasSurmiseNS = false;
}
}
w = elem.indexOf(':');
if (w !== -1) {
xmlns = nsmatrix[elem.substring(0, w)];
elem = elem.substr(w + 1);
} else {
xmlns = nsmatrix.xmlns;
}
if (!xmlns) {
// элемент неизвестного пространства имен
if (isTagEnd) {
nsmatrix = _nsmatrix; // так как тут всегда isTagStart
} else {
stopIndex = 1; // первый элемент для которого не определено пространство имен
}
j += 1;
continue;
}
elem = xmlns + ':' + elem;
}
stringNodePosStart = i;
stringNodePosEnd = j;
if (isTagStart) {
attrStartPos = q;
attrString = x;
onStartNode(elem, getAttrs, isTagEnd, getStringNode);
if (parseStop) {
return;
}
}
if (isTagEnd) {
onEndNode(elem, isTagStart, getStringNode);
if (parseStop) {
return;
}
if (isNamespace) {
if (isTagStart) {
nsmatrix = _nsmatrix;
} else {
nsmatrix = stacknsmatrix.pop();
}
}
}
j += 1;
if (j > progCur) {
if (is_onProgress)
await onProgress(Math.round(j*100/xmlLength));
progCur += progStep;
}
}
}
}

View File

@@ -56,7 +56,7 @@ class BookConverter {
return iconv.decode(data, selected); return iconv.decode(data, selected);
} }
convertHtml(data, isText) { async convertHtml(data, isText) {
let titleInfo = {}; let titleInfo = {};
let desc = {_n: 'description', 'title-info': titleInfo}; let desc = {_n: 'description', 'title-info': titleInfo};
let pars = []; let pars = [];
@@ -123,7 +123,7 @@ class BookConverter {
let buf = this.decode(data).toString(); let buf = this.decode(data).toString();
sax.parse(buf, { await sax.parse(buf, {
onStartNode, onEndNode, onTextNode, onStartNode, onEndNode, onTextNode,
innerCut: new Set(['head', 'script', 'style']) innerCut: new Set(['head', 'script', 'style'])
}); });
@@ -321,7 +321,7 @@ class BookConverter {
growParagraph(text); growParagraph(text);
}; };
sax.parse(repSpaces(this.decode(data).toString()), { await sax.parse(repSpaces(this.decode(data).toString()), {
onStartNode, onEndNode, onTextNode, onComment, onStartNode, onEndNode, onTextNode, onComment,
innerCut: new Set(['head', 'script', 'style']) innerCut: new Set(['head', 'script', 'style'])
}); });

View File

@@ -1,5 +1,5 @@
function parse(xstr, options) { function parseSync(xstr, options) {
let {onStartNode, onEndNode, onTextNode, onCdata, onComment, innerCut} = options; let {onStartNode, onEndNode, onTextNode, onCdata, onComment, onProgress, innerCut} = options;
if (!onStartNode) if (!onStartNode)
onStartNode = () => {}; onStartNode = () => {};
@@ -11,12 +11,17 @@ function parse(xstr, options) {
onCdata = () => {}; onCdata = () => {};
if (!onComment) if (!onComment)
onComment = () => {}; onComment = () => {};
if (!onProgress)
onProgress = () => {};
if (!innerCut) if (!innerCut)
innerCut = new Set(); innerCut = new Set();
let i = 0; let i = 0;
const len = xstr.length; const len = xstr.length;
const progStep = len/10;
let nextProg = 0;
let cutCounter = 0; let cutCounter = 0;
let cutTag = ''; let cutTag = '';
let inCdata; let inCdata;
@@ -110,6 +115,11 @@ function parse(xstr, options) {
} }
} }
if (right >= nextProg) {
const prog = Math.round(right/(len + 1)*100);
onProgress((prog >= 100 ? 99 : prog));
nextProg += progStep;
}
i = right + 1; i = right + 1;
} }
@@ -122,9 +132,150 @@ function parse(xstr, options) {
onTextNode(xstr.substr(i, len - i), len - 1, cutCounter, cutTag); onTextNode(xstr.substr(i, len - i), len - 1, cutCounter, cutTag);
} }
} }
onProgress(100);
} }
//асинхронная копия parseSync
async function parse(xstr, options) {
let {onStartNode, onEndNode, onTextNode, onCdata, onComment, onProgress, innerCut} = options;
if (!onStartNode)
onStartNode = () => {};
if (!onEndNode)
onEndNode = () => {};
if (!onTextNode)
onTextNode = () => {};
if (!onCdata)
onCdata = () => {};
if (!onComment)
onComment = () => {};
if (!onProgress)
onProgress = () => {};
if (!innerCut)
innerCut = new Set();
let i = 0;
const len = xstr.length;
const progStep = len/10;
let nextProg = 0;
let cutCounter = 0;
let cutTag = '';
let inCdata;
let inComment;
while (i < len) {
inCdata = false;
inComment = false;
let left = xstr.indexOf('<', i);
if (left < 0)
break;
let leftData = left;
if (left < len - 2 && xstr[left + 1] == '!') {
if (xstr[left + 2] == '-') {
const leftComment = xstr.indexOf('<!--', left);
if (leftComment == left) {
inComment = true;
leftData = left + 3;
}
}
if (!inComment && xstr[left + 2] == '[') {
const leftCdata = xstr.indexOf('<![CDATA[', left);
if (leftCdata == left) {
inCdata = true;
leftData = left + 8;
}
}
}
let right = null;
let rightData = null;
if (inCdata) {
rightData = xstr.indexOf(']]>', leftData + 1);
if (rightData < 0)
break;
right = rightData + 2;
} else if (inComment) {
rightData = xstr.indexOf('-->', leftData + 1);
if (rightData < 0)
break;
right = rightData + 2;
} else {
rightData = xstr.indexOf('>', leftData + 1);
if (rightData < 0)
break;
right = rightData;
}
let tagData = xstr.substr(leftData + 1, rightData - leftData - 1);
if (inCdata) {
await onCdata(tagData, left, cutCounter, cutTag);
} else if (inComment) {
await onComment(tagData, left, cutCounter, cutTag);
} else {
let tag = '';
let tail = '';
const firstSpace = tagData.indexOf(' ');
if (firstSpace >= 0) {
tail = tagData.substr(firstSpace);
tag = tagData.substr(0, firstSpace);
} else {
tag = tagData;
}
tag = tag.toLowerCase();
const text = xstr.substr(i, left - i);
await onTextNode(text, left, cutCounter, cutTag);
let endTag = '';
if (tag === '' || tag[0] !== '/') {
await onStartNode(tag, tail, left, cutCounter, cutTag);
} else {
endTag = tag.substr(1);
await onEndNode(endTag, tail, left, cutCounter, cutTag);
}
if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
if (!cutCounter)
cutTag = tag;
cutCounter++;
}
if (cutTag === endTag) {
cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0);
if (!cutCounter)
cutTag = '';
}
}
if (right >= nextProg) {
const prog = Math.round(right/(len + 1)*100);
await onProgress((prog >= 100 ? 99 : prog));
nextProg += progStep;
}
i = right + 1;
}
if (i < len) {
if (inCdata) {
await onCdata(xstr.substr(i, len - i), len - 1, cutCounter, cutTag);
} else if (inComment) {
await onComment(xstr.substr(i, len - i), len - 1, cutCounter, cutTag);
} else {
await onTextNode(xstr.substr(i, len - i), len - 1, cutCounter, cutTag);
}
}
await onProgress(100);
}
module.exports = { module.exports = {
parseSync,
parse parse
} }