From bc1f85208a06045f6b35f0537c2b5478b4e102ea Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Sun, 27 Jan 2019 03:23:14 +0700 Subject: [PATCH] =?UTF-8?q?=D0=98=D0=B7=D0=B1=D0=B0=D0=B2=D0=B8=D0=BB?= =?UTF-8?q?=D1=81=D1=8F=20=D0=BE=D1=82=20easysax.js=20=D0=B2=20=D0=BF?= =?UTF-8?q?=D0=BE=D0=BB=D1=8C=D0=B7=D1=83=20=D1=81=D0=B2=D0=BE=D0=B5=D0=B3?= =?UTF-8?q?=D0=BE=20sax.js?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client/components/Reader/share/BookParser.js | 82 +-- client/components/Reader/share/easysax.js | 736 ------------------- server/core/BookConverter/index.js | 6 +- server/core/BookConverter/sax.js | 155 +++- 4 files changed, 192 insertions(+), 787 deletions(-) delete mode 100644 client/components/Reader/share/easysax.js diff --git a/client/components/Reader/share/BookParser.js b/client/components/Reader/share/BookParser.js index 033c470e..c766b659 100644 --- a/client/components/Reader/share/BookParser.js +++ b/client/components/Reader/share/BookParser.js @@ -1,4 +1,4 @@ -import EasySAXParser from './easysax'; +import sax from '../../../../server/core/BookConverter/sax'; import {sleep} from '../../../share/utils'; export default class BookParser { @@ -19,8 +19,6 @@ export default class BookParser { callback = () => {}; callback(0); - this.data = data; - if (data.indexOf(' { let p = para[paraIndex]; if (p) { @@ -84,16 +82,14 @@ export default class BookParser { paraOffset += p.length; }; - const parser = new EasySAXParser(); + const onStartNode = (elemName) => {// eslint-disable-line no-unused-vars + if (elemName == '?xml') + return; - parser.on('error', (msgError) => {// eslint-disable-line no-unused-vars - }); - - parser.on('startNode', (elemName, getAttr, isTagEnd, getStrNode) => {// eslint-disable-line no-unused-vars tag = elemName; path += '/' + elemName; - if ((tag == 'p' || tag == 'empty-line') && path.indexOf('/FictionBook/body/section') == 0) { + if ((tag == 'p' || tag == 'empty-line') && path.indexOf('/fictionbook/body/section') == 0) { newParagraph(' ', 1); } @@ -111,9 +107,9 @@ export default class BookParser { newParagraph(' ', 1); bold = true; } - }); + }; - parser.on('endNode', (elemName, isTagStart, getStrNode) => {// eslint-disable-line no-unused-vars + const onEndNode = (elemName) => {// eslint-disable-line no-unused-vars if (tag == elemName) { if (tag == 'emphasis' || tag == 'strong') { growParagraph(``, 0); @@ -135,9 +131,9 @@ export default class BookParser { tag = path; } } - }); + }; - parser.on('textNode', (text) => { + const onTextNode = (text) => {// eslint-disable-line no-unused-vars text = text.replace(/ |[\t\n\r]/g, ' '); if (text != ' ' && text.trim() == '') @@ -147,30 +143,30 @@ export default class BookParser { return; switch (path) { - case '/FictionBook/description/title-info/author/first-name': + case '/fictionbook/description/title-info/author/first-name': fb2.firstName = text; break; - case '/FictionBook/description/title-info/author/middle-name': + case '/fictionbook/description/title-info/author/middle-name': fb2.middleName = text; break; - case '/FictionBook/description/title-info/author/last-name': + case '/fictionbook/description/title-info/author/last-name': fb2.lastName = text; break; - case '/FictionBook/description/title-info/genre': + case '/fictionbook/description/title-info/genre': fb2.genre = text; break; - case '/FictionBook/description/title-info/date': + case '/fictionbook/description/title-info/date': fb2.date = text; break; - case '/FictionBook/description/title-info/book-title': + case '/fictionbook/description/title-info/book-title': fb2.bookTitle = text; break; - case '/FictionBook/description/title-info/id': + case '/fictionbook/description/title-info/id': fb2.id = text; break; } - if (path.indexOf('/FictionBook/description/title-info/annotation') == 0) { + if (path.indexOf('/fictionbook/description/title-info/annotation') == 0) { if (!fb2.annotation) fb2.annotation = ''; if (tag != 'annotation') @@ -184,11 +180,11 @@ export default class BookParser { let tClose = (center ? '' : ''); tClose += (bold ? '' : ''); - if (path.indexOf('/FictionBook/body/title') == 0) { + if (path.indexOf('/fictionbook/body/title') == 0) { newParagraph(`${tOpen}${text}${tClose}`, text.length, true); } - if (path.indexOf('/FictionBook/body/section') == 0) { + if (path.indexOf('/fictionbook/body/section') == 0) { switch (tag) { case 'p': growParagraph(`${tOpen}${text}${tClose}`, text.length); @@ -197,24 +193,17 @@ export default class BookParser { growParagraph(`${tOpen}${text}${tClose}`, text.length); } } - }); + }; - parser.on('cdata', (data) => {// eslint-disable-line no-unused-vars - }); + const onProgress = async(prog) => { + await sleep(1); + callback(prog); + }; - parser.on('comment', (text) => {// eslint-disable-line no-unused-vars + await sax.parse(data, { + onStartNode, onEndNode, onTextNode, onProgress }); - parser.on('progress', async(progress) => { - if (progress > nextPerc) { - await sleep(1); - callback(progress); - nextPerc += 10; - } - }); - - await parser.parse(data); - this.fb2 = fb2; this.para = para; this.textLength = paraOffset; @@ -252,17 +241,16 @@ export default class BookParser { style: {bold: Boolean, italic: Boolean, center: Boolean}, text: String, }*/ - const parser = new EasySAXParser(); let style = {}; - parser.on('textNode', (text) => { + const onTextNode = async(text) => {// eslint-disable-line no-unused-vars result.push({ style: Object.assign({}, style), text: text }); - }); + }; - parser.on('startNode', (elemName, getAttr, isTagEnd, getStrNode) => {// eslint-disable-line no-unused-vars + const onStartNode = async(elemName) => {// eslint-disable-line no-unused-vars switch (elemName) { case 'strong': style.bold = true; @@ -274,9 +262,9 @@ export default class BookParser { style.center = true; break; } - }); + }; - parser.on('endNode', (elemName, isTagStart, getStrNode) => {// eslint-disable-line no-unused-vars + const onEndNode = async(elemName) => {// eslint-disable-line no-unused-vars switch (elemName) { case 'strong': style.bold = false; @@ -288,9 +276,11 @@ export default class BookParser { style.center = false; break; } - }); + }; - parser.parse(`

${s}

`); + sax.parseSync(s, { + onStartNode, onEndNode, onTextNode + }); return result; } diff --git a/client/components/Reader/share/easysax.js b/client/components/Reader/share/easysax.js deleted file mode 100644 index 6d4f4ff9..00000000 --- a/client/components/Reader/share/easysax.js +++ /dev/null @@ -1,736 +0,0 @@ -'use strict'; - -/* -new function() { - var parser = new EasySAXParser(); - - parser.ns('rss', { // or false - 'http://search.yahoo.com/mrss/': 'media', - 'http://www.w3.org/1999/xhtml': 'xhtml', - 'http://www.w3.org/2005/Atom': 'atom', - 'http://purl.org/rss/1.0/': 'rss', - }); - - parser.on('error', function(msgError) { - }); - - parser.on('startNode', function(elemName, getAttr, isTagEnd, getStrNode) { - var attr = getAttr(); - }); - - parser.on('endNode', function(elemName, isTagStart, getStrNode) { - }); - - parser.on('textNode', function(text) { - }); - - parser.on('cdata', function(data) { - }); - - - parser.on('comment', function(text) { - //console.log('--'+text+'--') - }); - - //parser.on('unknownNS', function(key) {console.log('unknownNS: ' + key)}); - //parser.on('question', function() {}); // - //parser.on('attention', function() {}); // - - console.time('easysax'); - for(var z=1000;z--;) { - parser.parse(xml) - }; - console.timeEnd('easysax'); -}; - -*/ - -// << ------------------------------------------------------------------------ >> // - -EasySAXParser.entityDecode = xmlEntityDecode; -export default EasySAXParser; - -var stringFromCharCode = String.fromCharCode; -var objectCreate = Object.create; -function NULL_FUNC() {} - -function entity2char(x) { - if (x === 'amp') { - return '&'; - } - - switch(x.toLocaleLowerCase()) { - case 'quot': return '"'; - case 'amp': return '&' - case 'lt': return '<' - case 'gt': return '>' - - case 'plusmn': return '\u00B1'; - case 'laquo': return '\u00AB'; - case 'raquo': return '\u00BB'; - case 'micro': return '\u00B5'; - case 'nbsp': return '\u00A0'; - case 'copy': return '\u00A9'; - case 'sup2': return '\u00B2'; - case 'sup3': return '\u00B3'; - case 'para': return '\u00B6'; - case 'reg': return '\u00AE'; - case 'deg': return '\u00B0'; - case 'apos': return '\''; - } - - return '&' + x + ';'; -} - -function replaceEntities(s, d, x, z) { - if (z) { - return entity2char(z); - } - - if (d) { - return stringFromCharCode(d); - } - - return stringFromCharCode(parseInt(x, 16)); -} - -function xmlEntityDecode(s) { - s = ('' + s); - - if (s.length > 3 && s.indexOf('&') !== -1) { - if (s.indexOf('<') !== -1) {s = s.replace(/</g, '<');} - if (s.indexOf('>') !== -1) {s = s.replace(/>/g, '>');} - if (s.indexOf('"') !== -1) {s = s.replace(/"/g, '"');} - - if (s.indexOf('&') !== -1) { - s = s.replace(/&#(\d+);|&#x([0123456789abcdef]+);|&(\w+);/ig, replaceEntities); - } - } - - return s; -} - -function cloneMatrixNS(nsmatrix) { - var nn = objectCreate(null); - for (var n in nsmatrix) { - nn[n] = nsmatrix[n]; - } - return nn; -} - - -function EasySAXParser(config) { - if (!this) { - return null; - } - - var onTextNode = NULL_FUNC, onStartNode = NULL_FUNC, onEndNode = NULL_FUNC, onCDATA = NULL_FUNC, onError = NULL_FUNC, - onComment, onQuestion, onAttention, onUnknownNS, onProgress; - var is_onComment = false, is_onQuestion = false, is_onAttention = false, is_onUnknownNS = false, is_onProgress = false; - - var isAutoEntity = true; // делать "EntityDecode" всегда - var entityDecode = xmlEntityDecode; - var hasSurmiseNS = false; - var isNamespace = false; - var returnError = null; - var parseStop = false; // прервать парсер - var defaultNS; - var nsmatrix = null; - var useNS; - var xml = ''; // string - - - this.setup = function(op) { - for (var name in op) { - switch(name) { - case 'entityDecode': entityDecode = op.entityDecode || entityDecode; break; - case 'autoEntity': isAutoEntity = !!op.autoEntity; break; - case 'defaultNS': defaultNS = op.defaultNS || null; break; - case 'ns': isNamespace = !!(useNS = op.ns || null); break; - case 'on': - var listeners = op.on; - for (var ev in listeners) { - this.on(ev, listeners[ev]); - } - break; - } - } - }; - - this.on = function(name, cb) { - if (typeof cb !== 'function') { - if (cb !== null) { - throw Error('required args on(string, function||null)'); - } - } - - switch(name) { - case 'startNode': onStartNode = cb || NULL_FUNC; break; - case 'textNode': onTextNode = cb || NULL_FUNC; break; - case 'endNode': onEndNode = cb || NULL_FUNC; break; - case 'error': onError = cb || NULL_FUNC; break; - case 'cdata': onCDATA = cb || NULL_FUNC; break; - - case 'unknownNS': onUnknownNS = cb; is_onUnknownNS = !!cb; break; - case 'attention': onAttention = cb; is_onAttention = !!cb; break; // - case 'question': onQuestion = cb; is_onQuestion = !!cb; break; // - case 'comment': onComment = cb; is_onComment = !!cb; break; - case 'progress': onProgress = cb; is_onProgress = !!cb; break; - } - }; - - this.ns = function(root, ns) { - if (!root) { - isNamespace = false; - defaultNS = null; - useNS = null; - return this; - } - - if (!ns || typeof root !== 'string') { - throw Error('required args ns(string, object)'); - } - - isNamespace = !!(useNS = ns || null); - defaultNS = root || null; - - return this; - }; - - this.parse = async function(_xml) { - if (typeof _xml !== 'string') { - return 'required args parser(string)'; // error - } - - returnError = null; - xml = _xml; - - if (isNamespace) { - nsmatrix = objectCreate(null); - nsmatrix.xmlns = defaultNS; - - await parse(); - - nsmatrix = null; - - } else { - await parse(); - } - - parseStop = false; - attrRes = true; - xml = ''; - - return returnError; - }; - - this.stop = function() { - parseStop = true; - }; - - if (config) { - this.setup(config); - } - - // ----------------------------------------------------- - - - var stringNodePosStart; // number - var stringNodePosEnd; // number - var attrStartPos; // number начало позиции атрибутов в строке attrString <(div^ class="xxxx" title="sssss")/> - var attrString; // строка атрибутов <(div class="xxxx" title="sssss")/> - var attrRes; // закешированный результат разбора атрибутов , null - разбор не проводился, object - хеш атрибутов, true - нет атрибутов, false - невалидный xml - - /* - парсит атрибуты по требованию. Важно! - функция не генерирует исключения. - - если была ошибка разбора возврашается false - если атрибутов нет и разбор удачен то возврашается true - если есть атрибуты то возврашается обьект(хеш) - */ - - function getAttrs() { - if (attrRes !== null) { - return attrRes; - } - - var xmlnsAlias; - var nsAttrName; - var attrList = isNamespace && hasSurmiseNS ? [] : null; - var i = attrStartPos + 1; // так как первый символ уже был проверен - var s = attrString; - var l = s.length; - var hasNewMatrix; - var newalias; - var value; - var alias; - var name; - var res = {}; - var ok; - var w; - var j; - - - for(; i < l; i++) { - w = s.charCodeAt(i); - - if (w === 32 || (w < 14 && w > 8) ) { // \f\n\r\t\v - continue - } - - if (w < 65 || w > 122 || (w > 90 && w < 97) ) { // недопустимые первые символы - if (w !== 95 && w !== 58) { // char 95"_" 58":" - return attrRes = false; // error. invalid first char - } - } - - for(j = i + 1; j < l; j++) { // проверяем все символы имени атрибута - w = s.charCodeAt(j); - - if ( w > 96 && w < 123 || w > 64 && w < 91 || w > 47 && w < 59 || w === 45 || w === 95) { - continue; - } - - if (w !== 61) { // "=" == 61 - return attrRes = false; // error. invalid char "=" - } - - break; - } - - name = s.substring(i, j); - ok = true; - - if (name === 'xmlns:xmlns') { - return attrRes = false; // error. invalid name - } - - w = s.charCodeAt(j + 1); - - if (w === 34) { // '"' - j = s.indexOf('"', i = j + 2 ); - - } else { - if (w !== 39) { // "'" - return attrRes = false; // error. invalid char - } - - j = s.indexOf('\'', i = j + 2 ); - } - - if (j === -1) { - return attrRes = false; // error. invalid char - } - - if (j + 1 < l) { - w = s.charCodeAt(j + 1); - - if (w > 32 || w < 9 || (w < 32 && w > 13)) { - // error. invalid char - return attrRes = false; - } - } - - - value = s.substring(i, j); - i = j + 1; // след. семвол уже проверен потому проверять нужно следуюший - - if (isAutoEntity) { - value = entityDecode(value); - } - - if (!isNamespace) { // - res[name] = value; - continue; - } - - if (hasSurmiseNS) { - // есть подозрение что в атрибутах присутствует xmlns - newalias = (name !== 'xmlns' - ? name.charCodeAt(0) === 120 && name.substr(0, 6) === 'xmlns:' ? name.substr(6) : null - : 'xmlns' - ); - - if (newalias !== null) { - alias = useNS[entityDecode(value)]; - if (is_onUnknownNS && !alias) { - alias = onUnknownNS(value); - } - - if (alias) { - if (nsmatrix[newalias] !== alias) { - if (!hasNewMatrix) { - nsmatrix = cloneMatrixNS(nsmatrix); - hasNewMatrix = true; - } - - nsmatrix[newalias] = alias; - } - } else { - if (nsmatrix[newalias]) { - if (!hasNewMatrix) { - nsmatrix = cloneMatrixNS(nsmatrix); - hasNewMatrix = true; - } - - nsmatrix[newalias] = false; - } - } - - res[name] = value; - continue; - } - - attrList.push(name, value); - continue; - } - - w = name.indexOf(':'); - if (w === -1) { - res[name] = value; - continue; - } - - nsAttrName = nsmatrix[name.substring(0, w)]; - if (nsAttrName) { - nsAttrName = nsmatrix['xmlns'] === nsAttrName ? name.substr(w + 1) : nsAttrName + name.substr(w); - res[nsAttrName + name.substr(w)] = value; - } - } - - - if (!ok) { - return attrRes = true; // атрибутов нет, ошибок тоже нет - } - - if (hasSurmiseNS) { - xmlnsAlias = nsmatrix['xmlns']; - - for (i = 0, l = attrList.length; i < l; i++) { - name = attrList[i++]; - - w = name.indexOf(':'); - if (w !== -1) { - nsAttrName = nsmatrix[name.substring(0, w)]; - if (nsAttrName) { - nsAttrName = xmlnsAlias === nsAttrName ? name.substr(w + 1) : nsAttrName + name.substr(w); - res[nsAttrName] = attrList[i]; - } - continue; - } - res[name] = attrList[i]; - } - } - - return attrRes = res; - } - - function getStringNode() { - return xml.substring(stringNodePosStart, stringNodePosEnd + 1); - } - - - async function parse() { - var stacknsmatrix = []; - var nodestack = []; - var stopIndex = 0; - var _nsmatrix; - var isTagStart = false; - var isTagEnd = false; - var x, y, q, w; - var j = 0; - var i = 0; - var xmlns; - var elem; - var stop; // используется при разборе "namespace" . если встретился неизвестное пространство то события не генерируются - var xmlLength = xml.length; - var progStep = xmlLength/100; - var progCur = 0; - - while(j !== -1) { - stop = stopIndex > 0; - - if (xml.charCodeAt(j) === 60) { // "<" - i = j; - } else { - i = xml.indexOf('<', j); - } - - if (i === -1) { // конец разбора - if (nodestack.length) { - onError(returnError = 'unexpected end parse'); - return; - } - - if (j === 0) { - onError(returnError = 'missing first tag'); - return; - } - - return; - } - - if (j !== i && !stop) { - onTextNode(isAutoEntity ? entityDecode(xml.substring(j, i)) : xml.substring(j, i)); - if (parseStop) { - return; - } - } - - w = xml.charCodeAt(i+1); - - if (w === 33) { // "!" - w = xml.charCodeAt(i+2); - if (w === 91 && xml.substr(i + 3, 6) === 'CDATA[') { // 91 == "[" - j = xml.indexOf(']]>', i); - if (j === -1) { - onError(returnError = 'cdata'); - return; - } - - if (!stop) { - onCDATA(xml.substring(i + 9, j)); - if (parseStop) { - return; - } - } - - j += 3; - continue; - } - - - if (w === 45 && xml.charCodeAt(i + 3) === 45) { // 45 == "-" - j = xml.indexOf('-->', i); - if (j === -1) { - onError(returnError = 'expected -->'); - return; - } - - - if (is_onComment && !stop) { - onComment(isAutoEntity ? entityDecode(xml.substring(i + 4, j)) : xml.substring(i + 4, j)); - if (parseStop) { - return; - } - } - - j += 3; - continue; - } - - j = xml.indexOf('>', i + 1); - if (j === -1) { - onError(returnError = 'expected ">"'); - return; - } - - if (is_onAttention && !stop) { - onAttention(xml.substring(i, j + 1)); - if (parseStop) { - return; - } - } - - j += 1; - continue; - } - - if (w === 63) { // "?" - j = xml.indexOf('?>', i); - if (j === -1) { // error - onError(returnError = '...?>'); - return; - } - - if (is_onQuestion) { - onQuestion(xml.substring(i, j + 2)); - if (parseStop) { - return; - } - } - - j += 2; - continue; - } - - j = xml.indexOf('>', i + 1); - - if (j == -1) { // error - onError(returnError = 'unclosed tag'); // ...> - return; - } - - attrRes = true; // атрибутов нет - - //if (xml.charCodeAt(i+1) === 47) { // 8 && w < 14)) { // \f\n\r\t\v пробел - continue; - } - - onError(returnError = 'close tag'); - return; - } - - } else { - if (xml.charCodeAt(j - 1) === 47) { // .../> - x = elem = xml.substring(i + 1, j - 1); - - isTagStart = true; - isTagEnd = true; - - } else { - x = elem = xml.substring(i + 1, j); - - isTagStart = true; - isTagEnd = false; - } - - if (!(w > 96 && w < 123 || w > 64 && w < 91 || w === 95 || w === 58)) { // char 95"_" 58":" - onError(returnError = 'first char nodeName'); - return; - } - - for (q = 1, y = x.length; q < y; q++) { - w = x.charCodeAt(q); - - if (w > 96 && w < 123 || w > 64 && w < 91 || w > 47 && w < 59 || w === 45 || w === 95) { - continue; - } - - if (w === 32 || (w < 14 && w > 8)) { // \f\n\r\t\v пробел - attrRes = null; // возможно есть атирибуты - elem = x.substring(0, q) - break; - } - - onError(returnError = 'invalid nodeName'); - return; - } - - if (!isTagEnd) { - nodestack.push(elem); - } - } - - - if (isNamespace) { - if (stop) { // потомки неизвестного пространства имен - if (isTagEnd) { - if (!isTagStart) { - if (--stopIndex === 0) { - nsmatrix = stacknsmatrix.pop(); - } - } - - } else { - stopIndex += 1; - } - - j += 1; - continue; - } - - // добавляем в stacknsmatrix только если !isTagEnd, иначе сохраняем контекст пространств в переменной - _nsmatrix = nsmatrix; - if (!isTagEnd) { - stacknsmatrix.push(nsmatrix); - } - - if (isTagStart && (attrRes === null)) { - hasSurmiseNS = x.indexOf('xmlns', q) !== -1; - if (hasSurmiseNS) { // есть подозрение на xmlns - attrStartPos = q; - attrString = x; - - getAttrs(); - - hasSurmiseNS = false; - } - } - - w = elem.indexOf(':'); - if (w !== -1) { - xmlns = nsmatrix[elem.substring(0, w)]; - elem = elem.substr(w + 1); - - } else { - xmlns = nsmatrix.xmlns; - } - - - if (!xmlns) { - // элемент неизвестного пространства имен - if (isTagEnd) { - nsmatrix = _nsmatrix; // так как тут всегда isTagStart - } else { - stopIndex = 1; // первый элемент для которого не определено пространство имен - } - - j += 1; - continue; - } - - elem = xmlns + ':' + elem; - } - - stringNodePosStart = i; - stringNodePosEnd = j; - - if (isTagStart) { - attrStartPos = q; - attrString = x; - - onStartNode(elem, getAttrs, isTagEnd, getStringNode); - if (parseStop) { - return; - } - } - - if (isTagEnd) { - onEndNode(elem, isTagStart, getStringNode); - if (parseStop) { - return; - } - - if (isNamespace) { - if (isTagStart) { - nsmatrix = _nsmatrix; - } else { - nsmatrix = stacknsmatrix.pop(); - } - } - } - - j += 1; - - if (j > progCur) { - if (is_onProgress) - await onProgress(Math.round(j*100/xmlLength)); - progCur += progStep; - } - } - } -} diff --git a/server/core/BookConverter/index.js b/server/core/BookConverter/index.js index c60b5982..488ada24 100644 --- a/server/core/BookConverter/index.js +++ b/server/core/BookConverter/index.js @@ -56,7 +56,7 @@ class BookConverter { return iconv.decode(data, selected); } - convertHtml(data, isText) { + async convertHtml(data, isText) { let titleInfo = {}; let desc = {_n: 'description', 'title-info': titleInfo}; let pars = []; @@ -123,7 +123,7 @@ class BookConverter { let buf = this.decode(data).toString(); - sax.parse(buf, { + await sax.parse(buf, { onStartNode, onEndNode, onTextNode, innerCut: new Set(['head', 'script', 'style']) }); @@ -321,7 +321,7 @@ class BookConverter { growParagraph(text); }; - sax.parse(repSpaces(this.decode(data).toString()), { + await sax.parse(repSpaces(this.decode(data).toString()), { onStartNode, onEndNode, onTextNode, onComment, innerCut: new Set(['head', 'script', 'style']) }); diff --git a/server/core/BookConverter/sax.js b/server/core/BookConverter/sax.js index 83b05dcc..c2fa877b 100644 --- a/server/core/BookConverter/sax.js +++ b/server/core/BookConverter/sax.js @@ -1,5 +1,5 @@ -function parse(xstr, options) { - let {onStartNode, onEndNode, onTextNode, onCdata, onComment, innerCut} = options; +function parseSync(xstr, options) { + let {onStartNode, onEndNode, onTextNode, onCdata, onComment, onProgress, innerCut} = options; if (!onStartNode) onStartNode = () => {}; @@ -11,12 +11,17 @@ function parse(xstr, options) { onCdata = () => {}; if (!onComment) onComment = () => {}; + if (!onProgress) + onProgress = () => {}; if (!innerCut) innerCut = new Set(); let i = 0; const len = xstr.length; + const progStep = len/10; + let nextProg = 0; + let cutCounter = 0; let cutTag = ''; let inCdata; @@ -110,6 +115,11 @@ function parse(xstr, options) { } } + if (right >= nextProg) { + const prog = Math.round(right/(len + 1)*100); + onProgress((prog >= 100 ? 99 : prog)); + nextProg += progStep; + } i = right + 1; } @@ -122,9 +132,150 @@ function parse(xstr, options) { onTextNode(xstr.substr(i, len - i), len - 1, cutCounter, cutTag); } } + + onProgress(100); } +//асинхронная копия parseSync +async function parse(xstr, options) { + let {onStartNode, onEndNode, onTextNode, onCdata, onComment, onProgress, innerCut} = options; + + if (!onStartNode) + onStartNode = () => {}; + if (!onEndNode) + onEndNode = () => {}; + if (!onTextNode) + onTextNode = () => {}; + if (!onCdata) + onCdata = () => {}; + if (!onComment) + onComment = () => {}; + if (!onProgress) + onProgress = () => {}; + + if (!innerCut) + innerCut = new Set(); + + let i = 0; + const len = xstr.length; + const progStep = len/10; + let nextProg = 0; + + let cutCounter = 0; + let cutTag = ''; + let inCdata; + let inComment; + while (i < len) { + inCdata = false; + inComment = false; + + let left = xstr.indexOf('<', i); + if (left < 0) + break; + let leftData = left; + + if (left < len - 2 && xstr[left + 1] == '!') { + if (xstr[left + 2] == '-') { + const leftComment = xstr.indexOf('', leftData + 1); + if (rightData < 0) + break; + right = rightData + 2; + } else { + rightData = xstr.indexOf('>', leftData + 1); + if (rightData < 0) + break; + right = rightData; + } + + let tagData = xstr.substr(leftData + 1, rightData - leftData - 1); + + if (inCdata) { + await onCdata(tagData, left, cutCounter, cutTag); + } else if (inComment) { + await onComment(tagData, left, cutCounter, cutTag); + } else { + let tag = ''; + let tail = ''; + const firstSpace = tagData.indexOf(' '); + if (firstSpace >= 0) { + tail = tagData.substr(firstSpace); + tag = tagData.substr(0, firstSpace); + } else { + tag = tagData; + } + tag = tag.toLowerCase(); + + const text = xstr.substr(i, left - i); + + await onTextNode(text, left, cutCounter, cutTag); + + let endTag = ''; + if (tag === '' || tag[0] !== '/') { + await onStartNode(tag, tail, left, cutCounter, cutTag); + } else { + endTag = tag.substr(1); + await onEndNode(endTag, tail, left, cutCounter, cutTag); + } + + if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) { + if (!cutCounter) + cutTag = tag; + cutCounter++; + } + + if (cutTag === endTag) { + cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0); + if (!cutCounter) + cutTag = ''; + } + } + + if (right >= nextProg) { + const prog = Math.round(right/(len + 1)*100); + await onProgress((prog >= 100 ? 99 : prog)); + nextProg += progStep; + } + i = right + 1; + } + + if (i < len) { + if (inCdata) { + await onCdata(xstr.substr(i, len - i), len - 1, cutCounter, cutTag); + } else if (inComment) { + await onComment(xstr.substr(i, len - i), len - 1, cutCounter, cutTag); + } else { + await onTextNode(xstr.substr(i, len - i), len - 1, cutCounter, cutTag); + } + } + + await onProgress(100); +} module.exports = { + parseSync, parse } \ No newline at end of file