From e0d0c6e46cfbc66e842d442cab8fc015dd3ed8ea Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Sun, 27 Jan 2019 04:45:07 +0700 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B4=D0=B5=D0=BB=D0=BA=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/BookConverter/sax.js | 183 +++++++++++++++++++++++++------ 1 file changed, 152 insertions(+), 31 deletions(-) diff --git a/server/core/BookConverter/sax.js b/server/core/BookConverter/sax.js index 712d4a05..ef348c82 100644 --- a/server/core/BookConverter/sax.js +++ b/server/core/BookConverter/sax.js @@ -1,21 +1,12 @@ function parseSync(xstr, options) { - let {onStartNode, onEndNode, onTextNode, onCdata, onComment, onProgress, innerCut} = options; - - if (!onStartNode) - onStartNode = () => {}; - if (!onEndNode) - onEndNode = () => {}; - if (!onTextNode) - onTextNode = () => {}; - if (!onCdata) - onCdata = () => {}; - if (!onComment) - onComment = () => {}; - if (!onProgress) - onProgress = () => {}; - - if (!innerCut) - innerCut = new Set(); + let {onStartNode: _onStartNode = () => {}, + onEndNode: _onEndNode = () => {}, + onTextNode: _onTextNode = () => {}, + onCdata: _onCdata = () => {}, + onComment: _onComment = () => {}, + onProgress: _onProgress = () => {}, + innerCut = new Set() + } = options; let i = 0; const len = xstr.length; @@ -80,9 +71,9 @@ function parseSync(xstr, options) { let tagData = xstr.substr(leftData + 1, rightData - leftData - 1); if (inCdata) { - onCdata(tagData, cutCounter, cutTag); + _onCdata(tagData, cutCounter, cutTag); } else if (inComment) { - onComment(tagData, cutCounter, cutTag); + _onComment(tagData, cutCounter, cutTag); } else { let tag = ''; let tail = ''; @@ -97,17 +88,17 @@ function parseSync(xstr, options) { const text = xstr.substr(i, left - i); - onTextNode(text, cutCounter, cutTag); + _onTextNode(text, cutCounter, cutTag); let endTag = (singleTag ? tag : ''); if (tag === '' || tag[0] !== '/') { - onStartNode(tag, tail, singleTag, cutCounter, cutTag); + _onStartNode(tag, tail, singleTag, cutCounter, cutTag); } else { endTag = tag.substr(1); } if (endTag) - onEndNode(endTag, tail, singleTag, cutCounter, cutTag); + _onEndNode(endTag, tail, singleTag, cutCounter, cutTag); if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) { if (!cutCounter) @@ -123,7 +114,7 @@ function parseSync(xstr, options) { } if (right >= nextProg) { - onProgress(Math.round(right/(len + 1)*100)); + _onProgress(Math.round(right/(len + 1)*100)); nextProg += progStep; } i = right + 1; @@ -131,22 +122,152 @@ function parseSync(xstr, options) { if (i < len) { if (inCdata) { - onCdata(xstr.substr(i, len - i), cutCounter, cutTag); + _onCdata(xstr.substr(i, len - i), cutCounter, cutTag); } else if (inComment) { - onComment(xstr.substr(i, len - i), cutCounter, cutTag); + _onComment(xstr.substr(i, len - i), cutCounter, cutTag); } else { - onTextNode(xstr.substr(i, len - i), cutCounter, cutTag); + _onTextNode(xstr.substr(i, len - i), cutCounter, cutTag); } } - onProgress(100); + _onProgress(100); } +//асинхронная копия parseSync +//делается заменой "_on" => "await _on" после while async function parse(xstr, options) { - return new Promise((resolve) => { - parseSync(xstr, options); - resolve(); - }); + let {onStartNode: _onStartNode = () => {}, + onEndNode: _onEndNode = () => {}, + onTextNode: _onTextNode = () => {}, + onCdata: _onCdata = () => {}, + onComment: _onComment = () => {}, + onProgress: _onProgress = () => {}, + innerCut = new Set() + } = options; + + let i = 0; + const len = xstr.length; + const progStep = len/10; + let nextProg = 0; + + let cutCounter = 0; + let cutTag = ''; + let inCdata; + let inComment; + while (i < len) { + inCdata = false; + inComment = false; + let singleTag = false; + + let left = xstr.indexOf('<', i); + if (left < 0) + break; + let leftData = left; + + if (left < len - 2 && xstr[left + 1] == '!') { + if (xstr[left + 2] == '-') { + const leftComment = xstr.indexOf('', leftData + 1); + if (rightData < 0) + break; + right = rightData + 2; + } else { + rightData = xstr.indexOf('>', leftData + 1); + if (rightData < 0) + break; + right = rightData; + if (xstr[right - 1] === '/') { + singleTag = true; + rightData--; + } + } + + let tagData = xstr.substr(leftData + 1, rightData - leftData - 1); + + if (inCdata) { + await _onCdata(tagData, cutCounter, cutTag); + } else if (inComment) { + await _onComment(tagData, cutCounter, cutTag); + } else { + let tag = ''; + let tail = ''; + const firstSpace = tagData.indexOf(' '); + if (firstSpace >= 0) { + tail = tagData.substr(firstSpace); + tag = tagData.substr(0, firstSpace); + } else { + tag = tagData; + } + tag = tag.toLowerCase(); + + const text = xstr.substr(i, left - i); + + await _onTextNode(text, cutCounter, cutTag); + + let endTag = (singleTag ? tag : ''); + if (tag === '' || tag[0] !== '/') { + await _onStartNode(tag, tail, singleTag, cutCounter, cutTag); + } else { + endTag = tag.substr(1); + } + + if (endTag) + await _onEndNode(endTag, tail, singleTag, cutCounter, cutTag); + + if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) { + if (!cutCounter) + cutTag = tag; + cutCounter++; + } + + if (cutTag === endTag) { + cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0); + if (!cutCounter) + cutTag = ''; + } + } + + if (right >= nextProg) { + await _onProgress(Math.round(right/(len + 1)*100)); + nextProg += progStep; + } + i = right + 1; + } + + if (i < len) { + if (inCdata) { + await _onCdata(xstr.substr(i, len - i), cutCounter, cutTag); + } else if (inComment) { + await _onComment(xstr.substr(i, len - i), cutCounter, cutTag); + } else { + await _onTextNode(xstr.substr(i, len - i), cutCounter, cutTag); + } + } + + await _onProgress(100); } module.exports = {