From 5bb43d431a10906eeceae5eeb4d5628bf078dc82 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Sat, 26 Jan 2019 07:00:01 +0700 Subject: [PATCH] =?UTF-8?q?=D0=A0=D0=B5=D1=84=D0=B0=D0=BA=D1=82=D0=BE?= =?UTF-8?q?=D1=80=D0=B8=D0=BD=D0=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/BookConverter/index.js | 120 +++++++++++++++++------------ 1 file changed, 69 insertions(+), 51 deletions(-) diff --git a/server/core/BookConverter/index.js b/server/core/BookConverter/index.js index 072796d2..557b3f26 100644 --- a/server/core/BookConverter/index.js +++ b/server/core/BookConverter/index.js @@ -55,7 +55,61 @@ class BookConverter { return iconv.decode(data, selected); } - async convertHtml(data, isText) { + parseHtml(buf, onNode, onText, innerCut) { + if (!onNode) + onNode = () => {}; + if (!onText) + onText = () => {}; + if (!innerCut) + innerCut = new Set(); + + buf = buf.replace(/ /g, ' '); + + let i = 0; + const len = buf.length; + let cutCounter = 0; + let cutTag = ''; + while (i < len) { + let left = buf.indexOf('<', i); + if (left < 0) + break; + let right = buf.indexOf('>', left + 1); + if (right < 0) + break; + + let tag = buf.substr(left + 1, right - left - 1).trim().toLowerCase(); + let tail = ''; + const firstSpace = tag.indexOf(' '); + if (firstSpace >= 0) { + tail = tag.substr(firstSpace + 1); + tag = tag.substr(0, firstSpace); + } + + const text = buf.substr(i, left - i); + + onText(text, cutCounter, cutTag); + onNode(tag, tail, cutCounter, cutTag); + + if (innerCut.has(tag) && (!cutCounter || cutTag == tag)) { + if (!cutCounter) + cutTag = tag; + cutCounter++; + } + + if (tag != '' && tag.charAt(0) == '/' && cutTag == tag.substr(1)) { + cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0); + if (!cutCounter) + cutTag = ''; + } + + i = right + 1; + } + + if (i < len) + onText(buf.substr(i, len - i), cutCounter, cutTag); + } + + convertHtml(data, isText) { let titleInfo = {}; let desc = {_n: 'description', 'title-info': titleInfo}; let pars = []; @@ -94,67 +148,32 @@ class BookConverter { }; newParagraph(); - const newPara = new Set(['TR', 'BR', 'BR/', 'DD', 'P', 'TITLE', '/TITLE', 'H1', 'H2', 'H3', '/H1', '/H2', '/H3']); + const newPara = new Set(['tr', 'br', 'br/', 'dd', 'p', 'title', '/title', 'h1', 'h2', 'h3', '/h1', '/h2', '/h3']); + + const onText = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars + if (!cutCounter) { + growParagraph(text); + } - const onText = (text) => { if (inTitle && !title) title = text; }; - const onNode = (elem) => { - if (elem == 'TITLE') - inTitle = true; - else if (elem == '/TITLE') - inTitle = false; - }; - - const innerCut = new Set(['HEAD', 'SCRIPT', 'STYLE']); - let buf = this.decode(data).toString(); - buf = buf.replace(/ /g, ' '); - - let i = 0; - const len = buf.length; - let cutCounter = 0; - let cutTag = ''; - while (i < len) { - let left = buf.indexOf('<', i); - if (left < 0) - break; - let right = buf.indexOf('>', left + 1); - if (right < 0) - break; - - let tag = buf.substr(left + 1, right - left - 1).trim().toUpperCase(); - const firstSpace = tag.indexOf(' '); - if (firstSpace >= 0) - tag = tag.substr(0, firstSpace); - - const text = buf.substr(i, left - i); + const onNode = (tag, tail, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars if (!cutCounter) { - growParagraph(text); if (newPara.has(tag)) newParagraph(); } - onText(text); - onNode(tag); - if (innerCut.has(tag) && (!cutCounter || cutTag == tag)) { - if (!cutCounter) - cutTag = tag; - cutCounter++; - } + if (tag == 'title') + inTitle = true; + else if (tag == '/title') + inTitle = false; + }; - if (tag != '' && tag.charAt(0) == '/' && cutTag == tag.substr(1)) { - cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0); - if (!cutCounter) - cutTag = ''; - } + let buf = this.decode(data).toString(); - i = right + 1; - } - - if (i < len && !cutCounter) - growParagraph(buf.substr(i, len - i)); + this.parseHtml(buf, onNode, onText, new Set(['head', 'script', 'style'])); titleInfo['book-title'] = title; @@ -297,7 +316,6 @@ class BookConverter { break; } } - }); parser.on('endNode', (elemName, isTagStart, getStrNode) => {// eslint-disable-line no-unused-vars