From 53d9f5ddc6cf600cb1792020f844388de209ad3b Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Sun, 24 Nov 2019 15:36:11 +0700 Subject: [PATCH 1/5] =?UTF-8?q?=D0=A3=D0=BB=D1=83=D1=87=D1=88=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20=D0=BA=D0=BE=D0=BD=D0=B2=D0=B5=D1=80=D1=82=D0=B8?= =?UTF-8?q?=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20html->fb2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client/components/Reader/share/BookParser.js | 2 ++ server/core/Reader/BookConverter/ConvertHtml.js | 7 ++++++- server/core/Reader/BookConverter/ConvertSamlib.js | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/client/components/Reader/share/BookParser.js b/client/components/Reader/share/BookParser.js index 8afe9875..c7fbc5f9 100644 --- a/client/components/Reader/share/BookParser.js +++ b/client/components/Reader/share/BookParser.js @@ -240,6 +240,7 @@ export default class BookParser { newParagraph(' ', 1); isFirstTitlePara = true; bold = true; + center = true; } if (tag == 'epigraph') { @@ -282,6 +283,7 @@ export default class BookParser { if (tag == 'subtitle') { isFirstTitlePara = false; bold = false; + center = false; } if (tag == 'epigraph') { diff --git a/server/core/Reader/BookConverter/ConvertHtml.js b/server/core/Reader/BookConverter/ConvertHtml.js index facb1954..b3373f56 100644 --- a/server/core/Reader/BookConverter/ConvertHtml.js +++ b/server/core/Reader/BookConverter/ConvertHtml.js @@ -77,7 +77,8 @@ class ConvertHtml extends ConvertBase { } }; - const newPara = new Set(['tr', '/table', 'hr', 'br', 'br/', 'li', 'dt', 'dd', 'p', 'title', '/title', 'h1', 'h2', 'h3', '/h1', '/h2', '/h3']); + const newPara = new Set(['tr', '/table', 'hr', 'br', 'br/', 'li', 'dt', 'dd', 'p', 'title', '/title', 'ul', '/ul', 'h1', 'h2', 'h3', 'h4', 'h5', '/h1', '/h2', '/h3', '/h4', '/h5']); + const newPara2 = new Set(['h1', 'h2', 'h3', 'h4', 'h5']); const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars text = this.escapeEntities(text); @@ -108,6 +109,8 @@ class ConvertHtml extends ConvertBase { if (!cutCounter) { if (newPara.has(tag)) newParagraph(); + if (newPara2.has(tag)) + newParagraph(); switch (tag) { case 'i': @@ -141,6 +144,8 @@ class ConvertHtml extends ConvertBase { if (!cutCounter) { if (newPara.has('/' + tag)) newParagraph(); + if (newPara2.has('/' + tag)) + newParagraph(); switch (tag) { case 'i': diff --git a/server/core/Reader/BookConverter/ConvertSamlib.js b/server/core/Reader/BookConverter/ConvertSamlib.js index 5b095c07..f5023d76 100644 --- a/server/core/Reader/BookConverter/ConvertSamlib.js +++ b/server/core/Reader/BookConverter/ConvertSamlib.js @@ -102,6 +102,8 @@ class ConvertSamlib extends ConvertBase { case 'h1': case 'h2': case 'h3': + case 'h4': + case 'h5': if (inPara) closeTag('p'); openTag('p'); @@ -173,6 +175,8 @@ class ConvertSamlib extends ConvertBase { case 'h1': case 'h2': case 'h3': + case 'h4': + case 'h5': closeTag('p'); bold = false; break; From 8af51bbf0819b340c72c3341dc931af47dad3cfd Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Mon, 25 Nov 2019 15:15:06 +0700 Subject: [PATCH 2/5] =?UTF-8?q?=D0=A3=D0=BB=D1=83=D1=87=D1=88=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20=D1=84=D0=B8=D0=BB=D1=8C=D1=82=D1=80=D0=B0=20htm?= =?UTF-8?q?l?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/Reader/BookConverter/ConvertHtml.js | 46 +++++++++++++++---- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/server/core/Reader/BookConverter/ConvertHtml.js b/server/core/Reader/BookConverter/ConvertHtml.js index b3373f56..4131436f 100644 --- a/server/core/Reader/BookConverter/ConvertHtml.js +++ b/server/core/Reader/BookConverter/ConvertHtml.js @@ -39,16 +39,19 @@ class ConvertHtml extends ConvertBase { let title = ''; let inTitle = false; + let inSubTitle = false; let inImage = false; let image = {}; let bold = false; let italic = false; + let begining = true; let spaceCounter = []; const repCrLfTab = (text) => text.replace(/[\n\r]/g, '').replace(/\t/g, ' '); const newParagraph = () => { + begining = false; pars.push({_n: 'p', _t: ''}); }; @@ -58,6 +61,8 @@ class ConvertHtml extends ConvertBase { const l = pars.length; pars[l - 1]._t += text; + if (inSubTitle) + pars[l - 1]._n = ''; //посчитаем отступы у текста, чтобы выделить потом параграфы const lines = text.split('\n'); @@ -84,10 +89,14 @@ class ConvertHtml extends ConvertBase { text = this.escapeEntities(text); if (!cutCounter && !(cutTitle && inTitle)) { - let tOpen = (bold ? '' : ''); + let tOpen = ''; + tOpen += (inSubTitle ? '' : ''); + tOpen += (bold ? '' : ''); tOpen += (italic ? '' : ''); - let tClose = (italic ? '' : ''); + let tClose = '' + tClose += (italic ? '' : ''); tClose += (bold ? '' : ''); + tClose += (inSubTitle ? '' : ''); growParagraph(`${tOpen}${text}${tClose}`); } @@ -107,9 +116,9 @@ class ConvertHtml extends ConvertBase { const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars if (!cutCounter) { - if (newPara.has(tag)) + if (newPara2.has(tag) && !begining) newParagraph(); - if (newPara2.has(tag)) + if (newPara.has(tag)) newParagraph(); switch (tag) { @@ -133,6 +142,10 @@ class ConvertHtml extends ConvertBase { cutTitle = true; } + if (tag == 'subtitle') { + inSubTitle = true; + } + if (tag == 'fb2-image') { inImage = true; const attrs = sax.getAttrsSync(tail); @@ -165,6 +178,9 @@ class ConvertHtml extends ConvertBase { if (tag == 'title' || tag == 'cut-title') inTitle = false; + if (tag == 'subtitle') + inSubTitle = false; + if (tag == 'fb2-image') inImage = false; }; @@ -202,7 +218,8 @@ class ConvertHtml extends ConvertBase { while (i > 0 && (!spaceCounter[i] || spaceCounter[i] < total)) i--; } - const parIndent = (i > 0 ? i : 0); + let parIndent = (i > 0 ? i : 0); + if (parIndent > 2) parIndent--; let newPars = []; const newPar = () => { @@ -238,7 +255,7 @@ class ConvertHtml extends ConvertBase { l++; } - if (l >= parIndent) { + if (l >= parIndent || line == '') { if (j > 0) newPar(); j++; @@ -255,6 +272,7 @@ class ConvertHtml extends ConvertBase { //убираем лишнее, делаем валидный fb2, т.к. в рез-те разбиения на параграфы бьются теги bold = false; italic = false; + inSubTitle = false; pars = body.section._a[0]; for (let i = 0; i < pars.length; i++) { if (pars[i]._n != 'p') @@ -264,16 +282,24 @@ class ConvertHtml extends ConvertBase { if (pars[i]._t.indexOf('<') >= 0 || bold || italic) { const t = pars[i]._t; + let first = true; let a = []; const onTextNode = (text) => { - let tOpen = (bold ? '' : ''); + let tOpen = ''; + tOpen += (inSubTitle ? '' : ''); + tOpen += (bold ? '' : ''); tOpen += (italic ? '' : ''); - let tClose = (italic ? '' : ''); + let tClose = '' + tClose += (italic ? '' : ''); tClose += (bold ? '' : ''); + tClose += (inSubTitle ? '' : ''); + if (first) + text = text.replace(/^\s+/, ''); //trimLeft a.push(`${tOpen}${text}${tClose}`); + first = false; } const onStartNode = (tag) => { @@ -281,6 +307,8 @@ class ConvertHtml extends ConvertBase { bold = true; if (tag == 'emphasis') italic = true; + if (tag == 'subtitle') + inSubTitle = true; } const onEndNode = (tag) => { @@ -288,6 +316,8 @@ class ConvertHtml extends ConvertBase { bold = false; if (tag == 'emphasis') italic = false; + if (tag == 'subtitle') + inSubTitle = false; } sax.parseSync(t, { onStartNode, onEndNode, onTextNode }); From 47f059213fac0daed91fe70211c78008fd779604 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Mon, 25 Nov 2019 15:21:33 +0700 Subject: [PATCH 3/5] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=20=D0=BA=D0=BE=D0=BD=D0=B2=D0=B5=D1=80=D1=82=D0=B5=D1=80?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20flibusta?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/Reader/BookConverter/ConvertSites.js | 45 +++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/server/core/Reader/BookConverter/ConvertSites.js b/server/core/Reader/BookConverter/ConvertSites.js index 96e756cf..5b9a608d 100644 --- a/server/core/Reader/BookConverter/ConvertSites.js +++ b/server/core/Reader/BookConverter/ConvertSites.js @@ -12,7 +12,10 @@ const sitesFilter = { converter: 'cutter', begin: ``, end: ``, - } + }, + 'flibusta.is': { + converter: 'flibusta' + }, }; class ConvertSites extends ConvertHtml { @@ -54,11 +57,11 @@ class ConvertSites extends ConvertHtml { if (m) title = m[1]; - return `${title.trim()}`; + return title.trim(); } cutter(text, opts) { - const title = this.getTitle(text); + const title = `${this.getTitle(text)}`; const l = text.indexOf(opts.begin) + opts.begin.length; const r = text.indexOf(opts.end); if (l < 0 || r < 0 || r <= l) @@ -66,6 +69,42 @@ class ConvertSites extends ConvertHtml { return text.substring(l, r) + title; } + + flibusta(text) { + let author = ''; + let m = text.match(/- ([\s\S]*?)<\/a>/); + if (m) + author = m[1]; + + let book = this.getTitle(text); + book = book.replace(' (fb2) | Флибуста', ''); + + const title = `${author}${(author ? ' - ' : '')}${book}`; + + let begin = '

'; + if (text.indexOf(begin) <= 0) + begin = '

'; + + const end = '

') + .replace(/

/g, '

') + .replace(/
/g, '

') + .replace(/

/g, '

') + .replace(/
/g, '

') + .replace(/<\/h3>/g, '
') + .replace(/<\/h5>/g, '
') + .replace(/
/g, '
') + .replace(/
/g, '
') + + title; + } } module.exports = ConvertSites; From d897a7400fc6e74250c3764773ba4dd18319b910 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Mon, 25 Nov 2019 15:36:34 +0700 Subject: [PATCH 4/5] =?UTF-8?q?=D0=A3=D0=BB=D1=83=D1=87=D1=88=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20=D0=BF=D0=B0=D1=80=D1=81=D0=B5=D1=80=D0=B0=20fb2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client/components/Reader/share/BookParser.js | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/client/components/Reader/share/BookParser.js b/client/components/Reader/share/BookParser.js index c7fbc5f9..8dec76b3 100644 --- a/client/components/Reader/share/BookParser.js +++ b/client/components/Reader/share/BookParser.js @@ -369,11 +369,10 @@ export default class BookParser { tClose += (bold ? '
' : ''); tClose += (center ? '' : ''); - if (path.indexOf('/fictionbook/body/title') == 0) { - growParagraph(`${tOpen}${text}${tClose}`, text.length); - } - - if (path.indexOf('/fictionbook/body/section') == 0) { + if (path.indexOf('/fictionbook/body/title') == 0 || + path.indexOf('/fictionbook/body/section') == 0 || + path.indexOf('/fictionbook/body/epigraph') == 0 + ) { growParagraph(`${tOpen}${text}${tClose}`, text.length); } From cede65313bd658925dd1229884c91b4a855ef8d2 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Mon, 25 Nov 2019 15:38:52 +0700 Subject: [PATCH 5/5] =?UTF-8?q?=D0=92=D0=B5=D1=80=D1=81=D0=B8=D1=8F=200.7.?= =?UTF-8?q?8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client/components/Reader/versionHistory.js | 11 +++++++++++ package.json | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/client/components/Reader/versionHistory.js b/client/components/Reader/versionHistory.js index c0cfa7e5..158095e5 100644 --- a/client/components/Reader/versionHistory.js +++ b/client/components/Reader/versionHistory.js @@ -1,4 +1,15 @@ export const versionHistory = [ +{ + showUntil: '2019-11-24', + header: '0.7.8 (2019-11-25)', + content: +` +
    +
  • улучшение html-фильтров для сайтов
  • +
+` +}, + { showUntil: '2019-11-10', header: '0.7.7 (2019-11-06)', diff --git a/package.json b/package.json index 1db3c398..7c310770 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "Liberama", - "version": "0.7.7", + "version": "0.7.8", "engines": { "node": ">=10.0.0" },