From 75e01c899eb638658974df2a2a595f48538a7ccf Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Wed, 9 Dec 2020 20:08:17 +0700 Subject: [PATCH] =?UTF-8?q?=D0=A0=D0=B0=D0=B1=D0=BE=D1=82=D0=B0=20=D0=BD?= =?UTF-8?q?=D0=B0=D0=B4=20=D0=BA=D0=BE=D0=BD=D0=B2=D0=B5=D1=80=D1=82=D0=B5?= =?UTF-8?q?=D1=80=D0=BE=D0=BC=20pdf?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/Reader/BookConverter/ConvertHtml.js | 34 ++++++++++++++- .../core/Reader/BookConverter/ConvertPdf.js | 42 ++++++++++++------- 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/server/core/Reader/BookConverter/ConvertHtml.js b/server/core/Reader/BookConverter/ConvertHtml.js index 24cc1357..3104bb4f 100644 --- a/server/core/Reader/BookConverter/ConvertHtml.js +++ b/server/core/Reader/BookConverter/ConvertHtml.js @@ -52,6 +52,8 @@ class ConvertHtml extends ConvertBase { let image = {}; let bold = false; let italic = false; + let superscript = false; + let subscript = false; let begining = true; let spaceCounter = []; @@ -101,7 +103,11 @@ class ConvertHtml extends ConvertBase { tOpen += (inSubTitle ? '' : ''); tOpen += (bold ? '' : ''); tOpen += (italic ? '' : ''); + tOpen += (superscript ? '' : ''); + tOpen += (subscript ? '' : ''); let tClose = '' + tClose += (subscript ? '' : ''); + tClose += (superscript ? '' : ''); tClose += (italic ? '' : ''); tClose += (bold ? '' : ''); tClose += (inSubTitle ? '' : ''); @@ -152,6 +158,12 @@ class ConvertHtml extends ConvertBase { bold = true; break; } + + if (tag == 'sup') + superscript = true; + + if (tag == 'sub') + subscript = true; } if (tag == 'title' || tag == 'fb2-title') { @@ -174,7 +186,7 @@ class ConvertHtml extends ConvertBase { inImage = true; const attrs = sax.getAttrsSync(tail); image = {_n: 'binary', _attrs: {id: attrs.name.value, 'content-type': attrs.type.value}, _t: ''}; - } + } }; const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars @@ -197,6 +209,12 @@ class ConvertHtml extends ConvertBase { bold = false; break; } + + if (tag == 'sup') + superscript = false; + + if (tag == 'sub') + subscript = false; } if (tag == 'title' || tag == 'fb2-title') @@ -302,6 +320,8 @@ class ConvertHtml extends ConvertBase { //убираем лишнее, делаем валидный fb2, т.к. в рез-те разбиения на параграфы бьются теги bold = false; italic = false; + superscript = false; + subscript = false; inSubTitle = false; pars = body.section._a[0]; for (let i = 0; i < pars.length; i++) { @@ -321,7 +341,11 @@ class ConvertHtml extends ConvertBase { tOpen += (inSubTitle ? '' : ''); tOpen += (bold ? '' : ''); tOpen += (italic ? '' : ''); + tOpen += (superscript ? '' : ''); + tOpen += (subscript ? '' : ''); let tClose = '' + tClose += (subscript ? '' : ''); + tClose += (superscript ? '' : ''); tClose += (italic ? '' : ''); tClose += (bold ? '' : ''); tClose += (inSubTitle ? '' : ''); @@ -337,6 +361,10 @@ class ConvertHtml extends ConvertBase { bold = true; if (tag == 'emphasis') italic = true; + if (tag == 'sup') + superscript = true; + if (tag == 'sub') + subscript = true; if (tag == 'subtitle') inSubTitle = true; } @@ -346,6 +374,10 @@ class ConvertHtml extends ConvertBase { bold = false; if (tag == 'emphasis') italic = false; + if (tag == 'sup') + superscript = false; + if (tag == 'sub') + subscript = false; if (tag == 'subtitle') inSubTitle = false; } diff --git a/server/core/Reader/BookConverter/ConvertPdf.js b/server/core/Reader/BookConverter/ConvertPdf.js index b2e4d330..de1432e2 100644 --- a/server/core/Reader/BookConverter/ConvertPdf.js +++ b/server/core/Reader/BookConverter/ConvertPdf.js @@ -91,15 +91,16 @@ class ConvertPdf extends ConvertHtml { //объединяем в одну строку равные по высоте const pl = []; - let pt = -100; + let pt = 0; let j = -1; pagelines.forEach(line => { //добавим закрывающий тег стиля line.text += line.tClose; + const f = (line.fonts.length ? fonts[line.fonts[0]] : null); + //проверим, возможно это заголовок if (line.fonts.length == 1 && line.pageWidth) { - const f = fonts[line.fonts[0]]; const centerLeft = (line.pageWidth - line.width)/2; if (f && f.isBold && Math.abs(centerLeft - line.left) < 3) { if (!sectionTitleFound) { @@ -111,8 +112,14 @@ class ConvertPdf extends ConvertHtml { } } - //объедняем - if (Math.abs(pt - line.top) > 3) { + //добавим пустую строку, если надо + if (f && f.fontSize && Math.abs(pt - line.top) > f.fontSize*1.5) { + j++; + pl[j] = {text: '
'}; + } + + //объединяем + if (pt == 0 || Math.abs(pt - line.top) > 3) { j++; pl[j] = line; } else { @@ -136,21 +143,24 @@ class ConvertPdf extends ConvertHtml { const attrs = sax.getAttrsSync(tail); const fontId = (attrs.id && attrs.id.value ? attrs.id.value : ''); const fontStyle = (attrs.fontstyle && attrs.fontstyle.value ? attrs.fontstyle.value : ''); + const fontSize = (attrs.fontsize && attrs.fontsize.value ? attrs.fontsize.value : ''); - if (fontId && fontStyle) { - const styles = fontStyle.split(' '); + if (fontId) { const styleTags = {bold: 'b', italics: 'i', superscript: 'sup', subscript: 'sub'}; - const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false}; + const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false, fontSize}; - styles.forEach(style => { - const s = styleTags[style]; - if (s) { - f.tOpen += `<${s}>`; - f.tClose = `${f.tClose}`; - if (s == 'b') - f.isBold = true; - } - }); + if (fontStyle) { + const styles = fontStyle.split(' '); + styles.forEach(style => { + const s = styleTags[style]; + if (s) { + f.tOpen += `<${s}>`; + f.tClose = `${f.tClose}`; + if (s == 'b') + f.isBold = true; + } + }); + } } }