From b4493b2e8d4c5984e94515244ca114e6fd213784 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Thu, 28 Feb 2019 20:02:14 +0700 Subject: [PATCH] =?UTF-8?q?=D0=A0=D0=B0=D0=B1=D0=BE=D1=82=D0=B0=20=D0=BD?= =?UTF-8?q?=D0=B0=D0=B4=20=D0=BA=D0=BE=D0=BD=D0=B2=D0=B5=D1=80=D1=82=D0=B5?= =?UTF-8?q?=D1=80=D0=BE=D0=BC=20pdf?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/BookConverter/ConvertHtml.js | 7 +++- server/core/BookConverter/ConvertPdf.js | 51 +++++++++++++++++++----- 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/server/core/BookConverter/ConvertHtml.js b/server/core/BookConverter/ConvertHtml.js index 4a78f80c..11c51e02 100644 --- a/server/core/BookConverter/ConvertHtml.js +++ b/server/core/BookConverter/ConvertHtml.js @@ -28,6 +28,7 @@ class ConvertHtml extends ConvertBase { } else { isText = opts.isText; } + const {cutTitle} = opts; let titleInfo = {}; let desc = {_n: 'description', 'title-info': titleInfo}; @@ -73,7 +74,7 @@ class ConvertHtml extends ConvertBase { const newPara = new Set(['tr', 'br', 'br/', 'dd', 'p', 'title', '/title', 'h1', 'h2', 'h3', '/h1', '/h2', '/h3']); const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars - if (!cutCounter) { + if (!cutCounter && !(cutTitle && inTitle)) { growParagraph(text); } @@ -111,7 +112,9 @@ class ConvertHtml extends ConvertBase { for (let i = 0; i < spaceCounter.length; i++) { total += (spaceCounter[i] ? spaceCounter[i] : 0); } - total /= 10; + + total /= 20; + let i = spaceCounter.length - 1; while (i > 0 && (!spaceCounter[i] || spaceCounter[i] < total)) i--; diff --git a/server/core/BookConverter/ConvertPdf.js b/server/core/BookConverter/ConvertPdf.js index a29f495a..d4a52c0e 100644 --- a/server/core/BookConverter/ConvertPdf.js +++ b/server/core/BookConverter/ConvertPdf.js @@ -45,15 +45,20 @@ class ConvertPdf extends ConvertHtml { const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars if (!cutCounter) { if (tag == 'text' && !inText) { - inText = true; - i++; - let attrs = sax.getAttrsSync(tail); - lines[i] = { + const line = { text: '', top: (attrs.top && attrs.top.value ? attrs.top.value : null), left: (attrs.left && attrs.left.value ? attrs.left.value : null), + width: (attrs.width && attrs.width.value ? attrs.width.value : null), + height: (attrs.height && attrs.height.value ? attrs.height.value : null), }; + + if (line.width !== '0' || line.height !== '0') { + inText = true; + i++; + lines[i] = line; + } } } }; @@ -69,12 +74,40 @@ class ConvertPdf extends ConvertHtml { }); //найдем параграфы и отступы -console.log(lines.length); - //формируем текст - let text = '' + const indents = []; + for (const line of lines) { + const top = parseInt(line.top); + const left = parseInt(line.left); - text = title + "\n" + text; - return await super.run(Buffer.from(text), {skipCheck: true, isText: true}); + if (!isNaN(top)) { + line.top = top; + } + + if (!isNaN(left)) { + indents[left] = 1; + line.left = left; + } + } + + let j = 0; + for (let i = 0; i < indents.length; i++) { + if (indents[i]) { + j++; + indents[i] = j; + } + } + indents[0] = 0; + + //формируем текст + let text = `${title}`; + for (const line of lines) { + const left = line.left || 0; + const sp = ' '.repeat(indents[left]); + + text += sp + line.text + "\n"; + } + + return await super.run(Buffer.from(text), {skipCheck: true, isText: true, cutTitle: true}); } }