From cb65cac3335b7c66c55364ac35e13b064fcba443 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Mon, 4 Mar 2019 20:00:51 +0700 Subject: [PATCH] =?UTF-8?q?=D0=9A=D0=BE=D0=BD=D0=B2=D0=B5=D1=80=D1=82?= =?UTF-8?q?=D0=B5=D1=80=20pdf=20-=20=D0=B7=D0=B0=D0=B3=D1=80=D1=83=D0=B6?= =?UTF-8?q?=D0=B0=D0=B5=D0=BC=20=D0=B8=D0=B7=D0=BE=D0=B1=D1=80=D0=B0=D0=B6?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/BookConverter/ConvertPdf.js | 60 +++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/server/core/BookConverter/ConvertPdf.js b/server/core/BookConverter/ConvertPdf.js index 236d0a1a..f323d987 100644 --- a/server/core/BookConverter/ConvertPdf.js +++ b/server/core/BookConverter/ConvertPdf.js @@ -1,4 +1,5 @@ const fs = require('fs-extra'); +const path = require('path'); const sax = require('./sax'); const utils = require('../utils'); @@ -34,11 +35,37 @@ class ConvertPdf extends ConvertHtml { //парсим xml let lines = []; + let images = []; + let loading = []; let inText = false; let title = ''; let prevTop = 0; let i = -1; + const loadImage = async(image) => { + const src = path.parse(image.src); + let type = 'unknown'; + switch (src.ext) { + case '.jpg': type = 'image/jpeg'; break; + case '.png': type = 'image/png'; break; + } + if (type != 'unknown') { + image.data = (await fs.readFile(image.src)).toString('base64'); + image.type = type; + image.name = src.base; + } + } + + const putImage = (curTop) => { + if (!isNaN(curTop) && images.length) { + while (images.length && images[0].top < curTop) { + i++; + lines[i] = images[0]; + images.shift(); + } + } + } + const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars if (!cutCounter && inText) { lines[i].text += text + ' '; @@ -62,12 +89,34 @@ class ConvertPdf extends ConvertHtml { if (line.width !== '0' || line.height !== '0') { inText = true; if (isNaN(line.top) || isNaN(prevTop) || (Math.abs(prevTop - line.top) > 3)) { + putImage(line.top); i++; lines[i] = line; } prevTop = line.top; } } + + if (tag == 'image') { + let attrs = sax.getAttrsSync(tail); + const src = (attrs.src && attrs.src.value ? attrs.src.value : ''); + if (src) { + const image = { + isImage: true, + src, + data: '', + type: '', + top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10) || 0, + }; + loading.push(loadImage(image)); + images.push(image); + images.sort((a, b) => a.top - b.top) + } + } + + if (tag == 'page') { + putImage(100000); + } } }; @@ -81,9 +130,15 @@ class ConvertPdf extends ConvertHtml { onStartNode, onEndNode, onTextNode }); + putImage(100000); + + await Promise.all(loading); + //найдем параграфы и отступы const indents = []; for (const line of lines) { + if (line.isImage) + continue; if (!isNaN(line.left)) { indents[line.left] = 1; } @@ -103,6 +158,11 @@ class ConvertPdf extends ConvertHtml { let concat = ''; let sp = ''; for (const line of lines) { + if (line.isImage) { + text += `${line.data}`; + continue; + } + if (concat == '') { const left = line.left || 0; sp = ' '.repeat(indents[left]);