From fde0437157c78144dfdf6788ea8ffbc44ec72347 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Fri, 18 Dec 2020 23:56:55 +0700 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=D0=BE=20=D0=B8=D0=B7=D0=B2=D0=BB=D0=B5=D1=87=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20=D1=81=D1=85=D0=B5=D0=BC=D1=8B=20=D0=B4=D0=BE?= =?UTF-8?q?=D0=BA=D1=83=D0=BC=D0=B5=D0=BD=D1=82=D0=B0=20=D0=B2=20ConvertPd?= =?UTF-8?q?fImages,=20=D0=BC=D0=B5=D0=BB=D0=BA=D0=B8=D0=B9=20=D1=80=D0=B5?= =?UTF-8?q?=D1=84=D0=B0=D0=BA=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/Reader/BookConverter/ConvertDjvu.js | 4 +- .../core/Reader/BookConverter/ConvertPdf.js | 1 - .../Reader/BookConverter/ConvertPdfImages.js | 62 +++++++++++++++---- 3 files changed, 51 insertions(+), 16 deletions(-) diff --git a/server/core/Reader/BookConverter/ConvertDjvu.js b/server/core/Reader/BookConverter/ConvertDjvu.js index e794700c..3ee72b3b 100644 --- a/server/core/Reader/BookConverter/ConvertDjvu.js +++ b/server/core/Reader/BookConverter/ConvertDjvu.js @@ -17,7 +17,7 @@ class ConvertDjvu extends ConvertJpegPng { return false; let {inputFiles, callback, abort, djvuQuality} = opts; - + djvuQuality = (djvuQuality && djvuQuality <= 100 && djvuQuality >= 10 ? djvuQuality : 20); let jpegQuality = djvuQuality; let tiffQuality = djvuQuality + 30; @@ -85,7 +85,7 @@ class ConvertDjvu extends ConvertJpegPng { files.sort((a, b) => a.base.localeCompare(b.base)); //схема документа (outline) - const djvusedResult = await this.execConverter(djvusedPath, ['-u', '-e', 'print-outline', inputFiles.sourceFile]); + const djvusedResult = await this.execConverter(djvusedPath, ['-u', '-e', 'print-outline', inputFiles.sourceFile], null, abort); const outline = []; const lines = djvusedResult.stdout.match(/\(".*"\s*?"#\d+".*?\)/g); if (lines) { diff --git a/server/core/Reader/BookConverter/ConvertPdf.js b/server/core/Reader/BookConverter/ConvertPdf.js index bec96b22..c868df5b 100644 --- a/server/core/Reader/BookConverter/ConvertPdf.js +++ b/server/core/Reader/BookConverter/ConvertPdf.js @@ -27,7 +27,6 @@ class ConvertPdf extends ConvertHtml { const outFile = `${outBasename}.xml`; const pdftohtmlPath = '/usr/bin/pdftohtml'; - if (!await fs.pathExists(pdftohtmlPath)) throw new Error('Внешний конвертер pdftohtml не найден'); diff --git a/server/core/Reader/BookConverter/ConvertPdfImages.js b/server/core/Reader/BookConverter/ConvertPdfImages.js index c174747c..51024922 100644 --- a/server/core/Reader/BookConverter/ConvertPdfImages.js +++ b/server/core/Reader/BookConverter/ConvertPdfImages.js @@ -2,6 +2,8 @@ const fs = require('fs-extra'); const path = require('path'); const utils = require('../../utils'); +const sax = require('../../sax'); + const ConvertJpegPng = require('./ConvertJpegPng'); class ConvertPdfImages extends ConvertJpegPng { @@ -24,19 +26,25 @@ class ConvertPdfImages extends ConvertJpegPng { if (!await fs.pathExists(pdftoppmPath)) throw new Error('Внешний конвертер pdftoppm не найден'); + const pdftohtmlPath = '/usr/bin/pdftohtml'; + if (!await fs.pathExists(pdftohtmlPath)) + throw new Error('Внешний конвертер pdftohtml не найден'); + + const inpFile = inputFiles.sourceFile; const dir = `${inputFiles.filesDir}/`; - const baseFile = `${dir}${path.basename(inputFiles.sourceFile)}`; - const jpgFiles = `${baseFile}.tmp`; + const outBasename = `${dir}${utils.randomHexString(10)}`; + const outFile = `${outBasename}.tmp`; //конвертируем в jpeg let perc = 0; - await this.execConverter(pdftoppmPath, ['-jpeg', '-jpegopt', `quality=${pdfQuality},progressive=y`, inputFiles.sourceFile, jpgFiles], () => { + await this.execConverter(pdftoppmPath, ['-jpeg', '-jpegopt', `quality=${pdfQuality},progressive=y`, inpFile, outFile], () => { perc = (perc < 100 ? perc + 1 : 40); callback(perc); }, abort); const limitSize = 2*this.config.maxUploadFileSize; let jpgFilesSize = 0; + //ищем изображения let files = []; await utils.findFiles(async(file) => { @@ -53,19 +61,47 @@ class ConvertPdfImages extends ConvertJpegPng { files.sort((a, b) => a.base.localeCompare(b.base)); //схема документа (outline) - //const djvusedResult = await this.execConverter(djvusedPath, ['-u', '-e', 'print-outline', inputFiles.sourceFile]); + const outXml = `${outBasename}.xml`; + await this.execConverter(pdftohtmlPath, ['-nodrm', '-i', '-c', '-s', '-xml', inpFile, outXml], null, abort); const outline = []; - /*const lines = djvusedResult.stdout.match(/\(".*"\s*?"#\d+".*?\)/g); - if (lines) { - lines.forEach(l => { - const m = l.match(/"(.*)"\s*?"#(\d+)"/); - if (m) { - outline[m[2]] = m[1]; - } - }); - }*/ + + let inOutline = 0; + let inItem = false; + let pageNum = 0; + + const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars + if (inOutline > 0 && inItem && pageNum) { + outline[pageNum] = text; + } + }; + + const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars + if (tag == 'outline') + inOutline++; + + if (inOutline > 0 && tag == 'item') { + const attrs = sax.getAttrsSync(tail); + pageNum = (attrs.page && attrs.page.value ? attrs.page.value : 0); + inItem = true; + } + }; + + const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars + if (tag == 'outline') + inOutline--; + if (tag == 'item') + inItem = false; + }; + + const dataXml = await fs.readFile(outXml); + const buf = this.decode(dataXml).toString(); + sax.parseSync(buf, { + onStartNode, onEndNode, onTextNode + }); + await utils.sleep(100); + //формируем список файлов let i = 0; const imageFiles = files.map(f => { i++;