From da284c793ef974b6fbc3de710c2b96c2f2fcfeae Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Thu, 21 Feb 2019 20:22:25 +0700 Subject: [PATCH 1/5] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8=D0=BB?= =?UTF-8?q?=20=D0=B7=D0=B0=D0=B3=D1=80=D1=83=D0=B7=D0=BA=D1=83=20=D0=B2?= =?UTF-8?q?=D0=BD=D0=B5=D1=88=D0=BD=D0=B8=D1=85=20=D0=B8=D0=B7=D0=BE=D0=B1?= =?UTF-8?q?=D1=80=D0=B0=D0=B6=D0=B5=D0=BD=D0=B8=D0=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../components/Reader/TextPage/DrawHelper.js | 23 ++++--- client/components/Reader/share/BookParser.js | 65 ++++++++++++++----- server/core/BookConverter/index.js | 39 +++++++++-- 3 files changed, 92 insertions(+), 35 deletions(-) diff --git a/client/components/Reader/TextPage/DrawHelper.js b/client/components/Reader/TextPage/DrawHelper.js index 40b81a55..458927b1 100644 --- a/client/components/Reader/TextPage/DrawHelper.js +++ b/client/components/Reader/TextPage/DrawHelper.js @@ -103,9 +103,8 @@ export default class DrawHelper { //image: {local: Boolean, inline: Boolean, id: String, imageLine: Number, lineCount: Number, paraIndex: Number}, const img = part.image; if (img && img.id && !img.inline && !imageDrawn.has(img.paraIndex)) { - if (img.local) { - const bin = this.parsed.binary[img.id]; - + const bin = this.parsed.binary[img.id]; + if (bin) { let imgH = img.lineCount*this.lineHeight; imgH = (imgH <= bin.h ? imgH : bin.h); let imgW = bin.w; @@ -118,9 +117,11 @@ export default class DrawHelper { const left = (this.w - imgW)/2; const top = ((img.lineCount*this.lineHeight - imgH)/2) + (i - img.imageLine)*this.lineHeight; - lineText += ``; - } else { - // + if (img.local) { + lineText += ``; + } else { + lineText += ``; + } } imageDrawn.add(img.paraIndex); } @@ -128,11 +129,13 @@ export default class DrawHelper { if (img && img.id && img.inline) { if (img.local) { const bin = this.parsed.binary[img.id]; - let resize = ''; - if (bin.h > this.fontSize) { - resize = `height: ${this.fontSize - 3}px`; + if (bin) { + let resize = ''; + if (bin.h > this.fontSize) { + resize = `height: ${this.fontSize - 3}px`; + } + lineText += ``; } - lineText += ``; } else { // } diff --git a/client/components/Reader/share/BookParser.js b/client/components/Reader/share/BookParser.js index 0548cb98..f7ca5507 100644 --- a/client/components/Reader/share/BookParser.js +++ b/client/components/Reader/share/BookParser.js @@ -85,6 +85,26 @@ export default class BookParser { }); }; + const getExternalImageDimensions = (src) => { + return new Promise (async(resolve, reject) => { + const i = new Image(); + let resolved = false; + i.onload = () => { + resolved = true; + this.binary[src] = { + w: i.width, + h: i.height, + }; + resolve(); + }; + + i.src = src; + await sleep(30*1000); + if (!resolved) + reject('Не удалось получить размер изображения'); + }); + }; + const newParagraph = (text, len, addIndex) => { paraIndex++; let p = { @@ -147,21 +167,26 @@ export default class BookParser { if (tag == 'binary') { let attrs = sax.getAttrsSync(tail); - binaryType = (attrs['content-type'].value ? attrs['content-type'].value : ''); + binaryType = (attrs['content-type'] && attrs['content-type'].value ? attrs['content-type'].value : ''); if (binaryType == 'image/jpeg' || binaryType == 'image/png') binaryId = (attrs.id.value ? attrs.id.value : ''); } if (tag == 'image') { let attrs = sax.getAttrsSync(tail); - if (attrs.href.value) { - if (inPara && !this.showInlineImagesInCenter) - growParagraph(``, 0); - else - newParagraph(`${' '.repeat(maxImageLineCount)}`, maxImageLineCount); - - if (inPara && this.showInlineImagesInCenter) - newParagraph(' ', 1); + if (attrs.href && attrs.href.value) { + const href = attrs.href.value; + if (href[0] == '#') {//local + if (inPara && !this.showInlineImagesInCenter) + growParagraph(``, 0); + else + newParagraph(`${' '.repeat(maxImageLineCount)}`, maxImageLineCount); + if (inPara && this.showInlineImagesInCenter) + newParagraph(' ', 1); + } else {//external + dimPromises.push(getExternalImageDimensions(href)); + newParagraph(`${' '.repeat(maxImageLineCount)}`, maxImageLineCount); + } } } @@ -409,14 +434,14 @@ export default class BookParser { break; case 'space': { let attrs = sax.getAttrsSync(tail); - if (attrs.w.value) + if (attrs.w && attrs.w.value) style.space = attrs.w.value; break; } case 'image': { let attrs = sax.getAttrsSync(tail); - let id = attrs.href.value; - if (id) { + if (attrs.href && attrs.href.value) { + let id = attrs.href.value; let local = false; if (id[0] == '#') { id = id.substr(1); @@ -428,8 +453,8 @@ export default class BookParser { } case 'image-inline': { let attrs = sax.getAttrsSync(tail); - let id = attrs.href.value; - if (id) { + if (attrs.href && attrs.href.value) { + let id = attrs.href.value; let local = false; if (id[0] == '#') { id = id.substr(1); @@ -617,6 +642,8 @@ export default class BookParser { if (part.image.id && !part.image.inline) { parsed.visible = this.showImages; const bin = this.binary[part.image.id]; + if (!bin) + continue; let lineCount = this.imageHeightLines; const c = Math.ceil(bin.h/this.lineHeight); @@ -648,10 +675,12 @@ export default class BookParser { if (part.image.id && part.image.inline && this.showImages) { const bin = this.binary[part.image.id]; - let imgH = (bin.h > this.fontSize ? this.fontSize : bin.h); - imgW += bin.w*imgH/bin.h; - line.parts.push({style, text: '', - image: {local: part.image.local, inline: true, id: part.image.id}}); + if (bin) { + let imgH = (bin.h > this.fontSize ? this.fontSize : bin.h); + imgW += bin.w*imgH/bin.h; + line.parts.push({style, text: '', + image: {local: part.image.local, inline: true, id: part.image.id}}); + } } let words = part.text.split(' '); diff --git a/server/core/BookConverter/index.js b/server/core/BookConverter/index.js index 41dfd629..5642ec06 100644 --- a/server/core/BookConverter/index.js +++ b/server/core/BookConverter/index.js @@ -9,6 +9,7 @@ const textUtils = require('./textUtils'); const FileDetector = require('../FileDetector'); const repSpaces = (text) => text.replace(/ |[\t\n\r]/g, ' '); +const repSpaces2 = (text) => text.replace(/[\n\r]/g, ''); class BookConverter { constructor() { @@ -31,7 +32,7 @@ class BookConverter { if (parsedUrl.hostname == 'samlib.ru' || parsedUrl.hostname == 'budclub.ru' || parsedUrl.hostname == 'zhurnal.lib.ru') { - await fs.writeFile(outputFile, this.convertSamlib(data)); + await fs.writeFile(outputFile, this.convertSamlib(data, parsedUrl.hostname)); return; } @@ -216,7 +217,7 @@ class BookConverter { return this.formatFb2(fb2); } - convertSamlib(data) { + convertSamlib(data, hostname) { let titleInfo = {}; let desc = {_n: 'description', 'title-info': titleInfo}; let pars = []; @@ -225,6 +226,7 @@ class BookConverter { let inSubtitle = false; let inJustify = true; + let inImage = false; let path = ''; let tag = '';// eslint-disable-line no-unused-vars @@ -235,10 +237,10 @@ class BookConverter { let italic = false; let bold = false; - const openTag = (name) => { + const openTag = (name, attrs) => { if (name == 'p') inPara = true; - let n = {_n: name, _a: [], _p: node}; + let n = {_n: name, _attrs: attrs, _a: [], _p: node}; node._a.push(n); node = n; }; @@ -269,7 +271,7 @@ class BookConverter { path += '/' + elemName; tag = elemName; } else { - if (inPara && elemName != 'i' && elemName != 'b') + if (inPara && elemName != 'i' && elemName != 'b' && elemName != 'img') closeTag('p'); switch (elemName) { @@ -299,6 +301,17 @@ class BookConverter { } break; + case 'img': { + const attrs = sax.getAttrsSync(tail); + if (attrs.src && attrs.src.value) { + let href = attrs.src.value; + if (href[0] == '/') + href = `http://${hostname}${href}`; + openTag('image', {href}); + inImage = true; + } + break; + } } } }; @@ -346,6 +359,11 @@ class BookConverter { inJustify = false; } break; + case 'img': + if (inImage) + closeTag('image'); + inImage = false; + break; } } }; @@ -390,7 +408,7 @@ class BookConverter { growParagraph(`${tOpen}${text}${tClose}`); }; - sax.parseSync(repSpaces(this.decode(data).toString()), { + sax.parseSync(repSpaces(repSpaces2(this.decode(data).toString())), { onStartNode, onEndNode, onTextNode, onComment, innerCut: new Set(['head', 'script', 'style']) }); @@ -437,8 +455,15 @@ class BookConverter { if (node._n) name = node._n; + let attrs = ''; + if (node._attrs) { + for (let attrName in node._attrs) { + attrs += ` ${attrName}="${node._attrs[attrName]}"`; + } + } + if (name) - out += `<${name}>`; + out += `<${name}${attrs}>`; if (node.hasOwnProperty('_t')) out += repSpaces(node._t); From 06cdc6eb63fd8cc6bbb2f6387d9c6b914b6a2720 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Thu, 21 Feb 2019 20:26:56 +0700 Subject: [PATCH 2/5] =?UTF-8?q?=D0=A3=D0=BB=D1=83=D1=87=D1=88=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20=D0=BF=D0=B0=D1=80=D1=81=D0=B8=D0=BD=D0=B3=D0=B0?= =?UTF-8?q?=20samlib?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/BookConverter/index.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/core/BookConverter/index.js b/server/core/BookConverter/index.js index 5642ec06..267b0e1d 100644 --- a/server/core/BookConverter/index.js +++ b/server/core/BookConverter/index.js @@ -281,12 +281,15 @@ class BookConverter { case 'h1': case 'h2': case 'h3': + case 'br': openTag('p'); break; case 'i': + case 'em': italic = true; break; case 'b': + case 'strong': bold = true; break; case 'div': @@ -343,9 +346,11 @@ class BookConverter { closeTag('p'); break; case 'i': + case 'em': italic = false; break; case 'b': + case 'strong': bold = false; break; case 'div': From e5384e27e5e3bd29559b2d98b9477de56f6d263f Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Thu, 21 Feb 2019 21:02:27 +0700 Subject: [PATCH 3/5] =?UTF-8?q?=D0=9F=D0=BE=D0=BF=D1=80=D0=B0=D0=B2=D0=BA?= =?UTF-8?q?=D0=B8=20=D0=B1=D0=B0=D0=B3=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client/components/Reader/share/BookParser.js | 13 +++++++++++-- server/core/BookConverter/index.js | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/client/components/Reader/share/BookParser.js b/client/components/Reader/share/BookParser.js index f7ca5507..b4c24a9a 100644 --- a/client/components/Reader/share/BookParser.js +++ b/client/components/Reader/share/BookParser.js @@ -78,6 +78,10 @@ export default class BookParser { resolve(); }; + i.onerror = (e) => { + reject(e); + }; + i.src = `data:${binaryType};base64,${data}`; await sleep(30*1000); if (!resolved) @@ -98,6 +102,10 @@ export default class BookParser { resolve(); }; + i.onerror = (e) => { + reject(e); + }; + i.src = src; await sleep(30*1000); if (!resolved) @@ -641,9 +649,9 @@ export default class BookParser { //изображения if (part.image.id && !part.image.inline) { parsed.visible = this.showImages; - const bin = this.binary[part.image.id]; + let bin = this.binary[part.image.id]; if (!bin) - continue; + bin = {h: 0, w: 0}; let lineCount = this.imageHeightLines; const c = Math.ceil(bin.h/this.lineHeight); @@ -670,6 +678,7 @@ export default class BookParser { line.last = true; line.parts.push({style, text: ' ', image: {local: part.image.local, inline: false, id: part.image.id, imageLine: i, lineCount, paraIndex}}); + continue; } diff --git a/server/core/BookConverter/index.js b/server/core/BookConverter/index.js index 267b0e1d..109842e9 100644 --- a/server/core/BookConverter/index.js +++ b/server/core/BookConverter/index.js @@ -271,7 +271,7 @@ class BookConverter { path += '/' + elemName; tag = elemName; } else { - if (inPara && elemName != 'i' && elemName != 'b' && elemName != 'img') + if (inPara && elemName != 'i' && elemName != 'b' && elemName != 'em' && elemName != 'strong' && elemName != 'img') closeTag('p'); switch (elemName) { From ab29c80dab78fb1feeae150caca659374331864f Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Thu, 21 Feb 2019 21:36:17 +0700 Subject: [PATCH 4/5] =?UTF-8?q?=D0=9F=D0=BE=D0=BF=D1=80=D0=B0=D0=B2=D0=BA?= =?UTF-8?q?=D0=B8=20=D0=B1=D0=B0=D0=B3=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/BookConverter/index.js | 9 ++++++++- server/core/FileDownloader.js | 10 ++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/server/core/BookConverter/index.js b/server/core/BookConverter/index.js index 109842e9..93865f30 100644 --- a/server/core/BookConverter/index.js +++ b/server/core/BookConverter/index.js @@ -231,6 +231,7 @@ class BookConverter { let tag = '';// eslint-disable-line no-unused-vars let inText = false; + let textFound = false; let node = {_a: pars}; let inPara = false; @@ -374,8 +375,10 @@ class BookConverter { }; const onComment = (text) => {// eslint-disable-line no-unused-vars - if (text == '--------- Собственно произведение -------------') + if (text == '--------- Собственно произведение -------------') { inText = true; + textFound = true; + } if (text == '-----------------------------------------------') inText = false; }; @@ -418,6 +421,10 @@ class BookConverter { innerCut: new Set(['head', 'script', 'style']) }); + //текст не найден на странице, обрабатываем как html + if (!textFound) + return this.convertHtml(data); + const title = (titleInfo['book-title'] ? titleInfo['book-title'] : ''); let author = ''; if (titleInfo.author) { diff --git a/server/core/FileDownloader.js b/server/core/FileDownloader.js index d816b07c..78c0df3a 100644 --- a/server/core/FileDownloader.js +++ b/server/core/FileDownloader.js @@ -8,8 +8,14 @@ class FileDownloader { async load(url, callback) { let errMes = ''; + const options = { + encoding: null, + headers: { + 'user-agent': 'Mozilla/5.0 (X11; HasCodingOs 1.0; Linux x64) AppleWebKit/637.36 (KHTML, like Gecko) Chrome/70.0.3112.101 Safari/637.36 HasBrowser/5.0' + } + }; - const response = await got(url, {method: 'HEAD'}); + const response = await got(url, Object.assign({}, options, {method: 'HEAD'})); let estSize = 0; if (response.headers['content-length']) { @@ -17,7 +23,7 @@ class FileDownloader { } let prevProg = 0; - const request = got(url, {encoding: null}).on('downloadProgress', progress => { + const request = got(url, options).on('downloadProgress', progress => { if (progress.transferred > maxDownloadSize) { errMes = 'file too big'; request.cancel(); From d661150665316e9290981cb91b3787fcac21b789 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Thu, 21 Feb 2019 21:38:46 +0700 Subject: [PATCH 5/5] 0.4.2 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index df59f11e..17b901fe 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "Liberama", - "version": "0.4.1", + "version": "0.4.2", "engines": { "node": ">=10.0.0" },