Compare commits

...

8 Commits
0.4.1 ... 0.4.2

Author SHA1 Message Date
Book Pauk
3920b71613 Merge branch 'release/0.4.2' 2019-02-21 21:39:09 +07:00
Book Pauk
d661150665 0.4.2 2019-02-21 21:38:46 +07:00
Book Pauk
ab29c80dab Поправки багов 2019-02-21 21:36:17 +07:00
Book Pauk
e5384e27e5 Поправки багов 2019-02-21 21:02:27 +07:00
Book Pauk
06cdc6eb63 Улучшение парсинга samlib 2019-02-21 20:26:56 +07:00
Book Pauk
da284c793e Добавил загрузку внешних изображений 2019-02-21 20:22:25 +07:00
Book Pauk
c2cef91eb3 Merge tag '0.4.1' into develop
0.4.1
2019-02-20 21:40:10 +07:00
Book Pauk
e272308823 Merge tag '0.4.0' into develop
0.4.0
2019-02-20 21:08:53 +07:00
5 changed files with 124 additions and 40 deletions

View File

@@ -103,9 +103,8 @@ export default class DrawHelper {
//image: {local: Boolean, inline: Boolean, id: String, imageLine: Number, lineCount: Number, paraIndex: Number}, //image: {local: Boolean, inline: Boolean, id: String, imageLine: Number, lineCount: Number, paraIndex: Number},
const img = part.image; const img = part.image;
if (img && img.id && !img.inline && !imageDrawn.has(img.paraIndex)) { if (img && img.id && !img.inline && !imageDrawn.has(img.paraIndex)) {
if (img.local) {
const bin = this.parsed.binary[img.id]; const bin = this.parsed.binary[img.id];
if (bin) {
let imgH = img.lineCount*this.lineHeight; let imgH = img.lineCount*this.lineHeight;
imgH = (imgH <= bin.h ? imgH : bin.h); imgH = (imgH <= bin.h ? imgH : bin.h);
let imgW = bin.w; let imgW = bin.w;
@@ -118,9 +117,11 @@ export default class DrawHelper {
const left = (this.w - imgW)/2; const left = (this.w - imgW)/2;
const top = ((img.lineCount*this.lineHeight - imgH)/2) + (i - img.imageLine)*this.lineHeight; const top = ((img.lineCount*this.lineHeight - imgH)/2) + (i - img.imageLine)*this.lineHeight;
if (img.local) {
lineText += `<img src="data:${bin.type};base64,${bin.data}" style="position: absolute; left: ${left}px; top: ${top}px; ${resize}"/>`; lineText += `<img src="data:${bin.type};base64,${bin.data}" style="position: absolute; left: ${left}px; top: ${top}px; ${resize}"/>`;
} else { } else {
// lineText += `<img src="${img.id}" style="position: absolute; left: ${left}px; top: ${top}px; ${resize}"/>`;
}
} }
imageDrawn.add(img.paraIndex); imageDrawn.add(img.paraIndex);
} }
@@ -128,11 +129,13 @@ export default class DrawHelper {
if (img && img.id && img.inline) { if (img && img.id && img.inline) {
if (img.local) { if (img.local) {
const bin = this.parsed.binary[img.id]; const bin = this.parsed.binary[img.id];
if (bin) {
let resize = ''; let resize = '';
if (bin.h > this.fontSize) { if (bin.h > this.fontSize) {
resize = `height: ${this.fontSize - 3}px`; resize = `height: ${this.fontSize - 3}px`;
} }
lineText += `<img src="data:${bin.type};base64,${bin.data}" style="${resize}"/>`; lineText += `<img src="data:${bin.type};base64,${bin.data}" style="${resize}"/>`;
}
} else { } else {
// //
} }

View File

@@ -78,6 +78,10 @@ export default class BookParser {
resolve(); resolve();
}; };
i.onerror = (e) => {
reject(e);
};
i.src = `data:${binaryType};base64,${data}`; i.src = `data:${binaryType};base64,${data}`;
await sleep(30*1000); await sleep(30*1000);
if (!resolved) if (!resolved)
@@ -85,6 +89,30 @@ export default class BookParser {
}); });
}; };
const getExternalImageDimensions = (src) => {
return new Promise (async(resolve, reject) => {
const i = new Image();
let resolved = false;
i.onload = () => {
resolved = true;
this.binary[src] = {
w: i.width,
h: i.height,
};
resolve();
};
i.onerror = (e) => {
reject(e);
};
i.src = src;
await sleep(30*1000);
if (!resolved)
reject('Не удалось получить размер изображения');
});
};
const newParagraph = (text, len, addIndex) => { const newParagraph = (text, len, addIndex) => {
paraIndex++; paraIndex++;
let p = { let p = {
@@ -147,21 +175,26 @@ export default class BookParser {
if (tag == 'binary') { if (tag == 'binary') {
let attrs = sax.getAttrsSync(tail); let attrs = sax.getAttrsSync(tail);
binaryType = (attrs['content-type'].value ? attrs['content-type'].value : ''); binaryType = (attrs['content-type'] && attrs['content-type'].value ? attrs['content-type'].value : '');
if (binaryType == 'image/jpeg' || binaryType == 'image/png') if (binaryType == 'image/jpeg' || binaryType == 'image/png')
binaryId = (attrs.id.value ? attrs.id.value : ''); binaryId = (attrs.id.value ? attrs.id.value : '');
} }
if (tag == 'image') { if (tag == 'image') {
let attrs = sax.getAttrsSync(tail); let attrs = sax.getAttrsSync(tail);
if (attrs.href.value) { if (attrs.href && attrs.href.value) {
const href = attrs.href.value;
if (href[0] == '#') {//local
if (inPara && !this.showInlineImagesInCenter) if (inPara && !this.showInlineImagesInCenter)
growParagraph(`<image-inline href="${attrs.href.value}"></image-inline>`, 0); growParagraph(`<image-inline href="${href}"></image-inline>`, 0);
else else
newParagraph(`<image href="${attrs.href.value}">${' '.repeat(maxImageLineCount)}</image>`, maxImageLineCount); newParagraph(`<image href="${href}">${' '.repeat(maxImageLineCount)}</image>`, maxImageLineCount);
if (inPara && this.showInlineImagesInCenter) if (inPara && this.showInlineImagesInCenter)
newParagraph(' ', 1); newParagraph(' ', 1);
} else {//external
dimPromises.push(getExternalImageDimensions(href));
newParagraph(`<image href="${href}">${' '.repeat(maxImageLineCount)}</image>`, maxImageLineCount);
}
} }
} }
@@ -409,14 +442,14 @@ export default class BookParser {
break; break;
case 'space': { case 'space': {
let attrs = sax.getAttrsSync(tail); let attrs = sax.getAttrsSync(tail);
if (attrs.w.value) if (attrs.w && attrs.w.value)
style.space = attrs.w.value; style.space = attrs.w.value;
break; break;
} }
case 'image': { case 'image': {
let attrs = sax.getAttrsSync(tail); let attrs = sax.getAttrsSync(tail);
if (attrs.href && attrs.href.value) {
let id = attrs.href.value; let id = attrs.href.value;
if (id) {
let local = false; let local = false;
if (id[0] == '#') { if (id[0] == '#') {
id = id.substr(1); id = id.substr(1);
@@ -428,8 +461,8 @@ export default class BookParser {
} }
case 'image-inline': { case 'image-inline': {
let attrs = sax.getAttrsSync(tail); let attrs = sax.getAttrsSync(tail);
if (attrs.href && attrs.href.value) {
let id = attrs.href.value; let id = attrs.href.value;
if (id) {
let local = false; let local = false;
if (id[0] == '#') { if (id[0] == '#') {
id = id.substr(1); id = id.substr(1);
@@ -616,7 +649,9 @@ export default class BookParser {
//изображения //изображения
if (part.image.id && !part.image.inline) { if (part.image.id && !part.image.inline) {
parsed.visible = this.showImages; parsed.visible = this.showImages;
const bin = this.binary[part.image.id]; let bin = this.binary[part.image.id];
if (!bin)
bin = {h: 0, w: 0};
let lineCount = this.imageHeightLines; let lineCount = this.imageHeightLines;
const c = Math.ceil(bin.h/this.lineHeight); const c = Math.ceil(bin.h/this.lineHeight);
@@ -643,16 +678,19 @@ export default class BookParser {
line.last = true; line.last = true;
line.parts.push({style, text: ' ', line.parts.push({style, text: ' ',
image: {local: part.image.local, inline: false, id: part.image.id, imageLine: i, lineCount, paraIndex}}); image: {local: part.image.local, inline: false, id: part.image.id, imageLine: i, lineCount, paraIndex}});
continue; continue;
} }
if (part.image.id && part.image.inline && this.showImages) { if (part.image.id && part.image.inline && this.showImages) {
const bin = this.binary[part.image.id]; const bin = this.binary[part.image.id];
if (bin) {
let imgH = (bin.h > this.fontSize ? this.fontSize : bin.h); let imgH = (bin.h > this.fontSize ? this.fontSize : bin.h);
imgW += bin.w*imgH/bin.h; imgW += bin.w*imgH/bin.h;
line.parts.push({style, text: '', line.parts.push({style, text: '',
image: {local: part.image.local, inline: true, id: part.image.id}}); image: {local: part.image.local, inline: true, id: part.image.id}});
} }
}
let words = part.text.split(' '); let words = part.text.split(' ');

View File

@@ -1,6 +1,6 @@
{ {
"name": "Liberama", "name": "Liberama",
"version": "0.4.1", "version": "0.4.2",
"engines": { "engines": {
"node": ">=10.0.0" "node": ">=10.0.0"
}, },

View File

@@ -9,6 +9,7 @@ const textUtils = require('./textUtils');
const FileDetector = require('../FileDetector'); const FileDetector = require('../FileDetector');
const repSpaces = (text) => text.replace(/&nbsp;|[\t\n\r]/g, ' '); const repSpaces = (text) => text.replace(/&nbsp;|[\t\n\r]/g, ' ');
const repSpaces2 = (text) => text.replace(/[\n\r]/g, '');
class BookConverter { class BookConverter {
constructor() { constructor() {
@@ -31,7 +32,7 @@ class BookConverter {
if (parsedUrl.hostname == 'samlib.ru' || if (parsedUrl.hostname == 'samlib.ru' ||
parsedUrl.hostname == 'budclub.ru' || parsedUrl.hostname == 'budclub.ru' ||
parsedUrl.hostname == 'zhurnal.lib.ru') { parsedUrl.hostname == 'zhurnal.lib.ru') {
await fs.writeFile(outputFile, this.convertSamlib(data)); await fs.writeFile(outputFile, this.convertSamlib(data, parsedUrl.hostname));
return; return;
} }
@@ -216,7 +217,7 @@ class BookConverter {
return this.formatFb2(fb2); return this.formatFb2(fb2);
} }
convertSamlib(data) { convertSamlib(data, hostname) {
let titleInfo = {}; let titleInfo = {};
let desc = {_n: 'description', 'title-info': titleInfo}; let desc = {_n: 'description', 'title-info': titleInfo};
let pars = []; let pars = [];
@@ -225,20 +226,22 @@ class BookConverter {
let inSubtitle = false; let inSubtitle = false;
let inJustify = true; let inJustify = true;
let inImage = false;
let path = ''; let path = '';
let tag = '';// eslint-disable-line no-unused-vars let tag = '';// eslint-disable-line no-unused-vars
let inText = false; let inText = false;
let textFound = false;
let node = {_a: pars}; let node = {_a: pars};
let inPara = false; let inPara = false;
let italic = false; let italic = false;
let bold = false; let bold = false;
const openTag = (name) => { const openTag = (name, attrs) => {
if (name == 'p') if (name == 'p')
inPara = true; inPara = true;
let n = {_n: name, _a: [], _p: node}; let n = {_n: name, _attrs: attrs, _a: [], _p: node};
node._a.push(n); node._a.push(n);
node = n; node = n;
}; };
@@ -269,7 +272,7 @@ class BookConverter {
path += '/' + elemName; path += '/' + elemName;
tag = elemName; tag = elemName;
} else { } else {
if (inPara && elemName != 'i' && elemName != 'b') if (inPara && elemName != 'i' && elemName != 'b' && elemName != 'em' && elemName != 'strong' && elemName != 'img')
closeTag('p'); closeTag('p');
switch (elemName) { switch (elemName) {
@@ -279,12 +282,15 @@ class BookConverter {
case 'h1': case 'h1':
case 'h2': case 'h2':
case 'h3': case 'h3':
case 'br':
openTag('p'); openTag('p');
break; break;
case 'i': case 'i':
case 'em':
italic = true; italic = true;
break; break;
case 'b': case 'b':
case 'strong':
bold = true; bold = true;
break; break;
case 'div': case 'div':
@@ -299,6 +305,17 @@ class BookConverter {
} }
break; break;
case 'img': {
const attrs = sax.getAttrsSync(tail);
if (attrs.src && attrs.src.value) {
let href = attrs.src.value;
if (href[0] == '/')
href = `http://${hostname}${href}`;
openTag('image', {href});
inImage = true;
}
break;
}
} }
} }
}; };
@@ -330,9 +347,11 @@ class BookConverter {
closeTag('p'); closeTag('p');
break; break;
case 'i': case 'i':
case 'em':
italic = false; italic = false;
break; break;
case 'b': case 'b':
case 'strong':
bold = false; bold = false;
break; break;
case 'div': case 'div':
@@ -346,13 +365,20 @@ class BookConverter {
inJustify = false; inJustify = false;
} }
break; break;
case 'img':
if (inImage)
closeTag('image');
inImage = false;
break;
} }
} }
}; };
const onComment = (text) => {// eslint-disable-line no-unused-vars const onComment = (text) => {// eslint-disable-line no-unused-vars
if (text == '--------- Собственно произведение -------------') if (text == '--------- Собственно произведение -------------') {
inText = true; inText = true;
textFound = true;
}
if (text == '-----------------------------------------------') if (text == '-----------------------------------------------')
inText = false; inText = false;
}; };
@@ -390,11 +416,15 @@ class BookConverter {
growParagraph(`${tOpen}${text}${tClose}`); growParagraph(`${tOpen}${text}${tClose}`);
}; };
sax.parseSync(repSpaces(this.decode(data).toString()), { sax.parseSync(repSpaces(repSpaces2(this.decode(data).toString())), {
onStartNode, onEndNode, onTextNode, onComment, onStartNode, onEndNode, onTextNode, onComment,
innerCut: new Set(['head', 'script', 'style']) innerCut: new Set(['head', 'script', 'style'])
}); });
//текст не найден на странице, обрабатываем как html
if (!textFound)
return this.convertHtml(data);
const title = (titleInfo['book-title'] ? titleInfo['book-title'] : ''); const title = (titleInfo['book-title'] ? titleInfo['book-title'] : '');
let author = ''; let author = '';
if (titleInfo.author) { if (titleInfo.author) {
@@ -437,8 +467,15 @@ class BookConverter {
if (node._n) if (node._n)
name = node._n; name = node._n;
let attrs = '';
if (node._attrs) {
for (let attrName in node._attrs) {
attrs += ` ${attrName}="${node._attrs[attrName]}"`;
}
}
if (name) if (name)
out += `<${name}>`; out += `<${name}${attrs}>`;
if (node.hasOwnProperty('_t')) if (node.hasOwnProperty('_t'))
out += repSpaces(node._t); out += repSpaces(node._t);

View File

@@ -8,8 +8,14 @@ class FileDownloader {
async load(url, callback) { async load(url, callback) {
let errMes = ''; let errMes = '';
const options = {
encoding: null,
headers: {
'user-agent': 'Mozilla/5.0 (X11; HasCodingOs 1.0; Linux x64) AppleWebKit/637.36 (KHTML, like Gecko) Chrome/70.0.3112.101 Safari/637.36 HasBrowser/5.0'
}
};
const response = await got(url, {method: 'HEAD'}); const response = await got(url, Object.assign({}, options, {method: 'HEAD'}));
let estSize = 0; let estSize = 0;
if (response.headers['content-length']) { if (response.headers['content-length']) {
@@ -17,7 +23,7 @@ class FileDownloader {
} }
let prevProg = 0; let prevProg = 0;
const request = got(url, {encoding: null}).on('downloadProgress', progress => { const request = got(url, options).on('downloadProgress', progress => {
if (progress.transferred > maxDownloadSize) { if (progress.transferred > maxDownloadSize) {
errMes = 'file too big'; errMes = 'file too big';
request.cancel(); request.cancel();