Compare commits

...

11 Commits
0.5.3 ... 0.5.4

Author SHA1 Message Date
Book Pauk
89bf907613 Merge branch 'release/0.5.4' 2019-03-04 22:57:38 +07:00
Book Pauk
641d0e45fd Версия 0.5.4 2019-03-04 22:57:10 +07:00
Book Pauk
b3e579d8b7 Улучшение парсинга pdf и html 2019-03-04 22:56:15 +07:00
Book Pauk
fcb61c89d5 Улучшение парсинга html 2019-03-04 22:42:54 +07:00
Book Pauk
3483d78c2c Улучшение парсинга pdf и текстов 2019-03-04 22:28:11 +07:00
Book Pauk
36b14d0b3a Мелкая поправка 2019-03-04 21:26:07 +07:00
Book Pauk
2f8b68ec62 Улучшение парсинга Pdf 2019-03-04 21:22:12 +07:00
Book Pauk
cb65cac333 Конвертер pdf - загружаем изображения 2019-03-04 20:00:51 +07:00
Book Pauk
d12ffc3d0d Поправил комментарий 2019-03-04 16:39:30 +07:00
Book Pauk
921744167e Поправка мелкого бага 2019-03-03 12:32:22 +07:00
Book Pauk
ebd96c4759 Merge tag '0.5.3' into develop
0.5.3
2019-03-01 21:30:38 +07:00
7 changed files with 185 additions and 17 deletions

View File

@@ -340,8 +340,8 @@ class Reader extends Vue {
this.setPositionActive = true; this.setPositionActive = true;
this.$nextTick(() => { this.$nextTick(() => {
this.$refs.setPositionPage.sliderMax = this.mostRecentBook().textLength - 1; const recent = this.mostRecentBook();
this.$refs.setPositionPage.sliderValue = this.mostRecentBook().bookPos; this.$refs.setPositionPage.init(recent.bookPos, recent.textLength - 1);
}); });
} else { } else {
this.setPositionActive = false; this.setPositionActive = false;

View File

@@ -18,7 +18,6 @@
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
import Vue from 'vue'; import Vue from 'vue';
import Component from 'vue-class-component'; import Component from 'vue-class-component';
import _ from 'lodash';
import Window from '../../share/Window.vue'; import Window from '../../share/Window.vue';
@@ -28,7 +27,8 @@ export default @Component({
}, },
watch: { watch: {
sliderValue: function(newValue) { sliderValue: function(newValue) {
this.$emit('book-pos-changed', {bookPos: newValue}); if (this.initialized)
this.$emit('book-pos-changed', {bookPos: newValue});
}, },
}, },
}) })
@@ -39,6 +39,13 @@ class SetPositionPage extends Vue {
created() { created() {
this.commit = this.$store.commit; this.commit = this.$store.commit;
this.reader = this.$store.state.reader; this.reader = this.$store.state.reader;
this.initialized = false;
}
init(sliderValue, sliderMax) {
this.sliderMax = sliderMax;
this.sliderValue = sliderValue;
this.initialized = true;
} }
formatTooltip(val) { formatTooltip(val) {

View File

@@ -25,6 +25,7 @@ class BookManager {
async init(settings) { async init(settings) {
this.settings = settings; this.settings = settings;
//bmCacheStore нужен только для ускорения загрузки читалки
this.booksCached = await bmCacheStore.getItem('books'); this.booksCached = await bmCacheStore.getItem('books');
if (!this.booksCached) if (!this.booksCached)
this.booksCached = {}; this.booksCached = {};
@@ -47,9 +48,9 @@ class BookManager {
} }
} }
//долгая загрузка из хранилища //долгая загрузка из хранилища,
//bmMetaStore и bmRecentStore в будущем можно будет убрать //хранение в отдельных записях дает относительно
//bmCacheStore достаточно //нормальное поведение при нескольких вкладках с читалкой в браузере
async loadMeta(immediate) { async loadMeta(immediate) {
if (!immediate) if (!immediate)
await utils.sleep(2000); await utils.sleep(2000);

View File

@@ -1,6 +1,6 @@
{ {
"name": "Liberama", "name": "Liberama",
"version": "0.5.3", "version": "0.5.4",
"engines": { "engines": {
"node": ">=10.0.0" "node": ">=10.0.0"
}, },

View File

@@ -34,10 +34,15 @@ class ConvertHtml extends ConvertBase {
let desc = {_n: 'description', 'title-info': titleInfo}; let desc = {_n: 'description', 'title-info': titleInfo};
let pars = []; let pars = [];
let body = {_n: 'body', section: {_a: []}}; let body = {_n: 'body', section: {_a: []}};
let fb2 = [desc, body]; let binary = [];
let fb2 = [desc, body, binary];
let title = ''; let title = '';
let inTitle = false; let inTitle = false;
let inImage = false;
let image = {};
let bold = false;
let italic = false;
let spaceCounter = []; let spaceCounter = [];
@@ -71,37 +76,93 @@ class ConvertHtml extends ConvertBase {
} }
}; };
const newPara = new Set(['tr', 'br', 'br/', 'dd', 'p', 'title', '/title', 'h1', 'h2', 'h3', '/h1', '/h2', '/h3']); const newPara = new Set(['tr', '/table', 'hr', 'br', 'br/', 'li', 'dt', 'dd', 'p', 'title', '/title', 'h1', 'h2', 'h3', '/h1', '/h2', '/h3']);
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter && !(cutTitle && inTitle)) { if (!cutCounter && !(cutTitle && inTitle)) {
growParagraph(text); let tOpen = (bold ? '<strong>' : '');
tOpen += (italic ? '<emphasis>' : '');
let tClose = (italic ? '</emphasis>' : '');
tClose += (bold ? '</strong>' : '');
growParagraph(`${tOpen}${text}${tClose}`);
} }
if (inTitle && !title) if (inTitle && !title)
title = text; title = text;
if (inImage) {
image._t = text;
binary.push(image);
pars.push({_n: 'image', _attrs: {'l:href': '#' + image._attrs.id}, _t: ''});
newParagraph();
}
}; };
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter) { if (!cutCounter) {
if (newPara.has(tag)) if (newPara.has(tag))
newParagraph(); newParagraph();
switch (tag) {
case 'i':
case 'em':
italic = true;
break;
case 'b':
case 'strong':
case 'h1':
case 'h2':
case 'h3':
bold = true;
break;
}
} }
if (tag == 'title') if (tag == 'title')
inTitle = true; inTitle = true;
if (tag == 'fb2-image') {
inImage = true;
const attrs = sax.getAttrsSync(tail);
image = {_n: 'binary', _attrs: {id: attrs.name.value, 'content-type': attrs.type.value}, _t: ''};
}
}; };
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter) {
if (newPara.has('/' + tag))
newParagraph();
switch (tag) {
case 'i':
case 'em':
italic = false;
break;
case 'b':
case 'strong':
case 'h1':
case 'h2':
case 'h3':
bold = false;
break;
}
}
if (tag == 'title') if (tag == 'title')
inTitle = false; inTitle = false;
if (tag == 'fb2-image')
inImage = false;
}; };
let buf = this.decode(data).toString(); let buf = this.decode(data).toString();
sax.parseSync(buf, { sax.parseSync(buf, {
onStartNode, onEndNode, onTextNode, onStartNode, onEndNode, onTextNode,
innerCut: new Set(['head', 'script', 'style', 'binary']) innerCut: new Set(['head', 'script', 'style', 'binary', 'fb2-image'])
}); });
titleInfo['book-title'] = title; titleInfo['book-title'] = title;
@@ -148,10 +209,16 @@ class ConvertHtml extends ConvertBase {
i = 0; i = 0;
for (const par of pars) { for (const par of pars) {
if (par._n != 'p') {
newPars.push(par);
continue;
}
if (i > 0) if (i > 0)
newPar(); newPar();
i++; i++;
let j = 0;
const lines = par._t.split('\n'); const lines = par._t.split('\n');
for (let line of lines) { for (let line of lines) {
line = repCrLfTab(line); line = repCrLfTab(line);
@@ -161,8 +228,11 @@ class ConvertHtml extends ConvertBase {
l++; l++;
} }
if (l >= parIndent) if (l >= parIndent) {
newPar(); if (j > 0)
newPar();
j++;
}
growPar(line.trim() + ' '); growPar(line.trim() + ' ');
} }
} }
@@ -173,6 +243,7 @@ class ConvertHtml extends ConvertBase {
} }
//убираем лишнее //убираем лишнее
pars = body.section._a[0];
for (let i = 0; i < pars.length; i++) for (let i = 0; i < pars.length; i++)
pars[i]._t = this.repSpaces(pars[i]._t).trim(); pars[i]._t = this.repSpaces(pars[i]._t).trim();

View File

@@ -1,4 +1,5 @@
const fs = require('fs-extra'); const fs = require('fs-extra');
const path = require('path');
const sax = require('./sax'); const sax = require('./sax');
const utils = require('../utils'); const utils = require('../utils');
@@ -34,14 +35,47 @@ class ConvertPdf extends ConvertHtml {
//парсим xml //парсим xml
let lines = []; let lines = [];
let images = [];
let loading = [];
let inText = false; let inText = false;
let bold = false;
let italic = false;
let title = ''; let title = '';
let prevTop = 0; let prevTop = 0;
let i = -1; let i = -1;
const loadImage = async(image) => {
const src = path.parse(image.src);
let type = 'unknown';
switch (src.ext) {
case '.jpg': type = 'image/jpeg'; break;
case '.png': type = 'image/png'; break;
}
if (type != 'unknown') {
image.data = (await fs.readFile(image.src)).toString('base64');
image.type = type;
image.name = src.base;
}
}
const putImage = (curTop) => {
if (!isNaN(curTop) && images.length) {
while (images.length && images[0].top < curTop) {
i++;
lines[i] = images[0];
images.shift();
}
}
}
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter && inText) { if (!cutCounter && inText) {
lines[i].text += text + ' '; let tOpen = (bold ? '<b>' : '');
tOpen += (italic ? '<i>' : '');
let tClose = (italic ? '</i>' : '');
tClose += (bold ? '</b>' : '');
lines[i].text += `${tOpen}${text}${tClose} `;
if (i < 2) if (i < 2)
title += text + ' '; title += text + ' ';
} }
@@ -49,6 +83,17 @@ class ConvertPdf extends ConvertHtml {
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter) { if (!cutCounter) {
if (inText) {
switch (tag) {
case 'i':
italic = true;
break;
case 'b':
bold = true;
break;
}
}
if (tag == 'text' && !inText) { if (tag == 'text' && !inText) {
let attrs = sax.getAttrsSync(tail); let attrs = sax.getAttrsSync(tail);
const line = { const line = {
@@ -59,19 +104,52 @@ class ConvertPdf extends ConvertHtml {
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10), height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
}; };
if (line.width !== '0' || line.height !== '0') { if (line.width != 0 || line.height != 0) {
inText = true; inText = true;
if (isNaN(line.top) || isNaN(prevTop) || (Math.abs(prevTop - line.top) > 3)) { if (isNaN(line.top) || isNaN(prevTop) || (Math.abs(prevTop - line.top) > 3)) {
putImage(line.top);
i++; i++;
lines[i] = line; lines[i] = line;
} }
prevTop = line.top; prevTop = line.top;
} }
} }
if (tag == 'image') {
const attrs = sax.getAttrsSync(tail);
const src = (attrs.src && attrs.src.value ? attrs.src.value : '');
if (src) {
const image = {
isImage: true,
src,
data: '',
type: '',
top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10) || 0,
};
loading.push(loadImage(image));
images.push(image);
images.sort((a, b) => a.top - b.top)
}
}
if (tag == 'page') {
putImage(100000);
}
} }
}; };
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (inText) {
switch (tag) {
case 'i':
italic = false;
break;
case 'b':
bold = false;
break;
}
}
if (tag == 'text') if (tag == 'text')
inText = false; inText = false;
}; };
@@ -81,9 +159,15 @@ class ConvertPdf extends ConvertHtml {
onStartNode, onEndNode, onTextNode onStartNode, onEndNode, onTextNode
}); });
putImage(100000);
await Promise.all(loading);
//найдем параграфы и отступы //найдем параграфы и отступы
const indents = []; const indents = [];
for (const line of lines) { for (const line of lines) {
if (line.isImage)
continue;
if (!isNaN(line.left)) { if (!isNaN(line.left)) {
indents[line.left] = 1; indents[line.left] = 1;
} }
@@ -103,6 +187,11 @@ class ConvertPdf extends ConvertHtml {
let concat = ''; let concat = '';
let sp = ''; let sp = '';
for (const line of lines) { for (const line of lines) {
if (line.isImage) {
text += `<fb2-image type="${line.type}" name="${line.name}">${line.data}</fb2-image>`;
continue;
}
if (concat == '') { if (concat == '') {
const left = line.left || 0; const left = line.left || 0;
sp = ' '.repeat(indents[left]); sp = ' '.repeat(indents[left]);

View File

@@ -135,7 +135,7 @@ class ConvertSamlib extends ConvertBase {
let href = attrs.src.value; let href = attrs.src.value;
if (href[0] == '/') if (href[0] == '/')
href = `http://${hostname}${href}`; href = `http://${hostname}${href}`;
openTag('image', {href}); openTag('image', {'l:href': href});
inImage = true; inImage = true;
} }
break; break;