Compare commits

..

38 Commits

Author SHA1 Message Date
Book Pauk
7fa891b4fc Merge branch 'release/0.9.11' 2020-12-09 22:31:33 +07:00
Book Pauk
6cb7412cf3 Версия 0.9.11 2020-12-09 22:30:58 +07:00
Book Pauk
157322834b Небольшая поправка 2020-12-09 22:30:19 +07:00
Book Pauk
1a13a0fee1 Работа над конвертером pdf 2020-12-09 22:19:14 +07:00
Book Pauk
37256255bf Добавлена поддержка тегов 'sup' и 'sub' 2020-12-09 20:35:52 +07:00
Book Pauk
75e01c899e Работа над конвертером pdf 2020-12-09 20:08:17 +07:00
Book Pauk
ef0d6eab89 Работа над конвертером Pdf 2020-12-09 19:05:09 +07:00
Book Pauk
5d54b1b0f4 Работа над конвертером pdf 2020-12-09 03:52:24 +07:00
Book Pauk
522f953b4f Работа над конвертером pdf 2020-12-09 03:06:15 +07:00
Book Pauk
15f02c7115 Работа над конвертером pdf 2020-12-09 01:29:58 +07:00
Book Pauk
174c877eee Рефакторинг, плюс небольшие доработки 2020-12-09 01:29:09 +07:00
Book Pauk
fd9ec736d7 Рефакторинг 2020-12-08 19:36:53 +07:00
Book Pauk
2c94025ba3 Поправлен баг 2020-12-08 19:31:00 +07:00
Book Pauk
bfadf35c40 Закончена работа над xmlParser, оттестировано 2020-12-08 18:48:55 +07:00
Book Pauk
f3b69caa12 Работа над модулем xmlParser 2020-12-08 16:17:36 +07:00
Book Pauk
18a83a5b0b Поправки настроек сжатия 2020-12-08 14:26:49 +07:00
Book Pauk
bd9669b782 Поправка цели dev 2020-12-08 14:26:25 +07:00
Book Pauk
e05713aa7f Работа над конвертером pdf 2020-12-08 14:15:17 +07:00
Book Pauk
bc3e1f0a6f Мелкий рефакторинг 2020-12-07 22:13:14 +07:00
Book Pauk
063d01b5ca Перевод pdf-конвертера на использование pdfalto 2020-12-07 22:05:01 +07:00
Book Pauk
81c38d7749 Мелкий рефакторинг 2020-12-07 20:13:32 +07:00
Book Pauk
a29842b084 Поправка readme 2020-12-07 20:12:37 +07:00
Book Pauk
bb5adcdaf6 Рефакторинг 2020-12-07 01:30:10 +07:00
Book Pauk
537e17a219 Merge tag '0.9.10-5' into develop
0.9.10-5
2020-12-05 13:42:45 +07:00
Book Pauk
03ce50153e Merge branch 'release/0.9.10-5' 2020-12-05 13:42:39 +07:00
Book Pauk
15d01ad7fc Коррекция таймаутов очереди ожидания 2020-12-05 13:41:42 +07:00
Book Pauk
e2b29e2c2f Merge tag '0.9.10-4' into develop
0.9.10-4
2020-12-05 13:25:10 +07:00
Book Pauk
ce7ae84e0f Merge branch 'release/0.9.10-4' 2020-12-05 13:25:06 +07:00
Book Pauk
01eb545f15 Улучшение работы с очередью, поправка багов 2020-12-05 13:24:04 +07:00
Book Pauk
706738c7f1 Merge tag '0.9.10-3' into develop
0.9.10-3
2020-12-05 01:40:37 +07:00
Book Pauk
6afa78cde9 Merge branch 'release/0.9.10-3' 2020-12-05 01:40:30 +07:00
Book Pauk
71f5710bba Увеличен лимит количества файлов для распаковки 2020-12-05 01:12:29 +07:00
Book Pauk
0d87043f91 Поправлен неверный вызов reject 2020-12-05 01:11:31 +07:00
Book Pauk
e25375fb7a Поправка багов 2020-12-05 00:31:53 +07:00
Book Pauk
41822999c8 Небольшие поправки 2020-12-05 00:06:54 +07:00
Book Pauk
07444bc7c2 Добавлена подсказка в сообщение об ошибке 2020-12-04 23:25:34 +07:00
Book Pauk
ec48e5b0b7 Мелкая поправка 2020-12-04 20:14:53 +07:00
Book Pauk
e8e2e9297f Merge tag '0.9.10-2' into develop
0.9.10-2
2020-12-04 20:00:40 +07:00
24 changed files with 609 additions and 251 deletions

View File

@@ -66,7 +66,7 @@ class Reader {
await utils.sleep(refreshPause); await utils.sleep(refreshPause);
i++; i++;
if (i > 120*1000/refreshPause) {//2 мин ждем телодвижений воркера if (i > 180*1000/refreshPause) {//3 мин ждем телодвижений воркера
throw new Error('Слишком долгое время ожидания'); throw new Error('Слишком долгое время ожидания');
} }
//проверка воркера //проверка воркера

View File

@@ -593,12 +593,6 @@ class Reader extends Vue {
} }
} }
refreshBookSplitToPara() {
if (this.mostRecentBook()) {
this.loadBook({url: this.mostRecentBook().url, skipCheck: true, isText: true, force: true});
}
}
recentBooksClose() { recentBooksClose() {
this.recentBooksActive = false; this.recentBooksActive = false;
} }
@@ -688,9 +682,14 @@ class Reader extends Vue {
} }
} }
refreshBook() { refreshBook(mode) {
if (this.mostRecentBook()) { const mrb = this.mostRecentBook();
this.loadBook({url: this.mostRecentBook().url, force: true}); if (mrb) {
if (mode && mode == 'split') {
this.loadBook({url: mrb.url, uploadFileName: mrb.uploadFileName, skipCheck: true, isText: true, force: true});
} else {
this.loadBook({url: mrb.url, uploadFileName: mrb.uploadFileName, force: true});
}
} }
} }
@@ -882,6 +881,7 @@ class Reader extends Vue {
wasOpened = (wasOpened ? wasOpened : {}); wasOpened = (wasOpened ? wasOpened : {});
const bookPos = (opts.bookPos !== undefined ? opts.bookPos : wasOpened.bookPos); const bookPos = (opts.bookPos !== undefined ? opts.bookPos : wasOpened.bookPos);
const bookPosSeen = (opts.bookPos !== undefined ? opts.bookPos : wasOpened.bookPosSeen); const bookPosSeen = (opts.bookPos !== undefined ? opts.bookPos : wasOpened.bookPosSeen);
const uploadFileName = (opts.uploadFileName ? opts.uploadFileName : '');
let book = null; let book = null;
@@ -929,7 +929,7 @@ class Reader extends Vue {
skipCheck: (opts.skipCheck ? true : false), skipCheck: (opts.skipCheck ? true : false),
isText: (opts.isText ? true : false), isText: (opts.isText ? true : false),
enableSitesFilter: this.enableSitesFilter, enableSitesFilter: this.enableSitesFilter,
uploadFileName: (opts.uploadFileName ? opts.uploadFileName : ''), uploadFileName
}, },
(state) => { (state) => {
progress.setState(state); progress.setState(state);
@@ -945,7 +945,7 @@ class Reader extends Vue {
}); });
// добавляем в историю // добавляем в историю
await bookManager.setRecentBook(Object.assign({bookPos, bookPosSeen}, addedBook)); await bookManager.setRecentBook(Object.assign({bookPos, bookPosSeen, uploadFileName}, addedBook));
this.mostRecentBook(); this.mostRecentBook();
this.addAction(bookPos); this.addAction(bookPos);
this.updateRoute(true); this.updateRoute(true);
@@ -982,7 +982,7 @@ class Reader extends Vue {
progress.hide(); this.progressActive = false; progress.hide(); this.progressActive = false;
await this.loadBook({url, uploadFileName: opts.file.name}); await this.loadBook({url, uploadFileName: opts.file.name, force: true});
} catch (e) { } catch (e) {
progress.hide(); this.progressActive = false; progress.hide(); this.progressActive = false;
this.loaderActive = true; this.loaderActive = true;
@@ -1054,7 +1054,7 @@ class Reader extends Vue {
this.copyTextToggle(); this.copyTextToggle();
break; break;
case 'splitToPara': case 'splitToPara':
this.refreshBookSplitToPara(); this.refreshBook('split');
break; break;
case 'refresh': case 'refresh':
this.refreshBook(); this.refreshBook();

View File

@@ -216,8 +216,15 @@ class ServerStorage extends Vue {
} }
error(message) { error(message) {
if (this.showServerStorageMessages && !this.offlineModeActive) if (this.showServerStorageMessages && !this.offlineModeActive) {
this.$root.notify.error(message); this.errorMessageCounter = (this.errorMessageCounter ? this.errorMessageCounter + 1 : 1);
const hint = (this.errorMessageCounter < 2 ? '' :
'<div><br>Надоело это сообщение? Добавьте в настройках кнопку "Автономный режим" ' +
'<i class="la la-unlink" style="font-size: 20px; color: white"></i> на панель инструментов и активируйте ее.</div>'
);
this.$root.notify.error(message + hint);
}
} }
async loadSettings(force = false, doNotifySuccess = true) { async loadSettings(force = false, doNotifySuccess = true) {

View File

@@ -77,9 +77,15 @@ export default class DrawHelper {
let j = 0; let j = 0;
//формируем строку //формируем строку
for (const part of line.parts) { for (const part of line.parts) {
let tOpen = (part.style.bold ? '<b>' : ''); let tOpen = '';
tOpen += (part.style.bold ? '<b>' : '');
tOpen += (part.style.italic ? '<i>' : ''); tOpen += (part.style.italic ? '<i>' : '');
let tClose = (part.style.italic ? '</i>' : ''); tOpen += (part.style.sup ? '<span style="vertical-align: baseline; position: relative; line-height: 0; top: -0.3em">' : '');
tOpen += (part.style.sub ? '<span style="vertical-align: baseline; position: relative; line-height: 0; top: 0.3em">' : '');
let tClose = '';
tClose += (part.style.sub ? '</span>' : '');
tClose += (part.style.sup ? '</span>' : '');
tClose += (part.style.italic ? '</i>' : '');
tClose += (part.style.bold ? '</b>' : ''); tClose += (part.style.bold ? '</b>' : '');
let text = ''; let text = '';

View File

@@ -285,7 +285,7 @@ export default class BookParser {
sectionLevel++; sectionLevel++;
} }
if (tag == 'emphasis' || tag == 'strong') { if (tag == 'emphasis' || tag == 'strong' || tag == 'sup' || tag == 'sub') {
growParagraph(`<${tag}>`, 0); growParagraph(`<${tag}>`, 0);
} }
@@ -343,7 +343,7 @@ export default class BookParser {
sectionLevel--; sectionLevel--;
} }
if (tag == 'emphasis' || tag == 'strong') { if (tag == 'emphasis' || tag == 'strong' || tag == 'sup' || tag == 'sub') {
growParagraph(`</${tag}>`, 0); growParagraph(`</${tag}>`, 0);
} }
@@ -507,7 +507,7 @@ export default class BookParser {
splitToStyle(s) { splitToStyle(s) {
let result = [];/*array of { let result = [];/*array of {
style: {bold: Boolean, italic: Boolean, center: Boolean, space: Number}, style: {bold: Boolean, italic: Boolean, sup: Boolean, sub: Boolean, center: Boolean, space: Number},
image: {local: Boolean, inline: Boolean, id: String}, image: {local: Boolean, inline: Boolean, id: String},
text: String, text: String,
}*/ }*/
@@ -530,6 +530,12 @@ export default class BookParser {
case 'emphasis': case 'emphasis':
style.italic = true; style.italic = true;
break; break;
case 'sup':
style.sup = true;
break;
case 'sub':
style.sub = true;
break;
case 'center': case 'center':
style.center = true; style.center = true;
break; break;
@@ -580,6 +586,12 @@ export default class BookParser {
case 'emphasis': case 'emphasis':
style.italic = false; style.italic = false;
break; break;
case 'sup':
style.sup = false;
break;
case 'sub':
style.sub = false;
break;
case 'center': case 'center':
style.center = false; style.center = false;
break; break;

View File

@@ -169,7 +169,7 @@ class BookManager {
} }
async deflateWithProgress(data, callback) { async deflateWithProgress(data, callback) {
const chunkSize = 128*1024; const chunkSize = 512*1024;
const deflator = new utils.pako.Deflate({level: 5}); const deflator = new utils.pako.Deflate({level: 5});
let chunkTotal = 1 + Math.floor(data.length/chunkSize); let chunkTotal = 1 + Math.floor(data.length/chunkSize);
@@ -203,7 +203,7 @@ class BookManager {
} }
async inflateWithProgress(data, callback) { async inflateWithProgress(data, callback) {
const chunkSize = 64*1024; const chunkSize = 512*1024;
const inflator = new utils.pako.Inflate({to: 'string'}); const inflator = new utils.pako.Inflate({to: 'string'});
let chunkTotal = 1 + Math.floor(data.length/chunkSize); let chunkTotal = 1 + Math.floor(data.length/chunkSize);

View File

@@ -1,4 +1,15 @@
export const versionHistory = [ export const versionHistory = [
{
showUntil: '2020-12-08',
header: '0.9.11 (2020-12-09)',
content:
`
<ul>
<li>оптимизации, улучшения работы конвертеров</li>
</ul>
`
},
{ {
showUntil: '2020-12-10', showUntil: '2020-12-10',
header: '0.9.10 (2020-12-03)', header: '0.9.10 (2020-12-03)',

View File

@@ -32,11 +32,23 @@ sudo -u www-data mkdir -p /home/liberama/data/calibre
sudo -u www-data tar xvf calibre-5.5.0-x86_64.txz -C /home/liberama/data/calibre sudo -u www-data tar xvf calibre-5.5.0-x86_64.txz -C /home/liberama/data/calibre
``` ```
### external converter `pdfalto`, github https://github.com/kermitt2/pdfalto
```
git clone https://github.com/kermitt2/pdfalto
cd pdfalto
git submodule update --init --recursive
cmake ./
добавить в начало CMakeLists.txt строчку: set(CMAKE_EXE_LINKER_FLAGS "-no-pie")
make
sudo -u www-data mkdir -p /home/liberama/data/pdfalto
sudo -u www-data cp pdfalto /home/liberama/data/pdfalto
```
### external converters ### external converters
``` ```
sudo apt install rar sudo apt install rar
sudo apt install libreoffice sudo apt install libreoffice
sudo apt install poppler-utils
sudo apt install djvulibre-bin sudo apt install djvulibre-bin
sudo apt install libtiff-tools sudo apt install libtiff-tools
sudo apt install graphicsmagick-imagemagick-compat sudo apt install graphicsmagick-imagemagick-compat

View File

@@ -1,4 +1,4 @@
#!/bin/bash #!/bin/bash
sudo -H -u www-data bash -c "cd /var/www; /home/liberama/liberama" & sudo -H -u www-data bash -c "cd /var/www; /home/liberama/liberama" & disown
sudo service cron start sudo service cron start

2
package-lock.json generated
View File

@@ -1,6 +1,6 @@
{ {
"name": "Liberama", "name": "Liberama",
"version": "0.9.10", "version": "0.9.11",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {

View File

@@ -1,6 +1,6 @@
{ {
"name": "Liberama", "name": "Liberama",
"version": "0.9.10", "version": "0.9.11",
"author": "Book Pauk <bookpauk@gmail.com>", "author": "Book Pauk <bookpauk@gmail.com>",
"license": "CC0-1.0", "license": "CC0-1.0",
"repository": "bookpauk/liberama", "repository": "bookpauk/liberama",
@@ -8,7 +8,7 @@
"node": ">=10.0.0" "node": ">=10.0.0"
}, },
"scripts": { "scripts": {
"dev": "nodemon --inspect --ignore server/public --ignore server/data --exec 'node server'", "dev": "nodemon --inspect --ignore server/public --ignore server/data --ignore client --exec 'node server'",
"build:client": "webpack --config build/webpack.prod.config.js", "build:client": "webpack --config build/webpack.prod.config.js",
"build:linux": "npm run build:client && node build/linux && pkg -t latest-linux-x64 -o dist/linux/liberama .", "build:linux": "npm run build:client && node build/linux && pkg -t latest-linux-x64 -o dist/linux/liberama .",
"build:win": "npm run build:client && node build/win && pkg -t latest-win-x64 -o dist/win/liberama .", "build:win": "npm run build:client && node build/win && pkg -t latest-win-x64 -o dist/win/liberama .",

View File

@@ -136,7 +136,7 @@ class WebSocketController {
break; break;
i++; i++;
if (i > 2*60*1000/refreshPause) {//2 мин ждем телодвижений воркера if (i > 3*60*1000/refreshPause) {//3 мин ждем телодвижений воркера
this.send({state: 'error', error: 'Время ожидания процесса истекло'}, req, ws); this.send({state: 'error', error: 'Время ожидания процесса истекло'}, req, ws);
break; break;
} }

View File

@@ -135,7 +135,7 @@ class FileDecompressor {
try { try {
return await zip.unpack(filename, outputDir, { return await zip.unpack(filename, outputDir, {
limitFileSize: this.limitFileSize, limitFileSize: this.limitFileSize,
limitFileCount: 1000, limitFileCount: 10000,
decodeEntryNameCallback: (nameRaw) => { decodeEntryNameCallback: (nameRaw) => {
return utils.bufferRemoveZeroes(nameRaw); return utils.bufferRemoveZeroes(nameRaw);
} }
@@ -144,7 +144,7 @@ class FileDecompressor {
fs.emptyDir(outputDir); fs.emptyDir(outputDir);
return await zip.unpack(filename, outputDir, { return await zip.unpack(filename, outputDir, {
limitFileSize: this.limitFileSize, limitFileSize: this.limitFileSize,
limitFileCount: 1000, limitFileCount: 10000,
decodeEntryNameCallback: (nameRaw) => { decodeEntryNameCallback: (nameRaw) => {
nameRaw = utils.bufferRemoveZeroes(nameRaw); nameRaw = utils.bufferRemoveZeroes(nameRaw);
const enc = textUtils.getEncodingLite(nameRaw); const enc = textUtils.getEncodingLite(nameRaw);
@@ -171,7 +171,7 @@ class FileDecompressor {
if (this.limitFileSize) { if (this.limitFileSize) {
if ((await fs.stat(filename)).size > this.limitFileSize) { if ((await fs.stat(filename)).size > this.limitFileSize) {
reject('Файл слишком большой'); reject(new Error('Файл слишком большой'));
return; return;
} }
} }

View File

@@ -28,7 +28,7 @@ class LimitedQueue {
get(onPlaceChange) { get(onPlaceChange) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
if (this.destroyed) if (this.destroyed)
reject('destroyed'); reject(new Error('destroyed'));
const take = () => { const take = () => {
if (this.freed <= 0) if (this.freed <= 0)
@@ -73,7 +73,7 @@ class LimitedQueue {
if (onPlaceChange) if (onPlaceChange)
onPlaceChange(this.listeners.length); onPlaceChange(this.listeners.length);
} else { } else {
reject('Превышен размер очереди ожидания'); reject(new Error('Превышен размер очереди ожидания'));
} }
} }
}); });

View File

@@ -5,8 +5,9 @@ const he = require('he');
const LimitedQueue = require('../../LimitedQueue'); const LimitedQueue = require('../../LimitedQueue');
const textUtils = require('./textUtils'); const textUtils = require('./textUtils');
const utils = require('../../utils'); const utils = require('../../utils');
const xmlParser = require('../../xmlParser');
const queue = new LimitedQueue(3, 20, 3*60*1000);//3 минуты ожидание подвижек const queue = new LimitedQueue(3, 20, 2*60*1000);//2 минуты ожидание подвижек
class ConvertBase { class ConvertBase {
constructor(config) { constructor(config) {
@@ -14,7 +15,6 @@ class ConvertBase {
this.calibrePath = `${config.dataDir}/calibre/ebook-convert`; this.calibrePath = `${config.dataDir}/calibre/ebook-convert`;
this.sofficePath = '/usr/bin/soffice'; this.sofficePath = '/usr/bin/soffice';
this.pdfToHtmlPath = '/usr/bin/pdftohtml';
} }
async run(data, opts) {// eslint-disable-line no-unused-vars async run(data, opts) {// eslint-disable-line no-unused-vars
@@ -27,9 +27,6 @@ class ConvertBase {
if (!await fs.pathExists(this.sofficePath)) if (!await fs.pathExists(this.sofficePath))
throw new Error('Внешний конвертер LibreOffice не найден'); throw new Error('Внешний конвертер LibreOffice не найден');
if (!await fs.pathExists(this.pdfToHtmlPath))
throw new Error('Внешний конвертер pdftohtml не найден');
} }
async execConverter(path, args, onData, abort) { async execConverter(path, args, onData, abort) {
@@ -42,21 +39,32 @@ class ConvertBase {
throw new Error('Слишком большая очередь конвертирования. Пожалуйста, попробуйте позже.'); throw new Error('Слишком большая очередь конвертирования. Пожалуйста, попробуйте позже.');
} }
abort = (abort ? abort : () => false);
const myAbort = () => {
return q.abort() || abort();
}
try { try {
if (myAbort())
throw new Error('abort');
const result = await utils.spawnProcess(path, { const result = await utils.spawnProcess(path, {
killAfter: 3600,//1 час killAfter: 3600,//1 час
args, args,
onData: (data) => { onData: (data) => {
q.resetTimeout(); if (queue.freed > 0)
q.resetTimeout();
onData(data); onData(data);
}, },
//будем периодически проверять работу конвертера и если очереди нет, то разрешаем работу пинком onData //будем периодически проверять работу конвертера и если очереди нет, то разрешаем работу пинком onData
onUsage: (stats) => { onUsage: (stats) => {
if (queue.freed > 1 && stats.cpu >= 10) if (queue.freed > 0 && stats.cpu >= 10) {
q.resetTimeout();
onData('.'); onData('.');
}
}, },
onUsageInterval: 10, onUsageInterval: 10,
abort abort: myAbort
}); });
if (result.code != 0) { if (result.code != 0) {
const error = `${result.code}|FORLOG|, exec: ${path}, args: ${args.join(' ')}, stdout: ${result.stdout}, stderr: ${result.stderr}`; const error = `${result.code}|FORLOG|, exec: ${path}, args: ${args.join(' ')}, stdout: ${result.stdout}, stderr: ${result.stderr}`;
@@ -95,61 +103,14 @@ class ConvertBase {
} }
formatFb2(fb2) { formatFb2(fb2) {
let out = '<?xml version="1.0" encoding="utf-8"?>'; const out = xmlParser.formatXml({
out += '<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">'; FictionBook: {
out += this.formatFb2Node(fb2); _attrs: {xmlns: 'http://www.gribuser.ru/xml/fictionbook/2.0', 'xmlns:l': 'http://www.w3.org/1999/xlink'},
out += '</FictionBook>'; _a: [fb2],
return out;
}
formatFb2Node(node, name) {
let out = '';
if (Array.isArray(node)) {
for (const n of node) {
out += this.formatFb2Node(n);
} }
} else if (typeof node == 'string') { }, 'utf-8', this.repSpaces);
if (name)
out += `<${name}>${this.repSpaces(node)}</${name}>`;
else
out += this.repSpaces(node);
} else {
if (node._n)
name = node._n;
let attrs = ''; return out.replace(/<p>\s*?<\/p>/g, '<empty-line/>');
if (node._attrs) {
for (let attrName in node._attrs) {
attrs += ` ${attrName}="${node._attrs[attrName]}"`;
}
}
let tOpen = '';
let tBody = '';
let tClose = '';
if (name)
tOpen += `<${name}${attrs}>`;
if (node.hasOwnProperty('_t'))
tBody += this.repSpaces(node._t);
for (let nodeName in node) {
if (nodeName && nodeName[0] == '_' && nodeName != '_a')
continue;
const n = node[nodeName];
tBody += this.formatFb2Node(n, nodeName);
}
if (name)
tClose += `</${name}>`;
if (attrs == '' && name == 'p' && tBody.trim() == '')
out += '<empty-line/>'
else
out += `${tOpen}${tBody}${tClose}`;
}
return out;
} }
} }

View File

@@ -2,9 +2,9 @@ const fs = require('fs-extra');
const path = require('path'); const path = require('path');
const utils = require('../../utils'); const utils = require('../../utils');
const ConvertHtml = require('./ConvertHtml'); const ConvertBase = require('./ConvertBase');
class ConvertDjvu extends ConvertHtml { class ConvertDjvu extends ConvertBase {
check(data, opts) { check(data, opts) {
const {inputFiles} = opts; const {inputFiles} = opts;
@@ -59,9 +59,17 @@ class ConvertDjvu extends ConvertHtml {
}, abort); }, abort);
//читаем изображения //читаем изображения
limitSize = 2*this.config.maxUploadFileSize;
let imagesSize = 0;
const loadImage = async(image) => { const loadImage = async(image) => {
image.data = (await fs.readFile(image.file)).toString('base64'); image.data = (await fs.readFile(image.file)).toString('base64');
image.name = path.basename(image.file); image.name = path.basename(image.file);
imagesSize += image.data.length;
if (imagesSize > limitSize) {
throw new Error(`Файл для конвертирования слишком большой|FORLOG| imagesSize: ${imagesSize} > ${limitSize}`);
}
} }
let files = []; let files = [];
@@ -82,20 +90,29 @@ class ConvertDjvu extends ConvertHtml {
await Promise.all(loading); await Promise.all(loading);
//формируем текст //формируем fb2
limitSize = 2*this.config.maxUploadFileSize; let titleInfo = {};
let desc = {_n: 'description', 'title-info': titleInfo};
let pars = [];
let body = {_n: 'body', section: {_a: [pars]}};
let binary = [];
let fb2 = [desc, body, binary];
let title = ''; let title = '';
if (uploadFileName) if (uploadFileName)
title = uploadFileName; title = uploadFileName;
let text = `<title>${title}</title>`;
for (const image of images) {
text += `<fb2-image type="image/jpeg" name="${image.name}">${image.data}</fb2-image>`;
if (text.length > limitSize) { titleInfo['book-title'] = title;
throw new Error(`Файл для конвертирования слишком большой|FORLOG| text.length: ${text.length} > ${limitSize}`);
} for (const image of images) {
const img = {_n: 'binary', _attrs: {id: image.name, 'content-type': 'image/jpeg'}, _t: image.data};
binary.push(img);
pars.push({_n: 'p', _t: ''});
pars.push({_n: 'image', _attrs: {'l:href': `#${image.name}`}});
} }
return await super.run(Buffer.from(text), {skipCheck: true, isText: true, cutTitle: true});
return this.formatFb2(fb2);
} }
} }

View File

@@ -2,7 +2,7 @@ const fs = require('fs-extra');
const ConvertHtml = require('./ConvertHtml'); const ConvertHtml = require('./ConvertHtml');
class ConvertDocX extends ConvertHtml { class ConvertFb3 extends ConvertHtml {
async check(data, opts) { async check(data, opts) {
const {inputFiles} = opts; const {inputFiles} = opts;
if (this.config.useExternalBookConverter && if (this.config.useExternalBookConverter &&
@@ -39,13 +39,14 @@ class ConvertDocX extends ConvertHtml {
const title = this.getTitle(text) const title = this.getTitle(text)
.replace(/<\/?p>/g, '') .replace(/<\/?p>/g, '')
; ;
text = `<title>${title}</title>` + text text = `<fb2-title>${title}</fb2-title>` + text
.replace(/<title>/g, '<br><b>') .replace(/<title>/g, '<br><b>')
.replace(/<\/title>/g, '</b><br>') .replace(/<\/title>/g, '</b><br>')
.replace(/<subtitle>/g, '<br><br><subtitle>') .replace(/<subtitle>/g, '<br><br><fb2-subtitle>')
.replace(/<\/subtitle>/g, '</fb2-subtitle>')
; ;
return await super.run(Buffer.from(text), {skipCheck: true, cutTitle: true}); return await super.run(Buffer.from(text), {skipCheck: true});
} }
} }
module.exports = ConvertDocX; module.exports = ConvertFb3;

View File

@@ -34,7 +34,6 @@ class ConvertHtml extends ConvertBase {
} else { } else {
isText = opts.isText; isText = opts.isText;
} }
let {cutTitle} = opts;
let titleInfo = {}; let titleInfo = {};
let desc = {_n: 'description', 'title-info': titleInfo}; let desc = {_n: 'description', 'title-info': titleInfo};
@@ -44,12 +43,17 @@ class ConvertHtml extends ConvertBase {
let fb2 = [desc, body, binary]; let fb2 = [desc, body, binary];
let title = ''; let title = '';
let author = '';
let inTitle = false; let inTitle = false;
let inSectionTitle = false;
let inAuthor = false;
let inSubTitle = false; let inSubTitle = false;
let inImage = false; let inImage = false;
let image = {}; let image = {};
let bold = false; let bold = false;
let italic = false; let italic = false;
let superscript = false;
let subscript = false;
let begining = true; let begining = true;
let spaceCounter = []; let spaceCounter = [];
@@ -62,7 +66,7 @@ class ConvertHtml extends ConvertBase {
}; };
const growParagraph = (text) => { const growParagraph = (text) => {
if (!pars.length) if (!pars.length || pars[pars.length - 1]._n != 'p')
newParagraph(); newParagraph();
const l = pars.length; const l = pars.length;
@@ -94,12 +98,16 @@ class ConvertHtml extends ConvertBase {
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
text = this.escapeEntities(text); text = this.escapeEntities(text);
if (!cutCounter && !(cutTitle && inTitle)) { if (!(cutCounter || inTitle || inSectionTitle || inSubTitle)) {
let tOpen = ''; let tOpen = '';
tOpen += (inSubTitle ? '<subtitle>' : ''); tOpen += (inSubTitle ? '<subtitle>' : '');
tOpen += (bold ? '<strong>' : ''); tOpen += (bold ? '<strong>' : '');
tOpen += (italic ? '<emphasis>' : ''); tOpen += (italic ? '<emphasis>' : '');
tOpen += (superscript ? '<sup>' : '');
tOpen += (subscript ? '<sub>' : '');
let tClose = '' let tClose = ''
tClose += (subscript ? '</sub>' : '');
tClose += (superscript ? '</sup>' : '');
tClose += (italic ? '</emphasis>' : ''); tClose += (italic ? '</emphasis>' : '');
tClose += (bold ? '</strong>' : ''); tClose += (bold ? '</strong>' : '');
tClose += (inSubTitle ? '</subtitle>' : ''); tClose += (inSubTitle ? '</subtitle>' : '');
@@ -110,12 +118,22 @@ class ConvertHtml extends ConvertBase {
if (inTitle && !title) if (inTitle && !title)
title = text; title = text;
if (inAuthor && !author)
author = text;
if (inSectionTitle) {
pars.unshift({_n: 'title', _t: text});
}
if (inSubTitle) {
pars.push({_n: 'subtitle', _t: text});
}
if (inImage) { if (inImage) {
image._t = text; image._t = text;
binary.push(image); binary.push(image);
pars.push({_n: 'image', _attrs: {'l:href': '#' + image._attrs.id}, _t: ''}); pars.push({_n: 'image', _attrs: {'l:href': '#' + image._attrs.id}, _t: ''});
newParagraph();
} }
}; };
@@ -140,15 +158,27 @@ class ConvertHtml extends ConvertBase {
bold = true; bold = true;
break; break;
} }
if (tag == 'sup')
superscript = true;
if (tag == 'sub')
subscript = true;
} }
if (tag == 'title' || tag == 'cut-title') { if (tag == 'title' || tag == 'fb2-title') {
inTitle = true; inTitle = true;
if (tag == 'cut-title')
cutTitle = true;
} }
if (tag == 'subtitle') { if (tag == 'fb2-author') {
inAuthor = true;
}
if (tag == 'fb2-section-title') {
inSectionTitle = true;
}
if (tag == 'fb2-subtitle') {
inSubTitle = true; inSubTitle = true;
} }
@@ -156,7 +186,7 @@ class ConvertHtml extends ConvertBase {
inImage = true; inImage = true;
const attrs = sax.getAttrsSync(tail); const attrs = sax.getAttrsSync(tail);
image = {_n: 'binary', _attrs: {id: attrs.name.value, 'content-type': attrs.type.value}, _t: ''}; image = {_n: 'binary', _attrs: {id: attrs.name.value, 'content-type': attrs.type.value}, _t: ''};
} }
}; };
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
@@ -179,12 +209,26 @@ class ConvertHtml extends ConvertBase {
bold = false; bold = false;
break; break;
} }
if (tag == 'sup')
superscript = false;
if (tag == 'sub')
subscript = false;
} }
if (tag == 'title' || tag == 'cut-title') if (tag == 'title' || tag == 'fb2-title')
inTitle = false; inTitle = false;
if (tag == 'subtitle') if (tag == 'fb2-author') {
inAuthor = false;
}
if (tag == 'fb2-section-title') {
inSectionTitle = false;
}
if (tag == 'fb2-subtitle')
inSubTitle = false; inSubTitle = false;
if (tag == 'fb2-image') if (tag == 'fb2-image')
@@ -195,10 +239,17 @@ class ConvertHtml extends ConvertBase {
sax.parseSync(buf, { sax.parseSync(buf, {
onStartNode, onEndNode, onTextNode, onStartNode, onEndNode, onTextNode,
innerCut: new Set(['head', 'script', 'style', 'binary', 'fb2-image']) innerCut: new Set(['head', 'script', 'style', 'binary', 'fb2-image', 'fb2-title', 'fb2-author'])
}); });
titleInfo['book-title'] = title; titleInfo['book-title'] = title;
if (author)
titleInfo.author = {'last-name': author};
body.section._a[0] = pars;
//console.log(JSON.stringify(fb2, null, 2));
//подозрение на чистый текст, надо разбить на параграфы //подозрение на чистый текст, надо разбить на параграфы
if (isText || (buf.length > 30*1024 && pars.length < buf.length/2000)) { if (isText || (buf.length > 30*1024 && pars.length < buf.length/2000)) {
let total = 0; let total = 0;
@@ -228,56 +279,49 @@ class ConvertHtml extends ConvertBase {
if (parIndent > 2) parIndent--; if (parIndent > 2) parIndent--;
let newPars = []; let newPars = [];
let curPar = {};
const newPar = () => { const newPar = () => {
newPars.push({_n: 'p', _t: ''}); curPar = {_n: 'p', _t: ''};
newPars.push(curPar);
}; };
const growPar = (text) => {
if (!newPars.length)
newPar();
const l = newPars.length;
newPars[l - 1]._t += text;
}
i = 0;
for (const par of pars) { for (const par of pars) {
if (par._n != 'p') { if (par._n != 'p') {
newPars.push(par); newPars.push(par);
continue; continue;
} }
if (i > 0) newPar();
newPar();
i++;
let j = 0;
const lines = par._t.split('\n'); const lines = par._t.split('\n');
for (let line of lines) { for (let j = 0; j < lines.length; j++) {
line = repCrLfTab(line); const line = repCrLfTab(lines[j]);
let l = 0; let l = 0;
while (l < line.length && line[l] == ' ') { while (l < line.length && line[l] == ' ') {
l++; l++;
} }
if (l >= parIndent || line == '') { if (j > 0 &&
if (j > 0) (l >= parIndent ||
newPar(); (j < lines.length - 1 && line == '')
j++; )
) {
newPar();
} }
growPar(line.trim() + ' ');
curPar._t += line.trim() + ' ';
} }
} }
body.section._a[0] = newPars; body.section._a[0] = newPars;
} else {
body.section._a[0] = pars;
} }
//убираем лишнее, делаем валидный fb2, т.к. в рез-те разбиения на параграфы бьются теги //убираем лишнее, делаем валидный fb2, т.к. в рез-те разбиения на параграфы бьются теги
bold = false; bold = false;
italic = false; italic = false;
superscript = false;
subscript = false;
inSubTitle = false; inSubTitle = false;
pars = body.section._a[0]; pars = body.section._a[0];
for (let i = 0; i < pars.length; i++) { for (let i = 0; i < pars.length; i++) {
@@ -297,7 +341,11 @@ class ConvertHtml extends ConvertBase {
tOpen += (inSubTitle ? '<subtitle>' : ''); tOpen += (inSubTitle ? '<subtitle>' : '');
tOpen += (bold ? '<strong>' : ''); tOpen += (bold ? '<strong>' : '');
tOpen += (italic ? '<emphasis>' : ''); tOpen += (italic ? '<emphasis>' : '');
tOpen += (superscript ? '<sup>' : '');
tOpen += (subscript ? '<sub>' : '');
let tClose = '' let tClose = ''
tClose += (subscript ? '</sub>' : '');
tClose += (superscript ? '</sup>' : '');
tClose += (italic ? '</emphasis>' : ''); tClose += (italic ? '</emphasis>' : '');
tClose += (bold ? '</strong>' : ''); tClose += (bold ? '</strong>' : '');
tClose += (inSubTitle ? '</subtitle>' : ''); tClose += (inSubTitle ? '</subtitle>' : '');
@@ -313,6 +361,10 @@ class ConvertHtml extends ConvertBase {
bold = true; bold = true;
if (tag == 'emphasis') if (tag == 'emphasis')
italic = true; italic = true;
if (tag == 'sup')
superscript = true;
if (tag == 'sub')
subscript = true;
if (tag == 'subtitle') if (tag == 'subtitle')
inSubTitle = true; inSubTitle = true;
} }
@@ -322,6 +374,10 @@ class ConvertHtml extends ConvertBase {
bold = false; bold = false;
if (tag == 'emphasis') if (tag == 'emphasis')
italic = false; italic = false;
if (tag == 'sup')
superscript = false;
if (tag == 'sub')
subscript = false;
if (tag == 'subtitle') if (tag == 'subtitle')
inSubTitle = false; inSubTitle = false;
} }

View File

@@ -1,9 +1,11 @@
//const _ = require('lodash');
const fs = require('fs-extra'); const fs = require('fs-extra');
const path = require('path'); const path = require('path');
const sax = require('../../sax'); const sax = require('../../sax');
const utils = require('../../utils'); const utils = require('../../utils');
const ConvertHtml = require('./ConvertHtml'); const ConvertHtml = require('./ConvertHtml');
const xmlParser = require('../../xmlParser');
class ConvertPdf extends ConvertHtml { class ConvertPdf extends ConvertHtml {
check(data, opts) { check(data, opts) {
@@ -14,20 +16,26 @@ class ConvertPdf extends ConvertHtml {
} }
async run(notUsed, opts) { async run(notUsed, opts) {
if (!opts.skipCheck) { if (!this.check(notUsed, opts))
if (!this.check(notUsed, opts)) return false;
return false;
}
await this.checkExternalConverterPresent(); await this.checkExternalConverterPresent();
const {inputFiles, callback, abort, uploadFileName} = opts; const {inputFiles, callback, abort, uploadFileName} = opts;
const inpFile = (opts.pdfFile ? opts.pdfFile : inputFiles.sourceFile); const inpFile = inputFiles.sourceFile;
const outFile = `${inputFiles.filesDir}/${utils.randomHexString(10)}.xml`; const outBasename = `${inputFiles.filesDir}/${utils.randomHexString(10)}`;
const outFile = `${outBasename}.xml`;
const metaFile = `${outBasename}_metadata.xml`;
const pdfaltoPath = `${this.config.dataDir}/pdfalto/pdfalto`;
if (!await fs.pathExists(pdfaltoPath))
throw new Error('Внешний конвертер pdfalto не найден');
//конвертируем в xml //конвертируем в xml
let perc = 0; let perc = 0;
await this.execConverter(this.pdfToHtmlPath, ['-nodrm', '-c', '-s', '-xml', inpFile, outFile], () => { await this.execConverter(pdfaltoPath, [inpFile, outFile], () => {
perc = (perc < 80 ? perc + 10 : 40); perc = (perc < 80 ? perc + 10 : 40);
callback(perc); callback(perc);
}, abort); }, abort);
@@ -36,17 +44,22 @@ class ConvertPdf extends ConvertHtml {
const data = await fs.readFile(outFile); const data = await fs.readFile(outFile);
callback(90); callback(90);
await utils.sleep(100);
//парсим xml //парсим xml
let lines = []; let lines = [];
let pagelines = [];
let line = {text: ''};
let page = {};
let fonts = {};
let sectionTitleFound = false;
let images = []; let images = [];
let loading = []; let loading = [];
let inText = false;
let bold = false;
let italic = false;
let title = ''; let title = '';
let prevTop = 0; let author = '';
let i = -1; let i = -1;
let titleCount = 0;
const loadImage = async(image) => { const loadImage = async(image) => {
const src = path.parse(image.src); const src = path.parse(image.src);
@@ -60,7 +73,7 @@ class ConvertPdf extends ConvertHtml {
image.type = type; image.type = type;
image.name = src.base; image.name = src.base;
} }
} };
const putImage = (curTop) => { const putImage = (curTop) => {
if (!isNaN(curTop) && images.length) { if (!isNaN(curTop) && images.length) {
@@ -70,104 +83,180 @@ class ConvertPdf extends ConvertHtml {
images.shift(); images.shift();
} }
} }
} };
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const putPageLines = () => {
if (!cutCounter && inText) { pagelines.sort((a, b) => (a.top - b.top)*10000 + (a.left - b.left))
let tOpen = (bold ? '<b>' : '');
tOpen += (italic ? '<i>' : ''); //объединяем в одну строку равные по высоте
let tClose = (italic ? '</i>' : ''); const pl = [];
tClose += (bold ? '</b>' : ''); let pt = 0;
let j = -1;
pagelines.forEach(line => {
//добавим закрывающий тег стиля
line.text += line.tClose;
lines[i].text += `${tOpen}${text}${tClose} `; //проверим, возможно это заголовок
if (titleCount < 2 && text.trim() != '') { if (line.fonts.length == 1 && line.pageWidth) {
title += text + (titleCount ? '' : ' - '); const f = (line.fonts.length ? fonts[line.fonts[0]] : null);
titleCount++; const centerLeft = (line.pageWidth - line.width)/2;
if (f && f.isBold && Math.abs(centerLeft - line.left) < 3) {
if (!sectionTitleFound) {
line.isSectionTitle = true;
sectionTitleFound = true;
} else {
line.isSubtitle = true;
}
}
} }
}
//объединяем
if (pt == 0 || Math.abs(pt - line.top) > 3) {
j++;
pl[j] = line;
} else {
pl[j].text += ` ${line.text}`;
}
pt = line.top;
});
//заполняем lines
const lastIndex = i;
pl.forEach(line => {
putImage(line.top);
//добавим пустую строку, если надо
const prevLine = (i > lastIndex ? lines[i] : {fonts: [], top: 0});
if (prevLine && !prevLine.isImage) {
const f = (prevLine.fonts.length ? fonts[prevLine.fonts[0]] : (line.fonts.length ? fonts[line.fonts[0]] : null));
if (f && f.fontSize && !line.isImage && line.top - prevLine.top > f.fontSize*1.8) {
i++;
lines[i] = {text: '<br>'};
}
}
i++;
lines[i] = line;
});
pagelines = [];
putImage(100000);
}; };
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter) { if (tag == 'textstyle') {
if (inText) { const attrs = sax.getAttrsSync(tail);
switch (tag) { const fontId = (attrs.id && attrs.id.value ? attrs.id.value : '');
case 'i': const fontStyle = (attrs.fontstyle && attrs.fontstyle.value ? attrs.fontstyle.value : '');
italic = true; const fontSize = (attrs.fontsize && attrs.fontsize.value ? attrs.fontsize.value : '');
break;
case 'b': if (fontId) {
bold = true; const styleTags = {bold: 'b', italics: 'i', superscript: 'sup', subscript: 'sub'};
break; const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false, fontSize};
if (fontStyle) {
const styles = fontStyle.split(' ');
styles.forEach(style => {
const s = styleTags[style];
if (s) {
f.tOpen += `<${s}>`;
f.tClose = `</${s}>${f.tClose}`;
if (s == 'b')
f.isBold = true;
}
});
} }
} }
}
if (tag == 'text' && !inText) { if (tag == 'page') {
let attrs = sax.getAttrsSync(tail); const attrs = sax.getAttrsSync(tail);
const line = { page = {
text: '', width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10), };
left: parseInt((attrs.left && attrs.left.value ? attrs.left.value : null), 10),
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
};
if (line.width != 0 || line.height != 0) { putPageLines();
inText = true; }
if (isNaN(line.top) || isNaN(prevTop) || (Math.abs(prevTop - line.top) > 3)) {
putImage(line.top); if (tag == 'textline') {
i++; const attrs = sax.getAttrsSync(tail);
lines[i] = line; line = {
} text: '',
prevTop = line.top; top: parseInt((attrs.vpos && attrs.vpos.value ? attrs.vpos.value : null), 10),
} left: parseInt((attrs.hpos && attrs.hpos.value ? attrs.hpos.value : null), 10),
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
tOpen: '',
tClose: '',
isSectionTitle: false,
isSubtitle: false,
pageWidth: page.width,
fonts: [],
};
if (line.width != 0 || line.height != 0) {
pagelines.push(line);
} }
}
if (tag == 'image') { if (tag == 'string') {
const attrs = sax.getAttrsSync(tail); const attrs = sax.getAttrsSync(tail);
const src = (attrs.src && attrs.src.value ? attrs.src.value : ''); if (attrs.content && attrs.content.value) {
let tOpen = '';
let tClose = '';
const fontId = (attrs.stylerefs && attrs.stylerefs.value ? attrs.stylerefs.value : '');
if (fontId && fonts[fontId]) {
tOpen = fonts[fontId].tOpen;
tClose = fonts[fontId].tClose;
if (!line.fonts.length || line.fonts[0] != fontId)
line.fonts.push(fontId);
}
if (line.tOpen != tOpen) {
line.text += line.tClose + tOpen;
line.tOpen = tOpen;
line.tClose = tClose;
}
line.text += `${line.text.length ? ' ' : ''}${attrs.content.value}`;
}
}
if (tag == 'illustration') {
const attrs = sax.getAttrsSync(tail);
if (attrs.type && attrs.type.value == 'image') {
let src = (attrs.fileid && attrs.fileid.value ? attrs.fileid.value : '');
if (src) { if (src) {
const image = { const image = {
isImage: true, isImage: true,
src, src,
data: '', data: '',
type: '', type: '',
top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10) || 0, top: parseInt((attrs.vpos && attrs.vpos.value ? attrs.vpos.value : null), 10) || 0,
left: parseInt((attrs.hpos && attrs.hpos.value ? attrs.hpos.value : null), 10) || 0,
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10) || 0,
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10) || 0,
}; };
loading.push(loadImage(image)); const exists = images.filter(img => (img.top == image.top && img.left == image.left && img.width == image.width && img.height == image.height));
images.push(image); if (!exists.length) {
images.sort((a, b) => a.top - b.top) loading.push(loadImage(image));
images.push(image);
images.sort((a, b) => (a.top - b.top)*10000 + (a.left - b.left));
}
} }
} }
if (tag == 'page') {
putImage(100000);
}
} }
}; };
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (inText) {
switch (tag) {
case 'i':
italic = false;
break;
case 'b':
bold = false;
break;
}
}
if (tag == 'text')
inText = false;
};
let buf = this.decode(data).toString(); let buf = this.decode(data).toString();
sax.parseSync(buf, { sax.parseSync(buf, {
onStartNode, onEndNode, onTextNode onStartNode
}); });
putImage(100000); putPageLines();
await Promise.all(loading); await Promise.all(loading);
await utils.sleep(100);
//найдем параграфы и отступы //найдем параграфы и отступы
const indents = []; const indents = [];
@@ -188,18 +277,51 @@ class ConvertPdf extends ConvertHtml {
} }
indents[0] = 0; indents[0] = 0;
//формируем текст //title
if (fs.pathExists(metaFile)) {
const metaXmlString = (await fs.readFile(metaFile)).toString();
let metaXmlParsed = xmlParser.parseXml(metaXmlString);
metaXmlParsed = xmlParser.simplifyXmlParsed(metaXmlParsed);
if (metaXmlParsed.metadata) {
title = (metaXmlParsed.metadata.title ? metaXmlParsed.metadata.title._t : '');
author = (metaXmlParsed.metadata.author ? metaXmlParsed.metadata.author._t : '');
}
}
if (!title && uploadFileName) if (!title && uploadFileName)
title = uploadFileName; title = uploadFileName;
let text = `<title>${title}</title>`;
//console.log(JSON.stringify(lines, null, 2));
//формируем текст
const limitSize = 2*this.config.maxUploadFileSize;
let text = '';
if (title)
text += `<fb2-title>${title}</fb2-title>`;
if (author)
text += `<fb2-author>${author}</fb2-author>`;
let concat = ''; let concat = '';
let sp = ''; let sp = '';
for (const line of lines) { for (const line of lines) {
if (text.length > limitSize) {
throw new Error(`Файл для конвертирования слишком большой|FORLOG| text.length: ${text.length} > ${limitSize}`);
}
if (line.isImage) { if (line.isImage) {
text += `<fb2-image type="${line.type}" name="${line.name}">${line.data}</fb2-image>`; text += `<fb2-image type="${line.type}" name="${line.name}">${line.data}</fb2-image>`;
continue; continue;
} }
if (line.isSectionTitle) {
text += `<fb2-section-title>${line.text.trim()}</fb2-section-title>`;
continue;
}
if (line.isSubtitle) {
text += `<br><fb2-subtitle>${line.text.trim()}</fb2-subtitle>`;
continue;
}
if (concat == '') { if (concat == '') {
const left = line.left || 0; const left = line.left || 0;
sp = ' '.repeat(indents[left]); sp = ' '.repeat(indents[left]);
@@ -217,7 +339,9 @@ class ConvertPdf extends ConvertHtml {
if (concat) if (concat)
text += sp + concat + "\n"; text += sp + concat + "\n";
return await super.run(Buffer.from(text), {skipCheck: true, isText: true, cutTitle: true}); //console.log(text);
await utils.sleep(100);
return await super.run(Buffer.from(text), {skipCheck: true, isText: true});
} }
} }

View File

@@ -48,7 +48,7 @@ class ConvertSites extends ConvertHtml {
if (text === false) if (text === false)
return false; return false;
return await super.run(Buffer.from(text), {skipCheck: true, cutTitle: true}); return await super.run(Buffer.from(text), {skipCheck: true});
} }
getTitle(text) { getTitle(text) {
@@ -79,7 +79,7 @@ class ConvertSites extends ConvertHtml {
let book = this.getTitle(text); let book = this.getTitle(text);
book = book.replace(' (fb2) | Флибуста', ''); book = book.replace(' (fb2) | Флибуста', '');
const title = `<title>${author}${(author ? ' - ' : '')}${book}</title>`; const title = `<fb2-title>${author}${(author ? ' - ' : '')}${book}</fb2-title>`;
let begin = '<h3 class="book">'; let begin = '<h3 class="book">';
if (text.indexOf(begin) <= 0) if (text.indexOf(begin) <= 0)
@@ -95,12 +95,12 @@ class ConvertSites extends ConvertHtml {
return text.substring(l, r) return text.substring(l, r)
.replace(/blockquote class="?book"?/g, 'p') .replace(/blockquote class="?book"?/g, 'p')
.replace(/<br\/?>\s*<\/h3>/g, '</h3>') .replace(/<br\/?>\s*<\/h3>/g, '</h3>')
.replace(/<h3 class="?book"?>/g, '<br><br><subtitle>') .replace(/<h3 class="?book"?>/g, '<br><br><fb2-subtitle>')
.replace(/<h5 class="?book"?>/g, '<br><br><subtitle>') .replace(/<h5 class="?book"?>/g, '<br><br><fb2-subtitle>')
.replace(/<h3>/g, '<br><br><subtitle>') .replace(/<h3>/g, '<br><br><fb2-subtitle>')
.replace(/<h5>/g, '<br><br><subtitle>') .replace(/<h5>/g, '<br><br><fb2-subtitle>')
.replace(/<\/h3>/g, '</subtitle><br>') .replace(/<\/h3>/g, '</fb2-subtitle><br>')
.replace(/<\/h5>/g, '</subtitle><br>') .replace(/<\/h5>/g, '</fb2-subtitle><br>')
.replace(/<div class="?stanza"?>/g, '<br>') .replace(/<div class="?stanza"?>/g, '<br>')
.replace(/<div>/g, '<br>') .replace(/<div>/g, '<br>')
+ title; + title;

View File

@@ -12,7 +12,7 @@ const utils = require('../utils');
const log = new (require('../AppLogger'))().log;//singleton const log = new (require('../AppLogger'))().log;//singleton
const cleanDirPeriod = 60*60*1000;//1 раз в час const cleanDirPeriod = 60*60*1000;//1 раз в час
const queue = new LimitedQueue(5, 100, 4*60*1000);//4 минуты ожидание подвижек const queue = new LimitedQueue(5, 100, 2*60*1000 + 15000);//2 минуты ожидание подвижек
let instance = null; let instance = null;
@@ -130,7 +130,8 @@ class ReaderWorker {
convertFilename = `${this.config.tempDownloadDir}/${tempFilename2}`; convertFilename = `${this.config.tempDownloadDir}/${tempFilename2}`;
await this.bookConverter.convertToFb2(decompFiles, convertFilename, opts, progress => { await this.bookConverter.convertToFb2(decompFiles, convertFilename, opts, progress => {
wState.set({progress}); wState.set({progress});
q.resetTimeout(); if (queue.freed > 0)
q.resetTimeout();
}, q.abort); }, q.abort);
//сжимаем файл в tmp, если там уже нет с тем же именем-sha256 //сжимаем файл в tmp, если там уже нет с тем же именем-sha256

View File

@@ -76,13 +76,13 @@ class ZipStreamer {
if (limitFileCount || limitFileSize || decodeEntryNameCallback) { if (limitFileCount || limitFileSize || decodeEntryNameCallback) {
const entries = Object.values(unzip.entries()); const entries = Object.values(unzip.entries());
if (limitFileCount && entries.length > limitFileCount) { if (limitFileCount && entries.length > limitFileCount) {
reject('Слишком много файлов'); reject(new Error('Слишком много файлов'));
return; return;
} }
for (const entry of entries) { for (const entry of entries) {
if (limitFileSize && !entry.isDirectory && entry.size > limitFileSize) { if (limitFileSize && !entry.isDirectory && entry.size > limitFileSize) {
reject('Файл слишком большой'); reject(new Error('Файл слишком большой'));
return; return;
} }

View File

@@ -6,7 +6,8 @@ function parseSync(xstr, options) {
onCdata: _onCdata = dummy, onCdata: _onCdata = dummy,
onComment: _onComment = dummy, onComment: _onComment = dummy,
onProgress: _onProgress = dummy, onProgress: _onProgress = dummy,
innerCut = new Set() innerCut = new Set(),
lowerCase = true,
} = options; } = options;
let i = 0; let i = 0;
@@ -91,7 +92,8 @@ function parseSync(xstr, options) {
} else { } else {
tag = tagData; tag = tagData;
} }
tag = tag.toLowerCase(); if (lowerCase)
tag = tag.toLowerCase();
if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) { if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
if (!cutCounter) if (!cutCounter)
@@ -146,7 +148,8 @@ async function parse(xstr, options) {
onCdata: _onCdata = dummy, onCdata: _onCdata = dummy,
onComment: _onComment = dummy, onComment: _onComment = dummy,
onProgress: _onProgress = dummy, onProgress: _onProgress = dummy,
innerCut = new Set() innerCut = new Set(),
lowerCase = true,
} = options; } = options;
let i = 0; let i = 0;
@@ -231,7 +234,8 @@ async function parse(xstr, options) {
} else { } else {
tag = tagData; tag = tagData;
} }
tag = tag.toLowerCase(); if (lowerCase)
tag = tag.toLowerCase();
if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) { if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
if (!cutCounter) if (!cutCounter)
@@ -276,7 +280,7 @@ async function parse(xstr, options) {
await _onProgress(100); await _onProgress(100);
} }
function getAttrsSync(tail) { function getAttrsSync(tail, lowerCase = true) {
let result = {}; let result = {};
let name = ''; let name = '';
let value = ''; let value = '';
@@ -287,13 +291,16 @@ function getAttrsSync(tail) {
let waitEq = false; let waitEq = false;
const pushResult = () => { const pushResult = () => {
if (lowerCase)
name = name.toLowerCase();
if (name != '') { if (name != '') {
const fn = name;
let ns = ''; let ns = '';
if (name.indexOf(':') >= 0) { if (fn.indexOf(':') >= 0) {
[ns, name] = name.split(':'); [ns, name] = fn.split(':');
} }
result[name] = {value, ns}; result[name] = {value, ns, fn};
} }
name = ''; name = '';
value = ''; value = '';

143
server/core/xmlParser.js Normal file
View File

@@ -0,0 +1,143 @@
const sax = require('./sax');
function formatXml(xmlParsed, encoding = 'utf-8', textFilterFunc) {
let out = `<?xml version="1.0" encoding="${encoding}"?>`;
out += formatXmlNode(xmlParsed, textFilterFunc);
return out;
}
function formatXmlNode(node, textFilterFunc) {
textFilterFunc = (textFilterFunc ? textFilterFunc : text => text);
const formatNode = (node, name) => {
let out = '';
if (Array.isArray(node)) {
for (const n of node) {
out += formatNode(n);
}
} else if (typeof node == 'string') {
if (name)
out += `<${name}>${textFilterFunc(node)}</${name}>`;
else
out += textFilterFunc(node);
} else {
if (node._n)
name = node._n;
let attrs = '';
if (node._attrs) {
for (let attrName in node._attrs) {
attrs += ` ${attrName}="${node._attrs[attrName]}"`;
}
}
let tOpen = '';
let tBody = '';
let tClose = '';
if (name)
tOpen += `<${name}${attrs}>`;
if (node.hasOwnProperty('_t'))
tBody += textFilterFunc(node._t);
for (let nodeName in node) {
if (nodeName && nodeName[0] == '_' && nodeName != '_a')
continue;
const n = node[nodeName];
tBody += formatNode(n, nodeName);
}
if (name)
tClose += `</${name}>`;
out += `${tOpen}${tBody}${tClose}`;
}
return out;
}
return formatNode(node);
}
function parseXml(xmlString, lowerCase = true) {
let result = {};
let node = result;
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
node._t = text;
};
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (tag == '?xml')
return;
const newNode = {_n: tag, _p: node};
if (tail) {
const parsedAttrs = sax.getAttrsSync(tail, lowerCase);
const atKeys = Object.keys(parsedAttrs);
if (atKeys.length) {
const attrs = {};
for (let i = 0; i < atKeys.length; i++) {
const attrName = atKeys[i];
attrs[parsedAttrs[attrName].fn] = parsedAttrs[attrName].value;
}
newNode._attrs = attrs;
}
}
if (!node._a)
node._a = [];
node._a.push(newNode);
node = newNode;
};
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (node._p && node._n == tag)
node = node._p;
};
sax.parseSync(xmlString, {
onStartNode, onEndNode, onTextNode, lowerCase
});
if (result._a)
result = result._a[0];
return result;
}
function simplifyXmlParsed(node) {
const simplifyNodeArray = (a) => {
const result = {};
for (let i = 0; i < a.length; i++) {
const child = a[i];
if (child._n && !result[child._n]) {
result[child._n] = {};
if (child._a) {
result[child._n] = simplifyNodeArray(child._a);
}
if (child._t) {
result[child._n]._t = child._t;
}
if (child._attrs) {
result[child._n]._attrs = child._attrs;
}
}
}
return result;
};
return simplifyNodeArray([node]);
}
module.exports = {
formatXml,
formatXmlNode,
parseXml,
simplifyXmlParsed
}