Compare commits

...

30 Commits

Author SHA1 Message Date
Book Pauk
7fa891b4fc Merge branch 'release/0.9.11' 2020-12-09 22:31:33 +07:00
Book Pauk
6cb7412cf3 Версия 0.9.11 2020-12-09 22:30:58 +07:00
Book Pauk
157322834b Небольшая поправка 2020-12-09 22:30:19 +07:00
Book Pauk
1a13a0fee1 Работа над конвертером pdf 2020-12-09 22:19:14 +07:00
Book Pauk
37256255bf Добавлена поддержка тегов 'sup' и 'sub' 2020-12-09 20:35:52 +07:00
Book Pauk
75e01c899e Работа над конвертером pdf 2020-12-09 20:08:17 +07:00
Book Pauk
ef0d6eab89 Работа над конвертером Pdf 2020-12-09 19:05:09 +07:00
Book Pauk
5d54b1b0f4 Работа над конвертером pdf 2020-12-09 03:52:24 +07:00
Book Pauk
522f953b4f Работа над конвертером pdf 2020-12-09 03:06:15 +07:00
Book Pauk
15f02c7115 Работа над конвертером pdf 2020-12-09 01:29:58 +07:00
Book Pauk
174c877eee Рефакторинг, плюс небольшие доработки 2020-12-09 01:29:09 +07:00
Book Pauk
fd9ec736d7 Рефакторинг 2020-12-08 19:36:53 +07:00
Book Pauk
2c94025ba3 Поправлен баг 2020-12-08 19:31:00 +07:00
Book Pauk
bfadf35c40 Закончена работа над xmlParser, оттестировано 2020-12-08 18:48:55 +07:00
Book Pauk
f3b69caa12 Работа над модулем xmlParser 2020-12-08 16:17:36 +07:00
Book Pauk
18a83a5b0b Поправки настроек сжатия 2020-12-08 14:26:49 +07:00
Book Pauk
bd9669b782 Поправка цели dev 2020-12-08 14:26:25 +07:00
Book Pauk
e05713aa7f Работа над конвертером pdf 2020-12-08 14:15:17 +07:00
Book Pauk
bc3e1f0a6f Мелкий рефакторинг 2020-12-07 22:13:14 +07:00
Book Pauk
063d01b5ca Перевод pdf-конвертера на использование pdfalto 2020-12-07 22:05:01 +07:00
Book Pauk
81c38d7749 Мелкий рефакторинг 2020-12-07 20:13:32 +07:00
Book Pauk
a29842b084 Поправка readme 2020-12-07 20:12:37 +07:00
Book Pauk
bb5adcdaf6 Рефакторинг 2020-12-07 01:30:10 +07:00
Book Pauk
537e17a219 Merge tag '0.9.10-5' into develop
0.9.10-5
2020-12-05 13:42:45 +07:00
Book Pauk
03ce50153e Merge branch 'release/0.9.10-5' 2020-12-05 13:42:39 +07:00
Book Pauk
15d01ad7fc Коррекция таймаутов очереди ожидания 2020-12-05 13:41:42 +07:00
Book Pauk
e2b29e2c2f Merge tag '0.9.10-4' into develop
0.9.10-4
2020-12-05 13:25:10 +07:00
Book Pauk
ce7ae84e0f Merge branch 'release/0.9.10-4' 2020-12-05 13:25:06 +07:00
Book Pauk
01eb545f15 Улучшение работы с очередью, поправка багов 2020-12-05 13:24:04 +07:00
Book Pauk
706738c7f1 Merge tag '0.9.10-3' into develop
0.9.10-3
2020-12-05 01:40:37 +07:00
18 changed files with 571 additions and 224 deletions

View File

@@ -66,7 +66,7 @@ class Reader {
await utils.sleep(refreshPause); await utils.sleep(refreshPause);
i++; i++;
if (i > 120*1000/refreshPause) {//2 мин ждем телодвижений воркера if (i > 180*1000/refreshPause) {//3 мин ждем телодвижений воркера
throw new Error('Слишком долгое время ожидания'); throw new Error('Слишком долгое время ожидания');
} }
//проверка воркера //проверка воркера

View File

@@ -77,9 +77,15 @@ export default class DrawHelper {
let j = 0; let j = 0;
//формируем строку //формируем строку
for (const part of line.parts) { for (const part of line.parts) {
let tOpen = (part.style.bold ? '<b>' : ''); let tOpen = '';
tOpen += (part.style.bold ? '<b>' : '');
tOpen += (part.style.italic ? '<i>' : ''); tOpen += (part.style.italic ? '<i>' : '');
let tClose = (part.style.italic ? '</i>' : ''); tOpen += (part.style.sup ? '<span style="vertical-align: baseline; position: relative; line-height: 0; top: -0.3em">' : '');
tOpen += (part.style.sub ? '<span style="vertical-align: baseline; position: relative; line-height: 0; top: 0.3em">' : '');
let tClose = '';
tClose += (part.style.sub ? '</span>' : '');
tClose += (part.style.sup ? '</span>' : '');
tClose += (part.style.italic ? '</i>' : '');
tClose += (part.style.bold ? '</b>' : ''); tClose += (part.style.bold ? '</b>' : '');
let text = ''; let text = '';

View File

@@ -285,7 +285,7 @@ export default class BookParser {
sectionLevel++; sectionLevel++;
} }
if (tag == 'emphasis' || tag == 'strong') { if (tag == 'emphasis' || tag == 'strong' || tag == 'sup' || tag == 'sub') {
growParagraph(`<${tag}>`, 0); growParagraph(`<${tag}>`, 0);
} }
@@ -343,7 +343,7 @@ export default class BookParser {
sectionLevel--; sectionLevel--;
} }
if (tag == 'emphasis' || tag == 'strong') { if (tag == 'emphasis' || tag == 'strong' || tag == 'sup' || tag == 'sub') {
growParagraph(`</${tag}>`, 0); growParagraph(`</${tag}>`, 0);
} }
@@ -507,7 +507,7 @@ export default class BookParser {
splitToStyle(s) { splitToStyle(s) {
let result = [];/*array of { let result = [];/*array of {
style: {bold: Boolean, italic: Boolean, center: Boolean, space: Number}, style: {bold: Boolean, italic: Boolean, sup: Boolean, sub: Boolean, center: Boolean, space: Number},
image: {local: Boolean, inline: Boolean, id: String}, image: {local: Boolean, inline: Boolean, id: String},
text: String, text: String,
}*/ }*/
@@ -530,6 +530,12 @@ export default class BookParser {
case 'emphasis': case 'emphasis':
style.italic = true; style.italic = true;
break; break;
case 'sup':
style.sup = true;
break;
case 'sub':
style.sub = true;
break;
case 'center': case 'center':
style.center = true; style.center = true;
break; break;
@@ -580,6 +586,12 @@ export default class BookParser {
case 'emphasis': case 'emphasis':
style.italic = false; style.italic = false;
break; break;
case 'sup':
style.sup = false;
break;
case 'sub':
style.sub = false;
break;
case 'center': case 'center':
style.center = false; style.center = false;
break; break;

View File

@@ -169,7 +169,7 @@ class BookManager {
} }
async deflateWithProgress(data, callback) { async deflateWithProgress(data, callback) {
const chunkSize = 128*1024; const chunkSize = 512*1024;
const deflator = new utils.pako.Deflate({level: 5}); const deflator = new utils.pako.Deflate({level: 5});
let chunkTotal = 1 + Math.floor(data.length/chunkSize); let chunkTotal = 1 + Math.floor(data.length/chunkSize);
@@ -203,7 +203,7 @@ class BookManager {
} }
async inflateWithProgress(data, callback) { async inflateWithProgress(data, callback) {
const chunkSize = 64*1024; const chunkSize = 512*1024;
const inflator = new utils.pako.Inflate({to: 'string'}); const inflator = new utils.pako.Inflate({to: 'string'});
let chunkTotal = 1 + Math.floor(data.length/chunkSize); let chunkTotal = 1 + Math.floor(data.length/chunkSize);

View File

@@ -1,4 +1,15 @@
export const versionHistory = [ export const versionHistory = [
{
showUntil: '2020-12-08',
header: '0.9.11 (2020-12-09)',
content:
`
<ul>
<li>оптимизации, улучшения работы конвертеров</li>
</ul>
`
},
{ {
showUntil: '2020-12-10', showUntil: '2020-12-10',
header: '0.9.10 (2020-12-03)', header: '0.9.10 (2020-12-03)',

View File

@@ -32,11 +32,23 @@ sudo -u www-data mkdir -p /home/liberama/data/calibre
sudo -u www-data tar xvf calibre-5.5.0-x86_64.txz -C /home/liberama/data/calibre sudo -u www-data tar xvf calibre-5.5.0-x86_64.txz -C /home/liberama/data/calibre
``` ```
### external converter `pdfalto`, github https://github.com/kermitt2/pdfalto
```
git clone https://github.com/kermitt2/pdfalto
cd pdfalto
git submodule update --init --recursive
cmake ./
добавить в начало CMakeLists.txt строчку: set(CMAKE_EXE_LINKER_FLAGS "-no-pie")
make
sudo -u www-data mkdir -p /home/liberama/data/pdfalto
sudo -u www-data cp pdfalto /home/liberama/data/pdfalto
```
### external converters ### external converters
``` ```
sudo apt install rar sudo apt install rar
sudo apt install libreoffice sudo apt install libreoffice
sudo apt install poppler-utils
sudo apt install djvulibre-bin sudo apt install djvulibre-bin
sudo apt install libtiff-tools sudo apt install libtiff-tools
sudo apt install graphicsmagick-imagemagick-compat sudo apt install graphicsmagick-imagemagick-compat

2
package-lock.json generated
View File

@@ -1,6 +1,6 @@
{ {
"name": "Liberama", "name": "Liberama",
"version": "0.9.10", "version": "0.9.11",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {

View File

@@ -1,6 +1,6 @@
{ {
"name": "Liberama", "name": "Liberama",
"version": "0.9.10", "version": "0.9.11",
"author": "Book Pauk <bookpauk@gmail.com>", "author": "Book Pauk <bookpauk@gmail.com>",
"license": "CC0-1.0", "license": "CC0-1.0",
"repository": "bookpauk/liberama", "repository": "bookpauk/liberama",
@@ -8,7 +8,7 @@
"node": ">=10.0.0" "node": ">=10.0.0"
}, },
"scripts": { "scripts": {
"dev": "nodemon --inspect --ignore server/public --ignore server/data --exec 'node server'", "dev": "nodemon --inspect --ignore server/public --ignore server/data --ignore client --exec 'node server'",
"build:client": "webpack --config build/webpack.prod.config.js", "build:client": "webpack --config build/webpack.prod.config.js",
"build:linux": "npm run build:client && node build/linux && pkg -t latest-linux-x64 -o dist/linux/liberama .", "build:linux": "npm run build:client && node build/linux && pkg -t latest-linux-x64 -o dist/linux/liberama .",
"build:win": "npm run build:client && node build/win && pkg -t latest-win-x64 -o dist/win/liberama .", "build:win": "npm run build:client && node build/win && pkg -t latest-win-x64 -o dist/win/liberama .",

View File

@@ -136,7 +136,7 @@ class WebSocketController {
break; break;
i++; i++;
if (i > 2*60*1000/refreshPause) {//2 мин ждем телодвижений воркера if (i > 3*60*1000/refreshPause) {//3 мин ждем телодвижений воркера
this.send({state: 'error', error: 'Время ожидания процесса истекло'}, req, ws); this.send({state: 'error', error: 'Время ожидания процесса истекло'}, req, ws);
break; break;
} }

View File

@@ -5,8 +5,9 @@ const he = require('he');
const LimitedQueue = require('../../LimitedQueue'); const LimitedQueue = require('../../LimitedQueue');
const textUtils = require('./textUtils'); const textUtils = require('./textUtils');
const utils = require('../../utils'); const utils = require('../../utils');
const xmlParser = require('../../xmlParser');
const queue = new LimitedQueue(3, 20, 3*60*1000);//3 минуты ожидание подвижек const queue = new LimitedQueue(3, 20, 2*60*1000);//2 минуты ожидание подвижек
class ConvertBase { class ConvertBase {
constructor(config) { constructor(config) {
@@ -14,7 +15,6 @@ class ConvertBase {
this.calibrePath = `${config.dataDir}/calibre/ebook-convert`; this.calibrePath = `${config.dataDir}/calibre/ebook-convert`;
this.sofficePath = '/usr/bin/soffice'; this.sofficePath = '/usr/bin/soffice';
this.pdfToHtmlPath = '/usr/bin/pdftohtml';
} }
async run(data, opts) {// eslint-disable-line no-unused-vars async run(data, opts) {// eslint-disable-line no-unused-vars
@@ -27,9 +27,6 @@ class ConvertBase {
if (!await fs.pathExists(this.sofficePath)) if (!await fs.pathExists(this.sofficePath))
throw new Error('Внешний конвертер LibreOffice не найден'); throw new Error('Внешний конвертер LibreOffice не найден');
if (!await fs.pathExists(this.pdfToHtmlPath))
throw new Error('Внешний конвертер pdftohtml не найден');
} }
async execConverter(path, args, onData, abort) { async execConverter(path, args, onData, abort) {
@@ -42,21 +39,32 @@ class ConvertBase {
throw new Error('Слишком большая очередь конвертирования. Пожалуйста, попробуйте позже.'); throw new Error('Слишком большая очередь конвертирования. Пожалуйста, попробуйте позже.');
} }
abort = (abort ? abort : () => false);
const myAbort = () => {
return q.abort() || abort();
}
try { try {
if (myAbort())
throw new Error('abort');
const result = await utils.spawnProcess(path, { const result = await utils.spawnProcess(path, {
killAfter: 3600,//1 час killAfter: 3600,//1 час
args, args,
onData: (data) => { onData: (data) => {
q.resetTimeout(); if (queue.freed > 0)
q.resetTimeout();
onData(data); onData(data);
}, },
//будем периодически проверять работу конвертера и если очереди нет, то разрешаем работу пинком onData //будем периодически проверять работу конвертера и если очереди нет, то разрешаем работу пинком onData
onUsage: (stats) => { onUsage: (stats) => {
if (queue.freed > 1 && stats.cpu >= 10) if (queue.freed > 0 && stats.cpu >= 10) {
q.resetTimeout();
onData('.'); onData('.');
}
}, },
onUsageInterval: 10, onUsageInterval: 10,
abort abort: myAbort
}); });
if (result.code != 0) { if (result.code != 0) {
const error = `${result.code}|FORLOG|, exec: ${path}, args: ${args.join(' ')}, stdout: ${result.stdout}, stderr: ${result.stderr}`; const error = `${result.code}|FORLOG|, exec: ${path}, args: ${args.join(' ')}, stdout: ${result.stdout}, stderr: ${result.stderr}`;
@@ -95,61 +103,14 @@ class ConvertBase {
} }
formatFb2(fb2) { formatFb2(fb2) {
let out = '<?xml version="1.0" encoding="utf-8"?>'; const out = xmlParser.formatXml({
out += '<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">'; FictionBook: {
out += this.formatFb2Node(fb2); _attrs: {xmlns: 'http://www.gribuser.ru/xml/fictionbook/2.0', 'xmlns:l': 'http://www.w3.org/1999/xlink'},
out += '</FictionBook>'; _a: [fb2],
return out;
}
formatFb2Node(node, name) {
let out = '';
if (Array.isArray(node)) {
for (const n of node) {
out += this.formatFb2Node(n);
} }
} else if (typeof node == 'string') { }, 'utf-8', this.repSpaces);
if (name)
out += `<${name}>${this.repSpaces(node)}</${name}>`;
else
out += this.repSpaces(node);
} else {
if (node._n)
name = node._n;
let attrs = ''; return out.replace(/<p>\s*?<\/p>/g, '<empty-line/>');
if (node._attrs) {
for (let attrName in node._attrs) {
attrs += ` ${attrName}="${node._attrs[attrName]}"`;
}
}
let tOpen = '';
let tBody = '';
let tClose = '';
if (name)
tOpen += `<${name}${attrs}>`;
if (node.hasOwnProperty('_t'))
tBody += this.repSpaces(node._t);
for (let nodeName in node) {
if (nodeName && nodeName[0] == '_' && nodeName != '_a')
continue;
const n = node[nodeName];
tBody += this.formatFb2Node(n, nodeName);
}
if (name)
tClose += `</${name}>`;
if (attrs == '' && name == 'p' && tBody.trim() == '')
out += '<empty-line/>'
else
out += `${tOpen}${tBody}${tClose}`;
}
return out;
} }
} }

View File

@@ -2,9 +2,9 @@ const fs = require('fs-extra');
const path = require('path'); const path = require('path');
const utils = require('../../utils'); const utils = require('../../utils');
const ConvertHtml = require('./ConvertHtml'); const ConvertBase = require('./ConvertBase');
class ConvertDjvu extends ConvertHtml { class ConvertDjvu extends ConvertBase {
check(data, opts) { check(data, opts) {
const {inputFiles} = opts; const {inputFiles} = opts;
@@ -59,9 +59,17 @@ class ConvertDjvu extends ConvertHtml {
}, abort); }, abort);
//читаем изображения //читаем изображения
limitSize = 2*this.config.maxUploadFileSize;
let imagesSize = 0;
const loadImage = async(image) => { const loadImage = async(image) => {
image.data = (await fs.readFile(image.file)).toString('base64'); image.data = (await fs.readFile(image.file)).toString('base64');
image.name = path.basename(image.file); image.name = path.basename(image.file);
imagesSize += image.data.length;
if (imagesSize > limitSize) {
throw new Error(`Файл для конвертирования слишком большой|FORLOG| imagesSize: ${imagesSize} > ${limitSize}`);
}
} }
let files = []; let files = [];
@@ -82,20 +90,29 @@ class ConvertDjvu extends ConvertHtml {
await Promise.all(loading); await Promise.all(loading);
//формируем текст //формируем fb2
limitSize = 2*this.config.maxUploadFileSize; let titleInfo = {};
let desc = {_n: 'description', 'title-info': titleInfo};
let pars = [];
let body = {_n: 'body', section: {_a: [pars]}};
let binary = [];
let fb2 = [desc, body, binary];
let title = ''; let title = '';
if (uploadFileName) if (uploadFileName)
title = uploadFileName; title = uploadFileName;
let text = `<title>${title}</title>`;
for (const image of images) {
text += `<fb2-image type="image/jpeg" name="${image.name}">${image.data}</fb2-image>`;
if (text.length > limitSize) { titleInfo['book-title'] = title;
throw new Error(`Файл для конвертирования слишком большой|FORLOG| text.length: ${text.length} > ${limitSize}`);
} for (const image of images) {
const img = {_n: 'binary', _attrs: {id: image.name, 'content-type': 'image/jpeg'}, _t: image.data};
binary.push(img);
pars.push({_n: 'p', _t: ''});
pars.push({_n: 'image', _attrs: {'l:href': `#${image.name}`}});
} }
return await super.run(Buffer.from(text), {skipCheck: true, isText: true, cutTitle: true});
return this.formatFb2(fb2);
} }
} }

View File

@@ -2,7 +2,7 @@ const fs = require('fs-extra');
const ConvertHtml = require('./ConvertHtml'); const ConvertHtml = require('./ConvertHtml');
class ConvertDocX extends ConvertHtml { class ConvertFb3 extends ConvertHtml {
async check(data, opts) { async check(data, opts) {
const {inputFiles} = opts; const {inputFiles} = opts;
if (this.config.useExternalBookConverter && if (this.config.useExternalBookConverter &&
@@ -39,13 +39,14 @@ class ConvertDocX extends ConvertHtml {
const title = this.getTitle(text) const title = this.getTitle(text)
.replace(/<\/?p>/g, '') .replace(/<\/?p>/g, '')
; ;
text = `<title>${title}</title>` + text text = `<fb2-title>${title}</fb2-title>` + text
.replace(/<title>/g, '<br><b>') .replace(/<title>/g, '<br><b>')
.replace(/<\/title>/g, '</b><br>') .replace(/<\/title>/g, '</b><br>')
.replace(/<subtitle>/g, '<br><br><subtitle>') .replace(/<subtitle>/g, '<br><br><fb2-subtitle>')
.replace(/<\/subtitle>/g, '</fb2-subtitle>')
; ;
return await super.run(Buffer.from(text), {skipCheck: true, cutTitle: true}); return await super.run(Buffer.from(text), {skipCheck: true});
} }
} }
module.exports = ConvertDocX; module.exports = ConvertFb3;

View File

@@ -34,7 +34,6 @@ class ConvertHtml extends ConvertBase {
} else { } else {
isText = opts.isText; isText = opts.isText;
} }
let {cutTitle} = opts;
let titleInfo = {}; let titleInfo = {};
let desc = {_n: 'description', 'title-info': titleInfo}; let desc = {_n: 'description', 'title-info': titleInfo};
@@ -44,12 +43,17 @@ class ConvertHtml extends ConvertBase {
let fb2 = [desc, body, binary]; let fb2 = [desc, body, binary];
let title = ''; let title = '';
let author = '';
let inTitle = false; let inTitle = false;
let inSectionTitle = false;
let inAuthor = false;
let inSubTitle = false; let inSubTitle = false;
let inImage = false; let inImage = false;
let image = {}; let image = {};
let bold = false; let bold = false;
let italic = false; let italic = false;
let superscript = false;
let subscript = false;
let begining = true; let begining = true;
let spaceCounter = []; let spaceCounter = [];
@@ -62,7 +66,7 @@ class ConvertHtml extends ConvertBase {
}; };
const growParagraph = (text) => { const growParagraph = (text) => {
if (!pars.length) if (!pars.length || pars[pars.length - 1]._n != 'p')
newParagraph(); newParagraph();
const l = pars.length; const l = pars.length;
@@ -94,12 +98,16 @@ class ConvertHtml extends ConvertBase {
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
text = this.escapeEntities(text); text = this.escapeEntities(text);
if (!cutCounter && !(cutTitle && inTitle)) { if (!(cutCounter || inTitle || inSectionTitle || inSubTitle)) {
let tOpen = ''; let tOpen = '';
tOpen += (inSubTitle ? '<subtitle>' : ''); tOpen += (inSubTitle ? '<subtitle>' : '');
tOpen += (bold ? '<strong>' : ''); tOpen += (bold ? '<strong>' : '');
tOpen += (italic ? '<emphasis>' : ''); tOpen += (italic ? '<emphasis>' : '');
tOpen += (superscript ? '<sup>' : '');
tOpen += (subscript ? '<sub>' : '');
let tClose = '' let tClose = ''
tClose += (subscript ? '</sub>' : '');
tClose += (superscript ? '</sup>' : '');
tClose += (italic ? '</emphasis>' : ''); tClose += (italic ? '</emphasis>' : '');
tClose += (bold ? '</strong>' : ''); tClose += (bold ? '</strong>' : '');
tClose += (inSubTitle ? '</subtitle>' : ''); tClose += (inSubTitle ? '</subtitle>' : '');
@@ -110,12 +118,22 @@ class ConvertHtml extends ConvertBase {
if (inTitle && !title) if (inTitle && !title)
title = text; title = text;
if (inAuthor && !author)
author = text;
if (inSectionTitle) {
pars.unshift({_n: 'title', _t: text});
}
if (inSubTitle) {
pars.push({_n: 'subtitle', _t: text});
}
if (inImage) { if (inImage) {
image._t = text; image._t = text;
binary.push(image); binary.push(image);
pars.push({_n: 'image', _attrs: {'l:href': '#' + image._attrs.id}, _t: ''}); pars.push({_n: 'image', _attrs: {'l:href': '#' + image._attrs.id}, _t: ''});
newParagraph();
} }
}; };
@@ -140,15 +158,27 @@ class ConvertHtml extends ConvertBase {
bold = true; bold = true;
break; break;
} }
if (tag == 'sup')
superscript = true;
if (tag == 'sub')
subscript = true;
} }
if (tag == 'title' || tag == 'cut-title') { if (tag == 'title' || tag == 'fb2-title') {
inTitle = true; inTitle = true;
if (tag == 'cut-title')
cutTitle = true;
} }
if (tag == 'subtitle') { if (tag == 'fb2-author') {
inAuthor = true;
}
if (tag == 'fb2-section-title') {
inSectionTitle = true;
}
if (tag == 'fb2-subtitle') {
inSubTitle = true; inSubTitle = true;
} }
@@ -179,12 +209,26 @@ class ConvertHtml extends ConvertBase {
bold = false; bold = false;
break; break;
} }
if (tag == 'sup')
superscript = false;
if (tag == 'sub')
subscript = false;
} }
if (tag == 'title' || tag == 'cut-title') if (tag == 'title' || tag == 'fb2-title')
inTitle = false; inTitle = false;
if (tag == 'subtitle') if (tag == 'fb2-author') {
inAuthor = false;
}
if (tag == 'fb2-section-title') {
inSectionTitle = false;
}
if (tag == 'fb2-subtitle')
inSubTitle = false; inSubTitle = false;
if (tag == 'fb2-image') if (tag == 'fb2-image')
@@ -195,10 +239,17 @@ class ConvertHtml extends ConvertBase {
sax.parseSync(buf, { sax.parseSync(buf, {
onStartNode, onEndNode, onTextNode, onStartNode, onEndNode, onTextNode,
innerCut: new Set(['head', 'script', 'style', 'binary', 'fb2-image']) innerCut: new Set(['head', 'script', 'style', 'binary', 'fb2-image', 'fb2-title', 'fb2-author'])
}); });
titleInfo['book-title'] = title; titleInfo['book-title'] = title;
if (author)
titleInfo.author = {'last-name': author};
body.section._a[0] = pars;
//console.log(JSON.stringify(fb2, null, 2));
//подозрение на чистый текст, надо разбить на параграфы //подозрение на чистый текст, надо разбить на параграфы
if (isText || (buf.length > 30*1024 && pars.length < buf.length/2000)) { if (isText || (buf.length > 30*1024 && pars.length < buf.length/2000)) {
let total = 0; let total = 0;
@@ -228,56 +279,49 @@ class ConvertHtml extends ConvertBase {
if (parIndent > 2) parIndent--; if (parIndent > 2) parIndent--;
let newPars = []; let newPars = [];
let curPar = {};
const newPar = () => { const newPar = () => {
newPars.push({_n: 'p', _t: ''}); curPar = {_n: 'p', _t: ''};
newPars.push(curPar);
}; };
const growPar = (text) => {
if (!newPars.length)
newPar();
const l = newPars.length;
newPars[l - 1]._t += text;
}
i = 0;
for (const par of pars) { for (const par of pars) {
if (par._n != 'p') { if (par._n != 'p') {
newPars.push(par); newPars.push(par);
continue; continue;
} }
if (i > 0) newPar();
newPar();
i++;
let j = 0;
const lines = par._t.split('\n'); const lines = par._t.split('\n');
for (let line of lines) { for (let j = 0; j < lines.length; j++) {
line = repCrLfTab(line); const line = repCrLfTab(lines[j]);
let l = 0; let l = 0;
while (l < line.length && line[l] == ' ') { while (l < line.length && line[l] == ' ') {
l++; l++;
} }
if (l >= parIndent || line == '') { if (j > 0 &&
if (j > 0) (l >= parIndent ||
newPar(); (j < lines.length - 1 && line == '')
j++; )
) {
newPar();
} }
growPar(line.trim() + ' ');
curPar._t += line.trim() + ' ';
} }
} }
body.section._a[0] = newPars; body.section._a[0] = newPars;
} else {
body.section._a[0] = pars;
} }
//убираем лишнее, делаем валидный fb2, т.к. в рез-те разбиения на параграфы бьются теги //убираем лишнее, делаем валидный fb2, т.к. в рез-те разбиения на параграфы бьются теги
bold = false; bold = false;
italic = false; italic = false;
superscript = false;
subscript = false;
inSubTitle = false; inSubTitle = false;
pars = body.section._a[0]; pars = body.section._a[0];
for (let i = 0; i < pars.length; i++) { for (let i = 0; i < pars.length; i++) {
@@ -297,7 +341,11 @@ class ConvertHtml extends ConvertBase {
tOpen += (inSubTitle ? '<subtitle>' : ''); tOpen += (inSubTitle ? '<subtitle>' : '');
tOpen += (bold ? '<strong>' : ''); tOpen += (bold ? '<strong>' : '');
tOpen += (italic ? '<emphasis>' : ''); tOpen += (italic ? '<emphasis>' : '');
tOpen += (superscript ? '<sup>' : '');
tOpen += (subscript ? '<sub>' : '');
let tClose = '' let tClose = ''
tClose += (subscript ? '</sub>' : '');
tClose += (superscript ? '</sup>' : '');
tClose += (italic ? '</emphasis>' : ''); tClose += (italic ? '</emphasis>' : '');
tClose += (bold ? '</strong>' : ''); tClose += (bold ? '</strong>' : '');
tClose += (inSubTitle ? '</subtitle>' : ''); tClose += (inSubTitle ? '</subtitle>' : '');
@@ -313,6 +361,10 @@ class ConvertHtml extends ConvertBase {
bold = true; bold = true;
if (tag == 'emphasis') if (tag == 'emphasis')
italic = true; italic = true;
if (tag == 'sup')
superscript = true;
if (tag == 'sub')
subscript = true;
if (tag == 'subtitle') if (tag == 'subtitle')
inSubTitle = true; inSubTitle = true;
} }
@@ -322,6 +374,10 @@ class ConvertHtml extends ConvertBase {
bold = false; bold = false;
if (tag == 'emphasis') if (tag == 'emphasis')
italic = false; italic = false;
if (tag == 'sup')
superscript = false;
if (tag == 'sub')
subscript = false;
if (tag == 'subtitle') if (tag == 'subtitle')
inSubTitle = false; inSubTitle = false;
} }

View File

@@ -1,9 +1,11 @@
//const _ = require('lodash');
const fs = require('fs-extra'); const fs = require('fs-extra');
const path = require('path'); const path = require('path');
const sax = require('../../sax'); const sax = require('../../sax');
const utils = require('../../utils'); const utils = require('../../utils');
const ConvertHtml = require('./ConvertHtml'); const ConvertHtml = require('./ConvertHtml');
const xmlParser = require('../../xmlParser');
class ConvertPdf extends ConvertHtml { class ConvertPdf extends ConvertHtml {
check(data, opts) { check(data, opts) {
@@ -22,11 +24,18 @@ class ConvertPdf extends ConvertHtml {
const {inputFiles, callback, abort, uploadFileName} = opts; const {inputFiles, callback, abort, uploadFileName} = opts;
const inpFile = inputFiles.sourceFile; const inpFile = inputFiles.sourceFile;
const outFile = `${inputFiles.filesDir}/${utils.randomHexString(10)}.xml`; const outBasename = `${inputFiles.filesDir}/${utils.randomHexString(10)}`;
const outFile = `${outBasename}.xml`;
const metaFile = `${outBasename}_metadata.xml`;
const pdfaltoPath = `${this.config.dataDir}/pdfalto/pdfalto`;
if (!await fs.pathExists(pdfaltoPath))
throw new Error('Внешний конвертер pdfalto не найден');
//конвертируем в xml //конвертируем в xml
let perc = 0; let perc = 0;
await this.execConverter(this.pdfToHtmlPath, ['-nodrm', '-c', '-s', '-xml', inpFile, outFile], () => { await this.execConverter(pdfaltoPath, [inpFile, outFile], () => {
perc = (perc < 80 ? perc + 10 : 40); perc = (perc < 80 ? perc + 10 : 40);
callback(perc); callback(perc);
}, abort); }, abort);
@@ -35,17 +44,22 @@ class ConvertPdf extends ConvertHtml {
const data = await fs.readFile(outFile); const data = await fs.readFile(outFile);
callback(90); callback(90);
await utils.sleep(100);
//парсим xml //парсим xml
let lines = []; let lines = [];
let pagelines = [];
let line = {text: ''};
let page = {};
let fonts = {};
let sectionTitleFound = false;
let images = []; let images = [];
let loading = []; let loading = [];
let inText = false;
let bold = false;
let italic = false;
let title = ''; let title = '';
let prevTop = 0; let author = '';
let i = -1; let i = -1;
let titleCount = 0;
const loadImage = async(image) => { const loadImage = async(image) => {
const src = path.parse(image.src); const src = path.parse(image.src);
@@ -59,7 +73,7 @@ class ConvertPdf extends ConvertHtml {
image.type = type; image.type = type;
image.name = src.base; image.name = src.base;
} }
} };
const putImage = (curTop) => { const putImage = (curTop) => {
if (!isNaN(curTop) && images.length) { if (!isNaN(curTop) && images.length) {
@@ -69,104 +83,180 @@ class ConvertPdf extends ConvertHtml {
images.shift(); images.shift();
} }
} }
} };
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const putPageLines = () => {
if (!cutCounter && inText) { pagelines.sort((a, b) => (a.top - b.top)*10000 + (a.left - b.left))
let tOpen = (bold ? '<b>' : '');
tOpen += (italic ? '<i>' : '');
let tClose = (italic ? '</i>' : '');
tClose += (bold ? '</b>' : '');
lines[i].text += `${tOpen}${text}${tClose} `; //объединяем в одну строку равные по высоте
if (titleCount < 2 && text.trim() != '') { const pl = [];
title += text + (titleCount ? '' : ' - '); let pt = 0;
titleCount++; let j = -1;
pagelines.forEach(line => {
//добавим закрывающий тег стиля
line.text += line.tClose;
//проверим, возможно это заголовок
if (line.fonts.length == 1 && line.pageWidth) {
const f = (line.fonts.length ? fonts[line.fonts[0]] : null);
const centerLeft = (line.pageWidth - line.width)/2;
if (f && f.isBold && Math.abs(centerLeft - line.left) < 3) {
if (!sectionTitleFound) {
line.isSectionTitle = true;
sectionTitleFound = true;
} else {
line.isSubtitle = true;
}
}
} }
}
//объединяем
if (pt == 0 || Math.abs(pt - line.top) > 3) {
j++;
pl[j] = line;
} else {
pl[j].text += ` ${line.text}`;
}
pt = line.top;
});
//заполняем lines
const lastIndex = i;
pl.forEach(line => {
putImage(line.top);
//добавим пустую строку, если надо
const prevLine = (i > lastIndex ? lines[i] : {fonts: [], top: 0});
if (prevLine && !prevLine.isImage) {
const f = (prevLine.fonts.length ? fonts[prevLine.fonts[0]] : (line.fonts.length ? fonts[line.fonts[0]] : null));
if (f && f.fontSize && !line.isImage && line.top - prevLine.top > f.fontSize*1.8) {
i++;
lines[i] = {text: '<br>'};
}
}
i++;
lines[i] = line;
});
pagelines = [];
putImage(100000);
}; };
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter) { if (tag == 'textstyle') {
if (inText) { const attrs = sax.getAttrsSync(tail);
switch (tag) { const fontId = (attrs.id && attrs.id.value ? attrs.id.value : '');
case 'i': const fontStyle = (attrs.fontstyle && attrs.fontstyle.value ? attrs.fontstyle.value : '');
italic = true; const fontSize = (attrs.fontsize && attrs.fontsize.value ? attrs.fontsize.value : '');
break;
case 'b': if (fontId) {
bold = true; const styleTags = {bold: 'b', italics: 'i', superscript: 'sup', subscript: 'sub'};
break; const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false, fontSize};
if (fontStyle) {
const styles = fontStyle.split(' ');
styles.forEach(style => {
const s = styleTags[style];
if (s) {
f.tOpen += `<${s}>`;
f.tClose = `</${s}>${f.tClose}`;
if (s == 'b')
f.isBold = true;
}
});
} }
} }
}
if (tag == 'text' && !inText) { if (tag == 'page') {
let attrs = sax.getAttrsSync(tail); const attrs = sax.getAttrsSync(tail);
const line = { page = {
text: '', width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10), };
left: parseInt((attrs.left && attrs.left.value ? attrs.left.value : null), 10),
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
};
if (line.width != 0 || line.height != 0) { putPageLines();
inText = true; }
if (isNaN(line.top) || isNaN(prevTop) || (Math.abs(prevTop - line.top) > 3)) {
putImage(line.top); if (tag == 'textline') {
i++; const attrs = sax.getAttrsSync(tail);
lines[i] = line; line = {
} text: '',
prevTop = line.top; top: parseInt((attrs.vpos && attrs.vpos.value ? attrs.vpos.value : null), 10),
} left: parseInt((attrs.hpos && attrs.hpos.value ? attrs.hpos.value : null), 10),
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
tOpen: '',
tClose: '',
isSectionTitle: false,
isSubtitle: false,
pageWidth: page.width,
fonts: [],
};
if (line.width != 0 || line.height != 0) {
pagelines.push(line);
} }
}
if (tag == 'image') { if (tag == 'string') {
const attrs = sax.getAttrsSync(tail); const attrs = sax.getAttrsSync(tail);
const src = (attrs.src && attrs.src.value ? attrs.src.value : ''); if (attrs.content && attrs.content.value) {
let tOpen = '';
let tClose = '';
const fontId = (attrs.stylerefs && attrs.stylerefs.value ? attrs.stylerefs.value : '');
if (fontId && fonts[fontId]) {
tOpen = fonts[fontId].tOpen;
tClose = fonts[fontId].tClose;
if (!line.fonts.length || line.fonts[0] != fontId)
line.fonts.push(fontId);
}
if (line.tOpen != tOpen) {
line.text += line.tClose + tOpen;
line.tOpen = tOpen;
line.tClose = tClose;
}
line.text += `${line.text.length ? ' ' : ''}${attrs.content.value}`;
}
}
if (tag == 'illustration') {
const attrs = sax.getAttrsSync(tail);
if (attrs.type && attrs.type.value == 'image') {
let src = (attrs.fileid && attrs.fileid.value ? attrs.fileid.value : '');
if (src) { if (src) {
const image = { const image = {
isImage: true, isImage: true,
src, src,
data: '', data: '',
type: '', type: '',
top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10) || 0, top: parseInt((attrs.vpos && attrs.vpos.value ? attrs.vpos.value : null), 10) || 0,
left: parseInt((attrs.hpos && attrs.hpos.value ? attrs.hpos.value : null), 10) || 0,
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10) || 0,
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10) || 0,
}; };
loading.push(loadImage(image)); const exists = images.filter(img => (img.top == image.top && img.left == image.left && img.width == image.width && img.height == image.height));
images.push(image); if (!exists.length) {
images.sort((a, b) => a.top - b.top) loading.push(loadImage(image));
images.push(image);
images.sort((a, b) => (a.top - b.top)*10000 + (a.left - b.left));
}
} }
} }
if (tag == 'page') {
putImage(100000);
}
} }
}; };
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (inText) {
switch (tag) {
case 'i':
italic = false;
break;
case 'b':
bold = false;
break;
}
}
if (tag == 'text')
inText = false;
};
let buf = this.decode(data).toString(); let buf = this.decode(data).toString();
sax.parseSync(buf, { sax.parseSync(buf, {
onStartNode, onEndNode, onTextNode onStartNode
}); });
putImage(100000); putPageLines();
await Promise.all(loading); await Promise.all(loading);
await utils.sleep(100);
//найдем параграфы и отступы //найдем параграфы и отступы
const indents = []; const indents = [];
@@ -187,11 +277,29 @@ class ConvertPdf extends ConvertHtml {
} }
indents[0] = 0; indents[0] = 0;
//формируем текст //title
const limitSize = 2*this.config.maxUploadFileSize; if (fs.pathExists(metaFile)) {
const metaXmlString = (await fs.readFile(metaFile)).toString();
let metaXmlParsed = xmlParser.parseXml(metaXmlString);
metaXmlParsed = xmlParser.simplifyXmlParsed(metaXmlParsed);
if (metaXmlParsed.metadata) {
title = (metaXmlParsed.metadata.title ? metaXmlParsed.metadata.title._t : '');
author = (metaXmlParsed.metadata.author ? metaXmlParsed.metadata.author._t : '');
}
}
if (!title && uploadFileName) if (!title && uploadFileName)
title = uploadFileName; title = uploadFileName;
let text = `<title>${title}</title>`;
//console.log(JSON.stringify(lines, null, 2));
//формируем текст
const limitSize = 2*this.config.maxUploadFileSize;
let text = '';
if (title)
text += `<fb2-title>${title}</fb2-title>`;
if (author)
text += `<fb2-author>${author}</fb2-author>`;
let concat = ''; let concat = '';
let sp = ''; let sp = '';
for (const line of lines) { for (const line of lines) {
@@ -204,6 +312,16 @@ class ConvertPdf extends ConvertHtml {
continue; continue;
} }
if (line.isSectionTitle) {
text += `<fb2-section-title>${line.text.trim()}</fb2-section-title>`;
continue;
}
if (line.isSubtitle) {
text += `<br><fb2-subtitle>${line.text.trim()}</fb2-subtitle>`;
continue;
}
if (concat == '') { if (concat == '') {
const left = line.left || 0; const left = line.left || 0;
sp = ' '.repeat(indents[left]); sp = ' '.repeat(indents[left]);
@@ -221,7 +339,9 @@ class ConvertPdf extends ConvertHtml {
if (concat) if (concat)
text += sp + concat + "\n"; text += sp + concat + "\n";
return await super.run(Buffer.from(text), {skipCheck: true, isText: true, cutTitle: true}); //console.log(text);
await utils.sleep(100);
return await super.run(Buffer.from(text), {skipCheck: true, isText: true});
} }
} }

View File

@@ -48,7 +48,7 @@ class ConvertSites extends ConvertHtml {
if (text === false) if (text === false)
return false; return false;
return await super.run(Buffer.from(text), {skipCheck: true, cutTitle: true}); return await super.run(Buffer.from(text), {skipCheck: true});
} }
getTitle(text) { getTitle(text) {
@@ -79,7 +79,7 @@ class ConvertSites extends ConvertHtml {
let book = this.getTitle(text); let book = this.getTitle(text);
book = book.replace(' (fb2) | Флибуста', ''); book = book.replace(' (fb2) | Флибуста', '');
const title = `<title>${author}${(author ? ' - ' : '')}${book}</title>`; const title = `<fb2-title>${author}${(author ? ' - ' : '')}${book}</fb2-title>`;
let begin = '<h3 class="book">'; let begin = '<h3 class="book">';
if (text.indexOf(begin) <= 0) if (text.indexOf(begin) <= 0)
@@ -95,12 +95,12 @@ class ConvertSites extends ConvertHtml {
return text.substring(l, r) return text.substring(l, r)
.replace(/blockquote class="?book"?/g, 'p') .replace(/blockquote class="?book"?/g, 'p')
.replace(/<br\/?>\s*<\/h3>/g, '</h3>') .replace(/<br\/?>\s*<\/h3>/g, '</h3>')
.replace(/<h3 class="?book"?>/g, '<br><br><subtitle>') .replace(/<h3 class="?book"?>/g, '<br><br><fb2-subtitle>')
.replace(/<h5 class="?book"?>/g, '<br><br><subtitle>') .replace(/<h5 class="?book"?>/g, '<br><br><fb2-subtitle>')
.replace(/<h3>/g, '<br><br><subtitle>') .replace(/<h3>/g, '<br><br><fb2-subtitle>')
.replace(/<h5>/g, '<br><br><subtitle>') .replace(/<h5>/g, '<br><br><fb2-subtitle>')
.replace(/<\/h3>/g, '</subtitle><br>') .replace(/<\/h3>/g, '</fb2-subtitle><br>')
.replace(/<\/h5>/g, '</subtitle><br>') .replace(/<\/h5>/g, '</fb2-subtitle><br>')
.replace(/<div class="?stanza"?>/g, '<br>') .replace(/<div class="?stanza"?>/g, '<br>')
.replace(/<div>/g, '<br>') .replace(/<div>/g, '<br>')
+ title; + title;

View File

@@ -12,7 +12,7 @@ const utils = require('../utils');
const log = new (require('../AppLogger'))().log;//singleton const log = new (require('../AppLogger'))().log;//singleton
const cleanDirPeriod = 60*60*1000;//1 раз в час const cleanDirPeriod = 60*60*1000;//1 раз в час
const queue = new LimitedQueue(5, 100, 4*60*1000);//4 минуты ожидание подвижек const queue = new LimitedQueue(5, 100, 2*60*1000 + 15000);//2 минуты ожидание подвижек
let instance = null; let instance = null;
@@ -130,7 +130,8 @@ class ReaderWorker {
convertFilename = `${this.config.tempDownloadDir}/${tempFilename2}`; convertFilename = `${this.config.tempDownloadDir}/${tempFilename2}`;
await this.bookConverter.convertToFb2(decompFiles, convertFilename, opts, progress => { await this.bookConverter.convertToFb2(decompFiles, convertFilename, opts, progress => {
wState.set({progress}); wState.set({progress});
q.resetTimeout(); if (queue.freed > 0)
q.resetTimeout();
}, q.abort); }, q.abort);
//сжимаем файл в tmp, если там уже нет с тем же именем-sha256 //сжимаем файл в tmp, если там уже нет с тем же именем-sha256

View File

@@ -6,7 +6,8 @@ function parseSync(xstr, options) {
onCdata: _onCdata = dummy, onCdata: _onCdata = dummy,
onComment: _onComment = dummy, onComment: _onComment = dummy,
onProgress: _onProgress = dummy, onProgress: _onProgress = dummy,
innerCut = new Set() innerCut = new Set(),
lowerCase = true,
} = options; } = options;
let i = 0; let i = 0;
@@ -91,7 +92,8 @@ function parseSync(xstr, options) {
} else { } else {
tag = tagData; tag = tagData;
} }
tag = tag.toLowerCase(); if (lowerCase)
tag = tag.toLowerCase();
if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) { if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
if (!cutCounter) if (!cutCounter)
@@ -146,7 +148,8 @@ async function parse(xstr, options) {
onCdata: _onCdata = dummy, onCdata: _onCdata = dummy,
onComment: _onComment = dummy, onComment: _onComment = dummy,
onProgress: _onProgress = dummy, onProgress: _onProgress = dummy,
innerCut = new Set() innerCut = new Set(),
lowerCase = true,
} = options; } = options;
let i = 0; let i = 0;
@@ -231,7 +234,8 @@ async function parse(xstr, options) {
} else { } else {
tag = tagData; tag = tagData;
} }
tag = tag.toLowerCase(); if (lowerCase)
tag = tag.toLowerCase();
if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) { if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
if (!cutCounter) if (!cutCounter)
@@ -276,7 +280,7 @@ async function parse(xstr, options) {
await _onProgress(100); await _onProgress(100);
} }
function getAttrsSync(tail) { function getAttrsSync(tail, lowerCase = true) {
let result = {}; let result = {};
let name = ''; let name = '';
let value = ''; let value = '';
@@ -287,13 +291,16 @@ function getAttrsSync(tail) {
let waitEq = false; let waitEq = false;
const pushResult = () => { const pushResult = () => {
if (lowerCase)
name = name.toLowerCase();
if (name != '') { if (name != '') {
const fn = name;
let ns = ''; let ns = '';
if (name.indexOf(':') >= 0) { if (fn.indexOf(':') >= 0) {
[ns, name] = name.split(':'); [ns, name] = fn.split(':');
} }
result[name] = {value, ns}; result[name] = {value, ns, fn};
} }
name = ''; name = '';
value = ''; value = '';

143
server/core/xmlParser.js Normal file
View File

@@ -0,0 +1,143 @@
const sax = require('./sax');
function formatXml(xmlParsed, encoding = 'utf-8', textFilterFunc) {
let out = `<?xml version="1.0" encoding="${encoding}"?>`;
out += formatXmlNode(xmlParsed, textFilterFunc);
return out;
}
function formatXmlNode(node, textFilterFunc) {
textFilterFunc = (textFilterFunc ? textFilterFunc : text => text);
const formatNode = (node, name) => {
let out = '';
if (Array.isArray(node)) {
for (const n of node) {
out += formatNode(n);
}
} else if (typeof node == 'string') {
if (name)
out += `<${name}>${textFilterFunc(node)}</${name}>`;
else
out += textFilterFunc(node);
} else {
if (node._n)
name = node._n;
let attrs = '';
if (node._attrs) {
for (let attrName in node._attrs) {
attrs += ` ${attrName}="${node._attrs[attrName]}"`;
}
}
let tOpen = '';
let tBody = '';
let tClose = '';
if (name)
tOpen += `<${name}${attrs}>`;
if (node.hasOwnProperty('_t'))
tBody += textFilterFunc(node._t);
for (let nodeName in node) {
if (nodeName && nodeName[0] == '_' && nodeName != '_a')
continue;
const n = node[nodeName];
tBody += formatNode(n, nodeName);
}
if (name)
tClose += `</${name}>`;
out += `${tOpen}${tBody}${tClose}`;
}
return out;
}
return formatNode(node);
}
function parseXml(xmlString, lowerCase = true) {
let result = {};
let node = result;
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
node._t = text;
};
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (tag == '?xml')
return;
const newNode = {_n: tag, _p: node};
if (tail) {
const parsedAttrs = sax.getAttrsSync(tail, lowerCase);
const atKeys = Object.keys(parsedAttrs);
if (atKeys.length) {
const attrs = {};
for (let i = 0; i < atKeys.length; i++) {
const attrName = atKeys[i];
attrs[parsedAttrs[attrName].fn] = parsedAttrs[attrName].value;
}
newNode._attrs = attrs;
}
}
if (!node._a)
node._a = [];
node._a.push(newNode);
node = newNode;
};
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (node._p && node._n == tag)
node = node._p;
};
sax.parseSync(xmlString, {
onStartNode, onEndNode, onTextNode, lowerCase
});
if (result._a)
result = result._a[0];
return result;
}
function simplifyXmlParsed(node) {
const simplifyNodeArray = (a) => {
const result = {};
for (let i = 0; i < a.length; i++) {
const child = a[i];
if (child._n && !result[child._n]) {
result[child._n] = {};
if (child._a) {
result[child._n] = simplifyNodeArray(child._a);
}
if (child._t) {
result[child._n]._t = child._t;
}
if (child._attrs) {
result[child._n]._attrs = child._attrs;
}
}
}
return result;
};
return simplifyNodeArray([node]);
}
module.exports = {
formatXml,
formatXmlNode,
parseXml,
simplifyXmlParsed
}