Compare commits
27 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7fa891b4fc | ||
|
|
6cb7412cf3 | ||
|
|
157322834b | ||
|
|
1a13a0fee1 | ||
|
|
37256255bf | ||
|
|
75e01c899e | ||
|
|
ef0d6eab89 | ||
|
|
5d54b1b0f4 | ||
|
|
522f953b4f | ||
|
|
15f02c7115 | ||
|
|
174c877eee | ||
|
|
fd9ec736d7 | ||
|
|
2c94025ba3 | ||
|
|
bfadf35c40 | ||
|
|
f3b69caa12 | ||
|
|
18a83a5b0b | ||
|
|
bd9669b782 | ||
|
|
e05713aa7f | ||
|
|
bc3e1f0a6f | ||
|
|
063d01b5ca | ||
|
|
81c38d7749 | ||
|
|
a29842b084 | ||
|
|
bb5adcdaf6 | ||
|
|
537e17a219 | ||
|
|
03ce50153e | ||
|
|
15d01ad7fc | ||
|
|
e2b29e2c2f |
@@ -66,7 +66,7 @@ class Reader {
|
||||
await utils.sleep(refreshPause);
|
||||
|
||||
i++;
|
||||
if (i > 120*1000/refreshPause) {//2 мин ждем телодвижений воркера
|
||||
if (i > 180*1000/refreshPause) {//3 мин ждем телодвижений воркера
|
||||
throw new Error('Слишком долгое время ожидания');
|
||||
}
|
||||
//проверка воркера
|
||||
|
||||
@@ -77,9 +77,15 @@ export default class DrawHelper {
|
||||
let j = 0;
|
||||
//формируем строку
|
||||
for (const part of line.parts) {
|
||||
let tOpen = (part.style.bold ? '<b>' : '');
|
||||
let tOpen = '';
|
||||
tOpen += (part.style.bold ? '<b>' : '');
|
||||
tOpen += (part.style.italic ? '<i>' : '');
|
||||
let tClose = (part.style.italic ? '</i>' : '');
|
||||
tOpen += (part.style.sup ? '<span style="vertical-align: baseline; position: relative; line-height: 0; top: -0.3em">' : '');
|
||||
tOpen += (part.style.sub ? '<span style="vertical-align: baseline; position: relative; line-height: 0; top: 0.3em">' : '');
|
||||
let tClose = '';
|
||||
tClose += (part.style.sub ? '</span>' : '');
|
||||
tClose += (part.style.sup ? '</span>' : '');
|
||||
tClose += (part.style.italic ? '</i>' : '');
|
||||
tClose += (part.style.bold ? '</b>' : '');
|
||||
|
||||
let text = '';
|
||||
|
||||
@@ -285,7 +285,7 @@ export default class BookParser {
|
||||
sectionLevel++;
|
||||
}
|
||||
|
||||
if (tag == 'emphasis' || tag == 'strong') {
|
||||
if (tag == 'emphasis' || tag == 'strong' || tag == 'sup' || tag == 'sub') {
|
||||
growParagraph(`<${tag}>`, 0);
|
||||
}
|
||||
|
||||
@@ -343,7 +343,7 @@ export default class BookParser {
|
||||
sectionLevel--;
|
||||
}
|
||||
|
||||
if (tag == 'emphasis' || tag == 'strong') {
|
||||
if (tag == 'emphasis' || tag == 'strong' || tag == 'sup' || tag == 'sub') {
|
||||
growParagraph(`</${tag}>`, 0);
|
||||
}
|
||||
|
||||
@@ -507,7 +507,7 @@ export default class BookParser {
|
||||
|
||||
splitToStyle(s) {
|
||||
let result = [];/*array of {
|
||||
style: {bold: Boolean, italic: Boolean, center: Boolean, space: Number},
|
||||
style: {bold: Boolean, italic: Boolean, sup: Boolean, sub: Boolean, center: Boolean, space: Number},
|
||||
image: {local: Boolean, inline: Boolean, id: String},
|
||||
text: String,
|
||||
}*/
|
||||
@@ -530,6 +530,12 @@ export default class BookParser {
|
||||
case 'emphasis':
|
||||
style.italic = true;
|
||||
break;
|
||||
case 'sup':
|
||||
style.sup = true;
|
||||
break;
|
||||
case 'sub':
|
||||
style.sub = true;
|
||||
break;
|
||||
case 'center':
|
||||
style.center = true;
|
||||
break;
|
||||
@@ -580,6 +586,12 @@ export default class BookParser {
|
||||
case 'emphasis':
|
||||
style.italic = false;
|
||||
break;
|
||||
case 'sup':
|
||||
style.sup = false;
|
||||
break;
|
||||
case 'sub':
|
||||
style.sub = false;
|
||||
break;
|
||||
case 'center':
|
||||
style.center = false;
|
||||
break;
|
||||
|
||||
@@ -169,7 +169,7 @@ class BookManager {
|
||||
}
|
||||
|
||||
async deflateWithProgress(data, callback) {
|
||||
const chunkSize = 128*1024;
|
||||
const chunkSize = 512*1024;
|
||||
const deflator = new utils.pako.Deflate({level: 5});
|
||||
|
||||
let chunkTotal = 1 + Math.floor(data.length/chunkSize);
|
||||
@@ -203,7 +203,7 @@ class BookManager {
|
||||
}
|
||||
|
||||
async inflateWithProgress(data, callback) {
|
||||
const chunkSize = 64*1024;
|
||||
const chunkSize = 512*1024;
|
||||
const inflator = new utils.pako.Inflate({to: 'string'});
|
||||
|
||||
let chunkTotal = 1 + Math.floor(data.length/chunkSize);
|
||||
|
||||
@@ -1,4 +1,15 @@
|
||||
export const versionHistory = [
|
||||
{
|
||||
showUntil: '2020-12-08',
|
||||
header: '0.9.11 (2020-12-09)',
|
||||
content:
|
||||
`
|
||||
<ul>
|
||||
<li>оптимизации, улучшения работы конвертеров</li>
|
||||
</ul>
|
||||
`
|
||||
},
|
||||
|
||||
{
|
||||
showUntil: '2020-12-10',
|
||||
header: '0.9.10 (2020-12-03)',
|
||||
|
||||
@@ -32,11 +32,23 @@ sudo -u www-data mkdir -p /home/liberama/data/calibre
|
||||
sudo -u www-data tar xvf calibre-5.5.0-x86_64.txz -C /home/liberama/data/calibre
|
||||
```
|
||||
|
||||
### external converter `pdfalto`, github https://github.com/kermitt2/pdfalto
|
||||
```
|
||||
git clone https://github.com/kermitt2/pdfalto
|
||||
cd pdfalto
|
||||
git submodule update --init --recursive
|
||||
cmake ./
|
||||
добавить в начало CMakeLists.txt строчку: set(CMAKE_EXE_LINKER_FLAGS "-no-pie")
|
||||
make
|
||||
|
||||
sudo -u www-data mkdir -p /home/liberama/data/pdfalto
|
||||
sudo -u www-data cp pdfalto /home/liberama/data/pdfalto
|
||||
```
|
||||
|
||||
### external converters
|
||||
```
|
||||
sudo apt install rar
|
||||
sudo apt install libreoffice
|
||||
sudo apt install poppler-utils
|
||||
sudo apt install djvulibre-bin
|
||||
sudo apt install libtiff-tools
|
||||
sudo apt install graphicsmagick-imagemagick-compat
|
||||
|
||||
2
package-lock.json
generated
2
package-lock.json
generated
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "Liberama",
|
||||
"version": "0.9.10",
|
||||
"version": "0.9.11",
|
||||
"lockfileVersion": 1,
|
||||
"requires": true,
|
||||
"dependencies": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "Liberama",
|
||||
"version": "0.9.10",
|
||||
"version": "0.9.11",
|
||||
"author": "Book Pauk <bookpauk@gmail.com>",
|
||||
"license": "CC0-1.0",
|
||||
"repository": "bookpauk/liberama",
|
||||
@@ -8,7 +8,7 @@
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
"scripts": {
|
||||
"dev": "nodemon --inspect --ignore server/public --ignore server/data --exec 'node server'",
|
||||
"dev": "nodemon --inspect --ignore server/public --ignore server/data --ignore client --exec 'node server'",
|
||||
"build:client": "webpack --config build/webpack.prod.config.js",
|
||||
"build:linux": "npm run build:client && node build/linux && pkg -t latest-linux-x64 -o dist/linux/liberama .",
|
||||
"build:win": "npm run build:client && node build/win && pkg -t latest-win-x64 -o dist/win/liberama .",
|
||||
|
||||
@@ -136,7 +136,7 @@ class WebSocketController {
|
||||
break;
|
||||
|
||||
i++;
|
||||
if (i > 2*60*1000/refreshPause) {//2 мин ждем телодвижений воркера
|
||||
if (i > 3*60*1000/refreshPause) {//3 мин ждем телодвижений воркера
|
||||
this.send({state: 'error', error: 'Время ожидания процесса истекло'}, req, ws);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -5,8 +5,9 @@ const he = require('he');
|
||||
const LimitedQueue = require('../../LimitedQueue');
|
||||
const textUtils = require('./textUtils');
|
||||
const utils = require('../../utils');
|
||||
const xmlParser = require('../../xmlParser');
|
||||
|
||||
const queue = new LimitedQueue(3, 20, 3*60*1000);//3 минуты ожидание подвижек
|
||||
const queue = new LimitedQueue(3, 20, 2*60*1000);//2 минуты ожидание подвижек
|
||||
|
||||
class ConvertBase {
|
||||
constructor(config) {
|
||||
@@ -14,7 +15,6 @@ class ConvertBase {
|
||||
|
||||
this.calibrePath = `${config.dataDir}/calibre/ebook-convert`;
|
||||
this.sofficePath = '/usr/bin/soffice';
|
||||
this.pdfToHtmlPath = '/usr/bin/pdftohtml';
|
||||
}
|
||||
|
||||
async run(data, opts) {// eslint-disable-line no-unused-vars
|
||||
@@ -27,9 +27,6 @@ class ConvertBase {
|
||||
|
||||
if (!await fs.pathExists(this.sofficePath))
|
||||
throw new Error('Внешний конвертер LibreOffice не найден');
|
||||
|
||||
if (!await fs.pathExists(this.pdfToHtmlPath))
|
||||
throw new Error('Внешний конвертер pdftohtml не найден');
|
||||
}
|
||||
|
||||
async execConverter(path, args, onData, abort) {
|
||||
@@ -106,61 +103,14 @@ class ConvertBase {
|
||||
}
|
||||
|
||||
formatFb2(fb2) {
|
||||
let out = '<?xml version="1.0" encoding="utf-8"?>';
|
||||
out += '<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">';
|
||||
out += this.formatFb2Node(fb2);
|
||||
out += '</FictionBook>';
|
||||
return out;
|
||||
}
|
||||
|
||||
formatFb2Node(node, name) {
|
||||
let out = '';
|
||||
|
||||
if (Array.isArray(node)) {
|
||||
for (const n of node) {
|
||||
out += this.formatFb2Node(n);
|
||||
const out = xmlParser.formatXml({
|
||||
FictionBook: {
|
||||
_attrs: {xmlns: 'http://www.gribuser.ru/xml/fictionbook/2.0', 'xmlns:l': 'http://www.w3.org/1999/xlink'},
|
||||
_a: [fb2],
|
||||
}
|
||||
} else if (typeof node == 'string') {
|
||||
if (name)
|
||||
out += `<${name}>${this.repSpaces(node)}</${name}>`;
|
||||
else
|
||||
out += this.repSpaces(node);
|
||||
} else {
|
||||
if (node._n)
|
||||
name = node._n;
|
||||
}, 'utf-8', this.repSpaces);
|
||||
|
||||
let attrs = '';
|
||||
if (node._attrs) {
|
||||
for (let attrName in node._attrs) {
|
||||
attrs += ` ${attrName}="${node._attrs[attrName]}"`;
|
||||
}
|
||||
}
|
||||
|
||||
let tOpen = '';
|
||||
let tBody = '';
|
||||
let tClose = '';
|
||||
if (name)
|
||||
tOpen += `<${name}${attrs}>`;
|
||||
if (node.hasOwnProperty('_t'))
|
||||
tBody += this.repSpaces(node._t);
|
||||
|
||||
for (let nodeName in node) {
|
||||
if (nodeName && nodeName[0] == '_' && nodeName != '_a')
|
||||
continue;
|
||||
|
||||
const n = node[nodeName];
|
||||
tBody += this.formatFb2Node(n, nodeName);
|
||||
}
|
||||
|
||||
if (name)
|
||||
tClose += `</${name}>`;
|
||||
|
||||
if (attrs == '' && name == 'p' && tBody.trim() == '')
|
||||
out += '<empty-line/>'
|
||||
else
|
||||
out += `${tOpen}${tBody}${tClose}`;
|
||||
}
|
||||
return out;
|
||||
return out.replace(/<p>\s*?<\/p>/g, '<empty-line/>');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@ const fs = require('fs-extra');
|
||||
const path = require('path');
|
||||
const utils = require('../../utils');
|
||||
|
||||
const ConvertHtml = require('./ConvertHtml');
|
||||
const ConvertBase = require('./ConvertBase');
|
||||
|
||||
class ConvertDjvu extends ConvertHtml {
|
||||
class ConvertDjvu extends ConvertBase {
|
||||
check(data, opts) {
|
||||
const {inputFiles} = opts;
|
||||
|
||||
@@ -59,9 +59,17 @@ class ConvertDjvu extends ConvertHtml {
|
||||
}, abort);
|
||||
|
||||
//читаем изображения
|
||||
limitSize = 2*this.config.maxUploadFileSize;
|
||||
let imagesSize = 0;
|
||||
|
||||
const loadImage = async(image) => {
|
||||
image.data = (await fs.readFile(image.file)).toString('base64');
|
||||
image.name = path.basename(image.file);
|
||||
|
||||
imagesSize += image.data.length;
|
||||
if (imagesSize > limitSize) {
|
||||
throw new Error(`Файл для конвертирования слишком большой|FORLOG| imagesSize: ${imagesSize} > ${limitSize}`);
|
||||
}
|
||||
}
|
||||
|
||||
let files = [];
|
||||
@@ -82,20 +90,29 @@ class ConvertDjvu extends ConvertHtml {
|
||||
|
||||
await Promise.all(loading);
|
||||
|
||||
//формируем текст
|
||||
limitSize = 2*this.config.maxUploadFileSize;
|
||||
//формируем fb2
|
||||
let titleInfo = {};
|
||||
let desc = {_n: 'description', 'title-info': titleInfo};
|
||||
let pars = [];
|
||||
let body = {_n: 'body', section: {_a: [pars]}};
|
||||
let binary = [];
|
||||
let fb2 = [desc, body, binary];
|
||||
|
||||
let title = '';
|
||||
if (uploadFileName)
|
||||
title = uploadFileName;
|
||||
let text = `<title>${title}</title>`;
|
||||
for (const image of images) {
|
||||
text += `<fb2-image type="image/jpeg" name="${image.name}">${image.data}</fb2-image>`;
|
||||
|
||||
if (text.length > limitSize) {
|
||||
throw new Error(`Файл для конвертирования слишком большой|FORLOG| text.length: ${text.length} > ${limitSize}`);
|
||||
}
|
||||
titleInfo['book-title'] = title;
|
||||
|
||||
for (const image of images) {
|
||||
const img = {_n: 'binary', _attrs: {id: image.name, 'content-type': 'image/jpeg'}, _t: image.data};
|
||||
binary.push(img);
|
||||
|
||||
pars.push({_n: 'p', _t: ''});
|
||||
pars.push({_n: 'image', _attrs: {'l:href': `#${image.name}`}});
|
||||
}
|
||||
return await super.run(Buffer.from(text), {skipCheck: true, isText: true, cutTitle: true});
|
||||
|
||||
return this.formatFb2(fb2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ const fs = require('fs-extra');
|
||||
|
||||
const ConvertHtml = require('./ConvertHtml');
|
||||
|
||||
class ConvertDocX extends ConvertHtml {
|
||||
class ConvertFb3 extends ConvertHtml {
|
||||
async check(data, opts) {
|
||||
const {inputFiles} = opts;
|
||||
if (this.config.useExternalBookConverter &&
|
||||
@@ -39,13 +39,14 @@ class ConvertDocX extends ConvertHtml {
|
||||
const title = this.getTitle(text)
|
||||
.replace(/<\/?p>/g, '')
|
||||
;
|
||||
text = `<title>${title}</title>` + text
|
||||
text = `<fb2-title>${title}</fb2-title>` + text
|
||||
.replace(/<title>/g, '<br><b>')
|
||||
.replace(/<\/title>/g, '</b><br>')
|
||||
.replace(/<subtitle>/g, '<br><br><subtitle>')
|
||||
.replace(/<subtitle>/g, '<br><br><fb2-subtitle>')
|
||||
.replace(/<\/subtitle>/g, '</fb2-subtitle>')
|
||||
;
|
||||
return await super.run(Buffer.from(text), {skipCheck: true, cutTitle: true});
|
||||
return await super.run(Buffer.from(text), {skipCheck: true});
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ConvertDocX;
|
||||
module.exports = ConvertFb3;
|
||||
|
||||
@@ -34,7 +34,6 @@ class ConvertHtml extends ConvertBase {
|
||||
} else {
|
||||
isText = opts.isText;
|
||||
}
|
||||
let {cutTitle} = opts;
|
||||
|
||||
let titleInfo = {};
|
||||
let desc = {_n: 'description', 'title-info': titleInfo};
|
||||
@@ -44,12 +43,17 @@ class ConvertHtml extends ConvertBase {
|
||||
let fb2 = [desc, body, binary];
|
||||
|
||||
let title = '';
|
||||
let author = '';
|
||||
let inTitle = false;
|
||||
let inSectionTitle = false;
|
||||
let inAuthor = false;
|
||||
let inSubTitle = false;
|
||||
let inImage = false;
|
||||
let image = {};
|
||||
let bold = false;
|
||||
let italic = false;
|
||||
let superscript = false;
|
||||
let subscript = false;
|
||||
let begining = true;
|
||||
|
||||
let spaceCounter = [];
|
||||
@@ -62,7 +66,7 @@ class ConvertHtml extends ConvertBase {
|
||||
};
|
||||
|
||||
const growParagraph = (text) => {
|
||||
if (!pars.length)
|
||||
if (!pars.length || pars[pars.length - 1]._n != 'p')
|
||||
newParagraph();
|
||||
|
||||
const l = pars.length;
|
||||
@@ -94,12 +98,16 @@ class ConvertHtml extends ConvertBase {
|
||||
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
text = this.escapeEntities(text);
|
||||
|
||||
if (!cutCounter && !(cutTitle && inTitle)) {
|
||||
if (!(cutCounter || inTitle || inSectionTitle || inSubTitle)) {
|
||||
let tOpen = '';
|
||||
tOpen += (inSubTitle ? '<subtitle>' : '');
|
||||
tOpen += (bold ? '<strong>' : '');
|
||||
tOpen += (italic ? '<emphasis>' : '');
|
||||
tOpen += (superscript ? '<sup>' : '');
|
||||
tOpen += (subscript ? '<sub>' : '');
|
||||
let tClose = ''
|
||||
tClose += (subscript ? '</sub>' : '');
|
||||
tClose += (superscript ? '</sup>' : '');
|
||||
tClose += (italic ? '</emphasis>' : '');
|
||||
tClose += (bold ? '</strong>' : '');
|
||||
tClose += (inSubTitle ? '</subtitle>' : '');
|
||||
@@ -110,12 +118,22 @@ class ConvertHtml extends ConvertBase {
|
||||
if (inTitle && !title)
|
||||
title = text;
|
||||
|
||||
if (inAuthor && !author)
|
||||
author = text;
|
||||
|
||||
if (inSectionTitle) {
|
||||
pars.unshift({_n: 'title', _t: text});
|
||||
}
|
||||
|
||||
if (inSubTitle) {
|
||||
pars.push({_n: 'subtitle', _t: text});
|
||||
}
|
||||
|
||||
if (inImage) {
|
||||
image._t = text;
|
||||
binary.push(image);
|
||||
|
||||
pars.push({_n: 'image', _attrs: {'l:href': '#' + image._attrs.id}, _t: ''});
|
||||
newParagraph();
|
||||
}
|
||||
|
||||
};
|
||||
@@ -140,15 +158,27 @@ class ConvertHtml extends ConvertBase {
|
||||
bold = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (tag == 'sup')
|
||||
superscript = true;
|
||||
|
||||
if (tag == 'sub')
|
||||
subscript = true;
|
||||
}
|
||||
|
||||
if (tag == 'title' || tag == 'cut-title') {
|
||||
if (tag == 'title' || tag == 'fb2-title') {
|
||||
inTitle = true;
|
||||
if (tag == 'cut-title')
|
||||
cutTitle = true;
|
||||
}
|
||||
|
||||
if (tag == 'subtitle') {
|
||||
if (tag == 'fb2-author') {
|
||||
inAuthor = true;
|
||||
}
|
||||
|
||||
if (tag == 'fb2-section-title') {
|
||||
inSectionTitle = true;
|
||||
}
|
||||
|
||||
if (tag == 'fb2-subtitle') {
|
||||
inSubTitle = true;
|
||||
}
|
||||
|
||||
@@ -156,7 +186,7 @@ class ConvertHtml extends ConvertBase {
|
||||
inImage = true;
|
||||
const attrs = sax.getAttrsSync(tail);
|
||||
image = {_n: 'binary', _attrs: {id: attrs.name.value, 'content-type': attrs.type.value}, _t: ''};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
@@ -179,12 +209,26 @@ class ConvertHtml extends ConvertBase {
|
||||
bold = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (tag == 'sup')
|
||||
superscript = false;
|
||||
|
||||
if (tag == 'sub')
|
||||
subscript = false;
|
||||
}
|
||||
|
||||
if (tag == 'title' || tag == 'cut-title')
|
||||
if (tag == 'title' || tag == 'fb2-title')
|
||||
inTitle = false;
|
||||
|
||||
if (tag == 'subtitle')
|
||||
if (tag == 'fb2-author') {
|
||||
inAuthor = false;
|
||||
}
|
||||
|
||||
if (tag == 'fb2-section-title') {
|
||||
inSectionTitle = false;
|
||||
}
|
||||
|
||||
if (tag == 'fb2-subtitle')
|
||||
inSubTitle = false;
|
||||
|
||||
if (tag == 'fb2-image')
|
||||
@@ -195,10 +239,17 @@ class ConvertHtml extends ConvertBase {
|
||||
|
||||
sax.parseSync(buf, {
|
||||
onStartNode, onEndNode, onTextNode,
|
||||
innerCut: new Set(['head', 'script', 'style', 'binary', 'fb2-image'])
|
||||
innerCut: new Set(['head', 'script', 'style', 'binary', 'fb2-image', 'fb2-title', 'fb2-author'])
|
||||
});
|
||||
|
||||
titleInfo['book-title'] = title;
|
||||
if (author)
|
||||
titleInfo.author = {'last-name': author};
|
||||
|
||||
body.section._a[0] = pars;
|
||||
|
||||
//console.log(JSON.stringify(fb2, null, 2));
|
||||
|
||||
//подозрение на чистый текст, надо разбить на параграфы
|
||||
if (isText || (buf.length > 30*1024 && pars.length < buf.length/2000)) {
|
||||
let total = 0;
|
||||
@@ -228,56 +279,49 @@ class ConvertHtml extends ConvertBase {
|
||||
if (parIndent > 2) parIndent--;
|
||||
|
||||
let newPars = [];
|
||||
let curPar = {};
|
||||
const newPar = () => {
|
||||
newPars.push({_n: 'p', _t: ''});
|
||||
curPar = {_n: 'p', _t: ''};
|
||||
newPars.push(curPar);
|
||||
};
|
||||
|
||||
const growPar = (text) => {
|
||||
if (!newPars.length)
|
||||
newPar();
|
||||
|
||||
const l = newPars.length;
|
||||
newPars[l - 1]._t += text;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
for (const par of pars) {
|
||||
if (par._n != 'p') {
|
||||
newPars.push(par);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i > 0)
|
||||
newPar();
|
||||
i++;
|
||||
|
||||
let j = 0;
|
||||
newPar();
|
||||
|
||||
const lines = par._t.split('\n');
|
||||
for (let line of lines) {
|
||||
line = repCrLfTab(line);
|
||||
for (let j = 0; j < lines.length; j++) {
|
||||
const line = repCrLfTab(lines[j]);
|
||||
|
||||
let l = 0;
|
||||
while (l < line.length && line[l] == ' ') {
|
||||
l++;
|
||||
}
|
||||
|
||||
if (l >= parIndent || line == '') {
|
||||
if (j > 0)
|
||||
newPar();
|
||||
j++;
|
||||
if (j > 0 &&
|
||||
(l >= parIndent ||
|
||||
(j < lines.length - 1 && line == '')
|
||||
)
|
||||
) {
|
||||
newPar();
|
||||
}
|
||||
growPar(line.trim() + ' ');
|
||||
|
||||
curPar._t += line.trim() + ' ';
|
||||
}
|
||||
}
|
||||
|
||||
body.section._a[0] = newPars;
|
||||
} else {
|
||||
body.section._a[0] = pars;
|
||||
}
|
||||
|
||||
//убираем лишнее, делаем валидный fb2, т.к. в рез-те разбиения на параграфы бьются теги
|
||||
bold = false;
|
||||
italic = false;
|
||||
superscript = false;
|
||||
subscript = false;
|
||||
inSubTitle = false;
|
||||
pars = body.section._a[0];
|
||||
for (let i = 0; i < pars.length; i++) {
|
||||
@@ -297,7 +341,11 @@ class ConvertHtml extends ConvertBase {
|
||||
tOpen += (inSubTitle ? '<subtitle>' : '');
|
||||
tOpen += (bold ? '<strong>' : '');
|
||||
tOpen += (italic ? '<emphasis>' : '');
|
||||
tOpen += (superscript ? '<sup>' : '');
|
||||
tOpen += (subscript ? '<sub>' : '');
|
||||
let tClose = ''
|
||||
tClose += (subscript ? '</sub>' : '');
|
||||
tClose += (superscript ? '</sup>' : '');
|
||||
tClose += (italic ? '</emphasis>' : '');
|
||||
tClose += (bold ? '</strong>' : '');
|
||||
tClose += (inSubTitle ? '</subtitle>' : '');
|
||||
@@ -313,6 +361,10 @@ class ConvertHtml extends ConvertBase {
|
||||
bold = true;
|
||||
if (tag == 'emphasis')
|
||||
italic = true;
|
||||
if (tag == 'sup')
|
||||
superscript = true;
|
||||
if (tag == 'sub')
|
||||
subscript = true;
|
||||
if (tag == 'subtitle')
|
||||
inSubTitle = true;
|
||||
}
|
||||
@@ -322,6 +374,10 @@ class ConvertHtml extends ConvertBase {
|
||||
bold = false;
|
||||
if (tag == 'emphasis')
|
||||
italic = false;
|
||||
if (tag == 'sup')
|
||||
superscript = false;
|
||||
if (tag == 'sub')
|
||||
subscript = false;
|
||||
if (tag == 'subtitle')
|
||||
inSubTitle = false;
|
||||
}
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
//const _ = require('lodash');
|
||||
const fs = require('fs-extra');
|
||||
const path = require('path');
|
||||
|
||||
const sax = require('../../sax');
|
||||
const utils = require('../../utils');
|
||||
const ConvertHtml = require('./ConvertHtml');
|
||||
const xmlParser = require('../../xmlParser');
|
||||
|
||||
class ConvertPdf extends ConvertHtml {
|
||||
check(data, opts) {
|
||||
@@ -22,11 +24,18 @@ class ConvertPdf extends ConvertHtml {
|
||||
const {inputFiles, callback, abort, uploadFileName} = opts;
|
||||
|
||||
const inpFile = inputFiles.sourceFile;
|
||||
const outFile = `${inputFiles.filesDir}/${utils.randomHexString(10)}.xml`;
|
||||
const outBasename = `${inputFiles.filesDir}/${utils.randomHexString(10)}`;
|
||||
const outFile = `${outBasename}.xml`;
|
||||
const metaFile = `${outBasename}_metadata.xml`;
|
||||
|
||||
const pdfaltoPath = `${this.config.dataDir}/pdfalto/pdfalto`;
|
||||
|
||||
if (!await fs.pathExists(pdfaltoPath))
|
||||
throw new Error('Внешний конвертер pdfalto не найден');
|
||||
|
||||
//конвертируем в xml
|
||||
let perc = 0;
|
||||
await this.execConverter(this.pdfToHtmlPath, ['-nodrm', '-c', '-s', '-xml', inpFile, outFile], () => {
|
||||
await this.execConverter(pdfaltoPath, [inpFile, outFile], () => {
|
||||
perc = (perc < 80 ? perc + 10 : 40);
|
||||
callback(perc);
|
||||
}, abort);
|
||||
@@ -35,17 +44,22 @@ class ConvertPdf extends ConvertHtml {
|
||||
const data = await fs.readFile(outFile);
|
||||
callback(90);
|
||||
|
||||
await utils.sleep(100);
|
||||
|
||||
//парсим xml
|
||||
let lines = [];
|
||||
let pagelines = [];
|
||||
let line = {text: ''};
|
||||
let page = {};
|
||||
let fonts = {};
|
||||
let sectionTitleFound = false;
|
||||
|
||||
let images = [];
|
||||
let loading = [];
|
||||
let inText = false;
|
||||
let bold = false;
|
||||
let italic = false;
|
||||
|
||||
let title = '';
|
||||
let prevTop = 0;
|
||||
let author = '';
|
||||
let i = -1;
|
||||
let titleCount = 0;
|
||||
|
||||
const loadImage = async(image) => {
|
||||
const src = path.parse(image.src);
|
||||
@@ -59,7 +73,7 @@ class ConvertPdf extends ConvertHtml {
|
||||
image.type = type;
|
||||
image.name = src.base;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const putImage = (curTop) => {
|
||||
if (!isNaN(curTop) && images.length) {
|
||||
@@ -69,104 +83,180 @@ class ConvertPdf extends ConvertHtml {
|
||||
images.shift();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (!cutCounter && inText) {
|
||||
let tOpen = (bold ? '<b>' : '');
|
||||
tOpen += (italic ? '<i>' : '');
|
||||
let tClose = (italic ? '</i>' : '');
|
||||
tClose += (bold ? '</b>' : '');
|
||||
const putPageLines = () => {
|
||||
pagelines.sort((a, b) => (a.top - b.top)*10000 + (a.left - b.left))
|
||||
|
||||
//объединяем в одну строку равные по высоте
|
||||
const pl = [];
|
||||
let pt = 0;
|
||||
let j = -1;
|
||||
pagelines.forEach(line => {
|
||||
//добавим закрывающий тег стиля
|
||||
line.text += line.tClose;
|
||||
|
||||
lines[i].text += `${tOpen}${text}${tClose} `;
|
||||
if (titleCount < 2 && text.trim() != '') {
|
||||
title += text + (titleCount ? '' : ' - ');
|
||||
titleCount++;
|
||||
//проверим, возможно это заголовок
|
||||
if (line.fonts.length == 1 && line.pageWidth) {
|
||||
const f = (line.fonts.length ? fonts[line.fonts[0]] : null);
|
||||
const centerLeft = (line.pageWidth - line.width)/2;
|
||||
if (f && f.isBold && Math.abs(centerLeft - line.left) < 3) {
|
||||
if (!sectionTitleFound) {
|
||||
line.isSectionTitle = true;
|
||||
sectionTitleFound = true;
|
||||
} else {
|
||||
line.isSubtitle = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//объединяем
|
||||
if (pt == 0 || Math.abs(pt - line.top) > 3) {
|
||||
j++;
|
||||
pl[j] = line;
|
||||
} else {
|
||||
pl[j].text += ` ${line.text}`;
|
||||
}
|
||||
pt = line.top;
|
||||
});
|
||||
|
||||
//заполняем lines
|
||||
const lastIndex = i;
|
||||
pl.forEach(line => {
|
||||
putImage(line.top);
|
||||
|
||||
//добавим пустую строку, если надо
|
||||
const prevLine = (i > lastIndex ? lines[i] : {fonts: [], top: 0});
|
||||
if (prevLine && !prevLine.isImage) {
|
||||
const f = (prevLine.fonts.length ? fonts[prevLine.fonts[0]] : (line.fonts.length ? fonts[line.fonts[0]] : null));
|
||||
if (f && f.fontSize && !line.isImage && line.top - prevLine.top > f.fontSize*1.8) {
|
||||
i++;
|
||||
lines[i] = {text: '<br>'};
|
||||
}
|
||||
}
|
||||
|
||||
i++;
|
||||
lines[i] = line;
|
||||
});
|
||||
pagelines = [];
|
||||
putImage(100000);
|
||||
};
|
||||
|
||||
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (!cutCounter) {
|
||||
if (inText) {
|
||||
switch (tag) {
|
||||
case 'i':
|
||||
italic = true;
|
||||
break;
|
||||
case 'b':
|
||||
bold = true;
|
||||
break;
|
||||
if (tag == 'textstyle') {
|
||||
const attrs = sax.getAttrsSync(tail);
|
||||
const fontId = (attrs.id && attrs.id.value ? attrs.id.value : '');
|
||||
const fontStyle = (attrs.fontstyle && attrs.fontstyle.value ? attrs.fontstyle.value : '');
|
||||
const fontSize = (attrs.fontsize && attrs.fontsize.value ? attrs.fontsize.value : '');
|
||||
|
||||
if (fontId) {
|
||||
const styleTags = {bold: 'b', italics: 'i', superscript: 'sup', subscript: 'sub'};
|
||||
const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false, fontSize};
|
||||
|
||||
if (fontStyle) {
|
||||
const styles = fontStyle.split(' ');
|
||||
styles.forEach(style => {
|
||||
const s = styleTags[style];
|
||||
if (s) {
|
||||
f.tOpen += `<${s}>`;
|
||||
f.tClose = `</${s}>${f.tClose}`;
|
||||
if (s == 'b')
|
||||
f.isBold = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'text' && !inText) {
|
||||
let attrs = sax.getAttrsSync(tail);
|
||||
const line = {
|
||||
text: '',
|
||||
top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10),
|
||||
left: parseInt((attrs.left && attrs.left.value ? attrs.left.value : null), 10),
|
||||
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
|
||||
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
|
||||
};
|
||||
if (tag == 'page') {
|
||||
const attrs = sax.getAttrsSync(tail);
|
||||
page = {
|
||||
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
|
||||
};
|
||||
|
||||
if (line.width != 0 || line.height != 0) {
|
||||
inText = true;
|
||||
if (isNaN(line.top) || isNaN(prevTop) || (Math.abs(prevTop - line.top) > 3)) {
|
||||
putImage(line.top);
|
||||
i++;
|
||||
lines[i] = line;
|
||||
}
|
||||
prevTop = line.top;
|
||||
}
|
||||
putPageLines();
|
||||
}
|
||||
|
||||
if (tag == 'textline') {
|
||||
const attrs = sax.getAttrsSync(tail);
|
||||
line = {
|
||||
text: '',
|
||||
top: parseInt((attrs.vpos && attrs.vpos.value ? attrs.vpos.value : null), 10),
|
||||
left: parseInt((attrs.hpos && attrs.hpos.value ? attrs.hpos.value : null), 10),
|
||||
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
|
||||
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
|
||||
tOpen: '',
|
||||
tClose: '',
|
||||
isSectionTitle: false,
|
||||
isSubtitle: false,
|
||||
pageWidth: page.width,
|
||||
fonts: [],
|
||||
};
|
||||
|
||||
if (line.width != 0 || line.height != 0) {
|
||||
pagelines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'image') {
|
||||
const attrs = sax.getAttrsSync(tail);
|
||||
const src = (attrs.src && attrs.src.value ? attrs.src.value : '');
|
||||
if (tag == 'string') {
|
||||
const attrs = sax.getAttrsSync(tail);
|
||||
if (attrs.content && attrs.content.value) {
|
||||
|
||||
let tOpen = '';
|
||||
let tClose = '';
|
||||
const fontId = (attrs.stylerefs && attrs.stylerefs.value ? attrs.stylerefs.value : '');
|
||||
if (fontId && fonts[fontId]) {
|
||||
tOpen = fonts[fontId].tOpen;
|
||||
tClose = fonts[fontId].tClose;
|
||||
if (!line.fonts.length || line.fonts[0] != fontId)
|
||||
line.fonts.push(fontId);
|
||||
}
|
||||
|
||||
if (line.tOpen != tOpen) {
|
||||
line.text += line.tClose + tOpen;
|
||||
line.tOpen = tOpen;
|
||||
line.tClose = tClose;
|
||||
}
|
||||
|
||||
line.text += `${line.text.length ? ' ' : ''}${attrs.content.value}`;
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'illustration') {
|
||||
const attrs = sax.getAttrsSync(tail);
|
||||
if (attrs.type && attrs.type.value == 'image') {
|
||||
let src = (attrs.fileid && attrs.fileid.value ? attrs.fileid.value : '');
|
||||
if (src) {
|
||||
const image = {
|
||||
isImage: true,
|
||||
src,
|
||||
data: '',
|
||||
type: '',
|
||||
top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10) || 0,
|
||||
top: parseInt((attrs.vpos && attrs.vpos.value ? attrs.vpos.value : null), 10) || 0,
|
||||
left: parseInt((attrs.hpos && attrs.hpos.value ? attrs.hpos.value : null), 10) || 0,
|
||||
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10) || 0,
|
||||
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10) || 0,
|
||||
};
|
||||
loading.push(loadImage(image));
|
||||
images.push(image);
|
||||
images.sort((a, b) => a.top - b.top)
|
||||
const exists = images.filter(img => (img.top == image.top && img.left == image.left && img.width == image.width && img.height == image.height));
|
||||
if (!exists.length) {
|
||||
loading.push(loadImage(image));
|
||||
images.push(image);
|
||||
images.sort((a, b) => (a.top - b.top)*10000 + (a.left - b.left));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'page') {
|
||||
putImage(100000);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (inText) {
|
||||
switch (tag) {
|
||||
case 'i':
|
||||
italic = false;
|
||||
break;
|
||||
case 'b':
|
||||
bold = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'text')
|
||||
inText = false;
|
||||
};
|
||||
|
||||
let buf = this.decode(data).toString();
|
||||
sax.parseSync(buf, {
|
||||
onStartNode, onEndNode, onTextNode
|
||||
onStartNode
|
||||
});
|
||||
|
||||
putImage(100000);
|
||||
putPageLines();
|
||||
|
||||
await Promise.all(loading);
|
||||
await utils.sleep(100);
|
||||
|
||||
//найдем параграфы и отступы
|
||||
const indents = [];
|
||||
@@ -187,11 +277,29 @@ class ConvertPdf extends ConvertHtml {
|
||||
}
|
||||
indents[0] = 0;
|
||||
|
||||
//формируем текст
|
||||
const limitSize = 2*this.config.maxUploadFileSize;
|
||||
//title
|
||||
if (fs.pathExists(metaFile)) {
|
||||
const metaXmlString = (await fs.readFile(metaFile)).toString();
|
||||
let metaXmlParsed = xmlParser.parseXml(metaXmlString);
|
||||
metaXmlParsed = xmlParser.simplifyXmlParsed(metaXmlParsed);
|
||||
if (metaXmlParsed.metadata) {
|
||||
title = (metaXmlParsed.metadata.title ? metaXmlParsed.metadata.title._t : '');
|
||||
author = (metaXmlParsed.metadata.author ? metaXmlParsed.metadata.author._t : '');
|
||||
}
|
||||
}
|
||||
|
||||
if (!title && uploadFileName)
|
||||
title = uploadFileName;
|
||||
let text = `<title>${title}</title>`;
|
||||
|
||||
//console.log(JSON.stringify(lines, null, 2));
|
||||
//формируем текст
|
||||
const limitSize = 2*this.config.maxUploadFileSize;
|
||||
let text = '';
|
||||
if (title)
|
||||
text += `<fb2-title>${title}</fb2-title>`;
|
||||
if (author)
|
||||
text += `<fb2-author>${author}</fb2-author>`;
|
||||
|
||||
let concat = '';
|
||||
let sp = '';
|
||||
for (const line of lines) {
|
||||
@@ -204,6 +312,16 @@ class ConvertPdf extends ConvertHtml {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.isSectionTitle) {
|
||||
text += `<fb2-section-title>${line.text.trim()}</fb2-section-title>`;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.isSubtitle) {
|
||||
text += `<br><fb2-subtitle>${line.text.trim()}</fb2-subtitle>`;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (concat == '') {
|
||||
const left = line.left || 0;
|
||||
sp = ' '.repeat(indents[left]);
|
||||
@@ -221,7 +339,9 @@ class ConvertPdf extends ConvertHtml {
|
||||
if (concat)
|
||||
text += sp + concat + "\n";
|
||||
|
||||
return await super.run(Buffer.from(text), {skipCheck: true, isText: true, cutTitle: true});
|
||||
//console.log(text);
|
||||
await utils.sleep(100);
|
||||
return await super.run(Buffer.from(text), {skipCheck: true, isText: true});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ class ConvertSites extends ConvertHtml {
|
||||
if (text === false)
|
||||
return false;
|
||||
|
||||
return await super.run(Buffer.from(text), {skipCheck: true, cutTitle: true});
|
||||
return await super.run(Buffer.from(text), {skipCheck: true});
|
||||
}
|
||||
|
||||
getTitle(text) {
|
||||
@@ -79,7 +79,7 @@ class ConvertSites extends ConvertHtml {
|
||||
let book = this.getTitle(text);
|
||||
book = book.replace(' (fb2) | Флибуста', '');
|
||||
|
||||
const title = `<title>${author}${(author ? ' - ' : '')}${book}</title>`;
|
||||
const title = `<fb2-title>${author}${(author ? ' - ' : '')}${book}</fb2-title>`;
|
||||
|
||||
let begin = '<h3 class="book">';
|
||||
if (text.indexOf(begin) <= 0)
|
||||
@@ -95,12 +95,12 @@ class ConvertSites extends ConvertHtml {
|
||||
return text.substring(l, r)
|
||||
.replace(/blockquote class="?book"?/g, 'p')
|
||||
.replace(/<br\/?>\s*<\/h3>/g, '</h3>')
|
||||
.replace(/<h3 class="?book"?>/g, '<br><br><subtitle>')
|
||||
.replace(/<h5 class="?book"?>/g, '<br><br><subtitle>')
|
||||
.replace(/<h3>/g, '<br><br><subtitle>')
|
||||
.replace(/<h5>/g, '<br><br><subtitle>')
|
||||
.replace(/<\/h3>/g, '</subtitle><br>')
|
||||
.replace(/<\/h5>/g, '</subtitle><br>')
|
||||
.replace(/<h3 class="?book"?>/g, '<br><br><fb2-subtitle>')
|
||||
.replace(/<h5 class="?book"?>/g, '<br><br><fb2-subtitle>')
|
||||
.replace(/<h3>/g, '<br><br><fb2-subtitle>')
|
||||
.replace(/<h5>/g, '<br><br><fb2-subtitle>')
|
||||
.replace(/<\/h3>/g, '</fb2-subtitle><br>')
|
||||
.replace(/<\/h5>/g, '</fb2-subtitle><br>')
|
||||
.replace(/<div class="?stanza"?>/g, '<br>')
|
||||
.replace(/<div>/g, '<br>')
|
||||
+ title;
|
||||
|
||||
@@ -6,7 +6,8 @@ function parseSync(xstr, options) {
|
||||
onCdata: _onCdata = dummy,
|
||||
onComment: _onComment = dummy,
|
||||
onProgress: _onProgress = dummy,
|
||||
innerCut = new Set()
|
||||
innerCut = new Set(),
|
||||
lowerCase = true,
|
||||
} = options;
|
||||
|
||||
let i = 0;
|
||||
@@ -91,7 +92,8 @@ function parseSync(xstr, options) {
|
||||
} else {
|
||||
tag = tagData;
|
||||
}
|
||||
tag = tag.toLowerCase();
|
||||
if (lowerCase)
|
||||
tag = tag.toLowerCase();
|
||||
|
||||
if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
|
||||
if (!cutCounter)
|
||||
@@ -146,7 +148,8 @@ async function parse(xstr, options) {
|
||||
onCdata: _onCdata = dummy,
|
||||
onComment: _onComment = dummy,
|
||||
onProgress: _onProgress = dummy,
|
||||
innerCut = new Set()
|
||||
innerCut = new Set(),
|
||||
lowerCase = true,
|
||||
} = options;
|
||||
|
||||
let i = 0;
|
||||
@@ -231,7 +234,8 @@ async function parse(xstr, options) {
|
||||
} else {
|
||||
tag = tagData;
|
||||
}
|
||||
tag = tag.toLowerCase();
|
||||
if (lowerCase)
|
||||
tag = tag.toLowerCase();
|
||||
|
||||
if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
|
||||
if (!cutCounter)
|
||||
@@ -276,7 +280,7 @@ async function parse(xstr, options) {
|
||||
await _onProgress(100);
|
||||
}
|
||||
|
||||
function getAttrsSync(tail) {
|
||||
function getAttrsSync(tail, lowerCase = true) {
|
||||
let result = {};
|
||||
let name = '';
|
||||
let value = '';
|
||||
@@ -287,13 +291,16 @@ function getAttrsSync(tail) {
|
||||
let waitEq = false;
|
||||
|
||||
const pushResult = () => {
|
||||
if (lowerCase)
|
||||
name = name.toLowerCase();
|
||||
if (name != '') {
|
||||
const fn = name;
|
||||
let ns = '';
|
||||
if (name.indexOf(':') >= 0) {
|
||||
[ns, name] = name.split(':');
|
||||
if (fn.indexOf(':') >= 0) {
|
||||
[ns, name] = fn.split(':');
|
||||
}
|
||||
|
||||
result[name] = {value, ns};
|
||||
result[name] = {value, ns, fn};
|
||||
}
|
||||
name = '';
|
||||
value = '';
|
||||
|
||||
143
server/core/xmlParser.js
Normal file
143
server/core/xmlParser.js
Normal file
@@ -0,0 +1,143 @@
|
||||
const sax = require('./sax');
|
||||
|
||||
function formatXml(xmlParsed, encoding = 'utf-8', textFilterFunc) {
|
||||
let out = `<?xml version="1.0" encoding="${encoding}"?>`;
|
||||
out += formatXmlNode(xmlParsed, textFilterFunc);
|
||||
return out;
|
||||
}
|
||||
|
||||
function formatXmlNode(node, textFilterFunc) {
|
||||
textFilterFunc = (textFilterFunc ? textFilterFunc : text => text);
|
||||
|
||||
const formatNode = (node, name) => {
|
||||
let out = '';
|
||||
|
||||
if (Array.isArray(node)) {
|
||||
for (const n of node) {
|
||||
out += formatNode(n);
|
||||
}
|
||||
} else if (typeof node == 'string') {
|
||||
if (name)
|
||||
out += `<${name}>${textFilterFunc(node)}</${name}>`;
|
||||
else
|
||||
out += textFilterFunc(node);
|
||||
} else {
|
||||
if (node._n)
|
||||
name = node._n;
|
||||
|
||||
let attrs = '';
|
||||
if (node._attrs) {
|
||||
for (let attrName in node._attrs) {
|
||||
attrs += ` ${attrName}="${node._attrs[attrName]}"`;
|
||||
}
|
||||
}
|
||||
|
||||
let tOpen = '';
|
||||
let tBody = '';
|
||||
let tClose = '';
|
||||
if (name)
|
||||
tOpen += `<${name}${attrs}>`;
|
||||
if (node.hasOwnProperty('_t'))
|
||||
tBody += textFilterFunc(node._t);
|
||||
|
||||
for (let nodeName in node) {
|
||||
if (nodeName && nodeName[0] == '_' && nodeName != '_a')
|
||||
continue;
|
||||
|
||||
const n = node[nodeName];
|
||||
tBody += formatNode(n, nodeName);
|
||||
}
|
||||
|
||||
if (name)
|
||||
tClose += `</${name}>`;
|
||||
|
||||
out += `${tOpen}${tBody}${tClose}`;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
return formatNode(node);
|
||||
}
|
||||
|
||||
function parseXml(xmlString, lowerCase = true) {
|
||||
let result = {};
|
||||
let node = result;
|
||||
|
||||
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
node._t = text;
|
||||
};
|
||||
|
||||
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (tag == '?xml')
|
||||
return;
|
||||
|
||||
const newNode = {_n: tag, _p: node};
|
||||
|
||||
if (tail) {
|
||||
const parsedAttrs = sax.getAttrsSync(tail, lowerCase);
|
||||
const atKeys = Object.keys(parsedAttrs);
|
||||
if (atKeys.length) {
|
||||
const attrs = {};
|
||||
for (let i = 0; i < atKeys.length; i++) {
|
||||
const attrName = atKeys[i];
|
||||
attrs[parsedAttrs[attrName].fn] = parsedAttrs[attrName].value;
|
||||
}
|
||||
|
||||
newNode._attrs = attrs;
|
||||
}
|
||||
}
|
||||
|
||||
if (!node._a)
|
||||
node._a = [];
|
||||
node._a.push(newNode);
|
||||
node = newNode;
|
||||
};
|
||||
|
||||
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (node._p && node._n == tag)
|
||||
node = node._p;
|
||||
};
|
||||
|
||||
sax.parseSync(xmlString, {
|
||||
onStartNode, onEndNode, onTextNode, lowerCase
|
||||
});
|
||||
|
||||
if (result._a)
|
||||
result = result._a[0];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function simplifyXmlParsed(node) {
|
||||
|
||||
const simplifyNodeArray = (a) => {
|
||||
const result = {};
|
||||
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
const child = a[i];
|
||||
if (child._n && !result[child._n]) {
|
||||
result[child._n] = {};
|
||||
if (child._a) {
|
||||
result[child._n] = simplifyNodeArray(child._a);
|
||||
}
|
||||
if (child._t) {
|
||||
result[child._n]._t = child._t;
|
||||
}
|
||||
if (child._attrs) {
|
||||
result[child._n]._attrs = child._attrs;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
return simplifyNodeArray([node]);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
formatXml,
|
||||
formatXmlNode,
|
||||
parseXml,
|
||||
simplifyXmlParsed
|
||||
}
|
||||
Reference in New Issue
Block a user