Изменение путей к модулям
This commit is contained in:
147
server/core/Reader/BookConverter/ConvertBase.js
Normal file
147
server/core/Reader/BookConverter/ConvertBase.js
Normal file
@@ -0,0 +1,147 @@
|
||||
const fs = require('fs-extra');
|
||||
const iconv = require('iconv-lite');
|
||||
const chardet = require('chardet');
|
||||
const he = require('he');
|
||||
|
||||
const textUtils = require('./textUtils');
|
||||
const utils = require('../../utils');
|
||||
|
||||
let execConverterCounter = 0;
|
||||
|
||||
class ConvertBase {
|
||||
constructor(config) {
|
||||
this.config = config;
|
||||
|
||||
this.calibrePath = `${config.dataDir}/calibre/ebook-convert`;
|
||||
this.sofficePath = '/usr/bin/soffice';
|
||||
this.pdfToHtmlPath = '/usr/bin/pdftohtml';
|
||||
}
|
||||
|
||||
async run(data, opts) {// eslint-disable-line no-unused-vars
|
||||
//override
|
||||
}
|
||||
|
||||
async checkExternalConverterPresent() {
|
||||
if (!await fs.pathExists(this.calibrePath))
|
||||
throw new Error('Внешний конвертер calibre не найден');
|
||||
|
||||
if (!await fs.pathExists(this.sofficePath))
|
||||
throw new Error('Внешний конвертер LibreOffice не найден');
|
||||
|
||||
if (!await fs.pathExists(this.pdfToHtmlPath))
|
||||
throw new Error('Внешний конвертер pdftohtml не найден');
|
||||
}
|
||||
|
||||
async execConverter(path, args, onData) {
|
||||
execConverterCounter++;
|
||||
try {
|
||||
if (execConverterCounter > 10)
|
||||
throw new Error('Слишком большая очередь конвертирования. Пожалуйста, попробуйте позже.');
|
||||
|
||||
const result = await utils.spawnProcess(path, {args, onData});
|
||||
if (result.code != 0) {
|
||||
let error = result.code;
|
||||
if (this.config.branch == 'development')
|
||||
error = `exec: ${path}, stdout: ${result.stdout}, stderr: ${result.stderr}`;
|
||||
throw new Error(`Внешний конвертер завершился с ошибкой: ${error}`);
|
||||
}
|
||||
} catch(e) {
|
||||
if (e.status == 'killed') {
|
||||
throw new Error('Слишком долгое ожидание конвертера');
|
||||
} else if (e.status == 'error') {
|
||||
throw new Error(e.error);
|
||||
} else {
|
||||
throw new Error(e);
|
||||
}
|
||||
} finally {
|
||||
execConverterCounter--;
|
||||
}
|
||||
}
|
||||
|
||||
decode(data) {
|
||||
let selected = textUtils.getEncoding(data);
|
||||
|
||||
if (selected == 'ISO-8859-5') {
|
||||
const charsetAll = chardet.detectAll(data.slice(0, 20000));
|
||||
for (const charset of charsetAll) {
|
||||
if (charset.name.indexOf('ISO-8859') < 0) {
|
||||
selected = charset.name;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (selected.toLowerCase() != 'utf-8')
|
||||
return iconv.decode(data, selected);
|
||||
else
|
||||
return data;
|
||||
}
|
||||
|
||||
repSpaces(text) {
|
||||
return text.replace(/ |[\t\n\r]/g, ' ');
|
||||
}
|
||||
|
||||
escapeEntities(text) {
|
||||
return he.escape(he.decode(text));
|
||||
}
|
||||
|
||||
formatFb2(fb2) {
|
||||
let out = '<?xml version="1.0" encoding="utf-8"?>';
|
||||
out += '<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">';
|
||||
out += this.formatFb2Node(fb2);
|
||||
out += '</FictionBook>';
|
||||
return out;
|
||||
}
|
||||
|
||||
formatFb2Node(node, name) {
|
||||
let out = '';
|
||||
|
||||
if (Array.isArray(node)) {
|
||||
for (const n of node) {
|
||||
out += this.formatFb2Node(n);
|
||||
}
|
||||
} else if (typeof node == 'string') {
|
||||
if (name)
|
||||
out += `<${name}>${this.repSpaces(node)}</${name}>`;
|
||||
else
|
||||
out += this.repSpaces(node);
|
||||
} else {
|
||||
if (node._n)
|
||||
name = node._n;
|
||||
|
||||
let attrs = '';
|
||||
if (node._attrs) {
|
||||
for (let attrName in node._attrs) {
|
||||
attrs += ` ${attrName}="${node._attrs[attrName]}"`;
|
||||
}
|
||||
}
|
||||
|
||||
let tOpen = '';
|
||||
let tBody = '';
|
||||
let tClose = '';
|
||||
if (name)
|
||||
tOpen += `<${name}${attrs}>`;
|
||||
if (node.hasOwnProperty('_t'))
|
||||
tBody += this.repSpaces(node._t);
|
||||
|
||||
for (let nodeName in node) {
|
||||
if (nodeName && nodeName[0] == '_' && nodeName != '_a')
|
||||
continue;
|
||||
|
||||
const n = node[nodeName];
|
||||
tBody += this.formatFb2Node(n, nodeName);
|
||||
}
|
||||
|
||||
if (name)
|
||||
tClose += `</${name}>`;
|
||||
|
||||
if (attrs == '' && name == 'p' && tBody.trim() == '')
|
||||
out += '<empty-line/>'
|
||||
else
|
||||
out += `${tOpen}${tBody}${tClose}`;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ConvertBase;
|
||||
33
server/core/Reader/BookConverter/ConvertDoc.js
Normal file
33
server/core/Reader/BookConverter/ConvertDoc.js
Normal file
@@ -0,0 +1,33 @@
|
||||
const fs = require('fs-extra');
|
||||
const path = require('path');
|
||||
|
||||
const ConvertDocX = require('./ConvertDocX');
|
||||
|
||||
class ConvertDoc extends ConvertDocX {
|
||||
check(data, opts) {
|
||||
const {inputFiles} = opts;
|
||||
|
||||
return this.config.useExternalBookConverter &&
|
||||
inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'msi';
|
||||
}
|
||||
|
||||
async run(data, opts) {
|
||||
if (!this.check(data, opts))
|
||||
return false;
|
||||
await this.checkExternalConverterPresent();
|
||||
|
||||
const {inputFiles, callback} = opts;
|
||||
|
||||
const outFile = `${inputFiles.filesDir}/${path.basename(inputFiles.sourceFile)}`;
|
||||
const docFile = `${outFile}.doc`;
|
||||
const docxFile = `${outFile}.docx`;
|
||||
const fb2File = `${outFile}.fb2`;
|
||||
|
||||
await fs.copy(inputFiles.sourceFile, docFile);
|
||||
await this.execConverter(this.sofficePath, ['--headless', '--convert-to', 'docx', '--outdir', inputFiles.filesDir, docFile]);
|
||||
|
||||
return await super.convert(docxFile, fb2File, callback);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ConvertDoc;
|
||||
49
server/core/Reader/BookConverter/ConvertDocX.js
Normal file
49
server/core/Reader/BookConverter/ConvertDocX.js
Normal file
@@ -0,0 +1,49 @@
|
||||
const fs = require('fs-extra');
|
||||
const path = require('path');
|
||||
|
||||
const ConvertBase = require('./ConvertBase');
|
||||
|
||||
class ConvertDocX extends ConvertBase {
|
||||
check(data, opts) {
|
||||
const {inputFiles} = opts;
|
||||
if (this.config.useExternalBookConverter &&
|
||||
inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'zip') {
|
||||
//ищем файл '[Content_Types].xml'
|
||||
for (const file of inputFiles.files) {
|
||||
if (file.path == '[Content_Types].xml') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async convert(docxFile, fb2File, callback) {
|
||||
let perc = 0;
|
||||
await this.execConverter(this.calibrePath, [docxFile, fb2File], () => {
|
||||
perc = (perc < 100 ? perc + 5 : 50);
|
||||
callback(perc);
|
||||
});
|
||||
|
||||
return await fs.readFile(fb2File);
|
||||
}
|
||||
|
||||
async run(data, opts) {
|
||||
if (!this.check(data, opts))
|
||||
return false;
|
||||
await this.checkExternalConverterPresent();
|
||||
|
||||
const {inputFiles, callback} = opts;
|
||||
|
||||
const outFile = `${inputFiles.filesDir}/${path.basename(inputFiles.sourceFile)}`;
|
||||
const docxFile = `${outFile}.docx`;
|
||||
const fb2File = `${outFile}.fb2`;
|
||||
|
||||
await fs.copy(inputFiles.sourceFile, docxFile);
|
||||
|
||||
return await this.convert(docxFile, fb2File, callback);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ConvertDocX;
|
||||
49
server/core/Reader/BookConverter/ConvertEpub.js
Normal file
49
server/core/Reader/BookConverter/ConvertEpub.js
Normal file
@@ -0,0 +1,49 @@
|
||||
const fs = require('fs-extra');
|
||||
const path = require('path');
|
||||
|
||||
const ConvertBase = require('./ConvertBase');
|
||||
|
||||
class ConvertEpub extends ConvertBase {
|
||||
async check(data, opts) {
|
||||
const {inputFiles} = opts;
|
||||
|
||||
if (this.config.useExternalBookConverter &&
|
||||
inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'zip') {
|
||||
//ищем файл 'mimetype'
|
||||
for (const file of inputFiles.files) {
|
||||
if (file.path == 'mimetype') {
|
||||
const mt = await fs.readFile(`${inputFiles.filesDir}/${file.path}`);
|
||||
if (mt.toString().trim() == 'application/epub+zip')
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async run(data, opts) {
|
||||
if (!await this.check(data, opts))
|
||||
return false;
|
||||
await this.checkExternalConverterPresent();
|
||||
|
||||
const {inputFiles, callback} = opts;
|
||||
|
||||
const outFile = `${inputFiles.filesDir}/${path.basename(inputFiles.sourceFile)}`;
|
||||
const epubFile = `${outFile}.epub`;
|
||||
const fb2File = `${outFile}.fb2`;
|
||||
|
||||
await fs.copy(inputFiles.sourceFile, epubFile);
|
||||
|
||||
let perc = 0;
|
||||
await this.execConverter(this.calibrePath, [epubFile, fb2File], () => {
|
||||
perc = (perc < 100 ? perc + 5 : 50);
|
||||
callback(perc);
|
||||
});
|
||||
|
||||
return await fs.readFile(fb2File);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ConvertEpub;
|
||||
41
server/core/Reader/BookConverter/ConvertFb2.js
Normal file
41
server/core/Reader/BookConverter/ConvertFb2.js
Normal file
@@ -0,0 +1,41 @@
|
||||
const ConvertBase = require('./ConvertBase');
|
||||
const iconv = require('iconv-lite');
|
||||
|
||||
class ConvertFb2 extends ConvertBase {
|
||||
check(data, opts) {
|
||||
const {dataType} = opts;
|
||||
|
||||
return (dataType && dataType.ext == 'xml' && data.toString().indexOf('<FictionBook') >= 0);
|
||||
}
|
||||
|
||||
async run(data, opts) {
|
||||
if (!this.check(data, opts))
|
||||
return false;
|
||||
|
||||
return this.checkEncoding(data);
|
||||
}
|
||||
|
||||
checkEncoding(data) {
|
||||
let result = data;
|
||||
|
||||
const left = data.indexOf('<?xml version="1.0"');
|
||||
if (left >= 0) {
|
||||
const right = data.indexOf('?>', left);
|
||||
if (right >= 0) {
|
||||
const head = data.slice(left, right + 2).toString();
|
||||
const m = head.match(/encoding="(.*)"/);
|
||||
if (m) {
|
||||
let encoding = m[1].toLowerCase();
|
||||
if (encoding != 'utf-8') {
|
||||
result = iconv.decode(data, encoding);
|
||||
result = Buffer.from(result.toString().replace(m[0], 'encoding="utf-8"'));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ConvertFb2;
|
||||
299
server/core/Reader/BookConverter/ConvertHtml.js
Normal file
299
server/core/Reader/BookConverter/ConvertHtml.js
Normal file
@@ -0,0 +1,299 @@
|
||||
const ConvertBase = require('./ConvertBase');
|
||||
const sax = require('./sax');
|
||||
const textUtils = require('./textUtils');
|
||||
|
||||
class ConvertHtml extends ConvertBase {
|
||||
check(data, opts) {
|
||||
const {dataType} = opts;
|
||||
|
||||
if (dataType && (dataType.ext == 'html' || dataType.ext == 'xml'))
|
||||
return {isText: false};
|
||||
|
||||
//может это чистый текст?
|
||||
if (textUtils.checkIfText(data)) {
|
||||
return {isText: true};
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async run(data, opts) {
|
||||
let isText = false;
|
||||
if (!opts.skipCheck) {
|
||||
const checkResult = this.check(data, opts);
|
||||
if (!checkResult)
|
||||
return false;
|
||||
|
||||
isText = checkResult.isText;
|
||||
} else {
|
||||
isText = opts.isText;
|
||||
}
|
||||
let {cutTitle} = opts;
|
||||
|
||||
let titleInfo = {};
|
||||
let desc = {_n: 'description', 'title-info': titleInfo};
|
||||
let pars = [];
|
||||
let body = {_n: 'body', section: {_a: []}};
|
||||
let binary = [];
|
||||
let fb2 = [desc, body, binary];
|
||||
|
||||
let title = '';
|
||||
let inTitle = false;
|
||||
let inImage = false;
|
||||
let image = {};
|
||||
let bold = false;
|
||||
let italic = false;
|
||||
|
||||
let spaceCounter = [];
|
||||
|
||||
const repCrLfTab = (text) => text.replace(/[\n\r]/g, '').replace(/\t/g, ' ');
|
||||
|
||||
const newParagraph = () => {
|
||||
pars.push({_n: 'p', _t: ''});
|
||||
};
|
||||
|
||||
const growParagraph = (text) => {
|
||||
if (!pars.length)
|
||||
newParagraph();
|
||||
|
||||
const l = pars.length;
|
||||
pars[l - 1]._t += text;
|
||||
|
||||
//посчитаем отступы у текста, чтобы выделить потом параграфы
|
||||
const lines = text.split('\n');
|
||||
for (let line of lines) {
|
||||
if (line.trim() == '')
|
||||
continue;
|
||||
line = repCrLfTab(line);
|
||||
|
||||
let l = 0;
|
||||
while (l < line.length && line[l] == ' ') {
|
||||
l++;
|
||||
}
|
||||
if (!spaceCounter[l])
|
||||
spaceCounter[l] = 0;
|
||||
spaceCounter[l]++;
|
||||
}
|
||||
};
|
||||
|
||||
const newPara = new Set(['tr', '/table', 'hr', 'br', 'br/', 'li', 'dt', 'dd', 'p', 'title', '/title', 'h1', 'h2', 'h3', '/h1', '/h2', '/h3']);
|
||||
|
||||
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
text = this.escapeEntities(text);
|
||||
|
||||
if (!cutCounter && !(cutTitle && inTitle)) {
|
||||
let tOpen = (bold ? '<strong>' : '');
|
||||
tOpen += (italic ? '<emphasis>' : '');
|
||||
let tClose = (italic ? '</emphasis>' : '');
|
||||
tClose += (bold ? '</strong>' : '');
|
||||
|
||||
growParagraph(`${tOpen}${text}${tClose}`);
|
||||
}
|
||||
|
||||
if (inTitle && !title)
|
||||
title = text;
|
||||
|
||||
if (inImage) {
|
||||
image._t = text;
|
||||
binary.push(image);
|
||||
|
||||
pars.push({_n: 'image', _attrs: {'l:href': '#' + image._attrs.id}, _t: ''});
|
||||
newParagraph();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (!cutCounter) {
|
||||
if (newPara.has(tag))
|
||||
newParagraph();
|
||||
|
||||
switch (tag) {
|
||||
case 'i':
|
||||
case 'em':
|
||||
italic = true;
|
||||
break;
|
||||
case 'b':
|
||||
case 'strong':
|
||||
case 'h1':
|
||||
case 'h2':
|
||||
case 'h3':
|
||||
bold = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'title' || tag == 'cut-title') {
|
||||
inTitle = true;
|
||||
if (tag == 'cut-title')
|
||||
cutTitle = true;
|
||||
}
|
||||
|
||||
if (tag == 'fb2-image') {
|
||||
inImage = true;
|
||||
const attrs = sax.getAttrsSync(tail);
|
||||
image = {_n: 'binary', _attrs: {id: attrs.name.value, 'content-type': attrs.type.value}, _t: ''};
|
||||
}
|
||||
};
|
||||
|
||||
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (!cutCounter) {
|
||||
if (newPara.has('/' + tag))
|
||||
newParagraph();
|
||||
|
||||
switch (tag) {
|
||||
case 'i':
|
||||
case 'em':
|
||||
italic = false;
|
||||
break;
|
||||
case 'b':
|
||||
case 'strong':
|
||||
case 'h1':
|
||||
case 'h2':
|
||||
case 'h3':
|
||||
bold = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'title' || tag == 'cut-title')
|
||||
inTitle = false;
|
||||
|
||||
if (tag == 'fb2-image')
|
||||
inImage = false;
|
||||
};
|
||||
|
||||
let buf = this.decode(data).toString();
|
||||
|
||||
sax.parseSync(buf, {
|
||||
onStartNode, onEndNode, onTextNode,
|
||||
innerCut: new Set(['head', 'script', 'style', 'binary', 'fb2-image'])
|
||||
});
|
||||
|
||||
titleInfo['book-title'] = title;
|
||||
|
||||
//подозрение на чистый текст, надо разбить на параграфы
|
||||
if (isText || pars.length < buf.length/2000) {
|
||||
let total = 0;
|
||||
let count = 1;
|
||||
for (let i = 0; i < spaceCounter.length; i++) {
|
||||
const sc = (spaceCounter[i] ? spaceCounter[i] : 0);
|
||||
if (sc) count++;
|
||||
total += sc;
|
||||
}
|
||||
|
||||
let d = 0;
|
||||
const mid = total/count;
|
||||
for (let i = 0; i < spaceCounter.length; i++) {
|
||||
const sc = (spaceCounter[i] ? spaceCounter[i] : 0);
|
||||
if (sc > mid) d++;
|
||||
}
|
||||
|
||||
let i = 0;
|
||||
//если разброс не слишком большой, выделяем параграфы
|
||||
if (d < 10 && spaceCounter.length) {
|
||||
total /= 20;
|
||||
i = spaceCounter.length - 1;
|
||||
while (i > 0 && (!spaceCounter[i] || spaceCounter[i] < total)) i--;
|
||||
}
|
||||
|
||||
const parIndent = (i > 0 ? i : 0);
|
||||
|
||||
let newPars = [];
|
||||
const newPar = () => {
|
||||
newPars.push({_n: 'p', _t: ''});
|
||||
};
|
||||
|
||||
const growPar = (text) => {
|
||||
if (!newPars.length)
|
||||
newPar();
|
||||
|
||||
const l = newPars.length;
|
||||
newPars[l - 1]._t += text;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
for (const par of pars) {
|
||||
if (par._n != 'p') {
|
||||
newPars.push(par);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i > 0)
|
||||
newPar();
|
||||
i++;
|
||||
|
||||
let j = 0;
|
||||
const lines = par._t.split('\n');
|
||||
for (let line of lines) {
|
||||
line = repCrLfTab(line);
|
||||
|
||||
let l = 0;
|
||||
while (l < line.length && line[l] == ' ') {
|
||||
l++;
|
||||
}
|
||||
|
||||
if (l >= parIndent) {
|
||||
if (j > 0)
|
||||
newPar();
|
||||
j++;
|
||||
}
|
||||
growPar(line.trim() + ' ');
|
||||
}
|
||||
}
|
||||
|
||||
body.section._a[0] = newPars;
|
||||
} else {
|
||||
body.section._a[0] = pars;
|
||||
}
|
||||
|
||||
//убираем лишнее, делаем валидный fb2, т.к. в рез-те разбиения на параграфы бьются теги
|
||||
bold = false;
|
||||
italic = false;
|
||||
pars = body.section._a[0];
|
||||
for (let i = 0; i < pars.length; i++) {
|
||||
if (pars[i]._n != 'p')
|
||||
continue;
|
||||
|
||||
pars[i]._t = this.repSpaces(pars[i]._t).trim();
|
||||
|
||||
if (pars[i]._t.indexOf('<') >= 0) {
|
||||
const t = pars[i]._t;
|
||||
let a = [];
|
||||
|
||||
const onTextNode = (text) => {
|
||||
let tOpen = (bold ? '<strong>' : '');
|
||||
tOpen += (italic ? '<emphasis>' : '');
|
||||
let tClose = (italic ? '</emphasis>' : '');
|
||||
tClose += (bold ? '</strong>' : '');
|
||||
|
||||
a.push(`${tOpen}${text}${tClose}`);
|
||||
}
|
||||
|
||||
const onStartNode = (tag) => {
|
||||
if (tag == 'strong')
|
||||
bold = true;
|
||||
if (tag == 'emphasis')
|
||||
italic = true;
|
||||
}
|
||||
|
||||
const onEndNode = (tag) => {
|
||||
if (tag == 'strong')
|
||||
bold = false;
|
||||
if (tag == 'emphasis')
|
||||
italic = false;
|
||||
}
|
||||
|
||||
sax.parseSync(t, { onStartNode, onEndNode, onTextNode });
|
||||
|
||||
pars[i]._t = '';
|
||||
pars[i]._a = a;
|
||||
}
|
||||
}
|
||||
|
||||
return this.formatFb2(fb2);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
module.exports = ConvertHtml;
|
||||
37
server/core/Reader/BookConverter/ConvertMobi.js
Normal file
37
server/core/Reader/BookConverter/ConvertMobi.js
Normal file
@@ -0,0 +1,37 @@
|
||||
const fs = require('fs-extra');
|
||||
const path = require('path');
|
||||
|
||||
const ConvertBase = require('./ConvertBase');
|
||||
|
||||
class ConvertMobi extends ConvertBase {
|
||||
async check(data, opts) {
|
||||
const {inputFiles} = opts;
|
||||
|
||||
return (this.config.useExternalBookConverter &&
|
||||
inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'mobi');
|
||||
}
|
||||
|
||||
async run(data, opts) {
|
||||
if (!await this.check(data, opts))
|
||||
return false;
|
||||
await this.checkExternalConverterPresent();
|
||||
|
||||
const {inputFiles, callback} = opts;
|
||||
|
||||
const outFile = `${inputFiles.filesDir}/${path.basename(inputFiles.sourceFile)}`;
|
||||
const mobiFile = `${outFile}.mobi`;
|
||||
const fb2File = `${outFile}.fb2`;
|
||||
|
||||
await fs.copy(inputFiles.sourceFile, mobiFile);
|
||||
|
||||
let perc = 0;
|
||||
await this.execConverter(this.calibrePath, [mobiFile, fb2File], () => {
|
||||
perc = (perc < 100 ? perc + 5 : 50);
|
||||
callback(perc);
|
||||
});
|
||||
|
||||
return await fs.readFile(fb2File);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ConvertMobi;
|
||||
219
server/core/Reader/BookConverter/ConvertPdf.js
Normal file
219
server/core/Reader/BookConverter/ConvertPdf.js
Normal file
@@ -0,0 +1,219 @@
|
||||
const fs = require('fs-extra');
|
||||
const path = require('path');
|
||||
|
||||
const sax = require('./sax');
|
||||
const utils = require('../../utils');
|
||||
const ConvertHtml = require('./ConvertHtml');
|
||||
|
||||
class ConvertPdf extends ConvertHtml {
|
||||
check(data, opts) {
|
||||
const {inputFiles} = opts;
|
||||
|
||||
return this.config.useExternalBookConverter &&
|
||||
inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'pdf';
|
||||
}
|
||||
|
||||
async run(notUsed, opts) {
|
||||
if (!this.check(notUsed, opts))
|
||||
return false;
|
||||
await this.checkExternalConverterPresent();
|
||||
|
||||
const {inputFiles, callback} = opts;
|
||||
|
||||
const outFile = `${inputFiles.filesDir}/${utils.randomHexString(10)}.xml`;
|
||||
|
||||
//конвертируем в xml
|
||||
let perc = 0;
|
||||
await this.execConverter(this.pdfToHtmlPath, ['-c', '-s', '-xml', inputFiles.sourceFile, outFile], () => {
|
||||
perc = (perc < 80 ? perc + 10 : 40);
|
||||
callback(perc);
|
||||
});
|
||||
callback(80);
|
||||
|
||||
const data = await fs.readFile(outFile);
|
||||
callback(90);
|
||||
|
||||
//парсим xml
|
||||
let lines = [];
|
||||
let images = [];
|
||||
let loading = [];
|
||||
let inText = false;
|
||||
let bold = false;
|
||||
let italic = false;
|
||||
let title = '';
|
||||
let prevTop = 0;
|
||||
let i = -1;
|
||||
let titleCount = 0;
|
||||
|
||||
const loadImage = async(image) => {
|
||||
const src = path.parse(image.src);
|
||||
let type = 'unknown';
|
||||
switch (src.ext) {
|
||||
case '.jpg': type = 'image/jpeg'; break;
|
||||
case '.png': type = 'image/png'; break;
|
||||
}
|
||||
if (type != 'unknown') {
|
||||
image.data = (await fs.readFile(image.src)).toString('base64');
|
||||
image.type = type;
|
||||
image.name = src.base;
|
||||
}
|
||||
}
|
||||
|
||||
const putImage = (curTop) => {
|
||||
if (!isNaN(curTop) && images.length) {
|
||||
while (images.length && images[0].top < curTop) {
|
||||
i++;
|
||||
lines[i] = images[0];
|
||||
images.shift();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (!cutCounter && inText) {
|
||||
let tOpen = (bold ? '<b>' : '');
|
||||
tOpen += (italic ? '<i>' : '');
|
||||
let tClose = (italic ? '</i>' : '');
|
||||
tClose += (bold ? '</b>' : '');
|
||||
|
||||
lines[i].text += `${tOpen}${text}${tClose} `;
|
||||
if (titleCount < 2 && text.trim() != '') {
|
||||
title += text + (titleCount ? '' : ' - ');
|
||||
titleCount++;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (!cutCounter) {
|
||||
if (inText) {
|
||||
switch (tag) {
|
||||
case 'i':
|
||||
italic = true;
|
||||
break;
|
||||
case 'b':
|
||||
bold = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'text' && !inText) {
|
||||
let attrs = sax.getAttrsSync(tail);
|
||||
const line = {
|
||||
text: '',
|
||||
top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10),
|
||||
left: parseInt((attrs.left && attrs.left.value ? attrs.left.value : null), 10),
|
||||
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
|
||||
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
|
||||
};
|
||||
|
||||
if (line.width != 0 || line.height != 0) {
|
||||
inText = true;
|
||||
if (isNaN(line.top) || isNaN(prevTop) || (Math.abs(prevTop - line.top) > 3)) {
|
||||
putImage(line.top);
|
||||
i++;
|
||||
lines[i] = line;
|
||||
}
|
||||
prevTop = line.top;
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'image') {
|
||||
const attrs = sax.getAttrsSync(tail);
|
||||
const src = (attrs.src && attrs.src.value ? attrs.src.value : '');
|
||||
if (src) {
|
||||
const image = {
|
||||
isImage: true,
|
||||
src,
|
||||
data: '',
|
||||
type: '',
|
||||
top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10) || 0,
|
||||
};
|
||||
loading.push(loadImage(image));
|
||||
images.push(image);
|
||||
images.sort((a, b) => a.top - b.top)
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'page') {
|
||||
putImage(100000);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (inText) {
|
||||
switch (tag) {
|
||||
case 'i':
|
||||
italic = false;
|
||||
break;
|
||||
case 'b':
|
||||
bold = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'text')
|
||||
inText = false;
|
||||
};
|
||||
|
||||
let buf = this.decode(data).toString();
|
||||
sax.parseSync(buf, {
|
||||
onStartNode, onEndNode, onTextNode
|
||||
});
|
||||
|
||||
putImage(100000);
|
||||
|
||||
await Promise.all(loading);
|
||||
|
||||
//найдем параграфы и отступы
|
||||
const indents = [];
|
||||
for (const line of lines) {
|
||||
if (line.isImage)
|
||||
continue;
|
||||
if (!isNaN(line.left)) {
|
||||
indents[line.left] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
let j = 0;
|
||||
for (let i = 0; i < indents.length; i++) {
|
||||
if (indents[i]) {
|
||||
j++;
|
||||
indents[i] = j;
|
||||
}
|
||||
}
|
||||
indents[0] = 0;
|
||||
|
||||
//формируем текст
|
||||
let text = `<title>${title}</title>`;
|
||||
let concat = '';
|
||||
let sp = '';
|
||||
for (const line of lines) {
|
||||
if (line.isImage) {
|
||||
text += `<fb2-image type="${line.type}" name="${line.name}">${line.data}</fb2-image>`;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (concat == '') {
|
||||
const left = line.left || 0;
|
||||
sp = ' '.repeat(indents[left]);
|
||||
}
|
||||
|
||||
let t = line.text.trim();
|
||||
if (t.substr(-1) == '-') {
|
||||
t = t.substr(0, t.length - 1);
|
||||
concat += t;
|
||||
} else {
|
||||
text += sp + concat + t + "\n";
|
||||
concat = '';
|
||||
}
|
||||
}
|
||||
if (concat)
|
||||
text += sp + concat + "\n";
|
||||
|
||||
return await super.run(Buffer.from(text), {skipCheck: true, isText: true, cutTitle: true});
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ConvertPdf;
|
||||
33
server/core/Reader/BookConverter/ConvertRtf.js
Normal file
33
server/core/Reader/BookConverter/ConvertRtf.js
Normal file
@@ -0,0 +1,33 @@
|
||||
const fs = require('fs-extra');
|
||||
const path = require('path');
|
||||
|
||||
const ConvertDocX = require('./ConvertDocX');
|
||||
|
||||
class ConvertRtf extends ConvertDocX {
|
||||
check(data, opts) {
|
||||
const {inputFiles} = opts;
|
||||
|
||||
return this.config.useExternalBookConverter &&
|
||||
inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'rtf';
|
||||
}
|
||||
|
||||
async run(data, opts) {
|
||||
if (!this.check(data, opts))
|
||||
return false;
|
||||
await this.checkExternalConverterPresent();
|
||||
|
||||
const {inputFiles, callback} = opts;
|
||||
|
||||
const outFile = `${inputFiles.filesDir}/${path.basename(inputFiles.sourceFile)}`;
|
||||
const rtfFile = `${outFile}.rtf`;
|
||||
const docxFile = `${outFile}.docx`;
|
||||
const fb2File = `${outFile}.fb2`;
|
||||
|
||||
await fs.copy(inputFiles.sourceFile, rtfFile);
|
||||
await this.execConverter(this.sofficePath, ['--headless', '--convert-to', 'docx', '--outdir', inputFiles.filesDir, rtfFile]);
|
||||
|
||||
return await super.convert(docxFile, fb2File, callback);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ConvertRtf;
|
||||
281
server/core/Reader/BookConverter/ConvertSamlib.js
Normal file
281
server/core/Reader/BookConverter/ConvertSamlib.js
Normal file
@@ -0,0 +1,281 @@
|
||||
const _ = require('lodash');
|
||||
const URL = require('url').URL;
|
||||
|
||||
const sax = require('./sax');
|
||||
const ConvertBase = require('./ConvertBase');
|
||||
|
||||
class ConvertSamlib extends ConvertBase {
|
||||
check(data, opts) {
|
||||
const {url, dataType} = opts;
|
||||
|
||||
const parsedUrl = new URL(url);
|
||||
if (dataType && dataType.ext == 'html' &&
|
||||
(parsedUrl.hostname == 'samlib.ru' ||
|
||||
parsedUrl.hostname == 'budclub.ru' ||
|
||||
parsedUrl.hostname == 'zhurnal.lib.ru')) {
|
||||
return {hostname: parsedUrl.hostname};
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async run(data, opts) {
|
||||
if (!opts.enableSitesFilter)
|
||||
return false;
|
||||
|
||||
const checkResult = this.check(data, opts);
|
||||
if (!checkResult)
|
||||
return false;
|
||||
|
||||
const {hostname} = checkResult;
|
||||
let titleInfo = {};
|
||||
let desc = {_n: 'description', 'title-info': titleInfo};
|
||||
let pars = [];
|
||||
let body = {_n: 'body', section: {_a: pars}};
|
||||
let fb2 = [desc, body];
|
||||
|
||||
let inSubtitle = false;
|
||||
let inJustify = true;
|
||||
let inImage = false;
|
||||
let isFirstPara = false;
|
||||
let path = '';
|
||||
let tag = '';// eslint-disable-line no-unused-vars
|
||||
|
||||
let inText = false;
|
||||
let textFound = false;
|
||||
let node = {_a: pars};
|
||||
|
||||
let inPara = false;
|
||||
let italic = false;
|
||||
let bold = false;
|
||||
|
||||
const openTag = (name, attrs) => {
|
||||
if (name == 'p')
|
||||
inPara = true;
|
||||
let n = {_n: name, _attrs: attrs, _a: [], _p: node};
|
||||
node._a.push(n);
|
||||
node = n;
|
||||
};
|
||||
|
||||
const closeTag = (name) => {
|
||||
if (name == 'p')
|
||||
inPara = false;
|
||||
if (node._p) {
|
||||
const exact = (node._n == name);
|
||||
node = node._p;
|
||||
if (!exact)
|
||||
closeTag(name);
|
||||
}
|
||||
};
|
||||
|
||||
const growParagraph = (text) => {
|
||||
if (!node._p) {
|
||||
if (text.trim() != '')
|
||||
openTag('p');
|
||||
else
|
||||
return;
|
||||
}
|
||||
if (node._n == 'p' && node._a.length == 0)
|
||||
text = text.trimLeft();
|
||||
node._a.push({_t: text});
|
||||
};
|
||||
|
||||
const onStartNode = (elemName, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (elemName == '')
|
||||
return;
|
||||
if (!inText) {
|
||||
path += '/' + elemName;
|
||||
tag = elemName;
|
||||
} else {
|
||||
switch (elemName) {
|
||||
case 'li':
|
||||
case 'p':
|
||||
case 'dd':
|
||||
case 'br':
|
||||
if (!(inSubtitle && isFirstPara)) {
|
||||
if (inPara)
|
||||
closeTag('p');
|
||||
openTag('p');
|
||||
}
|
||||
isFirstPara = false;
|
||||
break;
|
||||
case 'h1':
|
||||
case 'h2':
|
||||
case 'h3':
|
||||
if (inPara)
|
||||
closeTag('p');
|
||||
openTag('p');
|
||||
bold = true;
|
||||
break;
|
||||
case 'i':
|
||||
case 'em':
|
||||
italic = true;
|
||||
break;
|
||||
case 'b':
|
||||
case 'strong':
|
||||
bold = true;
|
||||
break;
|
||||
case 'div':
|
||||
if (inPara)
|
||||
closeTag('p');
|
||||
if (tail.indexOf('align="center"') >= 0) {
|
||||
openTag('subtitle');
|
||||
inSubtitle = true;
|
||||
isFirstPara = true;
|
||||
}
|
||||
|
||||
if (tail.indexOf('align="justify"') >= 0) {
|
||||
openTag('p');
|
||||
inJustify = true;
|
||||
}
|
||||
|
||||
break;
|
||||
case 'img': {
|
||||
if (inPara)
|
||||
closeTag('p');
|
||||
const attrs = sax.getAttrsSync(tail);
|
||||
if (attrs.src && attrs.src.value) {
|
||||
let href = attrs.src.value;
|
||||
if (href[0] == '/')
|
||||
href = `http://${hostname}${href}`;
|
||||
openTag('image', {'l:href': href});
|
||||
inImage = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const onEndNode = (elemName, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (!inText) {
|
||||
const oldPath = path;
|
||||
let t = '';
|
||||
do {
|
||||
let i = path.lastIndexOf('/');
|
||||
t = path.substr(i + 1);
|
||||
path = path.substr(0, i);
|
||||
} while (t != elemName && path);
|
||||
|
||||
if (t != elemName) {
|
||||
path = oldPath;
|
||||
}
|
||||
|
||||
let i = path.lastIndexOf('/');
|
||||
tag = path.substr(i + 1);
|
||||
} else {
|
||||
switch (elemName) {
|
||||
case 'li':
|
||||
case 'p':
|
||||
case 'dd':
|
||||
closeTag('p');
|
||||
break;
|
||||
case 'h1':
|
||||
case 'h2':
|
||||
case 'h3':
|
||||
closeTag('p');
|
||||
bold = false;
|
||||
break;
|
||||
case 'i':
|
||||
case 'em':
|
||||
italic = false;
|
||||
break;
|
||||
case 'b':
|
||||
case 'strong':
|
||||
bold = false;
|
||||
break;
|
||||
case 'div':
|
||||
if (inSubtitle) {
|
||||
closeTag('subtitle');
|
||||
inSubtitle = false;
|
||||
isFirstPara = false;
|
||||
}
|
||||
|
||||
if (inJustify) {
|
||||
closeTag('p');
|
||||
inJustify = false;
|
||||
}
|
||||
break;
|
||||
case 'img':
|
||||
if (inImage)
|
||||
closeTag('image');
|
||||
inImage = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const onComment = (text) => {// eslint-disable-line no-unused-vars
|
||||
if (text == '--------- Собственно произведение -------------') {
|
||||
inText = true;
|
||||
textFound = true;
|
||||
}
|
||||
if (text == '-----------------------------------------------')
|
||||
inText = false;
|
||||
};
|
||||
|
||||
const onTextNode = (text) => {// eslint-disable-line no-unused-vars
|
||||
if (text && text.trim() == '')
|
||||
text = (text.indexOf(' ') >= 0 ? ' ' : '');
|
||||
|
||||
if (!text)
|
||||
return;
|
||||
|
||||
text = this.escapeEntities(text);
|
||||
|
||||
switch (path) {
|
||||
case '/html/body/center/h2':
|
||||
titleInfo['book-title'] = text;
|
||||
return;
|
||||
case '/html/body/div/h3':
|
||||
if (!titleInfo.author)
|
||||
titleInfo.author = {};
|
||||
text = text.replace(':', '').trim().split(' ');
|
||||
if (text[0])
|
||||
titleInfo.author['last-name'] = text[0];
|
||||
if (text[1])
|
||||
titleInfo.author['first-name'] = text[1];
|
||||
if (text[2])
|
||||
titleInfo.author['middle-name'] = text[2];
|
||||
return;
|
||||
}
|
||||
|
||||
let tOpen = (bold ? '<strong>' : '');
|
||||
tOpen += (italic ? '<emphasis>' : '');
|
||||
let tClose = (italic ? '</emphasis>' : '');
|
||||
tClose += (bold ? '</strong>' : '');
|
||||
|
||||
if (inText)
|
||||
growParagraph(`${tOpen}${text}${tClose}`);
|
||||
};
|
||||
|
||||
sax.parseSync(this.decode(data).toString().replace(/ /g, ' '), {
|
||||
onStartNode, onEndNode, onTextNode, onComment,
|
||||
innerCut: new Set(['head', 'script', 'style'])
|
||||
});
|
||||
|
||||
//текст не найден на странице, обработать корректно не получилось
|
||||
if (!textFound)
|
||||
return false;
|
||||
|
||||
const title = (titleInfo['book-title'] ? titleInfo['book-title'] : '');
|
||||
let author = '';
|
||||
if (titleInfo.author) {
|
||||
author = _.compact([
|
||||
(titleInfo.author['last-name'] ? titleInfo.author['last-name'] : ''),
|
||||
(titleInfo.author['first-name'] ? titleInfo.author['first-name'] : ''),
|
||||
(titleInfo.author['middle-name'] ? titleInfo.author['middle-name'] : ''),
|
||||
]).join(' ');
|
||||
}
|
||||
|
||||
pars.unshift({_n: 'title', _a: [
|
||||
{_n: 'p', _t: author}, {_n: 'p', _t: ''},
|
||||
{_n: 'p', _t: title}, {_n: 'p', _t: ''},
|
||||
]})
|
||||
|
||||
return this.formatFb2(fb2);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
module.exports = ConvertSamlib;
|
||||
71
server/core/Reader/BookConverter/ConvertSites.js
Normal file
71
server/core/Reader/BookConverter/ConvertSites.js
Normal file
@@ -0,0 +1,71 @@
|
||||
const URL = require('url').URL;
|
||||
|
||||
const ConvertHtml = require('./ConvertHtml');
|
||||
|
||||
const sitesFilter = {
|
||||
'www.fanfiction.net': {
|
||||
converter: 'cutter',
|
||||
begin: `<div class='storytext xcontrast_txt nocopy' id='storytext'>`,
|
||||
end: `<div style='height:5px'></div><div style='clear:both;text-align:right;'>`,
|
||||
},
|
||||
'archiveofourown.org': {
|
||||
converter: 'cutter',
|
||||
begin: `<!-- BEGIN section where work skin applies -->`,
|
||||
end: `<!-- END work skin -->`,
|
||||
}
|
||||
};
|
||||
|
||||
class ConvertSites extends ConvertHtml {
|
||||
check(data, opts) {
|
||||
const {url, dataType} = opts;
|
||||
|
||||
const parsedUrl = new URL(url);
|
||||
if (dataType && dataType.ext == 'html') {
|
||||
if (sitesFilter[parsedUrl.hostname])
|
||||
return {hostname: parsedUrl.hostname};
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async run(data, opts) {
|
||||
if (!opts.enableSitesFilter)
|
||||
return false;
|
||||
|
||||
const checkResult = this.check(data, opts);
|
||||
if (!checkResult)
|
||||
return false;
|
||||
|
||||
const {hostname} = checkResult;
|
||||
|
||||
let text = this.decode(data).toString();
|
||||
|
||||
text = this[sitesFilter[hostname].converter](text, sitesFilter[hostname]);
|
||||
|
||||
if (text === false)
|
||||
return false;
|
||||
|
||||
return await super.run(Buffer.from(text), {skipCheck: true, cutTitle: true});
|
||||
}
|
||||
|
||||
getTitle(text) {
|
||||
let title = '';
|
||||
const m = text.match(/<title>([\s\S]*?)<\/title>/);
|
||||
if (m)
|
||||
title = m[1];
|
||||
|
||||
return `<title>${title.trim()}</title>`;
|
||||
}
|
||||
|
||||
cutter(text, opts) {
|
||||
const title = this.getTitle(text);
|
||||
const l = text.indexOf(opts.begin) + opts.begin.length;
|
||||
const r = text.indexOf(opts.end);
|
||||
if (l < 0 || r < 0 || r <= l)
|
||||
return false;
|
||||
|
||||
return text.substring(l, r) + title;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ConvertSites;
|
||||
59
server/core/Reader/BookConverter/index.js
Normal file
59
server/core/Reader/BookConverter/index.js
Normal file
@@ -0,0 +1,59 @@
|
||||
const fs = require('fs-extra');
|
||||
const FileDetector = require('../../FileDetector');
|
||||
|
||||
//порядок важен
|
||||
const convertClassFactory = [
|
||||
require('./ConvertEpub'),
|
||||
require('./ConvertPdf'),
|
||||
require('./ConvertRtf'),
|
||||
require('./ConvertDocX'),
|
||||
require('./ConvertDoc'),
|
||||
require('./ConvertMobi'),
|
||||
require('./ConvertFb2'),
|
||||
require('./ConvertSamlib'),
|
||||
require('./ConvertSites'),
|
||||
require('./ConvertHtml'),
|
||||
];
|
||||
|
||||
class BookConverter {
|
||||
constructor(config) {
|
||||
this.detector = new FileDetector();
|
||||
|
||||
this.convertFactory = [];
|
||||
for (const convertClass of convertClassFactory) {
|
||||
this.convertFactory.push(new convertClass(config));
|
||||
}
|
||||
}
|
||||
|
||||
async convertToFb2(inputFiles, outputFile, opts, callback) {
|
||||
const selectedFileType = await this.detector.detectFile(inputFiles.selectedFile);
|
||||
const data = await fs.readFile(inputFiles.selectedFile);
|
||||
|
||||
const convertOpts = Object.assign({}, opts, {inputFiles, callback, dataType: selectedFileType});
|
||||
let result = false;
|
||||
for (const convert of this.convertFactory) {
|
||||
result = await convert.run(data, convertOpts);
|
||||
if (result) {
|
||||
await fs.writeFile(outputFile, result);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!result && inputFiles.nesting) {
|
||||
result = await this.convertToFb2(inputFiles.nesting, outputFile, opts, callback);
|
||||
}
|
||||
|
||||
if (!result) {
|
||||
if (selectedFileType)
|
||||
throw new Error(`Этот формат файла не поддерживается: ${selectedFileType.mime}`);
|
||||
else {
|
||||
throw new Error(`Не удалось определить формат файла: ${opts.url}`);
|
||||
}
|
||||
}
|
||||
|
||||
callback(100);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = BookConverter;
|
||||
359
server/core/Reader/BookConverter/sax.js
Normal file
359
server/core/Reader/BookConverter/sax.js
Normal file
@@ -0,0 +1,359 @@
|
||||
function parseSync(xstr, options) {
|
||||
const dummy = () => {};
|
||||
let {onStartNode: _onStartNode = dummy,
|
||||
onEndNode: _onEndNode = dummy,
|
||||
onTextNode: _onTextNode = dummy,
|
||||
onCdata: _onCdata = dummy,
|
||||
onComment: _onComment = dummy,
|
||||
onProgress: _onProgress = dummy,
|
||||
innerCut = new Set()
|
||||
} = options;
|
||||
|
||||
let i = 0;
|
||||
const len = xstr.length;
|
||||
const progStep = len/20;
|
||||
let nextProg = 0;
|
||||
|
||||
let cutCounter = 0;
|
||||
let cutTag = '';
|
||||
let inCdata;
|
||||
let inComment;
|
||||
let leftData = 0;
|
||||
while (i < len) {
|
||||
inCdata = false;
|
||||
inComment = false;
|
||||
let singleTag = false;
|
||||
|
||||
let left = xstr.indexOf('<', i);
|
||||
if (left < 0)
|
||||
break;
|
||||
leftData = left;
|
||||
|
||||
if (left < len - 2 && xstr[left + 1] == '!') {
|
||||
if (xstr[left + 2] == '-') {
|
||||
const leftComment = xstr.indexOf('<!--', left);
|
||||
if (leftComment == left) {
|
||||
inComment = true;
|
||||
leftData = left + 3;
|
||||
}
|
||||
}
|
||||
|
||||
if (!inComment && xstr[left + 2] == '[') {
|
||||
const leftCdata = xstr.indexOf('<![CDATA[', left);
|
||||
if (leftCdata == left) {
|
||||
inCdata = true;
|
||||
leftData = left + 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (left != i) {
|
||||
const text = xstr.substr(i, left - i);
|
||||
_onTextNode(text, cutCounter, cutTag);
|
||||
}
|
||||
|
||||
let right = null;
|
||||
let rightData = null;
|
||||
if (inCdata) {
|
||||
rightData = xstr.indexOf(']]>', leftData + 1);
|
||||
if (rightData < 0)
|
||||
break;
|
||||
right = rightData + 2;
|
||||
} else if (inComment) {
|
||||
rightData = xstr.indexOf('-->', leftData + 1);
|
||||
if (rightData < 0)
|
||||
break;
|
||||
right = rightData + 2;
|
||||
} else {
|
||||
rightData = xstr.indexOf('>', leftData + 1);
|
||||
if (rightData < 0)
|
||||
break;
|
||||
right = rightData;
|
||||
if (xstr[right - 1] === '/') {
|
||||
singleTag = true;
|
||||
rightData--;
|
||||
}
|
||||
}
|
||||
|
||||
let tagData = xstr.substr(leftData + 1, rightData - leftData - 1);
|
||||
|
||||
if (inCdata) {
|
||||
_onCdata(tagData, cutCounter, cutTag);
|
||||
} else if (inComment) {
|
||||
_onComment(tagData, cutCounter, cutTag);
|
||||
} else {
|
||||
let tag = '';
|
||||
let tail = '';
|
||||
const firstSpace = tagData.indexOf(' ');
|
||||
if (firstSpace >= 0) {
|
||||
tail = tagData.substr(firstSpace);
|
||||
tag = tagData.substr(0, firstSpace);
|
||||
} else {
|
||||
tag = tagData;
|
||||
}
|
||||
tag = tag.toLowerCase();
|
||||
|
||||
let endTag = (singleTag ? tag : '');
|
||||
if (tag === '' || tag[0] !== '/') {
|
||||
_onStartNode(tag, tail, singleTag, cutCounter, cutTag);
|
||||
} else {
|
||||
endTag = tag.substr(1);
|
||||
}
|
||||
|
||||
if (endTag)
|
||||
_onEndNode(endTag, tail, singleTag, cutCounter, cutTag);
|
||||
|
||||
if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
|
||||
if (!cutCounter)
|
||||
cutTag = tag;
|
||||
cutCounter++;
|
||||
}
|
||||
|
||||
if (cutTag === endTag) {
|
||||
cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0);
|
||||
if (!cutCounter)
|
||||
cutTag = '';
|
||||
}
|
||||
}
|
||||
|
||||
if (right >= nextProg) {
|
||||
_onProgress(Math.round(right/(len + 1)*100));
|
||||
nextProg += progStep;
|
||||
}
|
||||
i = right + 1;
|
||||
}
|
||||
|
||||
if (i < len) {
|
||||
if (inCdata) {
|
||||
_onCdata(xstr.substr(leftData, len - leftData), cutCounter, cutTag);
|
||||
} else if (inComment) {
|
||||
_onComment(xstr.substr(leftData, len - leftData), cutCounter, cutTag);
|
||||
} else {
|
||||
_onTextNode(xstr.substr(i, len - i), cutCounter, cutTag);
|
||||
}
|
||||
}
|
||||
|
||||
_onProgress(100);
|
||||
}
|
||||
|
||||
//асинхронная копия parseSync
|
||||
//делается заменой "_on" => "await _on" после while
|
||||
async function parse(xstr, options) {
|
||||
const dummy = () => {};
|
||||
let {onStartNode: _onStartNode = dummy,
|
||||
onEndNode: _onEndNode = dummy,
|
||||
onTextNode: _onTextNode = dummy,
|
||||
onCdata: _onCdata = dummy,
|
||||
onComment: _onComment = dummy,
|
||||
onProgress: _onProgress = dummy,
|
||||
innerCut = new Set()
|
||||
} = options;
|
||||
|
||||
let i = 0;
|
||||
const len = xstr.length;
|
||||
const progStep = len/20;
|
||||
let nextProg = 0;
|
||||
|
||||
let cutCounter = 0;
|
||||
let cutTag = '';
|
||||
let inCdata;
|
||||
let inComment;
|
||||
let leftData = 0;
|
||||
while (i < len) {
|
||||
inCdata = false;
|
||||
inComment = false;
|
||||
let singleTag = false;
|
||||
|
||||
let left = xstr.indexOf('<', i);
|
||||
if (left < 0)
|
||||
break;
|
||||
leftData = left;
|
||||
|
||||
if (left < len - 2 && xstr[left + 1] == '!') {
|
||||
if (xstr[left + 2] == '-') {
|
||||
const leftComment = xstr.indexOf('<!--', left);
|
||||
if (leftComment == left) {
|
||||
inComment = true;
|
||||
leftData = left + 3;
|
||||
}
|
||||
}
|
||||
|
||||
if (!inComment && xstr[left + 2] == '[') {
|
||||
const leftCdata = xstr.indexOf('<![CDATA[', left);
|
||||
if (leftCdata == left) {
|
||||
inCdata = true;
|
||||
leftData = left + 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (left != i) {
|
||||
const text = xstr.substr(i, left - i);
|
||||
await _onTextNode(text, cutCounter, cutTag);
|
||||
}
|
||||
|
||||
let right = null;
|
||||
let rightData = null;
|
||||
if (inCdata) {
|
||||
rightData = xstr.indexOf(']]>', leftData + 1);
|
||||
if (rightData < 0)
|
||||
break;
|
||||
right = rightData + 2;
|
||||
} else if (inComment) {
|
||||
rightData = xstr.indexOf('-->', leftData + 1);
|
||||
if (rightData < 0)
|
||||
break;
|
||||
right = rightData + 2;
|
||||
} else {
|
||||
rightData = xstr.indexOf('>', leftData + 1);
|
||||
if (rightData < 0)
|
||||
break;
|
||||
right = rightData;
|
||||
if (xstr[right - 1] === '/') {
|
||||
singleTag = true;
|
||||
rightData--;
|
||||
}
|
||||
}
|
||||
|
||||
let tagData = xstr.substr(leftData + 1, rightData - leftData - 1);
|
||||
|
||||
if (inCdata) {
|
||||
await _onCdata(tagData, cutCounter, cutTag);
|
||||
} else if (inComment) {
|
||||
await _onComment(tagData, cutCounter, cutTag);
|
||||
} else {
|
||||
let tag = '';
|
||||
let tail = '';
|
||||
const firstSpace = tagData.indexOf(' ');
|
||||
if (firstSpace >= 0) {
|
||||
tail = tagData.substr(firstSpace);
|
||||
tag = tagData.substr(0, firstSpace);
|
||||
} else {
|
||||
tag = tagData;
|
||||
}
|
||||
tag = tag.toLowerCase();
|
||||
|
||||
let endTag = (singleTag ? tag : '');
|
||||
if (tag === '' || tag[0] !== '/') {
|
||||
await _onStartNode(tag, tail, singleTag, cutCounter, cutTag);
|
||||
} else {
|
||||
endTag = tag.substr(1);
|
||||
}
|
||||
|
||||
if (endTag)
|
||||
await _onEndNode(endTag, tail, singleTag, cutCounter, cutTag);
|
||||
|
||||
if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
|
||||
if (!cutCounter)
|
||||
cutTag = tag;
|
||||
cutCounter++;
|
||||
}
|
||||
|
||||
if (cutTag === endTag) {
|
||||
cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0);
|
||||
if (!cutCounter)
|
||||
cutTag = '';
|
||||
}
|
||||
}
|
||||
|
||||
if (right >= nextProg) {
|
||||
await _onProgress(Math.round(right/(len + 1)*100));
|
||||
nextProg += progStep;
|
||||
}
|
||||
i = right + 1;
|
||||
}
|
||||
|
||||
if (i < len) {
|
||||
if (inCdata) {
|
||||
await _onCdata(xstr.substr(leftData, len - leftData), cutCounter, cutTag);
|
||||
} else if (inComment) {
|
||||
await _onComment(xstr.substr(leftData, len - leftData), cutCounter, cutTag);
|
||||
} else {
|
||||
await _onTextNode(xstr.substr(i, len - i), cutCounter, cutTag);
|
||||
}
|
||||
}
|
||||
|
||||
await _onProgress(100);
|
||||
}
|
||||
|
||||
function getAttrsSync(tail) {
|
||||
let result = {};
|
||||
let name = '';
|
||||
let value = '';
|
||||
let vOpen = '';
|
||||
let inName = false;
|
||||
let inValue = false;
|
||||
let waitValue = false;
|
||||
let waitEq = false;
|
||||
|
||||
const pushResult = () => {
|
||||
if (name != '') {
|
||||
let ns = '';
|
||||
if (name.indexOf(':') >= 0) {
|
||||
[ns, name] = name.split(':');
|
||||
}
|
||||
|
||||
result[name] = {value, ns};
|
||||
}
|
||||
name = '';
|
||||
value = '';
|
||||
vOpen = '';
|
||||
inName = false;
|
||||
inValue = false;
|
||||
waitValue = false;
|
||||
waitEq = false;
|
||||
};
|
||||
|
||||
tail = tail.replace(/[\t\n\r]/g, ' ');
|
||||
for (let i = 0; i < tail.length; i++) {
|
||||
const c = tail.charAt(i);
|
||||
if (c == ' ') {
|
||||
if (inValue) {
|
||||
if (vOpen == '"')
|
||||
value += c;
|
||||
else
|
||||
pushResult();
|
||||
} else if (inName) {
|
||||
waitEq = true;
|
||||
inName = false;
|
||||
}
|
||||
} else if (!inValue && c == '=') {
|
||||
waitEq = false;
|
||||
waitValue = true;
|
||||
inName = false;
|
||||
} else if (c == '"') {
|
||||
if (inValue) {
|
||||
pushResult();
|
||||
} else if (waitValue) {
|
||||
inValue = true;
|
||||
vOpen = '"';
|
||||
}
|
||||
} else if (inValue) {
|
||||
value += c;
|
||||
} else if (inName) {
|
||||
name += c;
|
||||
} else if (waitEq) {
|
||||
pushResult();
|
||||
inName = true;
|
||||
name = c;
|
||||
} else if (waitValue) {
|
||||
waitValue = false;
|
||||
inValue = true;
|
||||
vOpen = ' ';
|
||||
value = c;
|
||||
} else {
|
||||
inName = true;
|
||||
name = c;
|
||||
}
|
||||
}
|
||||
if (name != '')
|
||||
pushResult();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
parseSync,
|
||||
getAttrsSync,
|
||||
parse
|
||||
}
|
||||
110
server/core/Reader/BookConverter/textUtils.js
Normal file
110
server/core/Reader/BookConverter/textUtils.js
Normal file
@@ -0,0 +1,110 @@
|
||||
function getEncoding(buf, returnAll) {
|
||||
const lowerCase = 3;
|
||||
const upperCase = 1;
|
||||
|
||||
const codePage = {
|
||||
'k': 'koi8-r',
|
||||
'w': 'Windows-1251',
|
||||
'd': 'cp866',
|
||||
'i': 'ISO-8859-5',
|
||||
'm': 'maccyrillic',
|
||||
'u': 'utf-8',
|
||||
};
|
||||
|
||||
let charsets = {
|
||||
'k': 0,
|
||||
'w': 0,
|
||||
'd': 0,
|
||||
'i': 0,
|
||||
'm': 0,
|
||||
'u': 0,
|
||||
};
|
||||
|
||||
const len = buf.length;
|
||||
const blockSize = (len > 5*3000 ? 3000 : len);
|
||||
let counter = 0;
|
||||
let i = 0;
|
||||
let totalChecked = 0;
|
||||
while (i < len) {
|
||||
const char = buf[i];
|
||||
const nextChar = (i < len - 1 ? buf[i + 1] : 0);
|
||||
totalChecked++;
|
||||
i++;
|
||||
//non-russian characters
|
||||
if (char < 128 || char > 256)
|
||||
continue;
|
||||
//UTF-8
|
||||
if ((char == 208 || char == 209) && nextChar >= 128 && nextChar <= 190)
|
||||
charsets['u'] += lowerCase;
|
||||
else {
|
||||
//CP866
|
||||
if ((char > 159 && char < 176) || (char > 223 && char < 242)) charsets['d'] += lowerCase;
|
||||
if ((char > 127 && char < 160)) charsets['d'] += upperCase;
|
||||
|
||||
//KOI8-R
|
||||
if ((char > 191 && char < 223)) charsets['k'] += lowerCase;
|
||||
if ((char > 222 && char < 256)) charsets['k'] += upperCase;
|
||||
|
||||
//WIN-1251
|
||||
if (char > 223 && char < 256) charsets['w'] += lowerCase;
|
||||
if (char > 191 && char < 224) charsets['w'] += upperCase;
|
||||
|
||||
//MAC
|
||||
if (char > 221 && char < 255) charsets['m'] += lowerCase;
|
||||
if (char > 127 && char < 160) charsets['m'] += upperCase;
|
||||
|
||||
//ISO-8859-5
|
||||
if (char > 207 && char < 240) charsets['i'] += lowerCase;
|
||||
if (char > 175 && char < 208) charsets['i'] += upperCase;
|
||||
}
|
||||
|
||||
counter++;
|
||||
|
||||
if (counter > blockSize) {
|
||||
counter = 0;
|
||||
i += Math.round(len/2 - 2*blockSize);
|
||||
}
|
||||
}
|
||||
|
||||
let sorted = Object.keys(charsets).map(function(key) {
|
||||
return { codePage: codePage[key], c: charsets[key], totalChecked };
|
||||
});
|
||||
|
||||
sorted.sort((a, b) => b.c - a.c);
|
||||
|
||||
if (returnAll)
|
||||
return sorted;
|
||||
else if (sorted[0].c > 0 && sorted[0].c > sorted[0].totalChecked/2)
|
||||
return sorted[0].codePage;
|
||||
else
|
||||
return 'ISO-8859-5';
|
||||
}
|
||||
|
||||
function checkIfText(buf) {
|
||||
const enc = getEncoding(buf, true);
|
||||
if (enc[0].c > enc[0].totalChecked*0.9)
|
||||
return true;
|
||||
|
||||
let spaceCount = 0;
|
||||
let crCount = 0;
|
||||
let lfCount = 0;
|
||||
for (let i = 0; i < buf.length; i++) {
|
||||
if (buf[i] == 32)
|
||||
spaceCount++;
|
||||
if (buf[i] == 13)
|
||||
crCount++;
|
||||
if (buf[i] == 10)
|
||||
lfCount++;
|
||||
}
|
||||
|
||||
const spaceFreq = spaceCount/(buf.length + 1);
|
||||
const crFreq = crCount/(buf.length + 1);
|
||||
const lfFreq = lfCount/(buf.length + 1);
|
||||
|
||||
return (buf.length < 1000 || spaceFreq > 0.1 || crFreq > 0.03 || lfFreq > 0.03);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getEncoding,
|
||||
checkIfText,
|
||||
}
|
||||
126
server/core/Reader/ReaderStorage.js
Normal file
126
server/core/Reader/ReaderStorage.js
Normal file
@@ -0,0 +1,126 @@
|
||||
const SQL = require('sql-template-strings');
|
||||
const _ = require('lodash');
|
||||
|
||||
const ConnManager = require('../../db/ConnManager');//singleton
|
||||
|
||||
let instance = null;
|
||||
|
||||
//singleton
|
||||
class ReaderStorage {
|
||||
constructor() {
|
||||
if (!instance) {
|
||||
this.connManager = new ConnManager();
|
||||
this.storagePool = this.connManager.pool.readerStorage;
|
||||
this.periodicCleanCache(3*3600*1000);//1 раз в 3 часа
|
||||
|
||||
instance = this;
|
||||
}
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
async doAction(act) {
|
||||
if (!_.isObject(act.items))
|
||||
throw new Error('items is not an object');
|
||||
|
||||
let result = {};
|
||||
switch (act.action) {
|
||||
case 'check':
|
||||
result = await this.checkItems(act.items);
|
||||
break;
|
||||
case 'get':
|
||||
result = await this.getItems(act.items);
|
||||
break;
|
||||
case 'set':
|
||||
result = await this.setItems(act.items, act.force);
|
||||
break;
|
||||
default:
|
||||
throw new Error('Unknown action');
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async checkItems(items) {
|
||||
let result = {state: 'success', items: {}};
|
||||
|
||||
const dbh = await this.storagePool.get();
|
||||
try {
|
||||
for (const id of Object.keys(items)) {
|
||||
if (this.cache[id]) {
|
||||
result.items[id] = this.cache[id];
|
||||
} else {
|
||||
const rows = await dbh.all(SQL`SELECT rev FROM storage WHERE id = ${id}`);
|
||||
const rev = (rows.length && rows[0].rev ? rows[0].rev : 0);
|
||||
result.items[id] = {rev};
|
||||
this.cache[id] = result.items[id];
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
dbh.ret();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async getItems(items) {
|
||||
let result = {state: 'success', items: {}};
|
||||
|
||||
const dbh = await this.storagePool.get();
|
||||
try {
|
||||
for (const id of Object.keys(items)) {
|
||||
const rows = await dbh.all(SQL`SELECT rev, data FROM storage WHERE id = ${id}`);
|
||||
const rev = (rows.length && rows[0].rev ? rows[0].rev : 0);
|
||||
const data = (rows.length && rows[0].data ? rows[0].data : '');
|
||||
result.items[id] = {rev, data};
|
||||
}
|
||||
} finally {
|
||||
dbh.ret();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async setItems(items, force) {
|
||||
let check = await this.checkItems(items);
|
||||
|
||||
//сначала проверим совпадение ревизий
|
||||
for (const id of Object.keys(items)) {
|
||||
if (!_.isString(items[id].data))
|
||||
throw new Error('items.data is not a string');
|
||||
|
||||
if (!force && check.items[id].rev + 1 !== items[id].rev)
|
||||
return {state: 'reject', items: check.items};
|
||||
}
|
||||
|
||||
const dbh = await this.storagePool.get();
|
||||
await dbh.run('BEGIN');
|
||||
try {
|
||||
const newRev = {};
|
||||
for (const id of Object.keys(items)) {
|
||||
await dbh.run(SQL`INSERT OR REPLACE INTO storage (id, rev, time, data) VALUES (${id}, ${items[id].rev}, strftime('%s','now'), ${items[id].data})`);
|
||||
newRev[id] = {rev: items[id].rev};
|
||||
}
|
||||
await dbh.run('COMMIT');
|
||||
|
||||
Object.assign(this.cache, newRev);
|
||||
} catch (e) {
|
||||
await dbh.run('ROLLBACK');
|
||||
throw e;
|
||||
} finally {
|
||||
dbh.ret();
|
||||
}
|
||||
|
||||
return {state: 'success'};
|
||||
}
|
||||
|
||||
periodicCleanCache(timeout) {
|
||||
this.cache = {};
|
||||
|
||||
setTimeout(() => {
|
||||
this.periodicCleanCache(timeout);
|
||||
}, timeout);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ReaderStorage;
|
||||
175
server/core/Reader/ReaderWorker.js
Normal file
175
server/core/Reader/ReaderWorker.js
Normal file
@@ -0,0 +1,175 @@
|
||||
const fs = require('fs-extra');
|
||||
const path = require('path');
|
||||
|
||||
const WorkerState = require('../WorkerState');//singleton
|
||||
const FileDownloader = require('../FileDownloader');
|
||||
const FileDecompressor = require('../FileDecompressor');
|
||||
const BookConverter = require('./BookConverter');
|
||||
|
||||
const utils = require('../utils');
|
||||
const log = new (require('../AppLogger'))().log;//singleton
|
||||
|
||||
let instance = null;
|
||||
|
||||
//singleton
|
||||
class ReaderWorker {
|
||||
constructor(config) {
|
||||
if (!instance) {
|
||||
this.config = Object.assign({}, config);
|
||||
|
||||
this.config.tempDownloadDir = `${config.tempDir}/download`;
|
||||
fs.ensureDirSync(this.config.tempDownloadDir);
|
||||
|
||||
this.config.tempPublicDir = `${config.publicDir}/tmp`;
|
||||
fs.ensureDirSync(this.config.tempPublicDir);
|
||||
|
||||
this.workerState = new WorkerState();
|
||||
this.down = new FileDownloader();
|
||||
this.decomp = new FileDecompressor();
|
||||
this.bookConverter = new BookConverter(this.config);
|
||||
|
||||
this.periodicCleanDir(this.config.tempPublicDir, this.config.maxTempPublicDirSize, 60*60*1000);//1 раз в час
|
||||
this.periodicCleanDir(this.config.uploadDir, this.config.maxUploadPublicDirSize, 60*60*1000);//1 раз в час
|
||||
|
||||
instance = this;
|
||||
}
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
async loadBook(opts, wState) {
|
||||
const url = opts.url;
|
||||
let errMes = '';
|
||||
let decompDir = '';
|
||||
let downloadedFilename = '';
|
||||
let isUploaded = false;
|
||||
let convertFilename = '';
|
||||
try {
|
||||
wState.set({state: 'download', step: 1, totalSteps: 3, url});
|
||||
|
||||
const tempFilename = utils.randomHexString(30);
|
||||
const tempFilename2 = utils.randomHexString(30);
|
||||
const decompDirname = utils.randomHexString(30);
|
||||
|
||||
if (url.indexOf('file://') != 0) {//download
|
||||
const downdata = await this.down.load(url, (progress) => {
|
||||
wState.set({progress});
|
||||
});
|
||||
|
||||
downloadedFilename = `${this.config.tempDownloadDir}/${tempFilename}`;
|
||||
await fs.writeFile(downloadedFilename, downdata);
|
||||
} else {//uploaded file
|
||||
downloadedFilename = `${this.config.uploadDir}/${url.substr(7)}`;
|
||||
if (!await fs.pathExists(downloadedFilename))
|
||||
throw new Error('Файл не найден на сервере (возможно был удален как устаревший). Пожалуйста, загрузите файл с диска на сервер заново.');
|
||||
await utils.touchFile(downloadedFilename);
|
||||
isUploaded = true;
|
||||
}
|
||||
wState.set({progress: 100});
|
||||
|
||||
//decompress
|
||||
wState.set({state: 'decompress', step: 2, progress: 0});
|
||||
decompDir = `${this.config.tempDownloadDir}/${decompDirname}`;
|
||||
let decompFiles = {};
|
||||
try {
|
||||
decompFiles = await this.decomp.decompressNested(downloadedFilename, decompDir);
|
||||
} catch (e) {
|
||||
if (this.config.branch == 'development')
|
||||
console.error(e);
|
||||
throw new Error('Ошибка распаковки');
|
||||
}
|
||||
wState.set({progress: 100});
|
||||
|
||||
//конвертирование в fb2
|
||||
wState.set({state: 'convert', step: 3, progress: 0});
|
||||
convertFilename = `${this.config.tempDownloadDir}/${tempFilename2}`;
|
||||
await this.bookConverter.convertToFb2(decompFiles, convertFilename, opts, progress => {
|
||||
wState.set({progress});
|
||||
});
|
||||
|
||||
//сжимаем файл в tmp, если там уже нет с тем же именем-sha256
|
||||
const compFilename = await this.decomp.gzipFileIfNotExists(convertFilename, `${this.config.tempPublicDir}`);
|
||||
|
||||
wState.set({progress: 100});
|
||||
|
||||
//finish
|
||||
const finishFilename = path.basename(compFilename);
|
||||
wState.finish({path: `/tmp/${finishFilename}`});
|
||||
|
||||
} catch (e) {
|
||||
if (this.config.branch == 'development')
|
||||
console.error(e);
|
||||
wState.set({state: 'error', error: (errMes ? errMes : e.message)});
|
||||
} finally {
|
||||
//clean
|
||||
if (decompDir)
|
||||
await fs.remove(decompDir);
|
||||
if (downloadedFilename && !isUploaded)
|
||||
await fs.remove(downloadedFilename);
|
||||
if (convertFilename)
|
||||
await fs.remove(convertFilename);
|
||||
}
|
||||
}
|
||||
|
||||
loadBookUrl(opts) {
|
||||
const workerId = this.workerState.generateWorkerId();
|
||||
const wState = this.workerState.getControl(workerId);
|
||||
wState.set({state: 'start'});
|
||||
|
||||
this.loadBook(opts, wState);
|
||||
|
||||
return workerId;
|
||||
}
|
||||
|
||||
async saveFile(file) {
|
||||
const hash = await utils.getFileHash(file.path, 'sha256', 'hex');
|
||||
const outFilename = `${this.config.uploadDir}/${hash}`;
|
||||
|
||||
if (!await fs.pathExists(outFilename)) {
|
||||
await fs.move(file.path, outFilename);
|
||||
} else {
|
||||
await utils.touchFile(outFilename);
|
||||
await fs.remove(file.path);
|
||||
}
|
||||
|
||||
return `file://${hash}`;
|
||||
}
|
||||
|
||||
async periodicCleanDir(dir, maxSize, timeout) {
|
||||
try {
|
||||
log(`Start clean dir: ${dir}, maxSize=${maxSize}`);
|
||||
const list = await fs.readdir(dir);
|
||||
|
||||
let size = 0;
|
||||
let files = [];
|
||||
for (const name of list) {
|
||||
const stat = await fs.stat(`${dir}/${name}`);
|
||||
if (!stat.isDirectory()) {
|
||||
size += stat.size;
|
||||
files.push({name, stat});
|
||||
}
|
||||
}
|
||||
log(`found ${files.length} files in dir ${dir}`);
|
||||
|
||||
files.sort((a, b) => a.stat.mtimeMs - b.stat.mtimeMs);
|
||||
|
||||
let i = 0;
|
||||
while (i < files.length && size > maxSize) {
|
||||
const file = files[i];
|
||||
log(`rm ${dir}/${file.name}`);
|
||||
await fs.remove(`${dir}/${file.name}`);
|
||||
size -= file.stat.size;
|
||||
i++;
|
||||
}
|
||||
log(`removed ${i} files`);
|
||||
} catch(e) {
|
||||
log(LM_ERR, e.message);
|
||||
} finally {
|
||||
setTimeout(() => {
|
||||
this.periodicCleanDir(dir, maxSize, timeout);
|
||||
}, timeout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ReaderWorker;
|
||||
Reference in New Issue
Block a user