Изменение путей к модулям

This commit is contained in:
Book Pauk
2019-10-29 14:45:55 +07:00
parent c33e91d5d0
commit 5bb9949440
17 changed files with 12 additions and 11 deletions

View File

@@ -0,0 +1,147 @@
const fs = require('fs-extra');
const iconv = require('iconv-lite');
const chardet = require('chardet');
const he = require('he');
const textUtils = require('./textUtils');
const utils = require('../../utils');
let execConverterCounter = 0;
class ConvertBase {
constructor(config) {
this.config = config;
this.calibrePath = `${config.dataDir}/calibre/ebook-convert`;
this.sofficePath = '/usr/bin/soffice';
this.pdfToHtmlPath = '/usr/bin/pdftohtml';
}
async run(data, opts) {// eslint-disable-line no-unused-vars
//override
}
async checkExternalConverterPresent() {
if (!await fs.pathExists(this.calibrePath))
throw new Error('Внешний конвертер calibre не найден');
if (!await fs.pathExists(this.sofficePath))
throw new Error('Внешний конвертер LibreOffice не найден');
if (!await fs.pathExists(this.pdfToHtmlPath))
throw new Error('Внешний конвертер pdftohtml не найден');
}
async execConverter(path, args, onData) {
execConverterCounter++;
try {
if (execConverterCounter > 10)
throw new Error('Слишком большая очередь конвертирования. Пожалуйста, попробуйте позже.');
const result = await utils.spawnProcess(path, {args, onData});
if (result.code != 0) {
let error = result.code;
if (this.config.branch == 'development')
error = `exec: ${path}, stdout: ${result.stdout}, stderr: ${result.stderr}`;
throw new Error(`Внешний конвертер завершился с ошибкой: ${error}`);
}
} catch(e) {
if (e.status == 'killed') {
throw new Error('Слишком долгое ожидание конвертера');
} else if (e.status == 'error') {
throw new Error(e.error);
} else {
throw new Error(e);
}
} finally {
execConverterCounter--;
}
}
decode(data) {
let selected = textUtils.getEncoding(data);
if (selected == 'ISO-8859-5') {
const charsetAll = chardet.detectAll(data.slice(0, 20000));
for (const charset of charsetAll) {
if (charset.name.indexOf('ISO-8859') < 0) {
selected = charset.name;
break;
}
}
}
if (selected.toLowerCase() != 'utf-8')
return iconv.decode(data, selected);
else
return data;
}
repSpaces(text) {
return text.replace(/&nbsp;|[\t\n\r]/g, ' ');
}
escapeEntities(text) {
return he.escape(he.decode(text));
}
formatFb2(fb2) {
let out = '<?xml version="1.0" encoding="utf-8"?>';
out += '<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">';
out += this.formatFb2Node(fb2);
out += '</FictionBook>';
return out;
}
formatFb2Node(node, name) {
let out = '';
if (Array.isArray(node)) {
for (const n of node) {
out += this.formatFb2Node(n);
}
} else if (typeof node == 'string') {
if (name)
out += `<${name}>${this.repSpaces(node)}</${name}>`;
else
out += this.repSpaces(node);
} else {
if (node._n)
name = node._n;
let attrs = '';
if (node._attrs) {
for (let attrName in node._attrs) {
attrs += ` ${attrName}="${node._attrs[attrName]}"`;
}
}
let tOpen = '';
let tBody = '';
let tClose = '';
if (name)
tOpen += `<${name}${attrs}>`;
if (node.hasOwnProperty('_t'))
tBody += this.repSpaces(node._t);
for (let nodeName in node) {
if (nodeName && nodeName[0] == '_' && nodeName != '_a')
continue;
const n = node[nodeName];
tBody += this.formatFb2Node(n, nodeName);
}
if (name)
tClose += `</${name}>`;
if (attrs == '' && name == 'p' && tBody.trim() == '')
out += '<empty-line/>'
else
out += `${tOpen}${tBody}${tClose}`;
}
return out;
}
}
module.exports = ConvertBase;

View File

@@ -0,0 +1,33 @@
const fs = require('fs-extra');
const path = require('path');
const ConvertDocX = require('./ConvertDocX');
class ConvertDoc extends ConvertDocX {
check(data, opts) {
const {inputFiles} = opts;
return this.config.useExternalBookConverter &&
inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'msi';
}
async run(data, opts) {
if (!this.check(data, opts))
return false;
await this.checkExternalConverterPresent();
const {inputFiles, callback} = opts;
const outFile = `${inputFiles.filesDir}/${path.basename(inputFiles.sourceFile)}`;
const docFile = `${outFile}.doc`;
const docxFile = `${outFile}.docx`;
const fb2File = `${outFile}.fb2`;
await fs.copy(inputFiles.sourceFile, docFile);
await this.execConverter(this.sofficePath, ['--headless', '--convert-to', 'docx', '--outdir', inputFiles.filesDir, docFile]);
return await super.convert(docxFile, fb2File, callback);
}
}
module.exports = ConvertDoc;

View File

@@ -0,0 +1,49 @@
const fs = require('fs-extra');
const path = require('path');
const ConvertBase = require('./ConvertBase');
class ConvertDocX extends ConvertBase {
check(data, opts) {
const {inputFiles} = opts;
if (this.config.useExternalBookConverter &&
inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'zip') {
//ищем файл '[Content_Types].xml'
for (const file of inputFiles.files) {
if (file.path == '[Content_Types].xml') {
return true;
}
}
}
return false;
}
async convert(docxFile, fb2File, callback) {
let perc = 0;
await this.execConverter(this.calibrePath, [docxFile, fb2File], () => {
perc = (perc < 100 ? perc + 5 : 50);
callback(perc);
});
return await fs.readFile(fb2File);
}
async run(data, opts) {
if (!this.check(data, opts))
return false;
await this.checkExternalConverterPresent();
const {inputFiles, callback} = opts;
const outFile = `${inputFiles.filesDir}/${path.basename(inputFiles.sourceFile)}`;
const docxFile = `${outFile}.docx`;
const fb2File = `${outFile}.fb2`;
await fs.copy(inputFiles.sourceFile, docxFile);
return await this.convert(docxFile, fb2File, callback);
}
}
module.exports = ConvertDocX;

View File

@@ -0,0 +1,49 @@
const fs = require('fs-extra');
const path = require('path');
const ConvertBase = require('./ConvertBase');
class ConvertEpub extends ConvertBase {
async check(data, opts) {
const {inputFiles} = opts;
if (this.config.useExternalBookConverter &&
inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'zip') {
//ищем файл 'mimetype'
for (const file of inputFiles.files) {
if (file.path == 'mimetype') {
const mt = await fs.readFile(`${inputFiles.filesDir}/${file.path}`);
if (mt.toString().trim() == 'application/epub+zip')
return true;
break;
}
}
}
return false;
}
async run(data, opts) {
if (!await this.check(data, opts))
return false;
await this.checkExternalConverterPresent();
const {inputFiles, callback} = opts;
const outFile = `${inputFiles.filesDir}/${path.basename(inputFiles.sourceFile)}`;
const epubFile = `${outFile}.epub`;
const fb2File = `${outFile}.fb2`;
await fs.copy(inputFiles.sourceFile, epubFile);
let perc = 0;
await this.execConverter(this.calibrePath, [epubFile, fb2File], () => {
perc = (perc < 100 ? perc + 5 : 50);
callback(perc);
});
return await fs.readFile(fb2File);
}
}
module.exports = ConvertEpub;

View File

@@ -0,0 +1,41 @@
const ConvertBase = require('./ConvertBase');
const iconv = require('iconv-lite');
class ConvertFb2 extends ConvertBase {
check(data, opts) {
const {dataType} = opts;
return (dataType && dataType.ext == 'xml' && data.toString().indexOf('<FictionBook') >= 0);
}
async run(data, opts) {
if (!this.check(data, opts))
return false;
return this.checkEncoding(data);
}
checkEncoding(data) {
let result = data;
const left = data.indexOf('<?xml version="1.0"');
if (left >= 0) {
const right = data.indexOf('?>', left);
if (right >= 0) {
const head = data.slice(left, right + 2).toString();
const m = head.match(/encoding="(.*)"/);
if (m) {
let encoding = m[1].toLowerCase();
if (encoding != 'utf-8') {
result = iconv.decode(data, encoding);
result = Buffer.from(result.toString().replace(m[0], 'encoding="utf-8"'));
}
}
}
}
return result;
}
}
module.exports = ConvertFb2;

View File

@@ -0,0 +1,299 @@
const ConvertBase = require('./ConvertBase');
const sax = require('./sax');
const textUtils = require('./textUtils');
class ConvertHtml extends ConvertBase {
check(data, opts) {
const {dataType} = opts;
if (dataType && (dataType.ext == 'html' || dataType.ext == 'xml'))
return {isText: false};
//может это чистый текст?
if (textUtils.checkIfText(data)) {
return {isText: true};
}
return false;
}
async run(data, opts) {
let isText = false;
if (!opts.skipCheck) {
const checkResult = this.check(data, opts);
if (!checkResult)
return false;
isText = checkResult.isText;
} else {
isText = opts.isText;
}
let {cutTitle} = opts;
let titleInfo = {};
let desc = {_n: 'description', 'title-info': titleInfo};
let pars = [];
let body = {_n: 'body', section: {_a: []}};
let binary = [];
let fb2 = [desc, body, binary];
let title = '';
let inTitle = false;
let inImage = false;
let image = {};
let bold = false;
let italic = false;
let spaceCounter = [];
const repCrLfTab = (text) => text.replace(/[\n\r]/g, '').replace(/\t/g, ' ');
const newParagraph = () => {
pars.push({_n: 'p', _t: ''});
};
const growParagraph = (text) => {
if (!pars.length)
newParagraph();
const l = pars.length;
pars[l - 1]._t += text;
//посчитаем отступы у текста, чтобы выделить потом параграфы
const lines = text.split('\n');
for (let line of lines) {
if (line.trim() == '')
continue;
line = repCrLfTab(line);
let l = 0;
while (l < line.length && line[l] == ' ') {
l++;
}
if (!spaceCounter[l])
spaceCounter[l] = 0;
spaceCounter[l]++;
}
};
const newPara = new Set(['tr', '/table', 'hr', 'br', 'br/', 'li', 'dt', 'dd', 'p', 'title', '/title', 'h1', 'h2', 'h3', '/h1', '/h2', '/h3']);
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
text = this.escapeEntities(text);
if (!cutCounter && !(cutTitle && inTitle)) {
let tOpen = (bold ? '<strong>' : '');
tOpen += (italic ? '<emphasis>' : '');
let tClose = (italic ? '</emphasis>' : '');
tClose += (bold ? '</strong>' : '');
growParagraph(`${tOpen}${text}${tClose}`);
}
if (inTitle && !title)
title = text;
if (inImage) {
image._t = text;
binary.push(image);
pars.push({_n: 'image', _attrs: {'l:href': '#' + image._attrs.id}, _t: ''});
newParagraph();
}
};
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter) {
if (newPara.has(tag))
newParagraph();
switch (tag) {
case 'i':
case 'em':
italic = true;
break;
case 'b':
case 'strong':
case 'h1':
case 'h2':
case 'h3':
bold = true;
break;
}
}
if (tag == 'title' || tag == 'cut-title') {
inTitle = true;
if (tag == 'cut-title')
cutTitle = true;
}
if (tag == 'fb2-image') {
inImage = true;
const attrs = sax.getAttrsSync(tail);
image = {_n: 'binary', _attrs: {id: attrs.name.value, 'content-type': attrs.type.value}, _t: ''};
}
};
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter) {
if (newPara.has('/' + tag))
newParagraph();
switch (tag) {
case 'i':
case 'em':
italic = false;
break;
case 'b':
case 'strong':
case 'h1':
case 'h2':
case 'h3':
bold = false;
break;
}
}
if (tag == 'title' || tag == 'cut-title')
inTitle = false;
if (tag == 'fb2-image')
inImage = false;
};
let buf = this.decode(data).toString();
sax.parseSync(buf, {
onStartNode, onEndNode, onTextNode,
innerCut: new Set(['head', 'script', 'style', 'binary', 'fb2-image'])
});
titleInfo['book-title'] = title;
//подозрение на чистый текст, надо разбить на параграфы
if (isText || pars.length < buf.length/2000) {
let total = 0;
let count = 1;
for (let i = 0; i < spaceCounter.length; i++) {
const sc = (spaceCounter[i] ? spaceCounter[i] : 0);
if (sc) count++;
total += sc;
}
let d = 0;
const mid = total/count;
for (let i = 0; i < spaceCounter.length; i++) {
const sc = (spaceCounter[i] ? spaceCounter[i] : 0);
if (sc > mid) d++;
}
let i = 0;
//если разброс не слишком большой, выделяем параграфы
if (d < 10 && spaceCounter.length) {
total /= 20;
i = spaceCounter.length - 1;
while (i > 0 && (!spaceCounter[i] || spaceCounter[i] < total)) i--;
}
const parIndent = (i > 0 ? i : 0);
let newPars = [];
const newPar = () => {
newPars.push({_n: 'p', _t: ''});
};
const growPar = (text) => {
if (!newPars.length)
newPar();
const l = newPars.length;
newPars[l - 1]._t += text;
}
i = 0;
for (const par of pars) {
if (par._n != 'p') {
newPars.push(par);
continue;
}
if (i > 0)
newPar();
i++;
let j = 0;
const lines = par._t.split('\n');
for (let line of lines) {
line = repCrLfTab(line);
let l = 0;
while (l < line.length && line[l] == ' ') {
l++;
}
if (l >= parIndent) {
if (j > 0)
newPar();
j++;
}
growPar(line.trim() + ' ');
}
}
body.section._a[0] = newPars;
} else {
body.section._a[0] = pars;
}
//убираем лишнее, делаем валидный fb2, т.к. в рез-те разбиения на параграфы бьются теги
bold = false;
italic = false;
pars = body.section._a[0];
for (let i = 0; i < pars.length; i++) {
if (pars[i]._n != 'p')
continue;
pars[i]._t = this.repSpaces(pars[i]._t).trim();
if (pars[i]._t.indexOf('<') >= 0) {
const t = pars[i]._t;
let a = [];
const onTextNode = (text) => {
let tOpen = (bold ? '<strong>' : '');
tOpen += (italic ? '<emphasis>' : '');
let tClose = (italic ? '</emphasis>' : '');
tClose += (bold ? '</strong>' : '');
a.push(`${tOpen}${text}${tClose}`);
}
const onStartNode = (tag) => {
if (tag == 'strong')
bold = true;
if (tag == 'emphasis')
italic = true;
}
const onEndNode = (tag) => {
if (tag == 'strong')
bold = false;
if (tag == 'emphasis')
italic = false;
}
sax.parseSync(t, { onStartNode, onEndNode, onTextNode });
pars[i]._t = '';
pars[i]._a = a;
}
}
return this.formatFb2(fb2);
}
}
module.exports = ConvertHtml;

View File

@@ -0,0 +1,37 @@
const fs = require('fs-extra');
const path = require('path');
const ConvertBase = require('./ConvertBase');
class ConvertMobi extends ConvertBase {
async check(data, opts) {
const {inputFiles} = opts;
return (this.config.useExternalBookConverter &&
inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'mobi');
}
async run(data, opts) {
if (!await this.check(data, opts))
return false;
await this.checkExternalConverterPresent();
const {inputFiles, callback} = opts;
const outFile = `${inputFiles.filesDir}/${path.basename(inputFiles.sourceFile)}`;
const mobiFile = `${outFile}.mobi`;
const fb2File = `${outFile}.fb2`;
await fs.copy(inputFiles.sourceFile, mobiFile);
let perc = 0;
await this.execConverter(this.calibrePath, [mobiFile, fb2File], () => {
perc = (perc < 100 ? perc + 5 : 50);
callback(perc);
});
return await fs.readFile(fb2File);
}
}
module.exports = ConvertMobi;

View File

@@ -0,0 +1,219 @@
const fs = require('fs-extra');
const path = require('path');
const sax = require('./sax');
const utils = require('../../utils');
const ConvertHtml = require('./ConvertHtml');
class ConvertPdf extends ConvertHtml {
check(data, opts) {
const {inputFiles} = opts;
return this.config.useExternalBookConverter &&
inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'pdf';
}
async run(notUsed, opts) {
if (!this.check(notUsed, opts))
return false;
await this.checkExternalConverterPresent();
const {inputFiles, callback} = opts;
const outFile = `${inputFiles.filesDir}/${utils.randomHexString(10)}.xml`;
//конвертируем в xml
let perc = 0;
await this.execConverter(this.pdfToHtmlPath, ['-c', '-s', '-xml', inputFiles.sourceFile, outFile], () => {
perc = (perc < 80 ? perc + 10 : 40);
callback(perc);
});
callback(80);
const data = await fs.readFile(outFile);
callback(90);
//парсим xml
let lines = [];
let images = [];
let loading = [];
let inText = false;
let bold = false;
let italic = false;
let title = '';
let prevTop = 0;
let i = -1;
let titleCount = 0;
const loadImage = async(image) => {
const src = path.parse(image.src);
let type = 'unknown';
switch (src.ext) {
case '.jpg': type = 'image/jpeg'; break;
case '.png': type = 'image/png'; break;
}
if (type != 'unknown') {
image.data = (await fs.readFile(image.src)).toString('base64');
image.type = type;
image.name = src.base;
}
}
const putImage = (curTop) => {
if (!isNaN(curTop) && images.length) {
while (images.length && images[0].top < curTop) {
i++;
lines[i] = images[0];
images.shift();
}
}
}
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter && inText) {
let tOpen = (bold ? '<b>' : '');
tOpen += (italic ? '<i>' : '');
let tClose = (italic ? '</i>' : '');
tClose += (bold ? '</b>' : '');
lines[i].text += `${tOpen}${text}${tClose} `;
if (titleCount < 2 && text.trim() != '') {
title += text + (titleCount ? '' : ' - ');
titleCount++;
}
}
};
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter) {
if (inText) {
switch (tag) {
case 'i':
italic = true;
break;
case 'b':
bold = true;
break;
}
}
if (tag == 'text' && !inText) {
let attrs = sax.getAttrsSync(tail);
const line = {
text: '',
top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10),
left: parseInt((attrs.left && attrs.left.value ? attrs.left.value : null), 10),
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
};
if (line.width != 0 || line.height != 0) {
inText = true;
if (isNaN(line.top) || isNaN(prevTop) || (Math.abs(prevTop - line.top) > 3)) {
putImage(line.top);
i++;
lines[i] = line;
}
prevTop = line.top;
}
}
if (tag == 'image') {
const attrs = sax.getAttrsSync(tail);
const src = (attrs.src && attrs.src.value ? attrs.src.value : '');
if (src) {
const image = {
isImage: true,
src,
data: '',
type: '',
top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10) || 0,
};
loading.push(loadImage(image));
images.push(image);
images.sort((a, b) => a.top - b.top)
}
}
if (tag == 'page') {
putImage(100000);
}
}
};
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (inText) {
switch (tag) {
case 'i':
italic = false;
break;
case 'b':
bold = false;
break;
}
}
if (tag == 'text')
inText = false;
};
let buf = this.decode(data).toString();
sax.parseSync(buf, {
onStartNode, onEndNode, onTextNode
});
putImage(100000);
await Promise.all(loading);
//найдем параграфы и отступы
const indents = [];
for (const line of lines) {
if (line.isImage)
continue;
if (!isNaN(line.left)) {
indents[line.left] = 1;
}
}
let j = 0;
for (let i = 0; i < indents.length; i++) {
if (indents[i]) {
j++;
indents[i] = j;
}
}
indents[0] = 0;
//формируем текст
let text = `<title>${title}</title>`;
let concat = '';
let sp = '';
for (const line of lines) {
if (line.isImage) {
text += `<fb2-image type="${line.type}" name="${line.name}">${line.data}</fb2-image>`;
continue;
}
if (concat == '') {
const left = line.left || 0;
sp = ' '.repeat(indents[left]);
}
let t = line.text.trim();
if (t.substr(-1) == '-') {
t = t.substr(0, t.length - 1);
concat += t;
} else {
text += sp + concat + t + "\n";
concat = '';
}
}
if (concat)
text += sp + concat + "\n";
return await super.run(Buffer.from(text), {skipCheck: true, isText: true, cutTitle: true});
}
}
module.exports = ConvertPdf;

View File

@@ -0,0 +1,33 @@
const fs = require('fs-extra');
const path = require('path');
const ConvertDocX = require('./ConvertDocX');
class ConvertRtf extends ConvertDocX {
check(data, opts) {
const {inputFiles} = opts;
return this.config.useExternalBookConverter &&
inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'rtf';
}
async run(data, opts) {
if (!this.check(data, opts))
return false;
await this.checkExternalConverterPresent();
const {inputFiles, callback} = opts;
const outFile = `${inputFiles.filesDir}/${path.basename(inputFiles.sourceFile)}`;
const rtfFile = `${outFile}.rtf`;
const docxFile = `${outFile}.docx`;
const fb2File = `${outFile}.fb2`;
await fs.copy(inputFiles.sourceFile, rtfFile);
await this.execConverter(this.sofficePath, ['--headless', '--convert-to', 'docx', '--outdir', inputFiles.filesDir, rtfFile]);
return await super.convert(docxFile, fb2File, callback);
}
}
module.exports = ConvertRtf;

View File

@@ -0,0 +1,281 @@
const _ = require('lodash');
const URL = require('url').URL;
const sax = require('./sax');
const ConvertBase = require('./ConvertBase');
class ConvertSamlib extends ConvertBase {
check(data, opts) {
const {url, dataType} = opts;
const parsedUrl = new URL(url);
if (dataType && dataType.ext == 'html' &&
(parsedUrl.hostname == 'samlib.ru' ||
parsedUrl.hostname == 'budclub.ru' ||
parsedUrl.hostname == 'zhurnal.lib.ru')) {
return {hostname: parsedUrl.hostname};
}
return false;
}
async run(data, opts) {
if (!opts.enableSitesFilter)
return false;
const checkResult = this.check(data, opts);
if (!checkResult)
return false;
const {hostname} = checkResult;
let titleInfo = {};
let desc = {_n: 'description', 'title-info': titleInfo};
let pars = [];
let body = {_n: 'body', section: {_a: pars}};
let fb2 = [desc, body];
let inSubtitle = false;
let inJustify = true;
let inImage = false;
let isFirstPara = false;
let path = '';
let tag = '';// eslint-disable-line no-unused-vars
let inText = false;
let textFound = false;
let node = {_a: pars};
let inPara = false;
let italic = false;
let bold = false;
const openTag = (name, attrs) => {
if (name == 'p')
inPara = true;
let n = {_n: name, _attrs: attrs, _a: [], _p: node};
node._a.push(n);
node = n;
};
const closeTag = (name) => {
if (name == 'p')
inPara = false;
if (node._p) {
const exact = (node._n == name);
node = node._p;
if (!exact)
closeTag(name);
}
};
const growParagraph = (text) => {
if (!node._p) {
if (text.trim() != '')
openTag('p');
else
return;
}
if (node._n == 'p' && node._a.length == 0)
text = text.trimLeft();
node._a.push({_t: text});
};
const onStartNode = (elemName, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (elemName == '')
return;
if (!inText) {
path += '/' + elemName;
tag = elemName;
} else {
switch (elemName) {
case 'li':
case 'p':
case 'dd':
case 'br':
if (!(inSubtitle && isFirstPara)) {
if (inPara)
closeTag('p');
openTag('p');
}
isFirstPara = false;
break;
case 'h1':
case 'h2':
case 'h3':
if (inPara)
closeTag('p');
openTag('p');
bold = true;
break;
case 'i':
case 'em':
italic = true;
break;
case 'b':
case 'strong':
bold = true;
break;
case 'div':
if (inPara)
closeTag('p');
if (tail.indexOf('align="center"') >= 0) {
openTag('subtitle');
inSubtitle = true;
isFirstPara = true;
}
if (tail.indexOf('align="justify"') >= 0) {
openTag('p');
inJustify = true;
}
break;
case 'img': {
if (inPara)
closeTag('p');
const attrs = sax.getAttrsSync(tail);
if (attrs.src && attrs.src.value) {
let href = attrs.src.value;
if (href[0] == '/')
href = `http://${hostname}${href}`;
openTag('image', {'l:href': href});
inImage = true;
}
break;
}
}
}
};
const onEndNode = (elemName, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!inText) {
const oldPath = path;
let t = '';
do {
let i = path.lastIndexOf('/');
t = path.substr(i + 1);
path = path.substr(0, i);
} while (t != elemName && path);
if (t != elemName) {
path = oldPath;
}
let i = path.lastIndexOf('/');
tag = path.substr(i + 1);
} else {
switch (elemName) {
case 'li':
case 'p':
case 'dd':
closeTag('p');
break;
case 'h1':
case 'h2':
case 'h3':
closeTag('p');
bold = false;
break;
case 'i':
case 'em':
italic = false;
break;
case 'b':
case 'strong':
bold = false;
break;
case 'div':
if (inSubtitle) {
closeTag('subtitle');
inSubtitle = false;
isFirstPara = false;
}
if (inJustify) {
closeTag('p');
inJustify = false;
}
break;
case 'img':
if (inImage)
closeTag('image');
inImage = false;
break;
}
}
};
const onComment = (text) => {// eslint-disable-line no-unused-vars
if (text == '--------- Собственно произведение -------------') {
inText = true;
textFound = true;
}
if (text == '-----------------------------------------------')
inText = false;
};
const onTextNode = (text) => {// eslint-disable-line no-unused-vars
if (text && text.trim() == '')
text = (text.indexOf(' ') >= 0 ? ' ' : '');
if (!text)
return;
text = this.escapeEntities(text);
switch (path) {
case '/html/body/center/h2':
titleInfo['book-title'] = text;
return;
case '/html/body/div/h3':
if (!titleInfo.author)
titleInfo.author = {};
text = text.replace(':', '').trim().split(' ');
if (text[0])
titleInfo.author['last-name'] = text[0];
if (text[1])
titleInfo.author['first-name'] = text[1];
if (text[2])
titleInfo.author['middle-name'] = text[2];
return;
}
let tOpen = (bold ? '<strong>' : '');
tOpen += (italic ? '<emphasis>' : '');
let tClose = (italic ? '</emphasis>' : '');
tClose += (bold ? '</strong>' : '');
if (inText)
growParagraph(`${tOpen}${text}${tClose}`);
};
sax.parseSync(this.decode(data).toString().replace(/&nbsp;/g, ' '), {
onStartNode, onEndNode, onTextNode, onComment,
innerCut: new Set(['head', 'script', 'style'])
});
//текст не найден на странице, обработать корректно не получилось
if (!textFound)
return false;
const title = (titleInfo['book-title'] ? titleInfo['book-title'] : '');
let author = '';
if (titleInfo.author) {
author = _.compact([
(titleInfo.author['last-name'] ? titleInfo.author['last-name'] : ''),
(titleInfo.author['first-name'] ? titleInfo.author['first-name'] : ''),
(titleInfo.author['middle-name'] ? titleInfo.author['middle-name'] : ''),
]).join(' ');
}
pars.unshift({_n: 'title', _a: [
{_n: 'p', _t: author}, {_n: 'p', _t: ''},
{_n: 'p', _t: title}, {_n: 'p', _t: ''},
]})
return this.formatFb2(fb2);
}
}
module.exports = ConvertSamlib;

View File

@@ -0,0 +1,71 @@
const URL = require('url').URL;
const ConvertHtml = require('./ConvertHtml');
const sitesFilter = {
'www.fanfiction.net': {
converter: 'cutter',
begin: `<div class='storytext xcontrast_txt nocopy' id='storytext'>`,
end: `<div style='height:5px'></div><div style='clear:both;text-align:right;'>`,
},
'archiveofourown.org': {
converter: 'cutter',
begin: `<!-- BEGIN section where work skin applies -->`,
end: `<!-- END work skin -->`,
}
};
class ConvertSites extends ConvertHtml {
check(data, opts) {
const {url, dataType} = opts;
const parsedUrl = new URL(url);
if (dataType && dataType.ext == 'html') {
if (sitesFilter[parsedUrl.hostname])
return {hostname: parsedUrl.hostname};
}
return false;
}
async run(data, opts) {
if (!opts.enableSitesFilter)
return false;
const checkResult = this.check(data, opts);
if (!checkResult)
return false;
const {hostname} = checkResult;
let text = this.decode(data).toString();
text = this[sitesFilter[hostname].converter](text, sitesFilter[hostname]);
if (text === false)
return false;
return await super.run(Buffer.from(text), {skipCheck: true, cutTitle: true});
}
getTitle(text) {
let title = '';
const m = text.match(/<title>([\s\S]*?)<\/title>/);
if (m)
title = m[1];
return `<title>${title.trim()}</title>`;
}
cutter(text, opts) {
const title = this.getTitle(text);
const l = text.indexOf(opts.begin) + opts.begin.length;
const r = text.indexOf(opts.end);
if (l < 0 || r < 0 || r <= l)
return false;
return text.substring(l, r) + title;
}
}
module.exports = ConvertSites;

View File

@@ -0,0 +1,59 @@
const fs = require('fs-extra');
const FileDetector = require('../../FileDetector');
//порядок важен
const convertClassFactory = [
require('./ConvertEpub'),
require('./ConvertPdf'),
require('./ConvertRtf'),
require('./ConvertDocX'),
require('./ConvertDoc'),
require('./ConvertMobi'),
require('./ConvertFb2'),
require('./ConvertSamlib'),
require('./ConvertSites'),
require('./ConvertHtml'),
];
class BookConverter {
constructor(config) {
this.detector = new FileDetector();
this.convertFactory = [];
for (const convertClass of convertClassFactory) {
this.convertFactory.push(new convertClass(config));
}
}
async convertToFb2(inputFiles, outputFile, opts, callback) {
const selectedFileType = await this.detector.detectFile(inputFiles.selectedFile);
const data = await fs.readFile(inputFiles.selectedFile);
const convertOpts = Object.assign({}, opts, {inputFiles, callback, dataType: selectedFileType});
let result = false;
for (const convert of this.convertFactory) {
result = await convert.run(data, convertOpts);
if (result) {
await fs.writeFile(outputFile, result);
break;
}
}
if (!result && inputFiles.nesting) {
result = await this.convertToFb2(inputFiles.nesting, outputFile, opts, callback);
}
if (!result) {
if (selectedFileType)
throw new Error(`Этот формат файла не поддерживается: ${selectedFileType.mime}`);
else {
throw new Error(`Не удалось определить формат файла: ${opts.url}`);
}
}
callback(100);
return result;
}
}
module.exports = BookConverter;

View File

@@ -0,0 +1,359 @@
function parseSync(xstr, options) {
const dummy = () => {};
let {onStartNode: _onStartNode = dummy,
onEndNode: _onEndNode = dummy,
onTextNode: _onTextNode = dummy,
onCdata: _onCdata = dummy,
onComment: _onComment = dummy,
onProgress: _onProgress = dummy,
innerCut = new Set()
} = options;
let i = 0;
const len = xstr.length;
const progStep = len/20;
let nextProg = 0;
let cutCounter = 0;
let cutTag = '';
let inCdata;
let inComment;
let leftData = 0;
while (i < len) {
inCdata = false;
inComment = false;
let singleTag = false;
let left = xstr.indexOf('<', i);
if (left < 0)
break;
leftData = left;
if (left < len - 2 && xstr[left + 1] == '!') {
if (xstr[left + 2] == '-') {
const leftComment = xstr.indexOf('<!--', left);
if (leftComment == left) {
inComment = true;
leftData = left + 3;
}
}
if (!inComment && xstr[left + 2] == '[') {
const leftCdata = xstr.indexOf('<![CDATA[', left);
if (leftCdata == left) {
inCdata = true;
leftData = left + 8;
}
}
}
if (left != i) {
const text = xstr.substr(i, left - i);
_onTextNode(text, cutCounter, cutTag);
}
let right = null;
let rightData = null;
if (inCdata) {
rightData = xstr.indexOf(']]>', leftData + 1);
if (rightData < 0)
break;
right = rightData + 2;
} else if (inComment) {
rightData = xstr.indexOf('-->', leftData + 1);
if (rightData < 0)
break;
right = rightData + 2;
} else {
rightData = xstr.indexOf('>', leftData + 1);
if (rightData < 0)
break;
right = rightData;
if (xstr[right - 1] === '/') {
singleTag = true;
rightData--;
}
}
let tagData = xstr.substr(leftData + 1, rightData - leftData - 1);
if (inCdata) {
_onCdata(tagData, cutCounter, cutTag);
} else if (inComment) {
_onComment(tagData, cutCounter, cutTag);
} else {
let tag = '';
let tail = '';
const firstSpace = tagData.indexOf(' ');
if (firstSpace >= 0) {
tail = tagData.substr(firstSpace);
tag = tagData.substr(0, firstSpace);
} else {
tag = tagData;
}
tag = tag.toLowerCase();
let endTag = (singleTag ? tag : '');
if (tag === '' || tag[0] !== '/') {
_onStartNode(tag, tail, singleTag, cutCounter, cutTag);
} else {
endTag = tag.substr(1);
}
if (endTag)
_onEndNode(endTag, tail, singleTag, cutCounter, cutTag);
if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
if (!cutCounter)
cutTag = tag;
cutCounter++;
}
if (cutTag === endTag) {
cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0);
if (!cutCounter)
cutTag = '';
}
}
if (right >= nextProg) {
_onProgress(Math.round(right/(len + 1)*100));
nextProg += progStep;
}
i = right + 1;
}
if (i < len) {
if (inCdata) {
_onCdata(xstr.substr(leftData, len - leftData), cutCounter, cutTag);
} else if (inComment) {
_onComment(xstr.substr(leftData, len - leftData), cutCounter, cutTag);
} else {
_onTextNode(xstr.substr(i, len - i), cutCounter, cutTag);
}
}
_onProgress(100);
}
//асинхронная копия parseSync
//делается заменой "_on" => "await _on" после while
async function parse(xstr, options) {
const dummy = () => {};
let {onStartNode: _onStartNode = dummy,
onEndNode: _onEndNode = dummy,
onTextNode: _onTextNode = dummy,
onCdata: _onCdata = dummy,
onComment: _onComment = dummy,
onProgress: _onProgress = dummy,
innerCut = new Set()
} = options;
let i = 0;
const len = xstr.length;
const progStep = len/20;
let nextProg = 0;
let cutCounter = 0;
let cutTag = '';
let inCdata;
let inComment;
let leftData = 0;
while (i < len) {
inCdata = false;
inComment = false;
let singleTag = false;
let left = xstr.indexOf('<', i);
if (left < 0)
break;
leftData = left;
if (left < len - 2 && xstr[left + 1] == '!') {
if (xstr[left + 2] == '-') {
const leftComment = xstr.indexOf('<!--', left);
if (leftComment == left) {
inComment = true;
leftData = left + 3;
}
}
if (!inComment && xstr[left + 2] == '[') {
const leftCdata = xstr.indexOf('<![CDATA[', left);
if (leftCdata == left) {
inCdata = true;
leftData = left + 8;
}
}
}
if (left != i) {
const text = xstr.substr(i, left - i);
await _onTextNode(text, cutCounter, cutTag);
}
let right = null;
let rightData = null;
if (inCdata) {
rightData = xstr.indexOf(']]>', leftData + 1);
if (rightData < 0)
break;
right = rightData + 2;
} else if (inComment) {
rightData = xstr.indexOf('-->', leftData + 1);
if (rightData < 0)
break;
right = rightData + 2;
} else {
rightData = xstr.indexOf('>', leftData + 1);
if (rightData < 0)
break;
right = rightData;
if (xstr[right - 1] === '/') {
singleTag = true;
rightData--;
}
}
let tagData = xstr.substr(leftData + 1, rightData - leftData - 1);
if (inCdata) {
await _onCdata(tagData, cutCounter, cutTag);
} else if (inComment) {
await _onComment(tagData, cutCounter, cutTag);
} else {
let tag = '';
let tail = '';
const firstSpace = tagData.indexOf(' ');
if (firstSpace >= 0) {
tail = tagData.substr(firstSpace);
tag = tagData.substr(0, firstSpace);
} else {
tag = tagData;
}
tag = tag.toLowerCase();
let endTag = (singleTag ? tag : '');
if (tag === '' || tag[0] !== '/') {
await _onStartNode(tag, tail, singleTag, cutCounter, cutTag);
} else {
endTag = tag.substr(1);
}
if (endTag)
await _onEndNode(endTag, tail, singleTag, cutCounter, cutTag);
if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
if (!cutCounter)
cutTag = tag;
cutCounter++;
}
if (cutTag === endTag) {
cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0);
if (!cutCounter)
cutTag = '';
}
}
if (right >= nextProg) {
await _onProgress(Math.round(right/(len + 1)*100));
nextProg += progStep;
}
i = right + 1;
}
if (i < len) {
if (inCdata) {
await _onCdata(xstr.substr(leftData, len - leftData), cutCounter, cutTag);
} else if (inComment) {
await _onComment(xstr.substr(leftData, len - leftData), cutCounter, cutTag);
} else {
await _onTextNode(xstr.substr(i, len - i), cutCounter, cutTag);
}
}
await _onProgress(100);
}
function getAttrsSync(tail) {
let result = {};
let name = '';
let value = '';
let vOpen = '';
let inName = false;
let inValue = false;
let waitValue = false;
let waitEq = false;
const pushResult = () => {
if (name != '') {
let ns = '';
if (name.indexOf(':') >= 0) {
[ns, name] = name.split(':');
}
result[name] = {value, ns};
}
name = '';
value = '';
vOpen = '';
inName = false;
inValue = false;
waitValue = false;
waitEq = false;
};
tail = tail.replace(/[\t\n\r]/g, ' ');
for (let i = 0; i < tail.length; i++) {
const c = tail.charAt(i);
if (c == ' ') {
if (inValue) {
if (vOpen == '"')
value += c;
else
pushResult();
} else if (inName) {
waitEq = true;
inName = false;
}
} else if (!inValue && c == '=') {
waitEq = false;
waitValue = true;
inName = false;
} else if (c == '"') {
if (inValue) {
pushResult();
} else if (waitValue) {
inValue = true;
vOpen = '"';
}
} else if (inValue) {
value += c;
} else if (inName) {
name += c;
} else if (waitEq) {
pushResult();
inName = true;
name = c;
} else if (waitValue) {
waitValue = false;
inValue = true;
vOpen = ' ';
value = c;
} else {
inName = true;
name = c;
}
}
if (name != '')
pushResult();
return result;
}
module.exports = {
parseSync,
getAttrsSync,
parse
}

View File

@@ -0,0 +1,110 @@
function getEncoding(buf, returnAll) {
const lowerCase = 3;
const upperCase = 1;
const codePage = {
'k': 'koi8-r',
'w': 'Windows-1251',
'd': 'cp866',
'i': 'ISO-8859-5',
'm': 'maccyrillic',
'u': 'utf-8',
};
let charsets = {
'k': 0,
'w': 0,
'd': 0,
'i': 0,
'm': 0,
'u': 0,
};
const len = buf.length;
const blockSize = (len > 5*3000 ? 3000 : len);
let counter = 0;
let i = 0;
let totalChecked = 0;
while (i < len) {
const char = buf[i];
const nextChar = (i < len - 1 ? buf[i + 1] : 0);
totalChecked++;
i++;
//non-russian characters
if (char < 128 || char > 256)
continue;
//UTF-8
if ((char == 208 || char == 209) && nextChar >= 128 && nextChar <= 190)
charsets['u'] += lowerCase;
else {
//CP866
if ((char > 159 && char < 176) || (char > 223 && char < 242)) charsets['d'] += lowerCase;
if ((char > 127 && char < 160)) charsets['d'] += upperCase;
//KOI8-R
if ((char > 191 && char < 223)) charsets['k'] += lowerCase;
if ((char > 222 && char < 256)) charsets['k'] += upperCase;
//WIN-1251
if (char > 223 && char < 256) charsets['w'] += lowerCase;
if (char > 191 && char < 224) charsets['w'] += upperCase;
//MAC
if (char > 221 && char < 255) charsets['m'] += lowerCase;
if (char > 127 && char < 160) charsets['m'] += upperCase;
//ISO-8859-5
if (char > 207 && char < 240) charsets['i'] += lowerCase;
if (char > 175 && char < 208) charsets['i'] += upperCase;
}
counter++;
if (counter > blockSize) {
counter = 0;
i += Math.round(len/2 - 2*blockSize);
}
}
let sorted = Object.keys(charsets).map(function(key) {
return { codePage: codePage[key], c: charsets[key], totalChecked };
});
sorted.sort((a, b) => b.c - a.c);
if (returnAll)
return sorted;
else if (sorted[0].c > 0 && sorted[0].c > sorted[0].totalChecked/2)
return sorted[0].codePage;
else
return 'ISO-8859-5';
}
function checkIfText(buf) {
const enc = getEncoding(buf, true);
if (enc[0].c > enc[0].totalChecked*0.9)
return true;
let spaceCount = 0;
let crCount = 0;
let lfCount = 0;
for (let i = 0; i < buf.length; i++) {
if (buf[i] == 32)
spaceCount++;
if (buf[i] == 13)
crCount++;
if (buf[i] == 10)
lfCount++;
}
const spaceFreq = spaceCount/(buf.length + 1);
const crFreq = crCount/(buf.length + 1);
const lfFreq = lfCount/(buf.length + 1);
return (buf.length < 1000 || spaceFreq > 0.1 || crFreq > 0.03 || lfFreq > 0.03);
}
module.exports = {
getEncoding,
checkIfText,
}

View File

@@ -0,0 +1,126 @@
const SQL = require('sql-template-strings');
const _ = require('lodash');
const ConnManager = require('../../db/ConnManager');//singleton
let instance = null;
//singleton
class ReaderStorage {
constructor() {
if (!instance) {
this.connManager = new ConnManager();
this.storagePool = this.connManager.pool.readerStorage;
this.periodicCleanCache(3*3600*1000);//1 раз в 3 часа
instance = this;
}
return instance;
}
async doAction(act) {
if (!_.isObject(act.items))
throw new Error('items is not an object');
let result = {};
switch (act.action) {
case 'check':
result = await this.checkItems(act.items);
break;
case 'get':
result = await this.getItems(act.items);
break;
case 'set':
result = await this.setItems(act.items, act.force);
break;
default:
throw new Error('Unknown action');
}
return result;
}
async checkItems(items) {
let result = {state: 'success', items: {}};
const dbh = await this.storagePool.get();
try {
for (const id of Object.keys(items)) {
if (this.cache[id]) {
result.items[id] = this.cache[id];
} else {
const rows = await dbh.all(SQL`SELECT rev FROM storage WHERE id = ${id}`);
const rev = (rows.length && rows[0].rev ? rows[0].rev : 0);
result.items[id] = {rev};
this.cache[id] = result.items[id];
}
}
} finally {
dbh.ret();
}
return result;
}
async getItems(items) {
let result = {state: 'success', items: {}};
const dbh = await this.storagePool.get();
try {
for (const id of Object.keys(items)) {
const rows = await dbh.all(SQL`SELECT rev, data FROM storage WHERE id = ${id}`);
const rev = (rows.length && rows[0].rev ? rows[0].rev : 0);
const data = (rows.length && rows[0].data ? rows[0].data : '');
result.items[id] = {rev, data};
}
} finally {
dbh.ret();
}
return result;
}
async setItems(items, force) {
let check = await this.checkItems(items);
//сначала проверим совпадение ревизий
for (const id of Object.keys(items)) {
if (!_.isString(items[id].data))
throw new Error('items.data is not a string');
if (!force && check.items[id].rev + 1 !== items[id].rev)
return {state: 'reject', items: check.items};
}
const dbh = await this.storagePool.get();
await dbh.run('BEGIN');
try {
const newRev = {};
for (const id of Object.keys(items)) {
await dbh.run(SQL`INSERT OR REPLACE INTO storage (id, rev, time, data) VALUES (${id}, ${items[id].rev}, strftime('%s','now'), ${items[id].data})`);
newRev[id] = {rev: items[id].rev};
}
await dbh.run('COMMIT');
Object.assign(this.cache, newRev);
} catch (e) {
await dbh.run('ROLLBACK');
throw e;
} finally {
dbh.ret();
}
return {state: 'success'};
}
periodicCleanCache(timeout) {
this.cache = {};
setTimeout(() => {
this.periodicCleanCache(timeout);
}, timeout);
}
}
module.exports = ReaderStorage;

View File

@@ -0,0 +1,175 @@
const fs = require('fs-extra');
const path = require('path');
const WorkerState = require('../WorkerState');//singleton
const FileDownloader = require('../FileDownloader');
const FileDecompressor = require('../FileDecompressor');
const BookConverter = require('./BookConverter');
const utils = require('../utils');
const log = new (require('../AppLogger'))().log;//singleton
let instance = null;
//singleton
class ReaderWorker {
constructor(config) {
if (!instance) {
this.config = Object.assign({}, config);
this.config.tempDownloadDir = `${config.tempDir}/download`;
fs.ensureDirSync(this.config.tempDownloadDir);
this.config.tempPublicDir = `${config.publicDir}/tmp`;
fs.ensureDirSync(this.config.tempPublicDir);
this.workerState = new WorkerState();
this.down = new FileDownloader();
this.decomp = new FileDecompressor();
this.bookConverter = new BookConverter(this.config);
this.periodicCleanDir(this.config.tempPublicDir, this.config.maxTempPublicDirSize, 60*60*1000);//1 раз в час
this.periodicCleanDir(this.config.uploadDir, this.config.maxUploadPublicDirSize, 60*60*1000);//1 раз в час
instance = this;
}
return instance;
}
async loadBook(opts, wState) {
const url = opts.url;
let errMes = '';
let decompDir = '';
let downloadedFilename = '';
let isUploaded = false;
let convertFilename = '';
try {
wState.set({state: 'download', step: 1, totalSteps: 3, url});
const tempFilename = utils.randomHexString(30);
const tempFilename2 = utils.randomHexString(30);
const decompDirname = utils.randomHexString(30);
if (url.indexOf('file://') != 0) {//download
const downdata = await this.down.load(url, (progress) => {
wState.set({progress});
});
downloadedFilename = `${this.config.tempDownloadDir}/${tempFilename}`;
await fs.writeFile(downloadedFilename, downdata);
} else {//uploaded file
downloadedFilename = `${this.config.uploadDir}/${url.substr(7)}`;
if (!await fs.pathExists(downloadedFilename))
throw new Error('Файл не найден на сервере (возможно был удален как устаревший). Пожалуйста, загрузите файл с диска на сервер заново.');
await utils.touchFile(downloadedFilename);
isUploaded = true;
}
wState.set({progress: 100});
//decompress
wState.set({state: 'decompress', step: 2, progress: 0});
decompDir = `${this.config.tempDownloadDir}/${decompDirname}`;
let decompFiles = {};
try {
decompFiles = await this.decomp.decompressNested(downloadedFilename, decompDir);
} catch (e) {
if (this.config.branch == 'development')
console.error(e);
throw new Error('Ошибка распаковки');
}
wState.set({progress: 100});
//конвертирование в fb2
wState.set({state: 'convert', step: 3, progress: 0});
convertFilename = `${this.config.tempDownloadDir}/${tempFilename2}`;
await this.bookConverter.convertToFb2(decompFiles, convertFilename, opts, progress => {
wState.set({progress});
});
//сжимаем файл в tmp, если там уже нет с тем же именем-sha256
const compFilename = await this.decomp.gzipFileIfNotExists(convertFilename, `${this.config.tempPublicDir}`);
wState.set({progress: 100});
//finish
const finishFilename = path.basename(compFilename);
wState.finish({path: `/tmp/${finishFilename}`});
} catch (e) {
if (this.config.branch == 'development')
console.error(e);
wState.set({state: 'error', error: (errMes ? errMes : e.message)});
} finally {
//clean
if (decompDir)
await fs.remove(decompDir);
if (downloadedFilename && !isUploaded)
await fs.remove(downloadedFilename);
if (convertFilename)
await fs.remove(convertFilename);
}
}
loadBookUrl(opts) {
const workerId = this.workerState.generateWorkerId();
const wState = this.workerState.getControl(workerId);
wState.set({state: 'start'});
this.loadBook(opts, wState);
return workerId;
}
async saveFile(file) {
const hash = await utils.getFileHash(file.path, 'sha256', 'hex');
const outFilename = `${this.config.uploadDir}/${hash}`;
if (!await fs.pathExists(outFilename)) {
await fs.move(file.path, outFilename);
} else {
await utils.touchFile(outFilename);
await fs.remove(file.path);
}
return `file://${hash}`;
}
async periodicCleanDir(dir, maxSize, timeout) {
try {
log(`Start clean dir: ${dir}, maxSize=${maxSize}`);
const list = await fs.readdir(dir);
let size = 0;
let files = [];
for (const name of list) {
const stat = await fs.stat(`${dir}/${name}`);
if (!stat.isDirectory()) {
size += stat.size;
files.push({name, stat});
}
}
log(`found ${files.length} files in dir ${dir}`);
files.sort((a, b) => a.stat.mtimeMs - b.stat.mtimeMs);
let i = 0;
while (i < files.length && size > maxSize) {
const file = files[i];
log(`rm ${dir}/${file.name}`);
await fs.remove(`${dir}/${file.name}`);
size -= file.stat.size;
i++;
}
log(`removed ${i} files`);
} catch(e) {
log(LM_ERR, e.message);
} finally {
setTimeout(() => {
this.periodicCleanDir(dir, maxSize, timeout);
}, timeout);
}
}
}
module.exports = ReaderWorker;