diff --git a/client/api/misc.js b/client/api/misc.js index 04fe804b..79d56d0e 100644 --- a/client/api/misc.js +++ b/client/api/misc.js @@ -6,7 +6,9 @@ const api = axios.create({ class Misc { async loadConfig() { - const response = await api.post('/config', {params: ['name', 'version', 'mode', 'maxUploadFileSize', 'branch']}); + const response = await api.post('/config', {params: [ + 'name', 'version', 'mode', 'maxUploadFileSize', 'useExternalBookConverter', 'branch', + ]}); return response.data; } } diff --git a/client/api/reader.js b/client/api/reader.js index 5b1b8ab4..071a9689 100644 --- a/client/api/reader.js +++ b/client/api/reader.js @@ -44,7 +44,7 @@ class Reader { await sleep(refreshPause); i++; - if (i > 30*1000/refreshPause) {//30 сек ждем телодвижений воркера + if (i > 120*1000/refreshPause) {//2 мин ждем телодвижений воркера throw new Error('Слишком долгое время ожидания'); } //проверка воркера diff --git a/client/components/Reader/LoaderPage/LoaderPage.vue b/client/components/Reader/LoaderPage/LoaderPage.vue index c0ac9167..58515f5d 100644 --- a/client/components/Reader/LoaderPage/LoaderPage.vue +++ b/client/components/Reader/LoaderPage/LoaderPage.vue @@ -3,8 +3,11 @@
{{ title }} Добро пожаловать! - Поддерживаются форматы: fb2, fb2.zip, html, txt + Поддерживаются форматы: fb2, html, txt + ...а также: rtf, doc, docx, и вскоре: pdf, epub, mobi + ...и распознается сжатие: zip, bz2, gz
+
@@ -15,8 +18,9 @@ Загрузить файл с диска
- Комментарии + Комментарии
+
Справка Помочь проекту @@ -38,7 +42,6 @@ class LoaderPage extends Vue { created() { this.commit = this.$store.commit; - this.config = this.$store.state.config; } mounted() { @@ -50,14 +53,22 @@ class LoaderPage extends Vue { } get title() { - if (this.config.mode == 'omnireader') + if (this.$store.state.config.mode == 'omnireader') return 'Omni Reader - браузерная онлайн-читалка.'; return 'Универсальная читалка книг и ресурсов интернета.'; } + get mode() { + return this.$store.state.config.mode; + } + get version() { - return `v${this.config.version}`; + return `v${this.$store.state.config.version}`; + } + + get isExternalConverter() { + return this.$store.state.config.useExternalBookConverter; } submitUrl() { diff --git a/docs/omnireader/readme.txt b/docs/omnireader/readme.txt index 0fcc4188..aed4bcaa 100644 --- a/docs/omnireader/readme.txt +++ b/docs/omnireader/readme.txt @@ -15,6 +15,18 @@ chgrp www-data /home/oldreader sudo -u www-data cp -r ./old/* /home/oldreader ### +### external converter +# calibre releases https://download.calibre-ebook.com/ +# download, unpack to data/calibre +# 3.39.1 +wget "https://download.calibre-ebook.com/3.39.1/calibre-3.39.1-x86_64.txz" +sudo -u www-data mkdir -p /home/liberama/data/calibre +sudo -u www-data tar xvf calibre-3.39.1-x86_64.txz -C /home/liberama/data/calibre + +apt install libreoffice +apt install poppler-utils +### + apt install nginx cp omnireader /etc/nginx/sites-available/omnireader diff --git a/package-lock.json b/package-lock.json index ad93ef29..97c61806 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "Liberama", - "version": "0.3.2", + "version": "0.4.7", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -3291,11 +3291,6 @@ "integrity": "sha1-8NZtA2cqglyxtzvbP+YjEMjlUrc=", "dev": true }, - "detect-file-type": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/detect-file-type/-/detect-file-type-0.2.0.tgz", - "integrity": "sha512-RO/pUyKObaN3KZFVAxTzq6zUIweCu45deaUj2ZpbA1wCrmHeHYHpCXT6ZMLtlMtYSgEbpGuCxzQz8a9IuwOfNw==" - }, "detect-indent": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/detect-indent/-/detect-indent-4.0.0.tgz", diff --git a/package.json b/package.json index 7ca81322..be8f684c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "Liberama", - "version": "0.4.7", + "version": "0.5.0", "engines": { "node": ">=10.0.0" }, @@ -62,7 +62,6 @@ "decompress": "^4.2.0", "decompress-bzip2": "^4.0.0", "decompress-gz": "0.0.1", - "detect-file-type": "^0.2.0", "element-ui": "^2.4.11", "express": "^4.16.4", "fg-loadcss": "^2.1.0", diff --git a/server/config/base.js b/server/config/base.js index 0ec65f87..4789cdc1 100644 --- a/server/config/base.js +++ b/server/config/base.js @@ -21,6 +21,8 @@ module.exports = { maxTempPublicDirSize: 512*1024*1024,//512Мб maxUploadPublicDirSize: 200*1024*1024,//100Мб + useExternalBookConverter: false, + servers: [ { serverName: '1', diff --git a/server/config/configSaver.js b/server/config/configSaver.js index f39b920f..f4844a5c 100644 --- a/server/config/configSaver.js +++ b/server/config/configSaver.js @@ -5,7 +5,8 @@ const propsToSave = [ 'maxUploadFileSize', 'maxTempPublicDirSize', 'maxUploadPublicDirSize', - + 'useExternalBookConverter', + 'servers', ]; diff --git a/server/core/BookConverter/ConvertBase.js b/server/core/BookConverter/ConvertBase.js index 460d91c3..ca78b442 100644 --- a/server/core/BookConverter/ConvertBase.js +++ b/server/core/BookConverter/ConvertBase.js @@ -1,16 +1,54 @@ +const fs = require('fs-extra'); const iconv = require('iconv-lite'); const chardet = require('chardet'); + const textUtils = require('./textUtils'); +const utils = require('../utils'); class ConvertBase { constructor(config) { this.config = config; + + this.calibrePath = `${config.dataDir}/calibre/ebook-convert`; + this.sofficePath = '/usr/bin/soffice'; + this.pdfToHtmlPath = '/usr/bin/pdftohtml'; } - run(data, opts) {// eslint-disable-line no-unused-vars + async run(data, opts) {// eslint-disable-line no-unused-vars //override } + async checkExternalConverterPresent() { + if (!await fs.pathExists(this.calibrePath)) + throw new Error('Внешний конвертер calibre не найден'); + + if (!await fs.pathExists(this.sofficePath)) + throw new Error('Внешний конвертер LibreOffice не найден'); + + if (!await fs.pathExists(this.pdfToHtmlPath)) + throw new Error('Внешний конвертер pdftohtml не найден'); + } + + async execConverter(path, args, onData) { + try { + const result = await utils.spawnProcess(path, {args, onData}); + if (result.code != 0) { + let error = result.code; + if (this.config.branch == 'development') + error = `exec: ${path}, stdout: ${result.stdout}, stderr: ${result.stderr}`; + throw new Error(`Внешний конвертер завершился с ошибкой: ${error}`); + } + } catch(e) { + if (e.status == 'killed') { + throw new Error('Слишком долгое ожидание конвертера'); + } else if (e.status == 'error') { + throw new Error(e.error); + } else { + throw new Error(e); + } + } + } + decode(data) { let selected = textUtils.getEncoding(data); diff --git a/server/core/BookConverter/ConvertDoc.js b/server/core/BookConverter/ConvertDoc.js new file mode 100644 index 00000000..d3571b0f --- /dev/null +++ b/server/core/BookConverter/ConvertDoc.js @@ -0,0 +1,33 @@ +const fs = require('fs-extra'); +const path = require('path'); + +const ConvertDocX = require('./ConvertDocX'); + +class ConvertDoc extends ConvertDocX { + check(data, opts) { + const {inputFiles} = opts; + + return this.config.useExternalBookConverter && + inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'msi'; + } + + async run(data, opts) { + if (!this.check(data, opts)) + return false; + await this.checkExternalConverterPresent(); + + const {inputFiles, callback} = opts; + + const outFile = `${inputFiles.fileListDir}/${path.basename(inputFiles.sourceFile)}`; + const docFile = `${outFile}.doc`; + const docxFile = `${outFile}.docx`; + const fb2File = `${outFile}.fb2`; + + await fs.copy(inputFiles.sourceFile, docFile); + await this.execConverter(this.sofficePath, ['--headless', '--convert-to', 'docx', '--outdir', inputFiles.fileListDir, docFile]); + + return await super.convert(docxFile, fb2File, callback); + } +} + +module.exports = ConvertDoc; diff --git a/server/core/BookConverter/ConvertDocX.js b/server/core/BookConverter/ConvertDocX.js new file mode 100644 index 00000000..8978475f --- /dev/null +++ b/server/core/BookConverter/ConvertDocX.js @@ -0,0 +1,50 @@ +const fs = require('fs-extra'); +const path = require('path'); + +const ConvertBase = require('./ConvertBase'); + +class ConvertDocX extends ConvertBase { + check(data, opts) { + const {inputFiles} = opts; + + if (this.config.useExternalBookConverter && + inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'zip') { + //ищем файл '[Content_Types].xml' + for (const file of inputFiles.fileList) { + if (file == '[Content_Types].xml') { + return true; + } + } + } + + return false; + } + + async convert(docxFile, fb2File, callback) { + let perc = 0; + await this.execConverter(this.calibrePath, [docxFile, fb2File], () => { + perc = (perc < 100 ? perc + 5 : 50); + callback(perc); + }); + + return await fs.readFile(fb2File); + } + + async run(data, opts) { + if (!this.check(data, opts)) + return false; + await this.checkExternalConverterPresent(); + + const {inputFiles, callback} = opts; + + const outFile = `${inputFiles.fileListDir}/${path.basename(inputFiles.sourceFile)}`; + const docxFile = `${outFile}.docx`; + const fb2File = `${outFile}.fb2`; + + await fs.copy(inputFiles.sourceFile, docxFile); + + return await this.convert(docxFile, fb2File, callback); + } +} + +module.exports = ConvertDocX; diff --git a/server/core/BookConverter/ConvertFb2.js b/server/core/BookConverter/ConvertFb2.js index e4a84866..ec492a0f 100644 --- a/server/core/BookConverter/ConvertFb2.js +++ b/server/core/BookConverter/ConvertFb2.js @@ -3,12 +3,12 @@ const iconv = require('iconv-lite'); class ConvertFb2 extends ConvertBase { check(data, opts) { - const {fileType} = opts; + const {dataType} = opts; - return (fileType && fileType.ext == 'xml' && data.toString().indexOf('= 0); + return (dataType && dataType.ext == 'xml' && data.toString().indexOf('= 0); } - run(data, opts) { + async run(data, opts) { if (!this.check(data, opts)) return false; diff --git a/server/core/BookConverter/ConvertHtml.js b/server/core/BookConverter/ConvertHtml.js index fa85937d..9596cff8 100644 --- a/server/core/BookConverter/ConvertHtml.js +++ b/server/core/BookConverter/ConvertHtml.js @@ -4,9 +4,9 @@ const textUtils = require('./textUtils'); class ConvertHtml extends ConvertBase { check(data, opts) { - const {fileType} = opts; + const {dataType} = opts; - if (fileType && (fileType.ext == 'html' || fileType.ext == 'xml')) + if (dataType && (dataType.ext == 'html' || dataType.ext == 'xml')) return {isText: false}; //может это чистый текст? @@ -17,7 +17,7 @@ class ConvertHtml extends ConvertBase { return false; } - run(data, opts) { + async run(data, opts) { const checkResult = this.check(data, opts); if (!checkResult) return false; diff --git a/server/core/BookConverter/ConvertRtf.js b/server/core/BookConverter/ConvertRtf.js new file mode 100644 index 00000000..6e6eda31 --- /dev/null +++ b/server/core/BookConverter/ConvertRtf.js @@ -0,0 +1,33 @@ +const fs = require('fs-extra'); +const path = require('path'); + +const ConvertDocX = require('./ConvertDocX'); + +class ConvertRtf extends ConvertDocX { + check(data, opts) { + const {inputFiles} = opts; + + return this.config.useExternalBookConverter && + inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'rtf'; + } + + async run(data, opts) { + if (!this.check(data, opts)) + return false; + await this.checkExternalConverterPresent(); + + const {inputFiles, callback} = opts; + + const outFile = `${inputFiles.fileListDir}/${path.basename(inputFiles.sourceFile)}`; + const rtfFile = `${outFile}.rtf`; + const docxFile = `${outFile}.docx`; + const fb2File = `${outFile}.fb2`; + + await fs.copy(inputFiles.sourceFile, rtfFile); + await this.execConverter(this.sofficePath, ['--headless', '--convert-to', 'docx', '--outdir', inputFiles.fileListDir, rtfFile]); + + return await super.convert(docxFile, fb2File, callback); + } +} + +module.exports = ConvertRtf; diff --git a/server/core/BookConverter/ConvertSamlib.js b/server/core/BookConverter/ConvertSamlib.js index 40ebd10b..474fe0a1 100644 --- a/server/core/BookConverter/ConvertSamlib.js +++ b/server/core/BookConverter/ConvertSamlib.js @@ -6,19 +6,20 @@ const ConvertBase = require('./ConvertBase'); class ConvertSamlib extends ConvertBase { check(data, opts) { - const {url} = opts; + const {url, dataType} = opts; const parsedUrl = new URL(url); - if (parsedUrl.hostname == 'samlib.ru' || + if (dataType && dataType.ext == 'html' && + (parsedUrl.hostname == 'samlib.ru' || parsedUrl.hostname == 'budclub.ru' || - parsedUrl.hostname == 'zhurnal.lib.ru') { + parsedUrl.hostname == 'zhurnal.lib.ru')) { return {hostname: parsedUrl.hostname}; } return false; } - run(data, opts) { + async run(data, opts) { const checkResult = this.check(data, opts); if (!checkResult) return false; diff --git a/server/core/BookConverter/index.js b/server/core/BookConverter/index.js index 8d9a6fd3..6249b911 100644 --- a/server/core/BookConverter/index.js +++ b/server/core/BookConverter/index.js @@ -3,6 +3,9 @@ const FileDetector = require('../FileDetector'); //порядок важен const convertClassFactory = [ + require('./ConvertRtf'), + require('./ConvertDocX'), + require('./ConvertDoc'), require('./ConvertFb2'), require('./ConvertSamlib'), require('./ConvertHtml'), @@ -18,22 +21,37 @@ class BookConverter { } } - async convertToFb2(inputFile, outputFile, url, callback) { - const fileType = await this.detector.detectFile(inputFile); + async convertToFb2(inputFiles, outputFile, url, callback) { + const selectedFileType = await this.detector.detectFile(inputFiles.selectedFile); + const data = await fs.readFile(inputFiles.selectedFile); + + let selectedFileType2 = null; + let data2 = null; + if (inputFiles.nesting) { + selectedFileType2 = await this.detector.detectFile(inputFiles.nesting.selectedFile); + data2 = await fs.readFile(inputFiles.nesting.selectedFile); + } - const data = await fs.readFile(inputFile); let result = false; for (const convert of this.convertFactory) { - result = convert.run(data, {inputFile, url, callback, fileType}); + result = await convert.run(data, {inputFiles, url, callback, dataType: selectedFileType}); if (result) { await fs.writeFile(outputFile, result); break; } + + if (inputFiles.nesting) { + result = await convert.run(data2, {inputFiles: inputFiles.nesting, url, callback, dataType: selectedFileType2}); + if (result) { + await fs.writeFile(outputFile, result); + break; + } + } } if (!result) { - if (fileType) - throw new Error(`Этот формат файла не поддерживается: ${fileType.mime}`); + if (selectedFileType) + throw new Error(`Этот формат файла не поддерживается: ${selectedFileType.mime}`); else { throw new Error(`Не удалось определить формат файла: ${url}`); } diff --git a/server/core/FileDecompressor.js b/server/core/FileDecompressor.js index 46b5eafe..9948147b 100644 --- a/server/core/FileDecompressor.js +++ b/server/core/FileDecompressor.js @@ -17,8 +17,17 @@ class FileDecompressor { async decompressFile(filename, outputDir) { const fileType = await this.detector.detectFile(filename); - if (!fileType || !(fileType.ext == 'zip' || fileType.ext == 'bz2' || fileType.ext == 'gz')) - return filename; + let result = { + sourceFile: filename, + sourceFileType: fileType, + selectedFile: filename, + fileListDir: outputDir, + fileList: [] + }; + + if (!fileType || !(fileType.ext == 'zip' || fileType.ext == 'bz2' || fileType.ext == 'gz')) { + return result; + } //дурной decompress, поэтому в 2 этапа //этап 1 @@ -44,20 +53,29 @@ class FileDecompressor { } } - let result = filename; + let sel = filename; + let fileList = []; let max = 0; if (files.length) { //ищем файл с максимальным размером for (let file of files) { + fileList.push(file.path); if (file.data.length > max) { - result = `${outputDir}/${file.path}`; + sel = `${outputDir}/${file.path}`; max = file.data.length; } } } //дурной decompress - if (result != filename) - await fs.chmod(result, 0o664); + if (sel != filename) + await fs.chmod(sel, 0o664); + + result.selectedFile = sel; + result.fileList = fileList; + + if (sel != filename) { + result.nesting = await this.decompressFile(sel, `${outputDir}/${utils.randomHexString(10)}`); + } return result; } diff --git a/server/core/FileDetector.js b/server/core/FileDetector.js deleted file mode 100644 index 692465aa..00000000 --- a/server/core/FileDetector.js +++ /dev/null @@ -1,57 +0,0 @@ -const detect = require('detect-file-type'); - -//html -detect.addSignature( - { - "type": "html", - "ext": "html", - "mime": "text/html", - "rules": [ - { "type": "or", "rules": - [ - { "type": "equal", "end": 5, "bytes": "3c68746d6c" }, - { "type": "equal", "end": 10, "bytes": "3c00680074006d006c00" }, - - { "type": "equal", "end": 9, "bytes": "3c21646f6374797065" }, - { "type": "equal", "end": 5, "bytes": "3c626f6479" }, - { "type": "equal", "end": 5, "bytes": "3c68656164" }, - { "type": "equal", "end": 7, "bytes": "3c696672616d65" }, - { "type": "equal", "end": 4, "bytes": "3c696d67" }, - { "type": "equal", "end": 7, "bytes": "3c6f626a656374" }, - { "type": "equal", "end": 7, "bytes": "3c736372697074" }, - { "type": "equal", "end": 6, "bytes": "3c7461626c65" }, - { "type": "equal", "end": 6, "bytes": "3c7469746c65" }, - ] - } - ] - } -); - -//xml 3c 3f 78 6d 6c 20 76 65 72 73 69 6f 6e 3d 22 31 2e 30 22 -detect.addSignature( - { - "type": "xml", - "ext": "xml", - "mime": "application/xml", - "rules": [ - { "type": "or", "rules": - [ - { "type": "equal", "end": 19, "bytes": "3c3f786d6c2076657273696f6e3d22312e3022" }, - ] - } - ] - } -); - -class FileDetector { - detectFile(filename) { - return new Promise((resolve, reject) => { - detect.fromFile(filename, (err, result) => { - if (err) reject(err); - resolve(result); - }); - }); - } -} - -module.exports = FileDetector; \ No newline at end of file diff --git a/server/core/FileDetector/index.js b/server/core/FileDetector/index.js new file mode 100644 index 00000000..21baa109 --- /dev/null +++ b/server/core/FileDetector/index.js @@ -0,0 +1,274 @@ +const fs = require('fs'); +const signatures = require('./signatures.json'); + +class FileDetector { + detectFile(filename) { + return new Promise((resolve, reject) => { + this.fromFile(filename, 2000, (err, result) => { + if (err) reject(err); + resolve(result); + }); + }); + } + + //все, что ниже, взято здесь: https://github.com/dimapaloskin/detect-file-type + fromFile(filePath, bufferLength, callback) { + if (typeof bufferLength === 'function') { + callback = bufferLength; + bufferLength = undefined; + } + + this.getFileSize(filePath, (err, fileSize) => { + if (err) { + return callback(err); + } + + fs.open(filePath, 'r', (err, fd) => { + if (err) { + return callback(err); + } + + let bufferSize = bufferLength; + if (!bufferSize) { + bufferSize = 500; + } + + if (fileSize < bufferSize) { + bufferSize = fileSize; + } + + const buffer = Buffer.alloc(bufferSize); + + fs.read(fd, buffer, 0, bufferSize, 0, (err) => { + fs.close(fd); + + if (err) { + return callback(err); + } + + this.fromBuffer(buffer, callback); + }); + }); + }); + } + + fromBuffer(buffer, callback) { + let result = null; + + const invalidSignaturesList = this.validateSigantures(); + if (invalidSignaturesList.length) { + return callback(invalidSignaturesList); + } + + signatures.every((signature) => { + if (this.detect(buffer, signature.rules)) { + result = { + ext: signature.ext, + mime: signature.mime + }; + + if (signature.iana) + result.iana = signature.iana; + + return false; + } + return true; + }); + + callback(null, result); + } + + detect(buffer, receivedRules, type) { + if (!type) { + type = 'and'; + } + + const rules = [...receivedRules]; + + let isDetected = true; + rules.every((rule) => { + if (rule.type === 'equal') { + const slicedHex = buffer.slice(rule.start || 0, rule.end || buffer.length).toString('hex'); + isDetected = (slicedHex === rule.bytes); + return this.isReturnFalse(isDetected, type); + } + + if (rule.type === 'notEqual') { + const slicedHex = buffer.slice(rule.start || 0, rule.end || buffer.length).toString('hex'); + isDetected = !(slicedHex === rule.bytes); + return this.isReturnFalse(isDetected, type); + } + + if (rule.type === 'contains') { + const slicedHex = buffer.slice(rule.start || 0, rule.end || buffer.length).toString('hex'); + if (typeof rule.bytes === 'string') { + rule.bytes = [rule.bytes]; + } + + rule.bytes.every((bytes) => { + isDetected = (slicedHex.indexOf(bytes) !== -1); + return isDetected; + }); + + return this.isReturnFalse(isDetected, type); + } + + if (rule.type === 'notContains') { + const slicedHex = buffer.slice(rule.start || 0, rule.end || buffer.length).toString('hex'); + if (typeof rule.bytes === 'string') { + rule.bytes = [rule.bytes]; + } + + rule.bytes.every((bytes) => { + isDetected = (slicedHex.indexOf(bytes) === -1); + return isDetected; + }); + + return this.isReturnFalse(isDetected, type); + } + + if (rule.type === 'or') { + isDetected = this.detect(buffer, rule.rules, 'or'); + return this.isReturnFalse(isDetected, type); + } + + if (rule.type === 'and') { + isDetected = this.detect(buffer, rule.rules, 'and'); + return this.isReturnFalse(isDetected, type); + } + + return true; + }); + + return isDetected; + } + + isReturnFalse(isDetected, type) { + if (!isDetected && type === 'and') { + return false; + } + + if (isDetected && type === 'or') { + return false; + } + + return true; + } + + validateRuleType(rule) { + const types = ['or', 'and', 'contains', 'notContains', 'equal', 'notEqual']; + return (types.indexOf(rule.type) !== -1); + } + + validateSigantures() { + let invalidSignatures = signatures.map((signature) => { + return this.validateSignature(signature); + }); + + invalidSignatures = this.cleanArray(invalidSignatures); + + if (invalidSignatures.length) { + return invalidSignatures; + } + + return true; + } + + validateSignature(signature) { + if (!('type' in signature)) { + return { + message: 'signature does not contain "type" field', + signature + }; + } + + if (!('ext' in signature)) { + return { + message: 'signature does not contain "ext" field', + signature + }; + } + + if (!('mime' in signature)) { + return { + message: 'signature does not contain "mime" field', + signature + }; + } + + if (!('rules' in signature)) { + return { + message: 'signature does not contain "rules" field', + signature + }; + } + + const invalidRules = this.validateRules(signature.rules); + + if (invalidRules && invalidRules.length) { + return { + message: 'signature has invalid rule', + signature, + rules: invalidRules + } + } + } + + validateRules(rules) { + let invalidRules = rules.map((rule) => { + let isRuleTypeValid = this.validateRuleType(rule); + + if (!isRuleTypeValid) { + return { + message: 'rule type does not supported', + rule + }; + } + + if ((rule.type === 'or' || rule.type === 'and') && !('rules' in rule)) { + return { + message: 'rule should contains "rules" field', + rule + }; + } + + if (rule.type === 'or' || rule.type === 'and') { + return this.validateRules(rule.rules); + } + + return false; + }); + + invalidRules = this.cleanArray(invalidRules); + + if (invalidRules.length) { + return invalidRules; + } + } + + cleanArray(actual) { + let newArray = new Array(); + for (let i = 0; i < actual.length; i++) { + if (actual[i]) { + newArray.push(actual[i]); + } + } + return newArray; + } + + addSignature(signature) { + signatures.push(signature); + } + + getFileSize(filePath, callback) { + fs.stat(filePath, (err, stat) => { + if (err) { + return callback(err); + } + + return callback(null, stat.size); + }); + } +} + +module.exports = FileDetector; \ No newline at end of file diff --git a/server/core/FileDetector/signatures.json b/server/core/FileDetector/signatures.json new file mode 100644 index 00000000..691df440 --- /dev/null +++ b/server/core/FileDetector/signatures.json @@ -0,0 +1,727 @@ +[ + { + "type": "jpg", + "ext": "jpg", + "mime": "image/jpeg", + "rules": [ + { "type": "equal", "start": 0, "end": 2, "bytes": "ffd8" } + ] + }, + + { + "type": "png", + "ext": "png", + "mime": "image/png", + "rules": [ + { "type": "equal", "start": 0,"end": 4, "bytes": "89504e47" } + ] + }, + + { + "type": "gif", + "ext": "gif", + "mime": "image/gif", + "rules": [ + { "type": "equal", "start": 0,"end": 3, "bytes": "474946" } + ] + }, + + { + "type": "bmp", + "ext": "bmp", + "mime": "image/bmp", + "rules": [ + { "type": "equal", "start": 0,"end": 2, "bytes": "424d" } + ] + }, + { + "type": "webp", + "ext": "webp", + "mime": "image/webp", + "rules": [ + { "type": "equal", "start": 8,"end": 12, "bytes": "57454250" } + ] + }, + + { + "type": "tif", + "ext": "tif", + "mime": "image/tiff", + "rules": [ + { "type": "and", "rules": + [ + { "type": "or", "rules": + [ + { "type": "equal", "start": 0, "end": 4, "bytes": "49492a00" }, + { "type": "equal", "start": 0, "end": 4, "bytes": "4d4d002a" } + ] + }, + { "type": "notEqual", "start": 8, "end": 10, "bytes": "4352" } + ] + } + ] + }, + + { + "type": "cr2", + "ext": "cr2", + "mime": "image/x-canon-cr2", + "rules": [ + { "type": "and", "rules": + [ + { "type": "or", "rules": + [ + { "type": "equal", "start": 0, "end": 4, "bytes": "49492a00" }, + { "type": "equal", "start": 0, "end": 4, "bytes": "4d4d002a" } + ] + }, + { "type": "equal", "start": 8, "end": 10, "bytes": "4352" } + ] + } + ] + }, + + { + "type": "jxr", + "ext": "jxr", + "mime": "image/vnd.ms-photo", + "rules": [ + { "type": "equal", "start": 0, "end": 3, "bytes": "4949bc" } + ] + }, + + { + "type": "psd", + "ext": "psd", + "mime": "image/vnd.adobe.photoshop", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "38425053" } + ] + }, + + { + "type": "flif", + "ext": "flif", + "mime": "image/flif", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "464c4946" } + ] + }, + + { + "type": "zip", + "ext": "zip", + "mime": "application/zip", + "rules": [ + { "type": "equal", "start": 0, "end": 2, "bytes": "504b" }, + { "type": "or", "rules": + [ + { "type": "equal", "start": 2, "end": 3, "bytes": "03" }, + { "type": "equal", "start": 2, "end": 3, "bytes": "05" }, + { "type": "equal", "start": 2, "end": 3, "bytes": "07" } + ] + }, + { "type": "or", "rules": + [ + { "type": "equal", "start": 3, "end": 4, "bytes": "04" }, + { "type": "equal", "start": 3, "end": 4, "bytes": "06" }, + { "type": "equal", "start": 3, "end": 4, "bytes": "08" } + ] + }, + { "type": "notEqual", "start": 36, "end": 58, "bytes": "70656170706c69636174696f6e2f657075622b7a6970" }, + { "type": "notEqual", "start": 30, "end": 50, "bytes": "4d4554412d494e462f6d6f7a696c6c612e727361" } + ] + }, + + { + "type": "epub", + "ext": "epub", + "mime": "application/epub+zip", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "504b0304" }, + { "type": "equal", "start": 36, "end": 58, "bytes": "70656170706c69636174696f6e2f657075622b7a6970" } + ] + }, + + { + "type": "xpi", + "ext": "xpi", + "mime": "application/x-xpinstall", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "504b0304" }, + { "type": "equal", "start": 30, "end": 50, "bytes": "4d4554412d494e462f6d6f7a696c6c612e727361" } + ] + }, + + { + "type": "tar", + "ext": "tar", + "mime": "application/x-tar", + "rules": [ + { "type": "equal", "start": 257, "end": 262, "bytes": "7573746172" } + ] + }, + { + "type": "rar", + "ext": "rar", + "mime": "application/x-rar-compressed", + "rules": [ + { "type": "equal", "start": 0, "end": 6, "bytes": "526172211a07" }, + { "type": "or", "rules": + [ + { "type": "equal", "start": 6, "end": 7, "bytes": "00" }, + { "type": "equal", "start": 6, "end": 7, "bytes": "01" } + ] + } + ] + }, + { + "type": "gz", + "ext": "gz", + "mime": "application/gzip", + "rules": [ + { "type": "equal", "start": 0, "end": 3, "bytes": "1f8b08" } + ] + }, + { + "type": "bz2", + "ext": "bz2", + "mime": "application/x-bzip2", + "rules": [ + { "type": "equal", "start": 0, "end": 3, "bytes": "425a68" } + ] + }, + { + "type": "7z", + "ext": "7z", + "mime": "application/x-7z-compressed", + "rules": [ + { "type": "equal", "start": 0, "end": 6, "bytes": "377abcaf271c" } + ] + }, + { + "type": "dmg", + "ext": "dmg", + "mime": "application/x-apple-diskimage", + "rules": [ + { "type": "equal", "start": 0, "end": 2, "bytes": "7801" } + ] + }, + + { + "type": "mp4", + "ext": "mp4", + "mime": "video/mp4", + "rules": [ + { "type": "or", "rules": + [ + { "type": "and", "rules": + [ + { "type": "equal", "start": 0, "end": 3, "bytes": "000000" }, + { "type": "or", "rules": + [ + { "type": "equal", "start": 3, "end": 4, "bytes": "18" }, + { "type": "equal", "start": 3, "end": 4, "bytes": "20" } + ] + }, + { "type": "equal", "start": 4, "end": 8, "bytes": "66747970" } + ] + }, + { "type": "equal", "start": 0, "end": 4, "bytes": "33677035" }, + { "type": "and", "rules": + [ + { "type": "equal", "start": 0, "end": 11, "bytes": "0000001c667479706d7034" }, + { "type": "equal", "start": 16, "end": 28, "bytes": "6d7034316d70343269736f6d" } + ] + }, + { "type": "equal", "start": 0, "end": 12, "bytes": "0000001c6674797069736f6d" }, + { "type": "equal", "start": 0, "end": 16, "bytes": "0000001c667479706d70343200000000" } + ] + } + ] + }, + + { + "type": "m4v", + "ext": "m4v", + "mime": "video/x-m4v", + "rules": [ + { "type": "equal", "start": 0, "end": 11, "bytes": "0000001c667479704d3456" } + ] + }, + + { + "type": "mid", + "ext": "mid", + "mime": "audio/midi", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "4d546864" } + ] + }, + + { + "type": "mkv", + "ext": "mkv", + "mime": "video/x-matroska", + "rules": [ + { "type": "equal", "start": 31, "end": 39, "bytes": "6d6174726f736b61" } + ] + }, + + { + "type": "webm", + "ext": "webm", + "mime": "video/webm", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "1a45dfa3" }, + { "type": "notEqual", "start": 31, "end": 39, "bytes": "6d6174726f736b61" } + ] + }, + + { + "type": "wmv", + "ext": "wmv", + "mime": "video/x-ms-wmv", + "rules": [ + { "type": "equal", "start": 0, "end": 10, "bytes": "3026b2758e66cf11a6d9" } + ] + }, + + { + "type": "mpg", + "ext": "mpg", + "mime": "video/mpeg", + "rules": [ + { "type": "equal", "start": 0, "end": 3, "bytes": "000001" }, + { "type": "or", "rules": + [ + { "type": "equal", "start": 3, "end": 4, "bytes": "b0"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "b1"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "b2"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "b3"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "b4"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "b5"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "b6"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "b7"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "b8"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "b9"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "ba"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "bb"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "bc"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "bd"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "be"}, + { "type": "equal", "start": 3, "end": 4, "bytes": "bf"} + ] + } + ] + }, + + { + "type": "mp3", + "ext": "mp3", + "mime": "audio/mpeg", + "rules": [ + { "type": "or", "rules": + [ + { "type": "equal", "start": 0, "end": 3, "bytes": "494433" }, + { "type": "equal", "start": 0, "end": 2, "bytes": "fffb" } + ] + } + ] + }, + + { + "type": "m4a", + "ext": "m4a", + "mime": "audio/m4a", + "rules": [ + { "type": "or", "rules": + [ + { "type": "equal", "start": 4, "end": 11, "bytes": "667479704d3441" }, + { "type": "equal", "start": 0, "end": 4, "bytes": "4d344120" } + ] + } + ] + }, + + { + "type": "opus", + "ext": "opus", + "mime": "audio/opus", + "rules": [ + { "type": "equal", "start": 28, "end": 36, "bytes": "4f70757348656164" } + ] + }, + + { + "type": "ogg", + "ext": "ogg", + "mime": "audio/ogg", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "4f676753" }, + { "type": "notEqual", "start": 28, "end": 36, "bytes": "4f70757348656164" } + ] + }, + + { + "type": "flac", + "ext": "flac", + "mime": "audio/x-flac", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "664c6143" } + ] + }, + + { + "type": "wav", + "ext": "wav", + "mime": "audio/x-wav", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "52494646" }, + { "type": "equal", "start": 8, "end": 12, "bytes": "57415645" } + ] + }, + + { + "type": "amr", + "ext": "amr", + "mime": "audio/amr", + "rules": [ + { "type": "equal", "start": 0, "end": 6, "bytes": "2321414d520a" } + ] + }, + + { + "type": "pdf", + "ext": "pdf", + "mime": "application/pdf", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "25504446" } + ] + }, + + { + "type": "exe", + "ext": "exe", + "mime": "application/x-msdownload", + "iana": "application/vnd.microsoft.portable-executable", + "rules": [ + { "type": "or", "rules": + [ + { "type": "equal", "start": 0, "end": 2, "bytes": "4d5a" }, + { "type": "equal", "start": 0, "end": 2, "bytes": "4d7a" }, + { "type": "equal", "start": 0, "end": 2, "bytes": "6d7a" }, + { "type": "equal", "start": 0, "end": 2, "bytes": "6d5a" } + ] + } + ] + }, + + { + "type": "swf", + "ext": "swf", + "mime": "application/x-shockwave-flash", + "iana": "application/vnd.adobe.flash.movie", + "rules": [ + { "type": "or", "rules": + [ + { "type": "equal", "start": 0, "end": 1, "bytes": "43" }, + { "type": "equal", "start": 0, "end": 1, "bytes": "46" } + ] + }, + { "type": "equal", "start": 1, "end": 3, "bytes": "5753" } + ] + }, + + { + "type": "rtf", + "ext": "rtf", + "mime": "application/rtf", + "rules": [ + { "type": "equal", "start": 0, "end": 5, "bytes": "7b5c727466" } + ] + }, + + { + "type": "mov", + "ext": "mov", + "mime": "video/quicktime", + "rules": [ + { "type": "equal", "start": 0, "end": 8, "bytes": "0000001466747970" } + ] + }, + + { + "type": "avi", + "ext": "avi", + "mime": "video/x-msvideo", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "52494646" }, + { "type": "equal", "start": 8, "end": 11, "bytes": "415649" } + ] + }, + + { + "type": "woff", + "ext": "woff", + "mime": "application/font-woff", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "774f4646" }, + { "type": "or", "rules": + [ + { "type": "equal", "start": 4, "end": 8, "bytes": "00010000" }, + { "type": "equal", "start": 4, "end": 8, "bytes": "4f54544f" } + ] + } + ] + }, + + { + "type": "woff2", + "ext": "woff2", + "mime": "application/font-woff", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "774f4632" }, + { "type": "or", "rules": + [ + { "type": "equal", "start": 4, "end": 8, "bytes": "00010000" }, + { "type": "equal", "start": 4, "end": 8, "bytes": "4f54544f" } + ] + } + ] + }, + + { + "type": "eot", + "ext": "eot", + "mime": "application/octet-stream", + "rules": [ + { "type": "equal", "start": 34, "end": 36, "bytes": "4c50" }, + { "type": "or", "rules": + [ + { "type": "equal", "start": 8, "end": 11, "bytes": "000001" }, + { "type": "equal", "start": 8, "end": 11, "bytes": "010002" }, + { "type": "equal", "start": 8, "end": 11, "bytes": "020002" } + ] + } + ] + }, + + { + "type": "ttf", + "ext": "ttf", + "mime": "application/font-sfnt", + "rules": [ + { "type": "equal", "start": 0, "end": 5, "bytes": "0001000000" } + ] + }, + + { + "type": "otf", + "ext": "otf", + "mime": "application/font-sfnt", + "rules": [ + { "type": "equal", "start": 0, "end": 5, "bytes": "4f54544f00" } + ] + }, + + { + "type": "ico", + "ext": "ico", + "mime": "application/x-icon", + "iana": "image/vnd.microsoft.icon", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "00000100" } + ] + }, + + { + "type": "flv", + "ext": "flv", + "mime": "application/x-flv", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "464c5601" } + ] + }, + + { + "type": "ps", + "ext": "ps", + "mime": "application/postscript", + "rules": [ + { "type": "equal", "start": 0, "end": 2, "bytes": "2521" } + ] + }, + + { + "type": "xz", + "ext": "xz", + "mime": "application/x-xz", + "rules": [ + { "type": "equal", "start": 0, "end": 6, "bytes": "fd377a585a00" } + ] + }, + + { + "type": "sqlite", + "ext": "sqlite", + "mime": "application/x-sqlite3", + "iana": "application/vnd.sqlite3", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "53514c69" } + ] + }, + + { + "type": "nes", + "ext": "nes", + "mime": "application/x-nintendo-nes-rom", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "4e45531a" } + ] + }, + + { + "type": "crx", + "ext": "crx", + "mime": "application/x-google-chrome-extension", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "43723234" } + ] + }, + + { + "type": "cab", + "ext": "cab", + "mime": "application/vnd.ms-cab-compressed", + "rules": [ + { "type": "or", "rules": + [ + { "type": "equal", "start": 0, "end": 4, "bytes": "4d534346" }, + { "type": "equal", "start": 0, "end": 4, "bytes": "49536328" } + ] + } + ] + }, + + { + "type": "ar", + "ext": "ar", + "mime": "application/x-unix-archive", + "rules": [ + { "type": "equal", "start": 0, "end": 7, "bytes": "213c617263683e" }, + { "type": "notEqual", "start": 0, "end": 21, "bytes": "213c617263683e0a64656269616e2d62696e617279" } + ] + }, + + { + "type": "deb", + "ext": "deb", + "mime": "application/x-deb", + "rules": [ + { "type": "equal", "start": 0, "end": 21, "bytes": "213c617263683e0a64656269616e2d62696e617279" } + ] + }, + + { + "type": "rpm", + "ext": "rpm", + "mime": "application/x-rpm", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "edabeedb" } + ] + }, + + { + "type": "Z", + "ext": "Z", + "mime": "application/x-compress", + "rules": [ + { "type": "or", "rules": + [ + { "type": "equal", "start": 0, "end": 2, "bytes": "1fa0" }, + { "type": "equal", "start": 0, "end": 2, "bytes": "1f9d" } + ] + } + ] + }, + + { + "type": "lz", + "ext": "lz", + "mime": "application/x-lzip", + "rules": [ + { "type": "equal", "start": 0, "end": 4, "bytes": "4c5a4950" } + ] + }, + + { + "type": "msi", + "ext": "msi", + "mime": "application/x-msi", + "rules": [ + { "type": "equal", "start": 0, "end": 8, "bytes": "d0cf11e0a1b11ae1" } + ] + }, + + { + "type": "svg", + "ext": "svg", + "mime": "image/svg+xml", + "rules": [ + { "type": "contains", "bytes": "3c737667" } + ] + }, + + { + "type": "html", + "ext": "html", + "mime": "text/html", + "rules": [ + { "type": "or", "rules": + [ + { "type": "contains", "bytes": "3c68746d6c" }, + { "type": "contains", "bytes": "3c00680074006d006c00" }, + { "type": "equal", "end": 5, "bytes": "3c68746d6c" }, + { "type": "equal", "end": 10, "bytes": "3c00680074006d006c00" }, + { "type": "equal", "end": 9, "bytes": "3c21646f6374797065" }, + { "type": "equal", "end": 5, "bytes": "3c626f6479" }, + { "type": "equal", "end": 5, "bytes": "3c68656164" }, + { "type": "equal", "end": 7, "bytes": "3c696672616d65" }, + { "type": "equal", "end": 4, "bytes": "3c696d67" }, + { "type": "equal", "end": 7, "bytes": "3c6f626a656374" }, + { "type": "equal", "end": 7, "bytes": "3c736372697074" }, + { "type": "equal", "end": 6, "bytes": "3c7461626c65" }, + { "type": "equal", "end": 6, "bytes": "3c7469746c65" } + ] + } + ] + }, + + { + "type": "docx", + "ext": "docx", + "mime": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "rules": [ + { "type": "or", "rules": + [ + { "type": "contains", "bytes": "6170706c69636174696f6e2f766e642e6f70656e786d6c666f726d6174732d6f6666696365646f63756d656e74" } + ] + } + ] + }, + + { + "type": "xml", + "ext": "xml", + "mime": "application/xml", + "rules": [ + { "type": "or", "rules": + [ + { "type": "equal", "end": 19, "bytes": "3c3f786d6c2076657273696f6e3d22312e3022" } + ] + } + ] + } + +] diff --git a/server/core/ReaderWorker.js b/server/core/ReaderWorker.js index 5692f999..ed05a93d 100644 --- a/server/core/ReaderWorker.js +++ b/server/core/ReaderWorker.js @@ -63,13 +63,13 @@ class ReaderWorker { //decompress wState.set({state: 'decompress', step: 2, progress: 0}); decompDir = `${this.config.tempDownloadDir}/${decompDirname}`; - const decompFilename = await this.decomp.decompressFile(downloadedFilename, decompDir); + const decompFiles = await this.decomp.decompressFile(downloadedFilename, decompDir); wState.set({progress: 100}); //конвертирование в fb2 wState.set({state: 'convert', step: 3, progress: 0}); convertFilename = `${this.config.tempDownloadDir}/${tempFilename2}`; - await this.bookConverter.convertToFb2(decompFilename, convertFilename, url, progress => { + await this.bookConverter.convertToFb2(decompFiles, convertFilename, url, progress => { wState.set({progress}); }); @@ -83,8 +83,9 @@ class ReaderWorker { wState.finish({path: `/tmp/${finishFilename}`}); } catch (e) { + if (this.config.branch == 'development') + console.error(e); wState.set({state: 'error', error: (errMes ? errMes : e.message)}); - } finally { //clean if (decompDir) diff --git a/server/core/utils.js b/server/core/utils.js index f37ef205..b134cf79 100644 --- a/server/core/utils.js +++ b/server/core/utils.js @@ -1,3 +1,4 @@ +const { spawn } = require('child_process'); const fs = require('fs-extra'); const crypto = require('crypto'); @@ -13,8 +14,48 @@ async function touchFile(filename) { await fs.utimes(filename, Date.now()/1000, Date.now()/1000); } +function spawnProcess(cmd, opts) { + let {args, killAfter, onData} = opts; + killAfter = (killAfter ? killAfter : 120*1000); + onData = (onData ? onData : () => {}); + args = (args ? args : []); + + return new Promise(async(resolve, reject) => { + let resolved = false; + const proc = spawn(cmd, args, {detached: true}); + + let stdout = ''; + proc.stdout.on('data', (data) => { + stdout += data; + onData(data); + }); + + let stderr = ''; + proc.stderr.on('data', (data) => { + stderr += data; + onData(data); + }); + + proc.on('close', (code) => { + resolved = true; + resolve({status: 'close', code, stdout, stderr}); + }); + + proc.on('error', (error) => { + reject({status: 'error', error, stdout, stderr}); + }); + + await sleep(killAfter); + if (!resolved) { + process.kill(proc.pid); + reject({status: 'killed', stdout, stderr}); + } + }); +} + module.exports = { sleep, randomHexString, - touchFile + touchFile, + spawnProcess }; \ No newline at end of file diff --git a/server/index.js b/server/index.js index 04da3b09..e2cdd9bf 100644 --- a/server/index.js +++ b/server/index.js @@ -90,7 +90,7 @@ async function main() { try { await main(); } catch (e) { - console.error(e.message); + console.error(e); process.exit(1); } })(); \ No newline at end of file