From 42ae088df33e13e814ed576389713c8e3ba89951 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Sat, 12 Jan 2019 17:38:21 +0700 Subject: [PATCH] =?UTF-8?q?=D0=9F=D1=80=D0=BE=D0=BC=D0=B5=D0=B6=D1=83?= =?UTF-8?q?=D1=82=D0=BE=D1=87=D0=BD=D1=8B=D0=B9=20=D0=BA=D0=BE=D0=BC=D0=BC?= =?UTF-8?q?=D0=B8=D1=82,=20=D0=B7=D0=B0=D0=B3=D1=80=D1=83=D0=B7=D0=BA?= =?UTF-8?q?=D0=B0=20=D0=B8=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1=D0=BE=D1=82?= =?UTF-8?q?=D0=BA=D0=B0=20=D1=84=D0=B0=D0=B9=D0=BB=D0=B0=20=D0=BA=D0=BD?= =?UTF-8?q?=D0=B8=D0=B3=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/FileDecompressor.js | 35 ++++++++++++++++++++ server/core/FileDetector.js | 57 +++++++++++++++++++++++++++++++++ server/core/ReaderWorker.js | 30 +++++++++++++++-- 3 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 server/core/FileDecompressor.js create mode 100644 server/core/FileDetector.js diff --git a/server/core/FileDecompressor.js b/server/core/FileDecompressor.js new file mode 100644 index 00000000..eaa6488b --- /dev/null +++ b/server/core/FileDecompressor.js @@ -0,0 +1,35 @@ +const fs = require('fs-extra'); +const decompress = require('decompress'); +const FileDetector = require('./FileDetector'); + +class FileDecompressor { + constructor() { + this.detector = new FileDetector(); + } + + async decompressFile(filename, outputDir) { + const fileType = await this.detector.detectFile(filename); + + if (!(fileType.ext == 'zip' || fileType.ext == 'bz2')) + return filename; + + const files = await decompress(filename, outputDir); + + let result = filename; + let max = 0; + if (!files.length) { + //ищем файл с максимальным размером + for (let file of files) { + const stats = await fs.stat(file); + if (stats.size > max) { + result = file; + max = stats.size; + } + } + } + + return result; + } +} + +module.exports = FileDecompressor; \ No newline at end of file diff --git a/server/core/FileDetector.js b/server/core/FileDetector.js new file mode 100644 index 00000000..806389c3 --- /dev/null +++ b/server/core/FileDetector.js @@ -0,0 +1,57 @@ +const detect = require('detect-file-type'); + +//html +detect.addSignature( + { + "type": "html", + "ext": "html", + "mime": "text/html", + "rules": [ + { "type": "or", "rules": + [ + { "type": "contains", "bytes": "3c68746d6c" }, + { "type": "contains", "bytes": "3c00680074006d006c00" }, + + { "type": "contains", "bytes": "3c21646f6374797065" }, + { "type": "contains", "bytes": "3c626f6479" }, + { "type": "contains", "bytes": "3c68656164" }, + { "type": "contains", "bytes": "3c696672616d65" }, + { "type": "contains", "bytes": "3c696d67" }, + { "type": "contains", "bytes": "3c6f626a656374" }, + { "type": "contains", "bytes": "3c736372697074" }, + { "type": "contains", "bytes": "3c7461626c65" }, + { "type": "contains", "bytes": "3c7469746c65" }, + ] + } + ] + } +); + +//xml 3c 3f 78 6d 6c 20 76 65 72 73 69 6f 6e 3d 22 31 2e 30 22 +detect.addSignature( + { + "type": "xml", + "ext": "xml", + "mime": "application/xml", + "rules": [ + { "type": "or", "rules": + [ + { "type": "contains", "bytes": "3c3f786d6c2076657273696f6e3d22312e3022" }, + ] + } + ] + } +); + +class FileDetector { + detectFile(filename) { + return new Promise((resolve, reject) => { + detect.fromFile(filename, (err, result) => { + if (err) reject(err); + resolve(result); + }); + }); + } +} + +module.exports = FileDetector; \ No newline at end of file diff --git a/server/core/ReaderWorker.js b/server/core/ReaderWorker.js index bc48554d..4208eabe 100644 --- a/server/core/ReaderWorker.js +++ b/server/core/ReaderWorker.js @@ -1,4 +1,7 @@ const workerState = require('./workerState'); +const FileDetector = require('./FileDetector'); +const FileDecompressor = require('./FileDecompressor'); +//const BookParser = require('./BookParser'); const utils = require('./utils'); const fs = require('fs-extra'); @@ -12,6 +15,8 @@ class ReaderWorker { this.config = Object.assign({}, config); this.config.tempDownloadDir = `${config.tempDir}/download`; fs.ensureDirSync(this.config.tempDownloadDir); + this.detector = new FileDetector(); + this.decomp = new FileDecompressor(); } async loadBook(url, wState) { @@ -21,6 +26,10 @@ class ReaderWorker { wState.set({state: 'download', step: 1, totalSteps: 3, url}); const tempFilename = utils.randomHexString(30); + const tempFilename2 = utils.randomHexString(30); + const decompDirname = utils.randomHexString(30); + + //download const d = download(url); d.on('downloadProgress', progress => { wState.set({progress: Math.round(progress.percent*100)}); @@ -29,9 +38,26 @@ class ReaderWorker { d.destroy(); } }); - await pipeline(d, fs.createWriteStream(`${this.config.tempDownloadDir}/${tempFilename}`)); + const downloadedFilename = `${this.config.tempDownloadDir}/${tempFilename}`; + await pipeline(d, fs.createWriteStream(downloadedFilename)); + + //decompress + wState.set({state: 'decompress', step: 2, progress: 0}); + const decompDir = `${this.config.tempDownloadDir}/${decompDirname}`; + const decompFilename = await this.decomp.decompressFile(downloadedFilename, decompDir); + wState.set({progress: 100}); - wState.finish({step: 3, file: tempFilename}); + //parse book + const fileType = await this.detector.detectFile(decompFilename); + if (fileType.ext == 'html' || fileType.ext == 'xml') { + //parse + } + + //clean + await fs.remove(decompDir); + await fs.remove(downloadedFilename); + + wState.finish({step: 3, file: tempFilename, fileType: fileType}); } catch (e) { wState.set({state: 'error', error: (errMes ? errMes : e.message)}); }