Промежуточный коммит, загрузка и обработка файла книги

This commit is contained in:
Book Pauk
2019-01-12 17:38:21 +07:00
parent 6aeb589e16
commit 42ae088df3
3 changed files with 120 additions and 2 deletions

View File

@@ -0,0 +1,35 @@
const fs = require('fs-extra');
const decompress = require('decompress');
const FileDetector = require('./FileDetector');
class FileDecompressor {
constructor() {
this.detector = new FileDetector();
}
async decompressFile(filename, outputDir) {
const fileType = await this.detector.detectFile(filename);
if (!(fileType.ext == 'zip' || fileType.ext == 'bz2'))
return filename;
const files = await decompress(filename, outputDir);
let result = filename;
let max = 0;
if (!files.length) {
//ищем файл с максимальным размером
for (let file of files) {
const stats = await fs.stat(file);
if (stats.size > max) {
result = file;
max = stats.size;
}
}
}
return result;
}
}
module.exports = FileDecompressor;

View File

@@ -0,0 +1,57 @@
const detect = require('detect-file-type');
//html
detect.addSignature(
{
"type": "html",
"ext": "html",
"mime": "text/html",
"rules": [
{ "type": "or", "rules":
[
{ "type": "contains", "bytes": "3c68746d6c" },
{ "type": "contains", "bytes": "3c00680074006d006c00" },
{ "type": "contains", "bytes": "3c21646f6374797065" },
{ "type": "contains", "bytes": "3c626f6479" },
{ "type": "contains", "bytes": "3c68656164" },
{ "type": "contains", "bytes": "3c696672616d65" },
{ "type": "contains", "bytes": "3c696d67" },
{ "type": "contains", "bytes": "3c6f626a656374" },
{ "type": "contains", "bytes": "3c736372697074" },
{ "type": "contains", "bytes": "3c7461626c65" },
{ "type": "contains", "bytes": "3c7469746c65" },
]
}
]
}
);
//xml 3c 3f 78 6d 6c 20 76 65 72 73 69 6f 6e 3d 22 31 2e 30 22
detect.addSignature(
{
"type": "xml",
"ext": "xml",
"mime": "application/xml",
"rules": [
{ "type": "or", "rules":
[
{ "type": "contains", "bytes": "3c3f786d6c2076657273696f6e3d22312e3022" },
]
}
]
}
);
class FileDetector {
detectFile(filename) {
return new Promise((resolve, reject) => {
detect.fromFile(filename, (err, result) => {
if (err) reject(err);
resolve(result);
});
});
}
}
module.exports = FileDetector;

View File

@@ -1,4 +1,7 @@
const workerState = require('./workerState');
const FileDetector = require('./FileDetector');
const FileDecompressor = require('./FileDecompressor');
//const BookParser = require('./BookParser');
const utils = require('./utils');
const fs = require('fs-extra');
@@ -12,6 +15,8 @@ class ReaderWorker {
this.config = Object.assign({}, config);
this.config.tempDownloadDir = `${config.tempDir}/download`;
fs.ensureDirSync(this.config.tempDownloadDir);
this.detector = new FileDetector();
this.decomp = new FileDecompressor();
}
async loadBook(url, wState) {
@@ -21,6 +26,10 @@ class ReaderWorker {
wState.set({state: 'download', step: 1, totalSteps: 3, url});
const tempFilename = utils.randomHexString(30);
const tempFilename2 = utils.randomHexString(30);
const decompDirname = utils.randomHexString(30);
//download
const d = download(url);
d.on('downloadProgress', progress => {
wState.set({progress: Math.round(progress.percent*100)});
@@ -29,9 +38,26 @@ class ReaderWorker {
d.destroy();
}
});
await pipeline(d, fs.createWriteStream(`${this.config.tempDownloadDir}/${tempFilename}`));
const downloadedFilename = `${this.config.tempDownloadDir}/${tempFilename}`;
await pipeline(d, fs.createWriteStream(downloadedFilename));
//decompress
wState.set({state: 'decompress', step: 2, progress: 0});
const decompDir = `${this.config.tempDownloadDir}/${decompDirname}`;
const decompFilename = await this.decomp.decompressFile(downloadedFilename, decompDir);
wState.set({progress: 100});
wState.finish({step: 3, file: tempFilename});
//parse book
const fileType = await this.detector.detectFile(decompFilename);
if (fileType.ext == 'html' || fileType.ext == 'xml') {
//parse
}
//clean
await fs.remove(decompDir);
await fs.remove(downloadedFilename);
wState.finish({step: 3, file: tempFilename, fileType: fileType});
} catch (e) {
wState.set({state: 'error', error: (errMes ? errMes : e.message)});
}