From 0d64d351847fc6d88419996fe5a88d0e36a8fd4b Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Tue, 22 Jan 2019 02:00:59 +0700 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B4=D0=B5=D0=BB=D0=B0=D0=BB=20?= =?UTF-8?q?=D0=B2=20=D0=BD=D0=B0=D1=87=D0=B0=D0=BB=D1=8C=D0=BD=D0=BE=D0=BC?= =?UTF-8?q?=20=D0=B2=D0=B8=D0=B4=D0=B5=20=D1=80=D0=B0=D1=81=D0=BF=D0=BE?= =?UTF-8?q?=D0=B7=D0=BD=D0=B0=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D1=82=D0=B5?= =?UTF-8?q?=D0=BA=D1=81=D1=82=D0=B0=20=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B8?= =?UTF-8?q?=D1=86=D1=8B=20=D1=81=D0=B0=D0=BC=D0=BB=D0=B8=D0=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{easysax.js => easysaxmod.js} | 0 server/core/BookConverter/index.js | 99 +++++++++++++++---- 2 files changed, 79 insertions(+), 20 deletions(-) rename server/core/BookConverter/{easysax.js => easysaxmod.js} (100%) diff --git a/server/core/BookConverter/easysax.js b/server/core/BookConverter/easysaxmod.js similarity index 100% rename from server/core/BookConverter/easysax.js rename to server/core/BookConverter/easysaxmod.js diff --git a/server/core/BookConverter/index.js b/server/core/BookConverter/index.js index 8b00b93e..128d637b 100644 --- a/server/core/BookConverter/index.js +++ b/server/core/BookConverter/index.js @@ -1,7 +1,11 @@ const fs = require('fs-extra'); -const FileDetector = require('../FileDetector'); const URL = require('url').URL; -const EasySAXParser = require('./easysax'); +const iconv = require('iconv-lite'); +const chardet = require('chardet'); +const _ = require('lodash'); + +const FileDetector = require('../FileDetector'); +const EasySAXParser = require('./easysaxmod'); class BookConverter { constructor() { @@ -12,9 +16,9 @@ class BookConverter { const fileType = await this.detector.detectFile(inputFile); if (fileType && (fileType.ext == 'html' || fileType.ext == 'xml')) { - const data = await fs.readFile(inputFile, 'utf8'); + const data = await fs.readFile(inputFile); - if (data.indexOf('= 0) { + if (data.toString().indexOf('= 0) { await fs.writeFile(outputFile, data); return; } @@ -26,7 +30,6 @@ class BookConverter { return; } - //Заглушка await fs.writeFile(outputFile, data); callback(100); @@ -39,12 +42,30 @@ class BookConverter { } async convertSamlib(data) { - let fb2 = [{parentName: 'description'}]; + let titleInfo = {}; + let desc = {_n: 'description', 'title-info': titleInfo}; + let pars = []; + let body = {_n: 'body', section: {_a: [pars]}}; + let fb2 = [desc, body]; + let path = ''; - let tag = ''; + let tag = '';// eslint-disable-line no-unused-vars let inText = false; + const newParagraph = () => { + pars.push({_n: 'p', _t: ''}); + }; + + const growParagraph = (text) => { + const l = pars.length; + if (l) { + if (pars[l - 1]._t == '') + text = text.trimLeft(); + pars[l - 1]._t += text; + } + }; + const parser = new EasySAXParser(); parser.on('error', (msgError) => {// eslint-disable-line no-unused-vars @@ -57,7 +78,10 @@ class BookConverter { if (!inText) { path += '/' + elemName; tag = elemName; -console.log(path); + } else { + if (elemName == 'p' || elemName == 'dd') { + newParagraph(); + } } }); @@ -78,14 +102,28 @@ console.log(path); let i = path.lastIndexOf('/'); tag = path.substr(i + 1); - - console.log('cl', elemName); - console.log('tag', tag); - console.log(path); } }); parser.on('textNode', (text) => {// eslint-disable-line no-unused-vars + switch (path) { + case '/html/body/center/h2': + titleInfo['book-title'] = text; + return; + case '/html/body/div/h3': + if (!titleInfo.author) + titleInfo.author = {}; + text = text.replace(':', '').trim().split(' '); + if (text[0]) + titleInfo.author['last-name'] = text[0]; + if (text[1]) + titleInfo.author['first-name'] = text[1]; + if (text[2]) + titleInfo.author['middle-name'] = text[2]; + return; + } + if (inText) + growParagraph(text); }); parser.on('cdata', (data) => {// eslint-disable-line no-unused-vars @@ -100,7 +138,22 @@ console.log(path); }); */ - await parser.parse(data); + await parser.parse(iconv.decode(data, chardet.detect(data))); + + const title = (titleInfo['book-title'] ? titleInfo['book-title'] : ''); + let author = ''; + if (titleInfo.author) { + author = _.compact([ + (titleInfo.author['last-name'] ? titleInfo.author['last-name'] : ''), + (titleInfo.author['first-name'] ? titleInfo.author['first-name'] : ''), + (titleInfo.author['middle-name'] ? titleInfo.author['middle-name'] : ''), + ]).join(' '); + } + + pars.unshift({_n: 'title', _a: [ + {_n: 'p', _t: author}, {_n: 'p', _t: ''}, + {_n: 'p', _t: title}, {_n: 'p', _t: ''}, + ]}) return this.formatFb2(fb2); } @@ -120,19 +173,25 @@ console.log(out); for (const n of node) { out += this.formatFb2Node(n); } + } else if (typeof node == 'string') { + out += `<${name}>${node}`; } else { - if (node.parentName) - name = node.parentName; + if (node._n) + name = node._n; if (!name) throw new Error(`malformed fb2 object`); out += `<${name}>`; - for (let nodeName in node) { - if (nodeName == 'parentName') - continue; + if (node.hasOwnProperty('_t')) { + out += node._t; + } else { + for (let nodeName in node) { + if (nodeName == '_n') + continue; - const n = node[nodeName]; - out += this.formatFb2Node(n, nodeName); + const n = node[nodeName]; + out += this.formatFb2Node(n, nodeName); + } } out += ``; }