Реструктуризация

This commit is contained in:
Book Pauk
2022-11-10 00:51:18 +07:00
parent ec6b72868b
commit 1d99472ca1
2 changed files with 130 additions and 23 deletions

View File

@@ -0,0 +1,102 @@
const fs = require('fs-extra');
const iconv = require('iconv-lite');
const textUtils = require('./textUtils');
const Fb2Parser = require('../fb2/Fb2Parser');
const utils = require('../utils');
class Fb2Helper {
checkEncoding(data) {
//Корректируем кодировку UTF-16
let encoding = textUtils.getEncoding(data);
if (encoding.indexOf('UTF-16') == 0) {
data = Buffer.from(iconv.decode(data, encoding));
encoding = 'utf-8';
}
//Корректируем пробелы, всякие файлы попадаются :(
if (data[0] == 32) {
data = Buffer.from(data.toString().trim());
}
//Окончательно корректируем кодировку
let result = data;
let left = data.indexOf('<?xml version="1.0"');
if (left < 0) {
left = data.indexOf('<?xml version=\'1.0\'');
}
if (left >= 0) {
const right = data.indexOf('?>', left);
if (right >= 0) {
const head = data.slice(left, right + 2).toString();
const m = head.match(/encoding=['"](.*?)['"]/);
if (m) {
let enc = m[1].toLowerCase();
if (enc != 'utf-8') {
//enc может не соответсвовать реальной кодировке файла, поэтому:
if (encoding.indexOf('ISO-8859') >= 0) {
encoding = enc;
}
result = iconv.decode(data, encoding);
result = Buffer.from(result.toString().replace(m[0], `encoding="utf-8"`));
}
}
}
}
return result;
}
async getDescAndCover(bookFile) {
let data = await fs.readFile(bookFile);
data = await utils.gunzipBuffer(data);
data = this.checkEncoding(data);
const fb2 = new Fb2Parser();
fb2.fromString(data.toString(), {
lowerCase: true,
pickNode: route => route.indexOf('fictionbook/body') !== 0,
});
const desc = fb2.$$('description').toObject();
const coverImage = fb2.inspector(desc).$('description/title-info/coverpage/image');
let cover = null;
let coverExt = '';
if (coverImage) {
const coverAttrs = coverImage.attrs();
const href = coverAttrs['l:href'];
let coverType = coverAttrs['content-type'];
coverType = (coverType == 'image/jpg' || coverType == 'application/octet-stream' ? 'image/jpeg' : coverType);
coverExt = (coverType == 'image/png' ? '.png' : '.jpg');
if (href) {
const binaryId = (href[0] == '#' ? href.substring(1) : href);
//найдем нужный image
fb2.$$('binary').eachSelf(node => {
let attrs = node.attrs();
if (!attrs)
return;
attrs = Object.fromEntries(attrs);
if (attrs.id === binaryId) {
const textNode = new Fb2Parser(node.value);
const base64 = textNode.$self('*TEXT').value;
cover = (base64 ? Buffer.from(base64, 'base64') : null);
}
});
}
}
return {desc, cover, coverExt};
}
}
module.exports = Fb2Helper;

View File

@@ -1,28 +1,6 @@
const XmlParser = require('../xml/XmlParser');
class Fb2Parser {
constructor() {
this.xml = new XmlParser();
}
toString(options) {
return this.xml.toString(options);
}
fromString(fb2String) {
this.xml.fromString(fb2String);
return this;
}
toObject(options) {
return this.xml.toObject(options);
}
fromObject(fb2Object) {
this.xml.fromObject(fb2Object);
return this;
}
class Fb2Parser extends XmlParser {
bookInfo(fb2Object) {
if (!fb2Object)
fb2Object = this.toObject();
@@ -33,6 +11,33 @@ class Fb2Parser {
bookInfoList(fb2Object) {
}
toHtml(xmlString) {
const substs = {
'<subtitle>': '<p><b>',
'</subtitle>': '</b></p>',
'<empty-line/>': '<br>',
'<strong>': '<b>',
'</strong>': '</b>',
'<emphasis>': '<i>',
'</emphasis>': '</i>',
'<stanza>': '<br>',
'</stanza>': '',
'<poem>': '<br>',
'</poem>': '',
'<cite>': '<i>',
'</cite>': '</i>',
'<table>': '<br>',
'</table>': '',
};
for (const [tag, s] of Object.entries(substs)) {
const r = new RegExp(`${tag}`, 'g');
xmlString = xmlString.replace(r, s);
}
return xmlString;
}
}
module.exports = Fb2Parser;