diff --git a/server/core/fb2/Fb2Helper.js b/server/core/fb2/Fb2Helper.js new file mode 100644 index 0000000..4bb07f3 --- /dev/null +++ b/server/core/fb2/Fb2Helper.js @@ -0,0 +1,102 @@ +const fs = require('fs-extra'); +const iconv = require('iconv-lite'); +const textUtils = require('./textUtils'); + +const Fb2Parser = require('../fb2/Fb2Parser'); +const utils = require('../utils'); + +class Fb2Helper { + checkEncoding(data) { + //Корректируем кодировку UTF-16 + let encoding = textUtils.getEncoding(data); + if (encoding.indexOf('UTF-16') == 0) { + data = Buffer.from(iconv.decode(data, encoding)); + encoding = 'utf-8'; + } + + //Корректируем пробелы, всякие файлы попадаются :( + if (data[0] == 32) { + data = Buffer.from(data.toString().trim()); + } + + //Окончательно корректируем кодировку + let result = data; + + let left = data.indexOf('= 0) { + const right = data.indexOf('?>', left); + if (right >= 0) { + const head = data.slice(left, right + 2).toString(); + const m = head.match(/encoding=['"](.*?)['"]/); + if (m) { + let enc = m[1].toLowerCase(); + if (enc != 'utf-8') { + //enc может не соответсвовать реальной кодировке файла, поэтому: + if (encoding.indexOf('ISO-8859') >= 0) { + encoding = enc; + } + + result = iconv.decode(data, encoding); + result = Buffer.from(result.toString().replace(m[0], `encoding="utf-8"`)); + } + } + } + } + + return result; + } + + async getDescAndCover(bookFile) { + let data = await fs.readFile(bookFile); + data = await utils.gunzipBuffer(data); + + data = this.checkEncoding(data); + + const fb2 = new Fb2Parser(); + + fb2.fromString(data.toString(), { + lowerCase: true, + pickNode: route => route.indexOf('fictionbook/body') !== 0, + }); + + const desc = fb2.$$('description').toObject(); + const coverImage = fb2.inspector(desc).$('description/title-info/coverpage/image'); + + let cover = null; + let coverExt = ''; + if (coverImage) { + const coverAttrs = coverImage.attrs(); + const href = coverAttrs['l:href']; + let coverType = coverAttrs['content-type']; + coverType = (coverType == 'image/jpg' || coverType == 'application/octet-stream' ? 'image/jpeg' : coverType); + coverExt = (coverType == 'image/png' ? '.png' : '.jpg'); + + if (href) { + const binaryId = (href[0] == '#' ? href.substring(1) : href); + + //найдем нужный image + fb2.$$('binary').eachSelf(node => { + let attrs = node.attrs(); + if (!attrs) + return; + attrs = Object.fromEntries(attrs); + + if (attrs.id === binaryId) { + const textNode = new Fb2Parser(node.value); + const base64 = textNode.$self('*TEXT').value; + + cover = (base64 ? Buffer.from(base64, 'base64') : null); + } + }); + } + } + + return {desc, cover, coverExt}; + } +} + +module.exports = Fb2Helper; \ No newline at end of file diff --git a/server/core/fb2/Fb2Parser.js b/server/core/fb2/Fb2Parser.js index 97708d2..13806bf 100644 --- a/server/core/fb2/Fb2Parser.js +++ b/server/core/fb2/Fb2Parser.js @@ -1,28 +1,6 @@ const XmlParser = require('../xml/XmlParser'); -class Fb2Parser { - constructor() { - this.xml = new XmlParser(); - } - - toString(options) { - return this.xml.toString(options); - } - - fromString(fb2String) { - this.xml.fromString(fb2String); - return this; - } - - toObject(options) { - return this.xml.toObject(options); - } - - fromObject(fb2Object) { - this.xml.fromObject(fb2Object); - return this; - } - +class Fb2Parser extends XmlParser { bookInfo(fb2Object) { if (!fb2Object) fb2Object = this.toObject(); @@ -33,6 +11,33 @@ class Fb2Parser { bookInfoList(fb2Object) { } + + toHtml(xmlString) { + const substs = { + '': '

', + '': '

', + '': '
', + '': '', + '': '', + '': '', + '': '', + '': '
', + '
': '', + '': '
', + '
': '', + '': '', + '': '', + '': '
', + '
': '', + }; + + for (const [tag, s] of Object.entries(substs)) { + const r = new RegExp(`${tag}`, 'g'); + xmlString = xmlString.replace(r, s); + } + + return xmlString; + } } module.exports = Fb2Parser; \ No newline at end of file