From dbb1bfe58798f13e5b490c4f260b34f10c18f503 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Tue, 24 Nov 2020 02:09:17 +0700 Subject: [PATCH] =?UTF-8?q?=D0=9F=D0=BE=D0=BF=D1=80=D0=B0=D0=B2=D0=BA?= =?UTF-8?q?=D0=B8=20=D1=80=D0=B0=D1=81=D0=BF=D0=BE=D0=B7=D0=BD=D0=B0=D0=B2?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BA=D0=BE=D0=B4=D0=B8=D1=80=D0=BE?= =?UTF-8?q?=D0=B2=D0=BA=D0=B8=20fb2-=D1=84=D0=B0=D0=B9=D0=BB=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/Reader/BookConverter/ConvertFb2.js | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/server/core/Reader/BookConverter/ConvertFb2.js b/server/core/Reader/BookConverter/ConvertFb2.js index 5c937996..7efa25c4 100644 --- a/server/core/Reader/BookConverter/ConvertFb2.js +++ b/server/core/Reader/BookConverter/ConvertFb2.js @@ -1,5 +1,6 @@ const ConvertBase = require('./ConvertBase'); const iconv = require('iconv-lite'); +const textUtils = require('./textUtils'); class ConvertFb2 extends ConvertBase { check(data, opts) { @@ -9,26 +10,46 @@ class ConvertFb2 extends ConvertBase { } async run(data, opts) { - if (!this.check(data, opts)) + let newData = data; + + //Корректируем кодировку, 16-битные кодировки должны стать utf-8 + const encoding = textUtils.getEncoding(newData); + if (encoding.indexOf('UTF-16') == 0) { + newData = Buffer.from(iconv.decode(newData, encoding)); + } + + if (!this.check(newData, opts)) return false; - return this.checkEncoding(data); + return this.checkEncoding(newData); } checkEncoding(data) { let result = data; - const left = data.indexOf('= 0) { const right = data.indexOf('?>', left); if (right >= 0) { const head = data.slice(left, right + 2).toString(); - const m = head.match(/encoding="(.*?)"/); + const m = head.match(/encoding=['"](.*?)['"]/); if (m) { let encoding = m[1].toLowerCase(); if (encoding != 'utf-8') { - result = iconv.decode(data, encoding); - result = Buffer.from(result.toString().replace(m[0], 'encoding="utf-8"')); + //encoding может не соответсвовать реальной кодировке файла, поэтому: + let calcEncoding = textUtils.getEncoding(data); + if (calcEncoding.indexOf('ISO-8859') >= 0) { + calcEncoding = encoding; + } + + result = iconv.decode(data, calcEncoding); + result = Buffer.from(result.toString().replace(m[0], `encoding=${q}utf-8${q}`)); } } }