Поправки распознавания кодировки fb2-файла

This commit is contained in:
Book Pauk
2020-11-24 02:09:17 +07:00
parent fe4b7a5a85
commit dbb1bfe587

View File

@@ -1,5 +1,6 @@
const ConvertBase = require('./ConvertBase'); const ConvertBase = require('./ConvertBase');
const iconv = require('iconv-lite'); const iconv = require('iconv-lite');
const textUtils = require('./textUtils');
class ConvertFb2 extends ConvertBase { class ConvertFb2 extends ConvertBase {
check(data, opts) { check(data, opts) {
@@ -9,26 +10,46 @@ class ConvertFb2 extends ConvertBase {
} }
async run(data, opts) { async run(data, opts) {
if (!this.check(data, opts)) let newData = data;
//Корректируем кодировку, 16-битные кодировки должны стать utf-8
const encoding = textUtils.getEncoding(newData);
if (encoding.indexOf('UTF-16') == 0) {
newData = Buffer.from(iconv.decode(newData, encoding));
}
if (!this.check(newData, opts))
return false; return false;
return this.checkEncoding(data); return this.checkEncoding(newData);
} }
checkEncoding(data) { checkEncoding(data) {
let result = data; let result = data;
const left = data.indexOf('<?xml version="1.0"'); let q = '"';
let left = data.indexOf('<?xml version="1.0"');
if (left < 0) {
left = data.indexOf('<?xml version=\'1.0\'');
q = '\'';
}
if (left >= 0) { if (left >= 0) {
const right = data.indexOf('?>', left); const right = data.indexOf('?>', left);
if (right >= 0) { if (right >= 0) {
const head = data.slice(left, right + 2).toString(); const head = data.slice(left, right + 2).toString();
const m = head.match(/encoding="(.*?)"/); const m = head.match(/encoding=['"](.*?)['"]/);
if (m) { if (m) {
let encoding = m[1].toLowerCase(); let encoding = m[1].toLowerCase();
if (encoding != 'utf-8') { if (encoding != 'utf-8') {
result = iconv.decode(data, encoding); //encoding может не соответсвовать реальной кодировке файла, поэтому:
result = Buffer.from(result.toString().replace(m[0], 'encoding="utf-8"')); let calcEncoding = textUtils.getEncoding(data);
if (calcEncoding.indexOf('ISO-8859') >= 0) {
calcEncoding = encoding;
}
result = iconv.decode(data, calcEncoding);
result = Buffer.from(result.toString().replace(m[0], `encoding=${q}utf-8${q}`));
} }
} }
} }