Поправки распознавания кодировки fb2-файла
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
const ConvertBase = require('./ConvertBase');
|
const ConvertBase = require('./ConvertBase');
|
||||||
const iconv = require('iconv-lite');
|
const iconv = require('iconv-lite');
|
||||||
|
const textUtils = require('./textUtils');
|
||||||
|
|
||||||
class ConvertFb2 extends ConvertBase {
|
class ConvertFb2 extends ConvertBase {
|
||||||
check(data, opts) {
|
check(data, opts) {
|
||||||
@@ -9,26 +10,46 @@ class ConvertFb2 extends ConvertBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async run(data, opts) {
|
async run(data, opts) {
|
||||||
if (!this.check(data, opts))
|
let newData = data;
|
||||||
|
|
||||||
|
//Корректируем кодировку, 16-битные кодировки должны стать utf-8
|
||||||
|
const encoding = textUtils.getEncoding(newData);
|
||||||
|
if (encoding.indexOf('UTF-16') == 0) {
|
||||||
|
newData = Buffer.from(iconv.decode(newData, encoding));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.check(newData, opts))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return this.checkEncoding(data);
|
return this.checkEncoding(newData);
|
||||||
}
|
}
|
||||||
|
|
||||||
checkEncoding(data) {
|
checkEncoding(data) {
|
||||||
let result = data;
|
let result = data;
|
||||||
|
|
||||||
const left = data.indexOf('<?xml version="1.0"');
|
let q = '"';
|
||||||
|
let left = data.indexOf('<?xml version="1.0"');
|
||||||
|
if (left < 0) {
|
||||||
|
left = data.indexOf('<?xml version=\'1.0\'');
|
||||||
|
q = '\'';
|
||||||
|
}
|
||||||
|
|
||||||
if (left >= 0) {
|
if (left >= 0) {
|
||||||
const right = data.indexOf('?>', left);
|
const right = data.indexOf('?>', left);
|
||||||
if (right >= 0) {
|
if (right >= 0) {
|
||||||
const head = data.slice(left, right + 2).toString();
|
const head = data.slice(left, right + 2).toString();
|
||||||
const m = head.match(/encoding="(.*?)"/);
|
const m = head.match(/encoding=['"](.*?)['"]/);
|
||||||
if (m) {
|
if (m) {
|
||||||
let encoding = m[1].toLowerCase();
|
let encoding = m[1].toLowerCase();
|
||||||
if (encoding != 'utf-8') {
|
if (encoding != 'utf-8') {
|
||||||
result = iconv.decode(data, encoding);
|
//encoding может не соответсвовать реальной кодировке файла, поэтому:
|
||||||
result = Buffer.from(result.toString().replace(m[0], 'encoding="utf-8"'));
|
let calcEncoding = textUtils.getEncoding(data);
|
||||||
|
if (calcEncoding.indexOf('ISO-8859') >= 0) {
|
||||||
|
calcEncoding = encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = iconv.decode(data, calcEncoding);
|
||||||
|
result = Buffer.from(result.toString().replace(m[0], `encoding=${q}utf-8${q}`));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user