Улучшено определение кодировки

2023-03-17 13:39:50 +07:00
parent bf6cf4238a
commit 4b4f7bd697
2 changed files with 3 additions and 12 deletions
--- a/server/core/fb2/Fb2Helper.js
+++ b/server/core/fb2/Fb2Helper.js
@@ -35,7 +35,7 @@ class Fb2Helper {
                if (m) {
                    let enc = m[1].toLowerCase();
                    if (enc != 'utf-8') {
-                        //enc может не соответсвовать реальной кодировке файла, поэтому:
+                        //если кодировка не определена в getEncoding, используем enc
                        if (encoding.indexOf('ISO-8859') >= 0) {
                            encoding = enc;
                        }
--- a/server/core/fb2/textUtils.js
+++ b/server/core/fb2/textUtils.js
@@ -4,7 +4,7 @@ function getEncoding(buf) {
    let selected = getEncodingLite(buf);

    if (selected == 'ISO-8859-5' && buf.length > 10) {
-        const charsetAll = chardet.analyse(buf.slice(0, 20000));
+        const charsetAll = chardet.analyse(buf.slice(0, 100000));
        for (const charset of charsetAll) {
            if (charset.name.indexOf('ISO-8859') < 0) {
                selected = charset.name;
@@ -39,9 +39,7 @@ function getEncodingLite(buf, returnAll) {
        'u': 0,
    };

-    const len = buf.length;
-    const blockSize = (len > 5*3000 ? 3000 : len);
-    let counter = 0;
+    const len = (buf.length > 100000 ? 100000 : buf.length);
    let i = 0;
    let totalChecked = 0;
    while (i < len) {
@@ -76,13 +74,6 @@ function getEncodingLite(buf, returnAll) {
            if (char > 207 && char < 240) charsets['i'] += lowerCase;
            if (char > 175 && char < 208) charsets['i'] += upperCase;
        }
-
-        counter++;
-
-        if (counter > blockSize) {
-            counter = 0;
-            i += Math.round(len/2 - 2*blockSize);
-        }
    }

    let sorted = Object.keys(charsets).map(function(key) {