From 07c38d9a9f4cff02eba9a744ae2beae145c13cfc Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Mon, 23 Nov 2020 21:19:04 +0700 Subject: [PATCH 1/7] =?UTF-8?q?=D0=9F=D0=BE=D0=BF=D1=80=D0=B0=D0=B2=D0=BA?= =?UTF-8?q?=D0=B0=20=D0=B4=D0=BB=D1=8F=20=D0=B6=D1=83=D1=80=D0=BD=D0=B0?= =?UTF-8?q?=D0=BB=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BE?= =?UTF-8?q?=D1=88=D0=B8=D0=B1=D0=BE=D0=BA=20=D0=BA=D0=BE=D0=BD=D0=B2=D0=B5?= =?UTF-8?q?=D1=80=D1=82=D0=B5=D1=80=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/Reader/BookConverter/ConvertBase.js | 4 +--- server/core/Reader/ReaderWorker.js | 11 ++++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/server/core/Reader/BookConverter/ConvertBase.js b/server/core/Reader/BookConverter/ConvertBase.js index 34aafe67..6ba20853 100644 --- a/server/core/Reader/BookConverter/ConvertBase.js +++ b/server/core/Reader/BookConverter/ConvertBase.js @@ -53,9 +53,7 @@ class ConvertBase { abort }); if (result.code != 0) { - let error = result.code; - if (this.config.branch == 'development') - error = `exec: ${path}, stdout: ${result.stdout}, stderr: ${result.stderr}`; + const error = `${result.code}|FORLOG|, exec: ${path}, args: ${args.join(' ')}, stdout: ${result.stdout}, stderr: ${result.stderr}`; throw new Error(`Внешний конвертер завершился с ошибкой: ${error}`); } } catch(e) { diff --git a/server/core/Reader/ReaderWorker.js b/server/core/Reader/ReaderWorker.js index 76ca0bfe..f0a30812 100644 --- a/server/core/Reader/ReaderWorker.js +++ b/server/core/Reader/ReaderWorker.js @@ -171,10 +171,15 @@ class ReaderWorker { } catch (e) { log(LM_ERR, e.stack); + let mes = e.message.split('|FORLOG|'); + if (mes[1]) + log(LM_ERR, mes[0] + mes[1]); log(LM_ERR, `downloadedFilename: ${downloadedFilename}`); - if (e.message == 'abort') - e.message = overLoadMes; - wState.set({state: 'error', error: e.message}); + + mes = mes[0]; + if (mes == 'abort') + mes = overLoadMes; + wState.set({state: 'error', error: mes}); } finally { //clean if (q) From a5c387a19ed48042a34fd07d45e6c91505e5d9e3 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Mon, 23 Nov 2020 21:35:45 +0700 Subject: [PATCH 2/7] =?UTF-8?q?=D0=9F=D0=BE=D0=BF=D1=80=D0=B0=D0=B2=D0=BA?= =?UTF-8?q?=D0=B0=20=D1=81=D1=82=D1=80=D0=BE=D0=BA=D0=B8=20=D0=B7=D0=B0?= =?UTF-8?q?=D0=BF=D1=83=D1=81=D0=BA=D0=B0=20=D1=81=D0=B5=D1=80=D0=B2=D0=B5?= =?UTF-8?q?=D1=80=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/beta/run_server.sh | 2 +- docs/omnireader.ru/cron_server.sh | 2 +- docs/omnireader.ru/start_server.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/beta/run_server.sh b/docs/beta/run_server.sh index b9ae79db..7c7bd60b 100755 --- a/docs/beta/run_server.sh +++ b/docs/beta/run_server.sh @@ -1,3 +1,3 @@ #!/bin/bash -sudo -H -u www-data /home/beta.liberama/liberama +sudo -H -u www-data bash -c "cd /var/www; /home/beta.liberama/liberama" diff --git a/docs/omnireader.ru/cron_server.sh b/docs/omnireader.ru/cron_server.sh index 5a887c31..3ad24ab8 100755 --- a/docs/omnireader.ru/cron_server.sh +++ b/docs/omnireader.ru/cron_server.sh @@ -1,7 +1,7 @@ #!/bin/bash if ! pgrep -x "liberama" > /dev/null ; then - sudo -H -u www-data /home/liberama/liberama + sudo -H -u www-data bash -c "cd /var/www; /home/liberama/liberama" else echo "Process 'liberama' already running" fi diff --git a/docs/omnireader.ru/start_server.sh b/docs/omnireader.ru/start_server.sh index f657a1ec..f4ba8447 100755 --- a/docs/omnireader.ru/start_server.sh +++ b/docs/omnireader.ru/start_server.sh @@ -1,4 +1,4 @@ #!/bin/bash -sudo -H -u www-data /home/liberama/liberama & +sudo -H -u www-data bash -c "cd /var/www; /home/liberama/liberama" & sudo service cron start From b65dcc5ade7c38adca553948d7b64cf005d00218 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Mon, 23 Nov 2020 21:49:19 +0700 Subject: [PATCH 3/7] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=20=D0=BF=D0=B0=D1=80=D0=B0=D0=BC=D0=B5=D1=82=D1=80=20-no?= =?UTF-8?q?drm?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/Reader/BookConverter/ConvertPdf.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/core/Reader/BookConverter/ConvertPdf.js b/server/core/Reader/BookConverter/ConvertPdf.js index 397a1868..c28ec9ed 100644 --- a/server/core/Reader/BookConverter/ConvertPdf.js +++ b/server/core/Reader/BookConverter/ConvertPdf.js @@ -24,7 +24,7 @@ class ConvertPdf extends ConvertHtml { //конвертируем в xml let perc = 0; - await this.execConverter(this.pdfToHtmlPath, ['-c', '-s', '-xml', inputFiles.sourceFile, outFile], () => { + await this.execConverter(this.pdfToHtmlPath, ['-nodrm', '-c', '-s', '-xml', inputFiles.sourceFile, outFile], () => { perc = (perc < 80 ? perc + 10 : 40); callback(perc); }, abort); From d8df5d76e563473cb07e0b0b89c3b5154211f708 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Mon, 23 Nov 2020 21:57:52 +0700 Subject: [PATCH 4/7] =?UTF-8?q?=D0=9F=D0=BE=D0=BF=D1=80=D0=B0=D0=B2=D0=BA?= =?UTF-8?q?=D0=B0=20=D1=80=D0=B8=D0=B4=D0=BC=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/omnireader.ru/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/omnireader.ru/README.md b/docs/omnireader.ru/README.md index 03d6c9f3..b5149069 100644 --- a/docs/omnireader.ru/README.md +++ b/docs/omnireader.ru/README.md @@ -27,9 +27,9 @@ sudo chown www-data.www-data /home/liberama ### external converter `calibre`, download from https://download.calibre-ebook.com/ ``` -wget "https://download.calibre-ebook.com/3.39.1/calibre-3.39.1-x86_64.txz" +wget "https://download.calibre-ebook.com/5.5.0/calibre-5.5.0-x86_64.txz" sudo -u www-data mkdir -p /home/liberama/data/calibre -sudo -u www-data tar xvf calibre-3.39.1-x86_64.txz -C /home/liberama/data/calibre +sudo -u www-data tar xvf calibre-5.5.0-x86_64.txz -C /home/liberama/data/calibre ``` ### external converters From fe4b7a5a85291309a6fa121856ba8e490e216e2e Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Mon, 23 Nov 2020 23:49:20 +0700 Subject: [PATCH 5/7] =?UTF-8?q?=D0=A3=D0=BB=D1=83=D1=87=D1=88=D0=B5=D0=BD?= =?UTF-8?q?=D0=BE=20=D1=80=D0=B0=D1=81=D0=BF=D0=BE=D0=B7=D0=BD=D0=B0=D0=B2?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D0=B5=20xml-=D1=84=D0=BE=D1=80=D0=BC=D0=B0?= =?UTF-8?q?=D1=82=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/FileDetector/index.js | 2 +- server/core/FileDetector/signatures.json | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/server/core/FileDetector/index.js b/server/core/FileDetector/index.js index 796f0845..07ccca86 100644 --- a/server/core/FileDetector/index.js +++ b/server/core/FileDetector/index.js @@ -54,7 +54,7 @@ class FileDetector { fromBuffer(buffer, callback) { let result = null; - + //console.log(buffer); const invalidSignaturesList = this.validateSigantures(); if (invalidSignaturesList.length) { return callback(invalidSignaturesList); diff --git a/server/core/FileDetector/signatures.json b/server/core/FileDetector/signatures.json index fbd816a8..6fec5a6d 100644 --- a/server/core/FileDetector/signatures.json +++ b/server/core/FileDetector/signatures.json @@ -676,7 +676,9 @@ { "type": "equal", "end": 19, "bytes": "3c3f786d6c2076657273696f6e3d22312e3022" }, { "type": "equal", "end": 22, "bytes": "efbbbf3c3f786d6c2076657273696f6e3d22312e3022" }, { "type": "equal", "end": 19, "bytes": "3c3f786d6c2076657273696f6e3d27312e3027" }, - { "type": "equal", "end": 22, "bytes": "efbbbf3c3f786d6c2076657273696f6e3d27312e3027" } + { "type": "equal", "end": 22, "bytes": "efbbbf3c3f786d6c2076657273696f6e3d27312e3027" }, + { "type": "equal", "end": 40, "bytes": "fffe3c003f0078006d006c002000760065007200730069006f006e003d00220031002e0030002200" }, + { "type": "equal", "end": 40, "bytes": "fffe3c003f0078006d006c002000760065007200730069006f006e003d00270031002e0030002700" } ] } ] From dbb1bfe58798f13e5b490c4f260b34f10c18f503 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Tue, 24 Nov 2020 02:09:17 +0700 Subject: [PATCH 6/7] =?UTF-8?q?=D0=9F=D0=BE=D0=BF=D1=80=D0=B0=D0=B2=D0=BA?= =?UTF-8?q?=D0=B8=20=D1=80=D0=B0=D1=81=D0=BF=D0=BE=D0=B7=D0=BD=D0=B0=D0=B2?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BA=D0=BE=D0=B4=D0=B8=D1=80=D0=BE?= =?UTF-8?q?=D0=B2=D0=BA=D0=B8=20fb2-=D1=84=D0=B0=D0=B9=D0=BB=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/Reader/BookConverter/ConvertFb2.js | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/server/core/Reader/BookConverter/ConvertFb2.js b/server/core/Reader/BookConverter/ConvertFb2.js index 5c937996..7efa25c4 100644 --- a/server/core/Reader/BookConverter/ConvertFb2.js +++ b/server/core/Reader/BookConverter/ConvertFb2.js @@ -1,5 +1,6 @@ const ConvertBase = require('./ConvertBase'); const iconv = require('iconv-lite'); +const textUtils = require('./textUtils'); class ConvertFb2 extends ConvertBase { check(data, opts) { @@ -9,26 +10,46 @@ class ConvertFb2 extends ConvertBase { } async run(data, opts) { - if (!this.check(data, opts)) + let newData = data; + + //Корректируем кодировку, 16-битные кодировки должны стать utf-8 + const encoding = textUtils.getEncoding(newData); + if (encoding.indexOf('UTF-16') == 0) { + newData = Buffer.from(iconv.decode(newData, encoding)); + } + + if (!this.check(newData, opts)) return false; - return this.checkEncoding(data); + return this.checkEncoding(newData); } checkEncoding(data) { let result = data; - const left = data.indexOf('= 0) { const right = data.indexOf('?>', left); if (right >= 0) { const head = data.slice(left, right + 2).toString(); - const m = head.match(/encoding="(.*?)"/); + const m = head.match(/encoding=['"](.*?)['"]/); if (m) { let encoding = m[1].toLowerCase(); if (encoding != 'utf-8') { - result = iconv.decode(data, encoding); - result = Buffer.from(result.toString().replace(m[0], 'encoding="utf-8"')); + //encoding может не соответсвовать реальной кодировке файла, поэтому: + let calcEncoding = textUtils.getEncoding(data); + if (calcEncoding.indexOf('ISO-8859') >= 0) { + calcEncoding = encoding; + } + + result = iconv.decode(data, calcEncoding); + result = Buffer.from(result.toString().replace(m[0], `encoding=${q}utf-8${q}`)); } } } From 08fd0f15ff780267a1229c451dc06e5aa157aee1 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Tue, 24 Nov 2020 04:49:18 +0700 Subject: [PATCH 7/7] =?UTF-8?q?=D0=A3=D0=BB=D1=83=D1=87=D1=88=D0=B5=D0=BD?= =?UTF-8?q?=D0=BE=20=D1=80=D0=B0=D1=81=D0=BF=D0=BE=D0=B7=D0=BD=D0=B0=D0=B2?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=B0=D1=80=D0=B0=D0=B3=D1=80?= =?UTF-8?q?=D0=B0=D1=84=D0=BE=D0=B2=20=D1=87=D0=B8=D1=81=D1=82=D0=BE=D0=B3?= =?UTF-8?q?=D0=BE=20=D1=82=D0=B5=D0=BA=D1=81=D1=82=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/Reader/BookConverter/ConvertHtml.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/core/Reader/BookConverter/ConvertHtml.js b/server/core/Reader/BookConverter/ConvertHtml.js index 37e2e12c..3c913cdc 100644 --- a/server/core/Reader/BookConverter/ConvertHtml.js +++ b/server/core/Reader/BookConverter/ConvertHtml.js @@ -218,7 +218,7 @@ class ConvertHtml extends ConvertBase { let i = 0; //если разброс не слишком большой, выделяем параграфы - if (d < 10 && spaceCounter.length) { + if (d < 20 && spaceCounter.length) { total /= 20; i = spaceCounter.length - 1; while (i > 0 && (!spaceCounter[i] || spaceCounter[i] < total)) i--;