Улучшение парсинга pdf и текстов

This commit is contained in:
Book Pauk
2019-03-04 22:28:11 +07:00
parent 36b14d0b3a
commit 3483d78c2c

View File

@@ -111,6 +111,11 @@ class ConvertHtml extends ConvertBase {
}; };
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter) {
if (newPara.has('/' + tag))
newParagraph();
}
if (tag == 'title') if (tag == 'title')
inTitle = false; inTitle = false;
@@ -178,6 +183,7 @@ class ConvertHtml extends ConvertBase {
newPar(); newPar();
i++; i++;
let j = 0;
const lines = par._t.split('\n'); const lines = par._t.split('\n');
for (let line of lines) { for (let line of lines) {
line = repCrLfTab(line); line = repCrLfTab(line);
@@ -187,8 +193,11 @@ class ConvertHtml extends ConvertBase {
l++; l++;
} }
if (l >= parIndent) if (l >= parIndent) {
newPar(); if (j > 0)
newPar();
j++;
}
growPar(line.trim() + ' '); growPar(line.trim() + ' ');
} }
} }
@@ -199,6 +208,7 @@ class ConvertHtml extends ConvertBase {
} }
//убираем лишнее //убираем лишнее
pars = body.section._a[0];
for (let i = 0; i < pars.length; i++) for (let i = 0; i < pars.length; i++)
pars[i]._t = this.repSpaces(pars[i]._t).trim(); pars[i]._t = this.repSpaces(pars[i]._t).trim();