Улучшение парсинга pdf и текстов
This commit is contained in:
@@ -111,6 +111,11 @@ class ConvertHtml extends ConvertBase {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||||
|
if (!cutCounter) {
|
||||||
|
if (newPara.has('/' + tag))
|
||||||
|
newParagraph();
|
||||||
|
}
|
||||||
|
|
||||||
if (tag == 'title')
|
if (tag == 'title')
|
||||||
inTitle = false;
|
inTitle = false;
|
||||||
|
|
||||||
@@ -178,6 +183,7 @@ class ConvertHtml extends ConvertBase {
|
|||||||
newPar();
|
newPar();
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
|
let j = 0;
|
||||||
const lines = par._t.split('\n');
|
const lines = par._t.split('\n');
|
||||||
for (let line of lines) {
|
for (let line of lines) {
|
||||||
line = repCrLfTab(line);
|
line = repCrLfTab(line);
|
||||||
@@ -187,8 +193,11 @@ class ConvertHtml extends ConvertBase {
|
|||||||
l++;
|
l++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (l >= parIndent)
|
if (l >= parIndent) {
|
||||||
newPar();
|
if (j > 0)
|
||||||
|
newPar();
|
||||||
|
j++;
|
||||||
|
}
|
||||||
growPar(line.trim() + ' ');
|
growPar(line.trim() + ' ');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -199,6 +208,7 @@ class ConvertHtml extends ConvertBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//убираем лишнее
|
//убираем лишнее
|
||||||
|
pars = body.section._a[0];
|
||||||
for (let i = 0; i < pars.length; i++)
|
for (let i = 0; i < pars.length; i++)
|
||||||
pars[i]._t = this.repSpaces(pars[i]._t).trim();
|
pars[i]._t = this.repSpaces(pars[i]._t).trim();
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user