Улучшение парсинга pdf и html
This commit is contained in:
@@ -76,7 +76,7 @@ class ConvertHtml extends ConvertBase {
|
||||
}
|
||||
};
|
||||
|
||||
const newPara = new Set(['tr', '/table', 'hr', 'br', 'br/', 'dd', 'p', 'title', '/title', 'h1', 'h2', 'h3', '/h1', '/h2', '/h3']);
|
||||
const newPara = new Set(['tr', '/table', 'hr', 'br', 'br/', 'li', 'dt', 'dd', 'p', 'title', '/title', 'h1', 'h2', 'h3', '/h1', '/h2', '/h3']);
|
||||
|
||||
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (!cutCounter && !(cutTitle && inTitle)) {
|
||||
|
||||
@@ -38,6 +38,8 @@ class ConvertPdf extends ConvertHtml {
|
||||
let images = [];
|
||||
let loading = [];
|
||||
let inText = false;
|
||||
let bold = false;
|
||||
let italic = false;
|
||||
let title = '';
|
||||
let prevTop = 0;
|
||||
let i = -1;
|
||||
@@ -68,7 +70,12 @@ class ConvertPdf extends ConvertHtml {
|
||||
|
||||
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (!cutCounter && inText) {
|
||||
lines[i].text += text + ' ';
|
||||
let tOpen = (bold ? '<b>' : '');
|
||||
tOpen += (italic ? '<i>' : '');
|
||||
let tClose = (italic ? '</i>' : '');
|
||||
tClose += (bold ? '</b>' : '');
|
||||
|
||||
lines[i].text += `${tOpen}${text}${tClose} `;
|
||||
if (i < 2)
|
||||
title += text + ' ';
|
||||
}
|
||||
@@ -76,6 +83,17 @@ class ConvertPdf extends ConvertHtml {
|
||||
|
||||
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (!cutCounter) {
|
||||
if (inText) {
|
||||
switch (tag) {
|
||||
case 'i':
|
||||
italic = true;
|
||||
break;
|
||||
case 'b':
|
||||
bold = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'text' && !inText) {
|
||||
let attrs = sax.getAttrsSync(tail);
|
||||
const line = {
|
||||
@@ -121,6 +139,17 @@ class ConvertPdf extends ConvertHtml {
|
||||
};
|
||||
|
||||
const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
if (inText) {
|
||||
switch (tag) {
|
||||
case 'i':
|
||||
italic = false;
|
||||
break;
|
||||
case 'b':
|
||||
bold = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'text')
|
||||
inText = false;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user