Доработки парсера html

This commit is contained in:
Book Pauk
2019-01-23 01:16:29 +07:00
parent 3e5600c8af
commit b9742e365e

View File

@@ -62,6 +62,9 @@ class BookConverter {
let body = {_n: 'body', section: {_a: [pars]}}; let body = {_n: 'body', section: {_a: [pars]}};
let fb2 = [desc, body]; let fb2 = [desc, body];
let title = '';
let inTitle = false;
const newParagraph = () => { const newParagraph = () => {
pars.push({_n: 'p', _t: ''}); pars.push({_n: 'p', _t: ''});
}; };
@@ -76,19 +79,18 @@ class BookConverter {
}; };
newParagraph(); newParagraph();
const newPara = new Set(['TR', 'BR', 'BR/', 'DD', 'P', 'TITLE', '/TITLE']);
const onText = (text) => {
if (inTitle && !title)
title = text;
};
const onNode = (elem) => { const onNode = (elem) => {
switch (elem) { if (elem == 'TITLE')
case 'TR': inTitle = true;
case 'BR': else if (elem == '/TITLE')
case 'BR/': inTitle = false;
case 'DD':
case 'P':
case 'TITLE':
case '/TITLE':
newParagraph();
break;
}
}; };
const innerCut = new Set(['HEAD', 'SCRIPT', 'STYLE']); const innerCut = new Set(['HEAD', 'SCRIPT', 'STYLE']);
@@ -111,10 +113,14 @@ class BookConverter {
if (firstSpace >= 0) if (firstSpace >= 0)
tag = tag.substr(0, firstSpace); tag = tag.substr(0, firstSpace);
const text = buf.substr(i, left - i);
if (!cutCounter) { if (!cutCounter) {
growParagraph(buf.substr(i, left - i)); growParagraph(text);
onNode(tag); if (newPara.has(tag))
newParagraph();
} }
onText(text);
onNode(tag);
if (innerCut.has(tag) && (!cutCounter || cutTag == tag)) { if (innerCut.has(tag) && (!cutCounter || cutTag == tag)) {
if (!cutCounter) if (!cutCounter)
@@ -134,6 +140,8 @@ class BookConverter {
if (i < len && !cutCounter) if (i < len && !cutCounter)
growParagraph(buf.substr(i, len - i)); growParagraph(buf.substr(i, len - i));
titleInfo['book-title'] = title;
return this.formatFb2(fb2); return this.formatFb2(fb2);
} }