Работа над конвертером html -> fb2

This commit is contained in:
Book Pauk
2019-01-23 00:59:15 +07:00
parent 8ed4c1e219
commit 3e48422530

View File

@@ -62,9 +62,6 @@ class BookConverter {
let body = {_n: 'body', section: {_a: [pars]}}; let body = {_n: 'body', section: {_a: [pars]}};
let fb2 = [desc, body]; let fb2 = [desc, body];
let path = '';
let tag = '';// eslint-disable-line no-unused-vars
const newParagraph = () => { const newParagraph = () => {
pars.push({_n: 'p', _t: ''}); pars.push({_n: 'p', _t: ''});
}; };
@@ -78,66 +75,64 @@ class BookConverter {
} }
}; };
const parser = new EasySAXParser(); newParagraph();
parser.on('error', (msgError) => {// eslint-disable-line no-unused-vars const onNode = (elem) => {
}); switch (elem) {
case 'TR':
parser.on('startNode', (elemName, getAttr, isTagEnd, getStrNode) => {// eslint-disable-line no-unused-vars case 'BR':
path += '/' + elemName; case 'BR/':
tag = elemName; case 'DD':
case 'P':
if (elemName == 'p' || elemName == 'dd') { case 'TITLE':
newParagraph(); case '/TITLE':
newParagraph();
break;
} }
}); };
parser.on('endNode', (elemName, isTagStart, getStrNode) => {// eslint-disable-line no-unused-vars const innerCut = new Set(['HEAD', 'SCRIPT', 'STYLE']);
const oldPath = path; let buf = this.decode(data).toString();
let t = '';
do {
let i = path.lastIndexOf('/');
t = path.substr(i + 1);
path = path.substr(0, i);
} while (t != elemName && path);
if (t != elemName) { let i = 0;
path = oldPath; const len = buf.length;
let cutCounter = 0;
let cutTag = '';
while (i < len) {
let left = buf.indexOf('<', i);
if (left < 0)
break;
let right = buf.indexOf('>', left + 1);
if (right < 0)
break;
let tag = buf.substr(left + 1, right - left - 1).trim().toUpperCase();
const firstSpace = tag.indexOf(' ');
if (firstSpace >= 0)
tag = tag.substr(0, firstSpace);
if (!cutCounter) {
growParagraph(buf.substr(i, left - i));
onNode(tag);
} }
let i = path.lastIndexOf('/'); if (innerCut.has(tag) && (!cutCounter || cutTag == tag)) {
tag = path.substr(i + 1); if (!cutCounter)
}); cutTag = tag;
cutCounter++;
parser.on('textNode', (text) => {// eslint-disable-line no-unused-vars
if (text != ' ' && text.trim() == '')
text = text.trim();
if (text == '')
return;
switch (path) {
case '/html/head/title':
titleInfo['book-title'] = text;
return;
} }
growParagraph(text); if (tag != '' && tag.charAt(0) == '/' && cutTag == tag.substr(1)) {
}); cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0);
if (!cutCounter)
cutTag = '';
}
parser.on('cdata', (data) => {// eslint-disable-line no-unused-vars i = right + 1;
}); }
parser.on('comment', (text) => {// eslint-disable-line no-unused-vars if (i < len && !cutCounter)
}); growParagraph(buf.substr(i, len - i));
/*
parser.on('progress', async(progress) => {
callback(...........);
});
*/
await parser.parse(this.decode(data));
return this.formatFb2(fb2); return this.formatFb2(fb2);
} }