Compare commits

...

7 Commits

Author SHA1 Message Date
Book Pauk
549ef91c81 Merge branch 'release/0.7.8' 2019-11-25 15:39:23 +07:00
Book Pauk
cede65313b Версия 0.7.8 2019-11-25 15:38:52 +07:00
Book Pauk
d897a7400f Улучшение парсера fb2 2019-11-25 15:36:34 +07:00
Book Pauk
47f059213f Добавлен конвертер для flibusta 2019-11-25 15:21:33 +07:00
Book Pauk
8af51bbf08 Улучшение фильтра html 2019-11-25 15:15:06 +07:00
Book Pauk
53d9f5ddc6 Улучшение конвертирования html->fb2 2019-11-24 15:36:11 +07:00
Book Pauk
06fffdccc8 Merge tag '0.7.7d' into develop
0.7.7d
2019-11-18 20:04:03 +07:00
6 changed files with 106 additions and 16 deletions

View File

@@ -240,6 +240,7 @@ export default class BookParser {
newParagraph(' ', 1);
isFirstTitlePara = true;
bold = true;
center = true;
}
if (tag == 'epigraph') {
@@ -282,6 +283,7 @@ export default class BookParser {
if (tag == 'subtitle') {
isFirstTitlePara = false;
bold = false;
center = false;
}
if (tag == 'epigraph') {
@@ -367,11 +369,10 @@ export default class BookParser {
tClose += (bold ? '</strong>' : '');
tClose += (center ? '</center>' : '');
if (path.indexOf('/fictionbook/body/title') == 0) {
growParagraph(`${tOpen}${text}${tClose}`, text.length);
}
if (path.indexOf('/fictionbook/body/section') == 0) {
if (path.indexOf('/fictionbook/body/title') == 0 ||
path.indexOf('/fictionbook/body/section') == 0 ||
path.indexOf('/fictionbook/body/epigraph') == 0
) {
growParagraph(`${tOpen}${text}${tClose}`, text.length);
}

View File

@@ -1,4 +1,15 @@
export const versionHistory = [
{
showUntil: '2019-11-24',
header: '0.7.8 (2019-11-25)',
content:
`
<ul>
<li>улучшение html-фильтров для сайтов</li>
</ul>
`
},
{
showUntil: '2019-11-10',
header: '0.7.7 (2019-11-06)',

View File

@@ -1,6 +1,6 @@
{
"name": "Liberama",
"version": "0.7.7",
"version": "0.7.8",
"engines": {
"node": ">=10.0.0"
},

View File

@@ -39,16 +39,19 @@ class ConvertHtml extends ConvertBase {
let title = '';
let inTitle = false;
let inSubTitle = false;
let inImage = false;
let image = {};
let bold = false;
let italic = false;
let begining = true;
let spaceCounter = [];
const repCrLfTab = (text) => text.replace(/[\n\r]/g, '').replace(/\t/g, ' ');
const newParagraph = () => {
begining = false;
pars.push({_n: 'p', _t: ''});
};
@@ -58,6 +61,8 @@ class ConvertHtml extends ConvertBase {
const l = pars.length;
pars[l - 1]._t += text;
if (inSubTitle)
pars[l - 1]._n = '';
//посчитаем отступы у текста, чтобы выделить потом параграфы
const lines = text.split('\n');
@@ -77,16 +82,21 @@ class ConvertHtml extends ConvertBase {
}
};
const newPara = new Set(['tr', '/table', 'hr', 'br', 'br/', 'li', 'dt', 'dd', 'p', 'title', '/title', 'h1', 'h2', 'h3', '/h1', '/h2', '/h3']);
const newPara = new Set(['tr', '/table', 'hr', 'br', 'br/', 'li', 'dt', 'dd', 'p', 'title', '/title', 'ul', '/ul', 'h1', 'h2', 'h3', 'h4', 'h5', '/h1', '/h2', '/h3', '/h4', '/h5']);
const newPara2 = new Set(['h1', 'h2', 'h3', 'h4', 'h5']);
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
text = this.escapeEntities(text);
if (!cutCounter && !(cutTitle && inTitle)) {
let tOpen = (bold ? '<strong>' : '');
let tOpen = '';
tOpen += (inSubTitle ? '<subtitle>' : '');
tOpen += (bold ? '<strong>' : '');
tOpen += (italic ? '<emphasis>' : '');
let tClose = (italic ? '</emphasis>' : '');
let tClose = ''
tClose += (italic ? '</emphasis>' : '');
tClose += (bold ? '</strong>' : '');
tClose += (inSubTitle ? '</subtitle>' : '');
growParagraph(`${tOpen}${text}${tClose}`);
}
@@ -106,6 +116,8 @@ class ConvertHtml extends ConvertBase {
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
if (!cutCounter) {
if (newPara2.has(tag) && !begining)
newParagraph();
if (newPara.has(tag))
newParagraph();
@@ -130,6 +142,10 @@ class ConvertHtml extends ConvertBase {
cutTitle = true;
}
if (tag == 'subtitle') {
inSubTitle = true;
}
if (tag == 'fb2-image') {
inImage = true;
const attrs = sax.getAttrsSync(tail);
@@ -141,6 +157,8 @@ class ConvertHtml extends ConvertBase {
if (!cutCounter) {
if (newPara.has('/' + tag))
newParagraph();
if (newPara2.has('/' + tag))
newParagraph();
switch (tag) {
case 'i':
@@ -160,6 +178,9 @@ class ConvertHtml extends ConvertBase {
if (tag == 'title' || tag == 'cut-title')
inTitle = false;
if (tag == 'subtitle')
inSubTitle = false;
if (tag == 'fb2-image')
inImage = false;
};
@@ -197,7 +218,8 @@ class ConvertHtml extends ConvertBase {
while (i > 0 && (!spaceCounter[i] || spaceCounter[i] < total)) i--;
}
const parIndent = (i > 0 ? i : 0);
let parIndent = (i > 0 ? i : 0);
if (parIndent > 2) parIndent--;
let newPars = [];
const newPar = () => {
@@ -233,7 +255,7 @@ class ConvertHtml extends ConvertBase {
l++;
}
if (l >= parIndent) {
if (l >= parIndent || line == '') {
if (j > 0)
newPar();
j++;
@@ -250,6 +272,7 @@ class ConvertHtml extends ConvertBase {
//убираем лишнее, делаем валидный fb2, т.к. в рез-те разбиения на параграфы бьются теги
bold = false;
italic = false;
inSubTitle = false;
pars = body.section._a[0];
for (let i = 0; i < pars.length; i++) {
if (pars[i]._n != 'p')
@@ -259,16 +282,24 @@ class ConvertHtml extends ConvertBase {
if (pars[i]._t.indexOf('<') >= 0 || bold || italic) {
const t = pars[i]._t;
let first = true;
let a = [];
const onTextNode = (text) => {
let tOpen = (bold ? '<strong>' : '');
let tOpen = '';
tOpen += (inSubTitle ? '<subtitle>' : '');
tOpen += (bold ? '<strong>' : '');
tOpen += (italic ? '<emphasis>' : '');
let tClose = (italic ? '</emphasis>' : '');
let tClose = ''
tClose += (italic ? '</emphasis>' : '');
tClose += (bold ? '</strong>' : '');
tClose += (inSubTitle ? '</subtitle>' : '');
if (first)
text = text.replace(/^\s+/, ''); //trimLeft
a.push(`${tOpen}${text}${tClose}`);
first = false;
}
const onStartNode = (tag) => {
@@ -276,6 +307,8 @@ class ConvertHtml extends ConvertBase {
bold = true;
if (tag == 'emphasis')
italic = true;
if (tag == 'subtitle')
inSubTitle = true;
}
const onEndNode = (tag) => {
@@ -283,6 +316,8 @@ class ConvertHtml extends ConvertBase {
bold = false;
if (tag == 'emphasis')
italic = false;
if (tag == 'subtitle')
inSubTitle = false;
}
sax.parseSync(t, { onStartNode, onEndNode, onTextNode });

View File

@@ -102,6 +102,8 @@ class ConvertSamlib extends ConvertBase {
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
if (inPara)
closeTag('p');
openTag('p');
@@ -173,6 +175,8 @@ class ConvertSamlib extends ConvertBase {
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
closeTag('p');
bold = false;
break;

View File

@@ -12,7 +12,10 @@ const sitesFilter = {
converter: 'cutter',
begin: `<!-- BEGIN section where work skin applies -->`,
end: `<!-- END work skin -->`,
}
},
'flibusta.is': {
converter: 'flibusta'
},
};
class ConvertSites extends ConvertHtml {
@@ -54,11 +57,11 @@ class ConvertSites extends ConvertHtml {
if (m)
title = m[1];
return `<title>${title.trim()}</title>`;
return title.trim();
}
cutter(text, opts) {
const title = this.getTitle(text);
const title = `<title>${this.getTitle(text)}</title>`;
const l = text.indexOf(opts.begin) + opts.begin.length;
const r = text.indexOf(opts.end);
if (l < 0 || r < 0 || r <= l)
@@ -66,6 +69,42 @@ class ConvertSites extends ConvertHtml {
return text.substring(l, r) + title;
}
flibusta(text) {
let author = '';
let m = text.match(/- <a href=".+">([\s\S]*?)<\/a><br\/?>/);
if (m)
author = m[1];
let book = this.getTitle(text);
book = book.replace(' (fb2) | Флибуста', '');
const title = `<title>${author}${(author ? ' - ' : '')}${book}</title>`;
let begin = '<h3 class="book">';
if (text.indexOf(begin) <= 0)
begin = '<h3 class=book>';
const end = '<div id="footer">';
const l = text.indexOf(begin);
const r = text.indexOf(end);
if (l < 0 || r < 0 || r <= l)
return false;
return text.substring(l, r)
.replace(/blockquote class="?book"?/g, 'p')
.replace(/<br\/?>\s*<\/h3>/g, '</h3>')
.replace(/<h3 class="?book"?>/g, '<br><br><subtitle>')
.replace(/<h5 class="?book"?>/g, '<br><br><subtitle>')
.replace(/<h3>/g, '<br><br><subtitle>')
.replace(/<h5>/g, '<br><br><subtitle>')
.replace(/<\/h3>/g, '</subtitle><br>')
.replace(/<\/h5>/g, '</subtitle><br>')
.replace(/<div class="?stanza"?>/g, '<br>')
.replace(/<div>/g, '<br>')
+ title;
}
}
module.exports = ConvertSites;