Работа над конвертером pdf

This commit is contained in:
Book Pauk
2020-12-09 20:08:17 +07:00
parent ef0d6eab89
commit 75e01c899e
2 changed files with 59 additions and 17 deletions

View File

@@ -52,6 +52,8 @@ class ConvertHtml extends ConvertBase {
let image = {};
let bold = false;
let italic = false;
let superscript = false;
let subscript = false;
let begining = true;
let spaceCounter = [];
@@ -101,7 +103,11 @@ class ConvertHtml extends ConvertBase {
tOpen += (inSubTitle ? '<subtitle>' : '');
tOpen += (bold ? '<strong>' : '');
tOpen += (italic ? '<emphasis>' : '');
tOpen += (superscript ? '<sup>' : '');
tOpen += (subscript ? '<sub>' : '');
let tClose = ''
tClose += (subscript ? '</sub>' : '');
tClose += (superscript ? '</sup>' : '');
tClose += (italic ? '</emphasis>' : '');
tClose += (bold ? '</strong>' : '');
tClose += (inSubTitle ? '</subtitle>' : '');
@@ -152,6 +158,12 @@ class ConvertHtml extends ConvertBase {
bold = true;
break;
}
if (tag == 'sup')
superscript = true;
if (tag == 'sub')
subscript = true;
}
if (tag == 'title' || tag == 'fb2-title') {
@@ -197,6 +209,12 @@ class ConvertHtml extends ConvertBase {
bold = false;
break;
}
if (tag == 'sup')
superscript = false;
if (tag == 'sub')
subscript = false;
}
if (tag == 'title' || tag == 'fb2-title')
@@ -302,6 +320,8 @@ class ConvertHtml extends ConvertBase {
//убираем лишнее, делаем валидный fb2, т.к. в рез-те разбиения на параграфы бьются теги
bold = false;
italic = false;
superscript = false;
subscript = false;
inSubTitle = false;
pars = body.section._a[0];
for (let i = 0; i < pars.length; i++) {
@@ -321,7 +341,11 @@ class ConvertHtml extends ConvertBase {
tOpen += (inSubTitle ? '<subtitle>' : '');
tOpen += (bold ? '<strong>' : '');
tOpen += (italic ? '<emphasis>' : '');
tOpen += (superscript ? '<sup>' : '');
tOpen += (subscript ? '<sub>' : '');
let tClose = ''
tClose += (subscript ? '</sub>' : '');
tClose += (superscript ? '</sup>' : '');
tClose += (italic ? '</emphasis>' : '');
tClose += (bold ? '</strong>' : '');
tClose += (inSubTitle ? '</subtitle>' : '');
@@ -337,6 +361,10 @@ class ConvertHtml extends ConvertBase {
bold = true;
if (tag == 'emphasis')
italic = true;
if (tag == 'sup')
superscript = true;
if (tag == 'sub')
subscript = true;
if (tag == 'subtitle')
inSubTitle = true;
}
@@ -346,6 +374,10 @@ class ConvertHtml extends ConvertBase {
bold = false;
if (tag == 'emphasis')
italic = false;
if (tag == 'sup')
superscript = false;
if (tag == 'sub')
subscript = false;
if (tag == 'subtitle')
inSubTitle = false;
}

View File

@@ -91,15 +91,16 @@ class ConvertPdf extends ConvertHtml {
//объединяем в одну строку равные по высоте
const pl = [];
let pt = -100;
let pt = 0;
let j = -1;
pagelines.forEach(line => {
//добавим закрывающий тег стиля
line.text += line.tClose;
const f = (line.fonts.length ? fonts[line.fonts[0]] : null);
//проверим, возможно это заголовок
if (line.fonts.length == 1 && line.pageWidth) {
const f = fonts[line.fonts[0]];
const centerLeft = (line.pageWidth - line.width)/2;
if (f && f.isBold && Math.abs(centerLeft - line.left) < 3) {
if (!sectionTitleFound) {
@@ -111,8 +112,14 @@ class ConvertPdf extends ConvertHtml {
}
}
//объедняем
if (Math.abs(pt - line.top) > 3) {
//добавим пустую строку, если надо
if (f && f.fontSize && Math.abs(pt - line.top) > f.fontSize*1.5) {
j++;
pl[j] = {text: '<br>'};
}
//объединяем
if (pt == 0 || Math.abs(pt - line.top) > 3) {
j++;
pl[j] = line;
} else {
@@ -136,12 +143,14 @@ class ConvertPdf extends ConvertHtml {
const attrs = sax.getAttrsSync(tail);
const fontId = (attrs.id && attrs.id.value ? attrs.id.value : '');
const fontStyle = (attrs.fontstyle && attrs.fontstyle.value ? attrs.fontstyle.value : '');
const fontSize = (attrs.fontsize && attrs.fontsize.value ? attrs.fontsize.value : '');
if (fontId && fontStyle) {
const styles = fontStyle.split(' ');
if (fontId) {
const styleTags = {bold: 'b', italics: 'i', superscript: 'sup', subscript: 'sub'};
const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false};
const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false, fontSize};
if (fontStyle) {
const styles = fontStyle.split(' ');
styles.forEach(style => {
const s = styleTags[style];
if (s) {
@@ -153,6 +162,7 @@ class ConvertPdf extends ConvertHtml {
});
}
}
}
if (tag == 'page') {
const attrs = sax.getAttrsSync(tail);