Работа над конвертером Pdf

This commit is contained in:
Book Pauk
2020-12-09 19:05:09 +07:00
parent 5d54b1b0f4
commit ef0d6eab89
2 changed files with 65 additions and 6 deletions

View File

@@ -45,6 +45,7 @@ class ConvertHtml extends ConvertBase {
let title = '';
let author = '';
let inTitle = false;
let inSectionTitle = false;
let inAuthor = false;
let inSubTitle = false;
let inImage = false;
@@ -63,7 +64,7 @@ class ConvertHtml extends ConvertBase {
};
const growParagraph = (text) => {
if (!pars.length)
if (!pars.length || pars[pars.length - 1]._n != 'p')
newParagraph();
const l = pars.length;
@@ -95,7 +96,7 @@ class ConvertHtml extends ConvertBase {
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
text = this.escapeEntities(text);
if (!(cutCounter || inTitle) || inSubTitle) {
if (!(cutCounter || inTitle || inSectionTitle || inSubTitle)) {
let tOpen = '';
tOpen += (inSubTitle ? '<subtitle>' : '');
tOpen += (bold ? '<strong>' : '');
@@ -114,12 +115,19 @@ class ConvertHtml extends ConvertBase {
if (inAuthor && !author)
author = text;
if (inSectionTitle) {
pars.unshift({_n: 'title', _t: text});
}
if (inSubTitle) {
pars.push({_n: 'subtitle', _t: text});
}
if (inImage) {
image._t = text;
binary.push(image);
pars.push({_n: 'image', _attrs: {'l:href': '#' + image._attrs.id}, _t: ''});
newParagraph();
}
};
@@ -154,6 +162,10 @@ class ConvertHtml extends ConvertBase {
inAuthor = true;
}
if (tag == 'fb2-section-title') {
inSectionTitle = true;
}
if (tag == 'fb2-subtitle') {
inSubTitle = true;
}
@@ -194,6 +206,10 @@ class ConvertHtml extends ConvertBase {
inAuthor = false;
}
if (tag == 'fb2-section-title') {
inSectionTitle = false;
}
if (tag == 'fb2-subtitle')
inSubTitle = false;

View File

@@ -50,7 +50,9 @@ class ConvertPdf extends ConvertHtml {
let lines = [];
let pagelines = [];
let line = {text: ''};
let page = {};
let fonts = {};
let sectionTitleFound = false;
let images = [];
let loading = [];
@@ -95,11 +97,26 @@ class ConvertPdf extends ConvertHtml {
//добавим закрывающий тег стиля
line.text += line.tClose;
//проверим, возможно это заголовок
if (line.fonts.length == 1 && line.pageWidth) {
const f = fonts[line.fonts[0]];
const centerLeft = (line.pageWidth - line.width)/2;
if (f && f.isBold && Math.abs(centerLeft - line.left) < 3) {
if (!sectionTitleFound) {
line.isSectionTitle = true;
sectionTitleFound = true;
} else {
line.isSubtitle = true;
}
}
}
//объедняем
if (Math.abs(pt - line.top) > 3) {
j++;
pl[j] = line;
} else {
pl[j].text += line.text;
pl[j].text += ` ${line.text}`;
}
pt = line.top;
});
@@ -111,6 +128,7 @@ class ConvertPdf extends ConvertHtml {
lines[i] = line;
});
pagelines = [];
prevTop = 0;
};
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
@@ -122,19 +140,26 @@ class ConvertPdf extends ConvertHtml {
if (fontId && fontStyle) {
const styles = fontStyle.split(' ');
const styleTags = {bold: 'b', italics: 'i', superscript: 'sup', subscript: 'sub'};
const f = fonts[fontId] = {tOpen: '', tClose: ''};
const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false};
styles.forEach(style => {
const s = styleTags[style];
if (s) {
f.tOpen += `<${s}>`;
f.tClose = `</${s}>${f.tClose}`;
if (s == 'b')
f.isBold = true;
}
});
}
}
if (tag == 'page') {
const attrs = sax.getAttrsSync(tail);
page = {
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
};
putPageLines();
putImage(100000);
}
@@ -149,13 +174,17 @@ class ConvertPdf extends ConvertHtml {
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
tOpen: '',
tClose: '',
isSectionTitle: false,
isSubtitle: false,
pageWidth: page.width,
fonts: [],
};
if (line.width != 0 || line.height != 0) {
if (Math.abs(prevTop - line.top) > 3) {
putImage(line.top);
pagelines.push(line);
}
pagelines.push(line);
prevTop = line.top;
}
}
@@ -170,6 +199,8 @@ class ConvertPdf extends ConvertHtml {
if (fontId && fonts[fontId]) {
tOpen = fonts[fontId].tOpen;
tClose = fonts[fontId].tClose;
if (!line.fonts.length || line.fonts[0] != fontId)
line.fonts.push(fontId);
}
if (line.tOpen != tOpen) {
@@ -252,6 +283,7 @@ class ConvertPdf extends ConvertHtml {
if (!title && uploadFileName)
title = uploadFileName;
//console.log(JSON.stringify(lines, null, 2));
//формируем текст
const limitSize = 2*this.config.maxUploadFileSize;
let text = '';
@@ -270,6 +302,16 @@ class ConvertPdf extends ConvertHtml {
continue;
}
if (line.isSectionTitle) {
text += `<fb2-section-title>${line.text.trim()}</fb2-section-title>`;
continue;
}
if (line.isSubtitle) {
text += `<br><fb2-subtitle>${line.text.trim()}</fb2-subtitle>`;
continue;
}
if (concat == '') {
const left = line.left || 0;
sp = ' '.repeat(indents[left]);
@@ -287,6 +329,7 @@ class ConvertPdf extends ConvertHtml {
if (concat)
text += sp + concat + "\n";
//console.log(text);
await utils.sleep(100);
return await super.run(Buffer.from(text), {skipCheck: true, isText: true});
}