Работа над конвертером Pdf
This commit is contained in:
@@ -45,6 +45,7 @@ class ConvertHtml extends ConvertBase {
|
||||
let title = '';
|
||||
let author = '';
|
||||
let inTitle = false;
|
||||
let inSectionTitle = false;
|
||||
let inAuthor = false;
|
||||
let inSubTitle = false;
|
||||
let inImage = false;
|
||||
@@ -63,7 +64,7 @@ class ConvertHtml extends ConvertBase {
|
||||
};
|
||||
|
||||
const growParagraph = (text) => {
|
||||
if (!pars.length)
|
||||
if (!pars.length || pars[pars.length - 1]._n != 'p')
|
||||
newParagraph();
|
||||
|
||||
const l = pars.length;
|
||||
@@ -95,7 +96,7 @@ class ConvertHtml extends ConvertBase {
|
||||
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
text = this.escapeEntities(text);
|
||||
|
||||
if (!(cutCounter || inTitle) || inSubTitle) {
|
||||
if (!(cutCounter || inTitle || inSectionTitle || inSubTitle)) {
|
||||
let tOpen = '';
|
||||
tOpen += (inSubTitle ? '<subtitle>' : '');
|
||||
tOpen += (bold ? '<strong>' : '');
|
||||
@@ -114,12 +115,19 @@ class ConvertHtml extends ConvertBase {
|
||||
if (inAuthor && !author)
|
||||
author = text;
|
||||
|
||||
if (inSectionTitle) {
|
||||
pars.unshift({_n: 'title', _t: text});
|
||||
}
|
||||
|
||||
if (inSubTitle) {
|
||||
pars.push({_n: 'subtitle', _t: text});
|
||||
}
|
||||
|
||||
if (inImage) {
|
||||
image._t = text;
|
||||
binary.push(image);
|
||||
|
||||
pars.push({_n: 'image', _attrs: {'l:href': '#' + image._attrs.id}, _t: ''});
|
||||
newParagraph();
|
||||
}
|
||||
|
||||
};
|
||||
@@ -154,6 +162,10 @@ class ConvertHtml extends ConvertBase {
|
||||
inAuthor = true;
|
||||
}
|
||||
|
||||
if (tag == 'fb2-section-title') {
|
||||
inSectionTitle = true;
|
||||
}
|
||||
|
||||
if (tag == 'fb2-subtitle') {
|
||||
inSubTitle = true;
|
||||
}
|
||||
@@ -194,6 +206,10 @@ class ConvertHtml extends ConvertBase {
|
||||
inAuthor = false;
|
||||
}
|
||||
|
||||
if (tag == 'fb2-section-title') {
|
||||
inSectionTitle = false;
|
||||
}
|
||||
|
||||
if (tag == 'fb2-subtitle')
|
||||
inSubTitle = false;
|
||||
|
||||
|
||||
@@ -50,7 +50,9 @@ class ConvertPdf extends ConvertHtml {
|
||||
let lines = [];
|
||||
let pagelines = [];
|
||||
let line = {text: ''};
|
||||
let page = {};
|
||||
let fonts = {};
|
||||
let sectionTitleFound = false;
|
||||
|
||||
let images = [];
|
||||
let loading = [];
|
||||
@@ -95,11 +97,26 @@ class ConvertPdf extends ConvertHtml {
|
||||
//добавим закрывающий тег стиля
|
||||
line.text += line.tClose;
|
||||
|
||||
//проверим, возможно это заголовок
|
||||
if (line.fonts.length == 1 && line.pageWidth) {
|
||||
const f = fonts[line.fonts[0]];
|
||||
const centerLeft = (line.pageWidth - line.width)/2;
|
||||
if (f && f.isBold && Math.abs(centerLeft - line.left) < 3) {
|
||||
if (!sectionTitleFound) {
|
||||
line.isSectionTitle = true;
|
||||
sectionTitleFound = true;
|
||||
} else {
|
||||
line.isSubtitle = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//объедняем
|
||||
if (Math.abs(pt - line.top) > 3) {
|
||||
j++;
|
||||
pl[j] = line;
|
||||
} else {
|
||||
pl[j].text += line.text;
|
||||
pl[j].text += ` ${line.text}`;
|
||||
}
|
||||
pt = line.top;
|
||||
});
|
||||
@@ -111,6 +128,7 @@ class ConvertPdf extends ConvertHtml {
|
||||
lines[i] = line;
|
||||
});
|
||||
pagelines = [];
|
||||
prevTop = 0;
|
||||
};
|
||||
|
||||
const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
@@ -122,19 +140,26 @@ class ConvertPdf extends ConvertHtml {
|
||||
if (fontId && fontStyle) {
|
||||
const styles = fontStyle.split(' ');
|
||||
const styleTags = {bold: 'b', italics: 'i', superscript: 'sup', subscript: 'sub'};
|
||||
const f = fonts[fontId] = {tOpen: '', tClose: ''};
|
||||
const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false};
|
||||
|
||||
styles.forEach(style => {
|
||||
const s = styleTags[style];
|
||||
if (s) {
|
||||
f.tOpen += `<${s}>`;
|
||||
f.tClose = `</${s}>${f.tClose}`;
|
||||
if (s == 'b')
|
||||
f.isBold = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (tag == 'page') {
|
||||
const attrs = sax.getAttrsSync(tail);
|
||||
page = {
|
||||
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
|
||||
};
|
||||
|
||||
putPageLines();
|
||||
putImage(100000);
|
||||
}
|
||||
@@ -149,13 +174,17 @@ class ConvertPdf extends ConvertHtml {
|
||||
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
|
||||
tOpen: '',
|
||||
tClose: '',
|
||||
isSectionTitle: false,
|
||||
isSubtitle: false,
|
||||
pageWidth: page.width,
|
||||
fonts: [],
|
||||
};
|
||||
|
||||
if (line.width != 0 || line.height != 0) {
|
||||
if (Math.abs(prevTop - line.top) > 3) {
|
||||
putImage(line.top);
|
||||
pagelines.push(line);
|
||||
}
|
||||
pagelines.push(line);
|
||||
prevTop = line.top;
|
||||
}
|
||||
}
|
||||
@@ -170,6 +199,8 @@ class ConvertPdf extends ConvertHtml {
|
||||
if (fontId && fonts[fontId]) {
|
||||
tOpen = fonts[fontId].tOpen;
|
||||
tClose = fonts[fontId].tClose;
|
||||
if (!line.fonts.length || line.fonts[0] != fontId)
|
||||
line.fonts.push(fontId);
|
||||
}
|
||||
|
||||
if (line.tOpen != tOpen) {
|
||||
@@ -252,6 +283,7 @@ class ConvertPdf extends ConvertHtml {
|
||||
if (!title && uploadFileName)
|
||||
title = uploadFileName;
|
||||
|
||||
//console.log(JSON.stringify(lines, null, 2));
|
||||
//формируем текст
|
||||
const limitSize = 2*this.config.maxUploadFileSize;
|
||||
let text = '';
|
||||
@@ -270,6 +302,16 @@ class ConvertPdf extends ConvertHtml {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.isSectionTitle) {
|
||||
text += `<fb2-section-title>${line.text.trim()}</fb2-section-title>`;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.isSubtitle) {
|
||||
text += `<br><fb2-subtitle>${line.text.trim()}</fb2-subtitle>`;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (concat == '') {
|
||||
const left = line.left || 0;
|
||||
sp = ' '.repeat(indents[left]);
|
||||
@@ -287,6 +329,7 @@ class ConvertPdf extends ConvertHtml {
|
||||
if (concat)
|
||||
text += sp + concat + "\n";
|
||||
|
||||
//console.log(text);
|
||||
await utils.sleep(100);
|
||||
return await super.run(Buffer.from(text), {skipCheck: true, isText: true});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user