diff --git a/server/core/Reader/BookConverter/ConvertHtml.js b/server/core/Reader/BookConverter/ConvertHtml.js index cf7da080..24cc1357 100644 --- a/server/core/Reader/BookConverter/ConvertHtml.js +++ b/server/core/Reader/BookConverter/ConvertHtml.js @@ -45,6 +45,7 @@ class ConvertHtml extends ConvertBase { let title = ''; let author = ''; let inTitle = false; + let inSectionTitle = false; let inAuthor = false; let inSubTitle = false; let inImage = false; @@ -63,7 +64,7 @@ class ConvertHtml extends ConvertBase { }; const growParagraph = (text) => { - if (!pars.length) + if (!pars.length || pars[pars.length - 1]._n != 'p') newParagraph(); const l = pars.length; @@ -95,7 +96,7 @@ class ConvertHtml extends ConvertBase { const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars text = this.escapeEntities(text); - if (!(cutCounter || inTitle) || inSubTitle) { + if (!(cutCounter || inTitle || inSectionTitle || inSubTitle)) { let tOpen = ''; tOpen += (inSubTitle ? '' : ''); tOpen += (bold ? '' : ''); @@ -114,12 +115,19 @@ class ConvertHtml extends ConvertBase { if (inAuthor && !author) author = text; + if (inSectionTitle) { + pars.unshift({_n: 'title', _t: text}); + } + + if (inSubTitle) { + pars.push({_n: 'subtitle', _t: text}); + } + if (inImage) { image._t = text; binary.push(image); pars.push({_n: 'image', _attrs: {'l:href': '#' + image._attrs.id}, _t: ''}); - newParagraph(); } }; @@ -154,6 +162,10 @@ class ConvertHtml extends ConvertBase { inAuthor = true; } + if (tag == 'fb2-section-title') { + inSectionTitle = true; + } + if (tag == 'fb2-subtitle') { inSubTitle = true; } @@ -194,6 +206,10 @@ class ConvertHtml extends ConvertBase { inAuthor = false; } + if (tag == 'fb2-section-title') { + inSectionTitle = false; + } + if (tag == 'fb2-subtitle') inSubTitle = false; diff --git a/server/core/Reader/BookConverter/ConvertPdf.js b/server/core/Reader/BookConverter/ConvertPdf.js index 3ffebb9e..b2e4d330 100644 --- a/server/core/Reader/BookConverter/ConvertPdf.js +++ b/server/core/Reader/BookConverter/ConvertPdf.js @@ -50,7 +50,9 @@ class ConvertPdf extends ConvertHtml { let lines = []; let pagelines = []; let line = {text: ''}; + let page = {}; let fonts = {}; + let sectionTitleFound = false; let images = []; let loading = []; @@ -95,11 +97,26 @@ class ConvertPdf extends ConvertHtml { //добавим закрывающий тег стиля line.text += line.tClose; + //проверим, возможно это заголовок + if (line.fonts.length == 1 && line.pageWidth) { + const f = fonts[line.fonts[0]]; + const centerLeft = (line.pageWidth - line.width)/2; + if (f && f.isBold && Math.abs(centerLeft - line.left) < 3) { + if (!sectionTitleFound) { + line.isSectionTitle = true; + sectionTitleFound = true; + } else { + line.isSubtitle = true; + } + } + } + + //объедняем if (Math.abs(pt - line.top) > 3) { j++; pl[j] = line; } else { - pl[j].text += line.text; + pl[j].text += ` ${line.text}`; } pt = line.top; }); @@ -111,6 +128,7 @@ class ConvertPdf extends ConvertHtml { lines[i] = line; }); pagelines = []; + prevTop = 0; }; const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars @@ -122,19 +140,26 @@ class ConvertPdf extends ConvertHtml { if (fontId && fontStyle) { const styles = fontStyle.split(' '); const styleTags = {bold: 'b', italics: 'i', superscript: 'sup', subscript: 'sub'}; - const f = fonts[fontId] = {tOpen: '', tClose: ''}; + const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false}; styles.forEach(style => { const s = styleTags[style]; if (s) { f.tOpen += `<${s}>`; f.tClose = `${f.tClose}`; + if (s == 'b') + f.isBold = true; } }); } } if (tag == 'page') { + const attrs = sax.getAttrsSync(tail); + page = { + width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10), + }; + putPageLines(); putImage(100000); } @@ -149,13 +174,17 @@ class ConvertPdf extends ConvertHtml { height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10), tOpen: '', tClose: '', + isSectionTitle: false, + isSubtitle: false, + pageWidth: page.width, + fonts: [], }; if (line.width != 0 || line.height != 0) { if (Math.abs(prevTop - line.top) > 3) { putImage(line.top); - pagelines.push(line); } + pagelines.push(line); prevTop = line.top; } } @@ -170,6 +199,8 @@ class ConvertPdf extends ConvertHtml { if (fontId && fonts[fontId]) { tOpen = fonts[fontId].tOpen; tClose = fonts[fontId].tClose; + if (!line.fonts.length || line.fonts[0] != fontId) + line.fonts.push(fontId); } if (line.tOpen != tOpen) { @@ -252,6 +283,7 @@ class ConvertPdf extends ConvertHtml { if (!title && uploadFileName) title = uploadFileName; + //console.log(JSON.stringify(lines, null, 2)); //формируем текст const limitSize = 2*this.config.maxUploadFileSize; let text = ''; @@ -270,6 +302,16 @@ class ConvertPdf extends ConvertHtml { continue; } + if (line.isSectionTitle) { + text += `${line.text.trim()}`; + continue; + } + + if (line.isSubtitle) { + text += `
${line.text.trim()}`; + continue; + } + if (concat == '') { const left = line.left || 0; sp = ' '.repeat(indents[left]); @@ -287,6 +329,7 @@ class ConvertPdf extends ConvertHtml { if (concat) text += sp + concat + "\n"; + //console.log(text); await utils.sleep(100); return await super.run(Buffer.from(text), {skipCheck: true, isText: true}); }