Улучшение парсинга html
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
const fs = require('fs-extra');
|
||||
const iconv = require('iconv-lite');
|
||||
const chardet = require('chardet');
|
||||
const he = require('he');
|
||||
|
||||
const textUtils = require('./textUtils');
|
||||
const utils = require('../utils');
|
||||
@@ -80,6 +81,10 @@ class ConvertBase {
|
||||
return text.replace(/ |[\t\n\r]/g, ' ');
|
||||
}
|
||||
|
||||
escapeEntities(text) {
|
||||
return he.escape(text);
|
||||
}
|
||||
|
||||
formatFb2(fb2) {
|
||||
let out = '<?xml version="1.0" encoding="utf-8"?>';
|
||||
out += '<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">';
|
||||
|
||||
@@ -79,6 +79,8 @@ class ConvertHtml extends ConvertBase {
|
||||
const newPara = new Set(['tr', '/table', 'hr', 'br', 'br/', 'li', 'dt', 'dd', 'p', 'title', '/title', 'h1', 'h2', 'h3', '/h1', '/h2', '/h3']);
|
||||
|
||||
const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
||||
text = this.escapeEntities(text);
|
||||
|
||||
if (!cutCounter && !(cutTitle && inTitle)) {
|
||||
let tOpen = (bold ? '<strong>' : '');
|
||||
tOpen += (italic ? '<emphasis>' : '');
|
||||
|
||||
@@ -218,6 +218,8 @@ class ConvertSamlib extends ConvertBase {
|
||||
if (!text)
|
||||
return;
|
||||
|
||||
text = this.escapeEntities(text);
|
||||
|
||||
switch (path) {
|
||||
case '/html/body/center/h2':
|
||||
titleInfo['book-title'] = text;
|
||||
|
||||
Reference in New Issue
Block a user