Работа над парсером html -> fb2

This commit is contained in:
Book Pauk
2019-01-22 00:09:04 +07:00
parent f260834a28
commit 3eb701cde8
3 changed files with 866 additions and 1 deletions

View File

@@ -1,5 +1,7 @@
const fs = require('fs-extra');
const FileDetector = require('./FileDetector');
const FileDetector = require('../FileDetector');
const URL = require('url').URL;
const EasySAXParser = require('./easysax');
class BookConverter {
constructor() {
@@ -17,6 +19,14 @@ class BookConverter {
return;
}
const parsedUrl = new URL(url);
if (parsedUrl.hostname == 'samlib.ru' ||
parsedUrl.hostname == 'budclub.ru') {
await fs.writeFile(outputFile, await this.convertSamlib(data));
return;
}
//Заглушка
await fs.writeFile(outputFile, data);
callback(100);
@@ -27,6 +37,107 @@ class BookConverter {
throw new Error(`unsupported file format: ${url}`);
}
}
async convertSamlib(data) {
let fb2 = [{parentName: 'description'}];
let path = '';
let tag = '';
let inText = false;
const parser = new EasySAXParser();
parser.on('error', (msgError) => {// eslint-disable-line no-unused-vars
});
parser.on('startNode', (elemName, getAttr, isTagEnd, getStrNode) => {// eslint-disable-line no-unused-vars
if (elemName == 'xxx7')
inText = !inText;
if (!inText) {
path += '/' + elemName;
tag = elemName;
console.log(path);
}
});
parser.on('endNode', (elemName, isTagStart, getStrNode) => {// eslint-disable-line no-unused-vars
if (!inText) {
const oldPath = path;
let t = '';
do {
let i = path.lastIndexOf('/');
t = path.substr(i + 1);
path = path.substr(0, i);
} while (t != elemName && path);
if (t != elemName) {
path = oldPath;
}
let i = path.lastIndexOf('/');
tag = path.substr(i + 1);
console.log('cl', elemName);
console.log('tag', tag);
console.log(path);
}
});
parser.on('textNode', (text) => {// eslint-disable-line no-unused-vars
});
parser.on('cdata', (data) => {// eslint-disable-line no-unused-vars
});
parser.on('comment', (text) => {// eslint-disable-line no-unused-vars
});
/*
parser.on('progress', async(progress) => {
callback(...........);
});
*/
await parser.parse(data);
return this.formatFb2(fb2);
}
formatFb2(fb2) {
let out = '<?xml version="1.0" encoding="utf-8"?>';
out += '<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">';
out += this.formatFb2Node(fb2);
out += '</FictionBook>';
console.log(out);
return out;
}
formatFb2Node(node, name) {
let out = '';
if (Array.isArray(node)) {
for (const n of node) {
out += this.formatFb2Node(n);
}
} else {
if (node.parentName)
name = node.parentName;
if (!name)
throw new Error(`malformed fb2 object`);
out += `<${name}>`;
for (let nodeName in node) {
if (nodeName == 'parentName')
continue;
const n = node[nodeName];
out += this.formatFb2Node(n, nodeName);
}
out += `</${name}>`;
}
return out;
}
}
module.exports = BookConverter;