Работа над InpxParser

This commit is contained in:
Book Pauk
2022-08-16 20:27:04 +07:00
parent 2948cfdc27
commit eddfde141e

View File

@@ -1,4 +1,4 @@
const path = require(path); const path = require('path');
const ZipReader = require('./ZipReader'); const ZipReader = require('./ZipReader');
const collectionInfo = 'collection.info'; const collectionInfo = 'collection.info';
@@ -9,18 +9,18 @@ const defaultStructure = 'AUTHOR;GENRE;TITLE;SERIES;SERNO;FILE;SIZE;LIBID;DEL;EX
class InpxParser { class InpxParser {
constructor() { constructor() {
this.info = {}; this.inpxInfo = {};
} }
async safeExtractToString(zipReader, fileName) { async safeExtractToString(zipReader, fileName) {
let result = ''; let result = '';
try { try {
result = await zipReader.extractToBuf(fileName).toString(); result = (await zipReader.extractToBuf(fileName)).toString().trim();
} catch (e) { } catch (e) {
//quiet //quiet
} }
return result.trim(); return result;
} }
async parse(inpxFile, readFileCallback, parsedCallback) { async parse(inpxFile, readFileCallback, parsedCallback) {
@@ -35,7 +35,7 @@ class InpxParser {
await zipReader.open(inpxFile); await zipReader.open(inpxFile);
try { try {
const info = this.info; const info = this.inpxInfo;
//info //info
await readFileCallback(collectionInfo); await readFileCallback(collectionInfo);
@@ -55,14 +55,14 @@ class InpxParser {
const structure = inpxStructure.split(';'); const structure = inpxStructure.split(';');
//inp-файлы //inp-файлы
let chunk = [];
const entries = Object.values(zipReader.entries); const entries = Object.values(zipReader.entries);
for (const entry of entries) { for (const entry of entries) {
if (!entry.isDirectory && path.extname(entry.name) == '.inp') { if (!entry.isDirectory && path.extname(entry.name) == '.inp') {
await readFileCallback(entry.name); await readFileCallback(entry.name);
const buf = await zipReader.extractToBuf(entry.name); const buf = await zipReader.extractToBuf(entry.name);
chunk.push(this.parseInp(buf, structure));
await this.parseInp(buf, structure, parsedCallback);
} }
} }
} finally { } finally {
@@ -70,13 +70,60 @@ class InpxParser {
} }
} }
parseInp(inpBuf, structure) { async parseInp(inpBuf, structure, parsedCallback) {
const structLen = structure.length;
const rows = inpBuf.toString().split('\n'); const rows = inpBuf.toString().split('\n');
console.log(rows);
let chunk = [];
for (const row of rows) {
let line = row;
if (!line)
continue;
if (line[line.length - 1] == '\x0D')
line = line.substring(0, line.length - 1);
//парсим запись
const parts = line.split('\x04');
const rec = {};
const len = (parts.length > structLen ? structLen : parts.length);
for (let i = 0; i < len; i++) {
if (structure[i])
rec[structure[i]] = parts[i];
}
//специальная обработка некоторых полей
if (rec.author) {
rec.author = rec.author.split(':').map(s => s.replace(/,/g, ' ').trim()).filter(s => s).join(',');
}
if (rec.genre) {
rec.genre = rec.genre.split(':').filter(s => s).join(',');
}
rec.serno = parseInt(rec.serno, 10) || 0;
rec.size = parseInt(rec.size, 10) || 0;
rec.del = parseInt(rec.del, 10) || 0;
rec.insno = parseInt(rec.insno, 10) || 0;
rec.librate = parseInt(rec.librate, 10) || 0;
//пушим
chunk.push(rec);
if (chunk.length >= 10000) {
await parsedCallback(chunk);
chunk = [];
}
}
if (chunk.length) {
await parsedCallback(chunk);
}
} }
get info() { get info() {
return this.info; return this.inpxInfo;
} }
} }