Files
inpx-web/server/core/DbCreator.js
2022-08-17 20:47:34 +07:00

235 lines
7.8 KiB
JavaScript

const InpxParser = require('./InpxParser');
const utils = require('./utils');
class DbCreator {
constructor(config) {
this.config = config;
}
async run(db, callback) {
const config = this.config;
//book
await db.create({
table: 'book'
});
callback({job: 'load inpx', jobMessage: 'Загрузка INPX'});
const readFileCallback = async(readState) => {
callback(readState);
};
//поисковые таблицы, ниже сохраним в БД
let authorMap = new Map();//авторы
let authorArr = [];
let seriesMap = new Map();//серии
let seriesArr = [];
let titleMap = new Map();//названия
let titleArr = [];
let genreMap = new Map();//жанры
let genreArr = [];
let recsLoaded = 0;
let id = 0;
const parsedCallback = async(chunk) => {
for (const rec of chunk) {
rec.id = ++id;
if (!rec.author)
continue;
//авторы
const author = rec.author.split(',');
if (author.length > 1)
author.push(rec.author);
const authorIds = [];
for (const a of author) {
let authorRec;
if (authorMap.has(a)) {
const authorId = authorMap.get(a);
authorRec = authorArr[authorId];
} else {
authorRec = {id: authorArr.length, author: a, value: a.toLowerCase(), bookId: []};
authorArr.push(authorRec);
authorMap.set(a, authorRec.id);
}
authorRec.bookId.push(id);
authorIds.push(authorRec.id);
}
//серии
if (rec.series) {
const series = rec.series;
let seriesRec;
if (seriesMap.has(series)) {
const seriesId = seriesMap.get(series);
seriesRec = seriesArr[seriesId];
} else {
seriesRec = {id: seriesArr.length, value: series.toLowerCase(), authorId: new Set()};
seriesArr.push(seriesRec);
seriesMap.set(series, seriesRec.id);
}
for (const id of authorIds) {
seriesRec.authorId.add(id);
}
}
//названия
if (rec.title) {
const title = rec.title;
let titleRec;
if (titleMap.has(title)) {
const titileId = titleMap.get(title);
titleRec = titleArr[titileId];
} else {
titleRec = {id: titleArr.length, value: title.toLowerCase(), authorId: new Set()};
titleArr.push(titleRec);
titleMap.set(title, titleRec.id);
}
for (const id of authorIds) {
titleRec.authorId.add(id);
}
}
//жанры
if (rec.genre) {
const genre = rec.genre.split(',');
for (const g of genre) {
let genreRec;
if (genreMap.has(g)) {
const genreId = genreMap.get(g);
genreRec = genreArr[genreId];
} else {
genreRec = {id: genreArr.length, value: g, authorId: new Set()};
genreArr.push(genreRec);
genreMap.set(g, genreRec.id);
}
for (const id of authorIds) {
genreRec.authorId.add(id);
}
}
}
}
await db.insert({table: 'book', rows: chunk});
recsLoaded += chunk.length;
callback({recsLoaded});
};
//парсинг
const parser = new InpxParser();
await parser.parse(config.inpxFile, readFileCallback, parsedCallback);
callback({job: 'config save', jobMessage: 'Сохранение конфигурации'});
//чистка памяти, ибо жрет как не в себя
authorMap = null;
seriesMap = null;
titleMap = null;
genreMap = null;
utils.freeMemory();
//конфиг
console.log('author:', authorArr.length);
console.log('series:', seriesArr.length);
console.log('title:', titleArr.length);
console.log('genre:', genreArr.length);
//сохраним поисковые таблицы
const chunkSize = 10000;
//author
callback({job: 'author save', jobMessage: 'Сохранение авторов книг'});
await db.create({
table: 'author',
index: {field: 'value', depth: config.indexDepth},
});
//вставка в БД по кусочкам, экономим память
for (let i = 0; i < authorArr.length; i += chunkSize) {
const chunk = authorArr.slice(i, i + chunkSize);
await db.insert({table: 'author', rows: chunk});
}
authorArr = null;
await db.close({table: 'author'});
utils.freeMemory();
//series
callback({job: 'series save', jobMessage: 'Сохранение серий книг'});
await db.create({
table: 'series',
index: {field: 'value', depth: config.indexDepth},
});
//вставка в БД по кусочкам, экономим память
for (let i = 0; i < seriesArr.length; i += chunkSize) {
const chunk = seriesArr.slice(i, i + chunkSize);
for (const rec of chunk)
rec.authorId = Array.from(rec.authorId);
await db.insert({table: 'series', rows: chunk});
}
seriesArr = null;
await db.close({table: 'series'});
utils.freeMemory();
//title
callback({job: 'title save', jobMessage: 'Сохранение названий книг'});
await db.create({
table: 'title',
index: {field: 'value', depth: config.indexDepth},
});
//вставка в БД по кусочкам, экономим память
for (let i = 0; i < titleArr.length; i += chunkSize) {
const chunk = titleArr.slice(i, i + chunkSize);
for (const rec of chunk)
rec.authorId = Array.from(rec.authorId);
await db.insert({table: 'title', rows: chunk});
}
titleArr = null;
await db.close({table: 'title'});
utils.freeMemory();
//genre
callback({job: 'genre save', jobMessage: 'Сохранение жанров'});
await db.create({
table: 'genre',
index: {field: 'value', depth: config.indexDepth},
});
//вставка в БД по кусочкам, экономим память
for (let i = 0; i < genreArr.length; i += chunkSize) {
const chunk = genreArr.slice(i, i + chunkSize);
for (const rec of chunk)
rec.authorId = Array.from(rec.authorId);
await db.insert({table: 'genre', rows: chunk});
}
genreArr = null;
await db.close({table: 'genre'});
utils.freeMemory();
callback({job: 'done', jobMessage: ''});
}
}
module.exports = DbCreator;