Улучшение поисковой БД
This commit is contained in:
@@ -61,103 +61,23 @@ class DbCreator {
|
||||
if (author.length > 1)
|
||||
author.push(rec.author);
|
||||
|
||||
const authorIds = [];
|
||||
for (let i = 0; i < author.length; i++) {
|
||||
const a = author[i];
|
||||
|
||||
let authorRec;
|
||||
if (authorMap.has(a)) {
|
||||
const authorId = authorMap.get(a);
|
||||
authorRec = authorArr[authorId];
|
||||
const authorTmpId = authorMap.get(a);
|
||||
authorRec = authorArr[authorTmpId];
|
||||
} else {
|
||||
authorRec = {id: authorArr.length, author: a, value: a.toLowerCase(), bookId: []};
|
||||
authorRec = {tmpId: authorArr.length, author: a, value: a.toLowerCase(), bookId: []};
|
||||
authorArr.push(authorRec);
|
||||
authorMap.set(a, authorRec.id);
|
||||
authorMap.set(a, authorRec.tmpId);
|
||||
|
||||
if (author.length == 1 || i < author.length - 1) //без соавторов
|
||||
authorCount++;
|
||||
}
|
||||
|
||||
authorRec.bookId.push(id);
|
||||
authorIds.push(authorRec.id);
|
||||
}
|
||||
|
||||
//серии
|
||||
if (rec.series) {
|
||||
const series = rec.series;
|
||||
|
||||
let seriesRec;
|
||||
if (seriesMap.has(series)) {
|
||||
const seriesId = seriesMap.get(series);
|
||||
seriesRec = seriesArr[seriesId];
|
||||
} else {
|
||||
seriesRec = {id: seriesArr.length, value: series.toLowerCase(), authorId: new Set()};
|
||||
seriesArr.push(seriesRec);
|
||||
seriesMap.set(series, seriesRec.id);
|
||||
}
|
||||
|
||||
for (const id of authorIds) {
|
||||
seriesRec.authorId.add(id);
|
||||
}
|
||||
}
|
||||
|
||||
//названия
|
||||
if (rec.title) {
|
||||
const title = rec.title;
|
||||
|
||||
let titleRec;
|
||||
if (titleMap.has(title)) {
|
||||
const titleId = titleMap.get(title);
|
||||
titleRec = titleArr[titleId];
|
||||
} else {
|
||||
titleRec = {id: titleArr.length, value: title.toLowerCase(), authorId: new Set()};
|
||||
titleArr.push(titleRec);
|
||||
titleMap.set(title, titleRec.id);
|
||||
}
|
||||
|
||||
for (const id of authorIds) {
|
||||
titleRec.authorId.add(id);
|
||||
}
|
||||
}
|
||||
|
||||
//жанры
|
||||
if (rec.genre) {
|
||||
const genre = rec.genre.split(',');
|
||||
|
||||
for (const g of genre) {
|
||||
let genreRec;
|
||||
if (genreMap.has(g)) {
|
||||
const genreId = genreMap.get(g);
|
||||
genreRec = genreArr[genreId];
|
||||
} else {
|
||||
genreRec = {id: genreArr.length, value: g, authorId: new Set()};
|
||||
genreArr.push(genreRec);
|
||||
genreMap.set(g, genreRec.id);
|
||||
}
|
||||
|
||||
for (const id of authorIds) {
|
||||
genreRec.authorId.add(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//языки
|
||||
if (rec.lang) {
|
||||
const lang = rec.lang;
|
||||
|
||||
let langRec;
|
||||
if (langMap.has(lang)) {
|
||||
const langId = langMap.get(lang);
|
||||
langRec = langArr[langId];
|
||||
} else {
|
||||
langRec = {id: langArr.length, value: lang, authorId: new Set()};
|
||||
langArr.push(langRec);
|
||||
langMap.set(lang, langRec.id);
|
||||
}
|
||||
|
||||
for (const id of authorIds) {
|
||||
langRec.authorId.add(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -170,17 +90,170 @@ class DbCreator {
|
||||
utils.freeMemory();
|
||||
};
|
||||
|
||||
//парсинг
|
||||
//парсинг 1
|
||||
const parser = new InpxParser();
|
||||
await parser.parse(config.inpxFile, readFileCallback, parsedCallback);
|
||||
|
||||
utils.freeMemory();
|
||||
|
||||
//отсортируем авторов и выдадим им правильные id
|
||||
//порядок id соответствует ASC-сортировке по author.toLowerCase
|
||||
callback({job: 'author sort', jobMessage: 'Сортировка'});
|
||||
authorArr.sort((a, b) => a.value.localeCompare(b.value));
|
||||
|
||||
id = 0;
|
||||
authorMap = new Map();
|
||||
for (const authorRec of authorArr) {
|
||||
authorRec.id = ++id;
|
||||
authorMap.set(authorRec.author, id);
|
||||
delete authorRec.tmpId;
|
||||
}
|
||||
|
||||
utils.freeMemory();
|
||||
|
||||
//теперь можно создавать остальные поисковые таблицы
|
||||
const parseBookRec = (rec) => {
|
||||
//авторы
|
||||
if (!rec.author) {
|
||||
if (!rec.del)
|
||||
noAuthorBookCount++;
|
||||
rec.author = 'Автор не указан';
|
||||
}
|
||||
|
||||
const author = rec.author.split(',');
|
||||
if (author.length > 1)
|
||||
author.push(rec.author);
|
||||
|
||||
const authorIds = [];
|
||||
for (const a of author) {
|
||||
const authorId = authorMap.get(a);
|
||||
if (!authorId) //подстраховка
|
||||
continue;
|
||||
authorIds.push(authorId);
|
||||
}
|
||||
|
||||
//серии
|
||||
if (rec.series) {
|
||||
const series = rec.series;
|
||||
|
||||
let seriesRec;
|
||||
if (seriesMap.has(series)) {
|
||||
const seriesId = seriesMap.get(series);
|
||||
seriesRec = seriesArr[seriesId];
|
||||
} else {
|
||||
seriesRec = {id: seriesArr.length, value: series.toLowerCase(), authorId: new Set()};
|
||||
seriesArr.push(seriesRec);
|
||||
seriesMap.set(series, seriesRec.id);
|
||||
}
|
||||
|
||||
for (const id of authorIds) {
|
||||
seriesRec.authorId.add(id);
|
||||
}
|
||||
}
|
||||
|
||||
//названия
|
||||
if (rec.title) {
|
||||
const title = rec.title;
|
||||
|
||||
let titleRec;
|
||||
if (titleMap.has(title)) {
|
||||
const titleId = titleMap.get(title);
|
||||
titleRec = titleArr[titleId];
|
||||
} else {
|
||||
titleRec = {id: titleArr.length, value: title.toLowerCase(), authorId: new Set()};
|
||||
titleArr.push(titleRec);
|
||||
titleMap.set(title, titleRec.id);
|
||||
}
|
||||
|
||||
for (const id of authorIds) {
|
||||
titleRec.authorId.add(id);
|
||||
}
|
||||
}
|
||||
|
||||
//жанры
|
||||
if (rec.genre) {
|
||||
const genre = rec.genre.split(',');
|
||||
|
||||
for (const g of genre) {
|
||||
let genreRec;
|
||||
if (genreMap.has(g)) {
|
||||
const genreId = genreMap.get(g);
|
||||
genreRec = genreArr[genreId];
|
||||
} else {
|
||||
genreRec = {id: genreArr.length, value: g, authorId: new Set()};
|
||||
genreArr.push(genreRec);
|
||||
genreMap.set(g, genreRec.id);
|
||||
}
|
||||
|
||||
for (const id of authorIds) {
|
||||
genreRec.authorId.add(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//языки
|
||||
if (rec.lang) {
|
||||
const lang = rec.lang;
|
||||
|
||||
let langRec;
|
||||
if (langMap.has(lang)) {
|
||||
const langId = langMap.get(lang);
|
||||
langRec = langArr[langId];
|
||||
} else {
|
||||
langRec = {id: langArr.length, value: lang, authorId: new Set()};
|
||||
langArr.push(langRec);
|
||||
langMap.set(lang, langRec.id);
|
||||
}
|
||||
|
||||
for (const id of authorIds) {
|
||||
langRec.authorId.add(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
callback({job: 'search tables create', jobMessage: 'Создание поисковых таблиц'});
|
||||
|
||||
//парсинг 2
|
||||
while (1) {// eslint-disable-line
|
||||
//пробегаемся по сохраненным книгам
|
||||
const rows = await db.select({
|
||||
table: 'book',
|
||||
where: `
|
||||
let iter = @getItem('book_parsing');
|
||||
if (!iter) {
|
||||
iter = @all();
|
||||
@setItem('book_parsing', iter);
|
||||
}
|
||||
|
||||
const ids = new Set();
|
||||
let id = iter.next();
|
||||
while (!id.done && ids.size < 10000) {
|
||||
ids.add(id.value);
|
||||
id = iter.next();
|
||||
}
|
||||
|
||||
return ids;
|
||||
`
|
||||
});
|
||||
|
||||
if (rows.length) {
|
||||
for (const rec of rows)
|
||||
parseBookRec(rec);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//чистка памяти, ибо жрет как не в себя
|
||||
authorMap = null;
|
||||
seriesMap = null;
|
||||
titleMap = null;
|
||||
genreMap = null;
|
||||
|
||||
utils.freeMemory();
|
||||
for (let i = 0; i < 3; i++) {
|
||||
utils.freeMemory();
|
||||
await utils.sleep(1000);
|
||||
}
|
||||
|
||||
//config
|
||||
callback({job: 'config save', jobMessage: 'Сохранение конфигурации'});
|
||||
@@ -303,7 +376,7 @@ class DbCreator {
|
||||
utils.freeMemory();
|
||||
|
||||
//кэш-таблицы
|
||||
|
||||
|
||||
|
||||
callback({job: 'done', jobMessage: ''});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user