Начало переделки структуры БД и поиска
This commit is contained in:
@@ -138,6 +138,72 @@ class DbCreator {
|
||||
callback({progress: (readState.current || 0)/totalFiles});
|
||||
};
|
||||
|
||||
const parseField = (fieldValue, fieldMap, fieldArr, bookId, fillBookIds = true) => {
|
||||
let value = fieldValue;
|
||||
|
||||
if (typeof(fieldValue) == 'string') {
|
||||
if (!fieldValue)
|
||||
fieldValue = emptyFieldValue;
|
||||
|
||||
value = fieldValue.toLowerCase();
|
||||
}
|
||||
|
||||
let fieldRec;
|
||||
if (fieldMap.has(value)) {
|
||||
const fieldId = fieldMap.get(value);
|
||||
fieldRec = fieldArr[fieldId];
|
||||
} else {
|
||||
fieldRec = {id: fieldArr.length, value, bookIds: new Set()};
|
||||
fieldArr.push(fieldRec);
|
||||
fieldMap.set(value, fieldRec.id);
|
||||
}
|
||||
|
||||
if (fieldValue !== emptyFieldValue || fillBookIds)
|
||||
fieldRec.bookIds.add(bookId);
|
||||
};
|
||||
|
||||
const parseBookRec = (rec) => {
|
||||
//авторы
|
||||
const author = splitAuthor(rec.author);
|
||||
|
||||
for (let i = 0; i < author.length; i++) {
|
||||
const a = author[i];
|
||||
|
||||
//статистика
|
||||
if (!authorMap.has(a.toLowerCase()) && (author.length == 1 || i < author.length - 1)) //без соавторов
|
||||
authorCount++;
|
||||
|
||||
parseField(a, authorMap, authorArr, rec.id);
|
||||
}
|
||||
|
||||
//серии
|
||||
parseField(rec.series, seriesMap, seriesArr, rec.id, false);
|
||||
|
||||
//названия
|
||||
parseField(rec.title, titleMap, titleArr, rec.id);
|
||||
|
||||
//жанры
|
||||
let genre = rec.genre || emptyFieldValue;
|
||||
genre = rec.genre.split(',');
|
||||
|
||||
for (let g of genre) {
|
||||
parseField(g, genreMap, genreArr, rec.id);
|
||||
}
|
||||
|
||||
//языки
|
||||
parseField(rec.lang, langMap, langArr, rec.id);
|
||||
|
||||
//удаленные
|
||||
parseField(rec.del, delMap, delArr, rec.id);
|
||||
|
||||
//дата поступления
|
||||
parseField(rec.date, dateMap, dateArr, rec.id);
|
||||
|
||||
//оценка
|
||||
parseField(rec.librate, librateMap, librateArr, rec.id);
|
||||
};
|
||||
|
||||
//основная процедура парсинга
|
||||
let id = 0;
|
||||
const parsedCallback = async(chunk) => {
|
||||
let filtered = false;
|
||||
@@ -159,40 +225,7 @@ class DbCreator {
|
||||
bookDelCount++;
|
||||
}
|
||||
|
||||
//авторы
|
||||
const author = splitAuthor(rec.author);
|
||||
|
||||
for (let i = 0; i < author.length; i++) {
|
||||
const a = author[i];
|
||||
const value = a.toLowerCase();
|
||||
|
||||
let authorRec;
|
||||
if (authorMap.has(value)) {
|
||||
const authorTmpId = authorMap.get(value);
|
||||
authorRec = authorArr[authorTmpId];
|
||||
} else {
|
||||
authorRec = {tmpId: authorArr.length, author: a, value, bookCount: 0, bookDelCount: 0, bookId: []};
|
||||
authorArr.push(authorRec);
|
||||
authorMap.set(value, authorRec.tmpId);
|
||||
|
||||
if (author.length == 1 || i < author.length - 1) //без соавторов
|
||||
authorCount++;
|
||||
}
|
||||
|
||||
//это нужно для того, чтобы имя автора начиналось с заглавной
|
||||
if (a[0].toUpperCase() === a[0])
|
||||
authorRec.author = a;
|
||||
|
||||
//счетчики
|
||||
if (!rec.del) {
|
||||
authorRec.bookCount++;
|
||||
} else {
|
||||
authorRec.bookDelCount++;
|
||||
}
|
||||
|
||||
//ссылки на книги
|
||||
authorRec.bookId.push(id);
|
||||
}
|
||||
parseBookRec(rec);
|
||||
}
|
||||
|
||||
let saveChunk = [];
|
||||
@@ -211,246 +244,10 @@ class DbCreator {
|
||||
utils.freeMemory();
|
||||
};
|
||||
|
||||
//парсинг 1
|
||||
//парсинг
|
||||
const parser = new InpxParser();
|
||||
await parser.parse(config.inpxFile, readFileCallback, parsedCallback);
|
||||
|
||||
utils.freeMemory();
|
||||
|
||||
//отсортируем авторов и выдадим им правильные id
|
||||
//порядок id соответствует ASC-сортировке по author.toLowerCase
|
||||
callback({job: 'author sort', jobMessage: 'Сортировка авторов', jobStep: 2, progress: 0});
|
||||
await utils.sleep(100);
|
||||
authorArr.sort((a, b) => a.value.localeCompare(b.value));
|
||||
|
||||
id = 0;
|
||||
authorMap = new Map();
|
||||
for (const authorRec of authorArr) {
|
||||
authorRec.id = ++id;
|
||||
authorMap.set(authorRec.author, id);
|
||||
delete authorRec.tmpId;
|
||||
}
|
||||
|
||||
utils.freeMemory();
|
||||
|
||||
//подготовка к сохранению author_book
|
||||
const saveBookChunk = async(authorChunk, callback) => {
|
||||
callback(0);
|
||||
|
||||
const ids = [];
|
||||
for (const a of authorChunk) {
|
||||
for (const id of a.bookId) {
|
||||
ids.push(id);
|
||||
}
|
||||
}
|
||||
|
||||
ids.sort((a, b) => a - b);// обязательно, иначе будет тормозить - особенности JembaDb
|
||||
|
||||
callback(0.1);
|
||||
const rows = await db.select({table: 'book', where: `@@id(${db.esc(ids)})`});
|
||||
callback(0.6);
|
||||
await utils.sleep(100);
|
||||
|
||||
const bookArr = new Map();
|
||||
for (const row of rows)
|
||||
bookArr.set(row.id, row);
|
||||
|
||||
const abRows = [];
|
||||
for (const a of authorChunk) {
|
||||
const aBooks = [];
|
||||
for (const id of a.bookId) {
|
||||
const rec = bookArr.get(id);
|
||||
aBooks.push(rec);
|
||||
}
|
||||
|
||||
abRows.push({id: a.id, author: a.author, books: JSON.stringify(aBooks)});
|
||||
|
||||
delete a.bookId;//в дальнейшем не понадобится, authorArr сохраняем без него
|
||||
}
|
||||
|
||||
callback(0.7);
|
||||
await db.insert({
|
||||
table: 'author_book',
|
||||
rows: abRows,
|
||||
});
|
||||
callback(1);
|
||||
};
|
||||
|
||||
callback({job: 'book sort', jobMessage: 'Сортировка книг', jobStep: 3, progress: 0});
|
||||
|
||||
//сохранение author_book
|
||||
await db.create({
|
||||
table: 'author_book',
|
||||
});
|
||||
|
||||
let idsLen = 0;
|
||||
let aChunk = [];
|
||||
let prevI = 0;
|
||||
for (let i = 0; i < authorArr.length; i++) {// eslint-disable-line
|
||||
const author = authorArr[i];
|
||||
|
||||
aChunk.push(author);
|
||||
idsLen += author.bookId.length;
|
||||
|
||||
if (idsLen > 50000) {//константа выяснена эмпирическим путем "память/скорость"
|
||||
await saveBookChunk(aChunk, (p) => {
|
||||
callback({progress: (prevI + (i - prevI)*p)/authorArr.length});
|
||||
});
|
||||
|
||||
prevI = i;
|
||||
idsLen = 0;
|
||||
aChunk = [];
|
||||
await utils.sleep(100);
|
||||
utils.freeMemory();
|
||||
await db.freeMemory();
|
||||
}
|
||||
}
|
||||
if (aChunk.length) {
|
||||
await saveBookChunk(aChunk, () => {});
|
||||
aChunk = null;
|
||||
}
|
||||
|
||||
callback({progress: 1});
|
||||
|
||||
//чистка памяти, ибо жрет как не в себя
|
||||
await db.close({table: 'book'});
|
||||
await db.freeMemory();
|
||||
utils.freeMemory();
|
||||
|
||||
//парсинг 2, подготовка
|
||||
const parseField = (fieldValue, fieldMap, fieldArr, authorIds, bookId) => {
|
||||
let addBookId = bookId;
|
||||
let value = fieldValue;
|
||||
|
||||
if (typeof(fieldValue) == 'string') {
|
||||
if (!fieldValue) {
|
||||
fieldValue = emptyFieldValue;
|
||||
addBookId = 0;//!!!
|
||||
}
|
||||
|
||||
value = fieldValue.toLowerCase();
|
||||
}
|
||||
|
||||
let fieldRec;
|
||||
if (fieldMap.has(value)) {
|
||||
const fieldId = fieldMap.get(value);
|
||||
fieldRec = fieldArr[fieldId];
|
||||
} else {
|
||||
fieldRec = {id: fieldArr.length, value, authorId: new Set()};
|
||||
if (bookId)
|
||||
fieldRec.bookId = new Set();
|
||||
fieldArr.push(fieldRec);
|
||||
fieldMap.set(value, fieldRec.id);
|
||||
}
|
||||
|
||||
for (const id of authorIds) {
|
||||
fieldRec.authorId.add(id);
|
||||
}
|
||||
|
||||
if (addBookId)
|
||||
fieldRec.bookId.add(addBookId);
|
||||
};
|
||||
|
||||
const parseBookRec = (rec) => {
|
||||
//авторы
|
||||
const author = splitAuthor(rec.author);
|
||||
|
||||
const authorIds = [];
|
||||
for (const a of author) {
|
||||
const authorId = authorMap.get(a);
|
||||
if (!authorId) //подстраховка
|
||||
continue;
|
||||
authorIds.push(authorId);
|
||||
}
|
||||
|
||||
//серии
|
||||
parseField(rec.series, seriesMap, seriesArr, authorIds, rec.id);
|
||||
|
||||
//названия
|
||||
parseField(rec.title, titleMap, titleArr, authorIds, rec.id);
|
||||
|
||||
//жанры
|
||||
let genre = rec.genre || emptyFieldValue;
|
||||
genre = rec.genre.split(',');
|
||||
|
||||
for (let g of genre) {
|
||||
if (!g)
|
||||
g = emptyFieldValue;
|
||||
|
||||
let genreRec;
|
||||
if (genreMap.has(g)) {
|
||||
const genreId = genreMap.get(g);
|
||||
genreRec = genreArr[genreId];
|
||||
} else {
|
||||
genreRec = {id: genreArr.length, value: g, authorId: new Set()};
|
||||
genreArr.push(genreRec);
|
||||
genreMap.set(g, genreRec.id);
|
||||
}
|
||||
|
||||
for (const id of authorIds) {
|
||||
genreRec.authorId.add(id);
|
||||
}
|
||||
}
|
||||
|
||||
//языки
|
||||
parseField(rec.lang, langMap, langArr, authorIds);
|
||||
|
||||
//удаленные
|
||||
parseField(rec.del, delMap, delArr, authorIds);
|
||||
|
||||
//дата поступления
|
||||
parseField(rec.date, dateMap, dateArr, authorIds);
|
||||
|
||||
//оценка
|
||||
parseField(rec.librate, librateMap, librateArr, authorIds);
|
||||
};
|
||||
|
||||
callback({job: 'search tables create', jobMessage: 'Создание поисковых таблиц', jobStep: 4, progress: 0});
|
||||
|
||||
//парсинг 2, теперь можно создавать остальные поисковые таблицы
|
||||
let proc = 0;
|
||||
while (1) {// eslint-disable-line
|
||||
const rows = await db.select({
|
||||
table: 'author_book',
|
||||
where: `
|
||||
let iter = @getItem('parse_book');
|
||||
if (!iter) {
|
||||
iter = @all();
|
||||
@setItem('parse_book', iter);
|
||||
}
|
||||
|
||||
const ids = new Set();
|
||||
let id = iter.next();
|
||||
while (!id.done) {
|
||||
ids.add(id.value);
|
||||
if (ids.size >= 10000)
|
||||
break;
|
||||
id = iter.next();
|
||||
}
|
||||
|
||||
return ids;
|
||||
`
|
||||
});
|
||||
|
||||
if (rows.length) {
|
||||
for (const row of rows) {
|
||||
const books = JSON.parse(row.books);
|
||||
for (const rec of books)
|
||||
parseBookRec(rec);
|
||||
}
|
||||
|
||||
proc += rows.length;
|
||||
callback({progress: proc/authorArr.length});
|
||||
} else
|
||||
break;
|
||||
|
||||
if (config.lowMemoryMode) {
|
||||
await utils.sleep(100);
|
||||
utils.freeMemory();
|
||||
await db.freeMemory();
|
||||
}
|
||||
}
|
||||
|
||||
//чистка памяти, ибо жрет как не в себя
|
||||
authorMap = null;
|
||||
seriesMap = null;
|
||||
@@ -461,25 +258,42 @@ class DbCreator {
|
||||
dateMap = null;
|
||||
librateMap = null;
|
||||
|
||||
await db.close({table: 'book'});
|
||||
await db.freeMemory();
|
||||
utils.freeMemory();
|
||||
|
||||
//сортировка серий
|
||||
callback({job: 'sort', jobMessage: 'Сортировка', jobStep: 5, progress: 0});
|
||||
//отсортируем таблицы выдадим им правильные id
|
||||
//порядок id соответствует ASC-сортировке по value
|
||||
callback({job: 'sort', jobMessage: 'Сортировка', jobStep: 2, progress: 0});
|
||||
await utils.sleep(100);
|
||||
seriesArr.sort((a, b) => a.value.localeCompare(b.value));
|
||||
//сортировка авторов
|
||||
authorArr.sort((a, b) => a.value.localeCompare(b.value));
|
||||
callback({progress: 0.2});
|
||||
await utils.sleep(100);
|
||||
|
||||
id = 0;
|
||||
for (const authorRec of authorArr) {
|
||||
authorRec.id = ++id;
|
||||
}
|
||||
callback({progress: 0.3});
|
||||
await utils.sleep(100);
|
||||
|
||||
//сортировка серий
|
||||
seriesArr.sort((a, b) => a.value.localeCompare(b.value));
|
||||
callback({progress: 0.5});
|
||||
await utils.sleep(100);
|
||||
|
||||
id = 0;
|
||||
for (const seriesRec of seriesArr) {
|
||||
seriesRec.id = ++id;
|
||||
}
|
||||
callback({progress: 0.6});
|
||||
await utils.sleep(100);
|
||||
|
||||
await utils.sleep(100);
|
||||
callback({progress: 0.5});
|
||||
//заодно и названия
|
||||
//сортировка названий
|
||||
titleArr.sort((a, b) => a.value.localeCompare(b.value));
|
||||
await utils.sleep(100);
|
||||
callback({progress: 0.7});
|
||||
callback({progress: 0.8});
|
||||
await utils.sleep(100);
|
||||
id = 0;
|
||||
for (const titleRec of titleArr) {
|
||||
titleRec.id = ++id;
|
||||
@@ -507,7 +321,7 @@ class DbCreator {
|
||||
//сохраним поисковые таблицы
|
||||
const chunkSize = 10000;
|
||||
|
||||
const saveTable = async(table, arr, nullArr, authorIdToArray = false, bookIdToArray = false, indexType = 'string') => {
|
||||
const saveTable = async(table, arr, nullArr, indexType = 'string') => {
|
||||
|
||||
if (indexType == 'string')
|
||||
arr.sort((a, b) => a.value.localeCompare(b.value));
|
||||
@@ -523,21 +337,14 @@ class DbCreator {
|
||||
for (let i = 0; i < arr.length; i += chunkSize) {
|
||||
const chunk = arr.slice(i, i + chunkSize);
|
||||
|
||||
if (authorIdToArray) {
|
||||
for (const rec of chunk)
|
||||
rec.authorId = Array.from(rec.authorId);
|
||||
}
|
||||
|
||||
if (bookIdToArray) {
|
||||
for (const rec of chunk)
|
||||
rec.bookId = Array.from(rec.bookId);
|
||||
}
|
||||
for (const rec of chunk)
|
||||
rec.bookIds = Array.from(rec.bookIds);
|
||||
|
||||
await db.insert({table, rows: chunk});
|
||||
|
||||
if (i % 5 == 0) {
|
||||
await db.freeMemory();
|
||||
await utils.sleep(100);
|
||||
await utils.sleep(10);
|
||||
}
|
||||
|
||||
callback({progress: i/arr.length});
|
||||
@@ -555,28 +362,28 @@ class DbCreator {
|
||||
|
||||
//series
|
||||
callback({job: 'series save', jobMessage: 'Сохранение индекса серий', jobStep: 7, progress: 0});
|
||||
await saveTable('series', seriesArr, () => {seriesArr = null}, true, true);
|
||||
await saveTable('series', seriesArr, () => {seriesArr = null});
|
||||
|
||||
//title
|
||||
callback({job: 'title save', jobMessage: 'Сохранение индекса названий', jobStep: 8, progress: 0});
|
||||
await saveTable('title', titleArr, () => {titleArr = null}, true, true);
|
||||
await saveTable('title', titleArr, () => {titleArr = null});
|
||||
|
||||
//genre
|
||||
callback({job: 'genre save', jobMessage: 'Сохранение индекса жанров', jobStep: 9, progress: 0});
|
||||
await saveTable('genre', genreArr, () => {genreArr = null}, true);
|
||||
await saveTable('genre', genreArr, () => {genreArr = null});
|
||||
|
||||
callback({job: 'others save', jobMessage: 'Сохранение остальных индексов', jobStep: 10, progress: 0});
|
||||
//lang
|
||||
await saveTable('lang', langArr, () => {langArr = null}, true);
|
||||
await saveTable('lang', langArr, () => {langArr = null});
|
||||
|
||||
//del
|
||||
await saveTable('del', delArr, () => {delArr = null}, true, false, 'number');
|
||||
await saveTable('del', delArr, () => {delArr = null}, 'number');
|
||||
|
||||
//date
|
||||
await saveTable('date', dateArr, () => {dateArr = null}, true);
|
||||
await saveTable('date', dateArr, () => {dateArr = null});
|
||||
|
||||
//librate
|
||||
await saveTable('librate', librateArr, () => {librateArr = null}, true, false, 'number');
|
||||
await saveTable('librate', librateArr, () => {librateArr = null}, 'number');
|
||||
|
||||
//кэш-таблицы запросов
|
||||
await db.create({table: 'query_cache'});
|
||||
@@ -592,14 +399,19 @@ class DbCreator {
|
||||
});
|
||||
|
||||
callback({job: 'optimization', jobMessage: 'Оптимизация', jobStep: 11, progress: 0});
|
||||
await this.optimizeTable('series', 'series_book', 'series', db, (p) => {
|
||||
await this.optimizeTable('author', db, (p) => {
|
||||
if (p.progress)
|
||||
p.progress = 0.2*p.progress;
|
||||
p.progress = 0.3*p.progress;
|
||||
callback(p);
|
||||
});
|
||||
await this.optimizeTable('title', 'title_book', 'title', db, (p) => {
|
||||
await this.optimizeTable('series', db, (p) => {
|
||||
if (p.progress)
|
||||
p.progress = 0.2 + 0.8*p.progress;
|
||||
p.progress = 0.3 + 0.2*p.progress;
|
||||
callback(p);
|
||||
});
|
||||
await this.optimizeTable('title', db, (p) => {
|
||||
if (p.progress)
|
||||
p.progress = 0.5 + 0.5*p.progress;
|
||||
callback(p);
|
||||
});
|
||||
|
||||
@@ -627,7 +439,11 @@ class DbCreator {
|
||||
callback({job: 'done', jobMessage: ''});
|
||||
}
|
||||
|
||||
async optimizeTable(from, to, restoreProp, db, callback) {
|
||||
async optimizeTable(from, db, callback) {
|
||||
const to = `${from}_book`;
|
||||
const toId = `${from}_id`;
|
||||
const restoreProp = from;
|
||||
|
||||
//оптимизация таблицы from, превращаем массив bookId в books, кладем все в таблицу to
|
||||
await db.open({table: from});
|
||||
|
||||
@@ -636,10 +452,19 @@ class DbCreator {
|
||||
flag: {name: 'toDel', check: 'r => r.toDel'},
|
||||
});
|
||||
|
||||
const bookId2RecId = new Map();
|
||||
|
||||
const saveChunk = async(chunk) => {
|
||||
const ids = [];
|
||||
for (const s of chunk) {
|
||||
for (const id of s.bookId) {
|
||||
for (const rec of chunk) {
|
||||
for (const id of rec.bookIds) {
|
||||
let b2r = bookId2RecId.get(id);
|
||||
if (!b2r) {
|
||||
b2r = [];
|
||||
bookId2RecId.set(id, b2r);
|
||||
}
|
||||
b2r.push(rec.id);
|
||||
|
||||
ids.push(id);
|
||||
}
|
||||
}
|
||||
@@ -652,30 +477,30 @@ class DbCreator {
|
||||
for (const row of rows)
|
||||
bookArr.set(row.id, row);
|
||||
|
||||
for (const s of chunk) {
|
||||
s.books = [];
|
||||
s.bookCount = 0;
|
||||
s.bookDelCount = 0;
|
||||
for (const id of s.bookId) {
|
||||
const rec = bookArr.get(id);
|
||||
for (const rec of chunk) {
|
||||
rec.books = [];
|
||||
rec.bookCount = 0;
|
||||
rec.bookDelCount = 0;
|
||||
|
||||
for (const id of rec.bookIds) {
|
||||
const book = bookArr.get(id);
|
||||
if (rec) {//на всякий случай
|
||||
s.books.push(rec);
|
||||
if (!rec.del)
|
||||
s.bookCount++;
|
||||
rec.books.push(book);
|
||||
if (!book.del)
|
||||
rec.bookCount++;
|
||||
else
|
||||
s.bookDelCount++;
|
||||
rec.bookDelCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (s.books.length) {
|
||||
s[restoreProp] = s.books[0][restoreProp];
|
||||
if (rec.books.length) {
|
||||
rec[restoreProp] = rec.books[0][restoreProp];
|
||||
} else {
|
||||
s.toDel = 1;
|
||||
rec.toDel = 1;
|
||||
}
|
||||
|
||||
delete s.value;
|
||||
delete s.authorId;
|
||||
delete s.bookId;
|
||||
delete rec.value;
|
||||
delete rec.bookIds;
|
||||
}
|
||||
|
||||
await db.insert({
|
||||
@@ -699,11 +524,16 @@ class DbCreator {
|
||||
}
|
||||
|
||||
const ids = new Set();
|
||||
let bookIdsLen = 0;
|
||||
let id = iter.next();
|
||||
while (!id.done) {
|
||||
ids.add(id.value);
|
||||
if (ids.size >= 20000)
|
||||
|
||||
const row = @row(id.value);
|
||||
bookIdsLen += row.bookIds.length;
|
||||
if (bookIdsLen >= 50000)
|
||||
break;
|
||||
|
||||
id = iter.next();
|
||||
}
|
||||
|
||||
@@ -729,6 +559,14 @@ class DbCreator {
|
||||
await db.delete({table: to, where: `@@flag('toDel')`});
|
||||
await db.close({table: to});
|
||||
await db.close({table: from});
|
||||
|
||||
await db.create({table: toId});
|
||||
const idRows = [];
|
||||
for (const [id, value] of bookId2RecId) {
|
||||
idRows.push({id, value});
|
||||
}
|
||||
await db.insert({table: toId, rows: idRows});
|
||||
await db.close({table: toId});
|
||||
}
|
||||
|
||||
async countStats(db, callback, stats) {
|
||||
|
||||
Reference in New Issue
Block a user