From 41d1dc14413ddee8c81006b3293907f86d6892e1 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Thu, 18 Aug 2022 00:41:00 +0700 Subject: [PATCH] =?UTF-8?q?=D0=A0=D0=B0=D0=B1=D0=BE=D1=82=D0=B0=20=D0=BD?= =?UTF-8?q?=D0=B0=D0=B4=20WebWorker=20=D0=B8=20DbCreator?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/DbCreator.js | 115 ++++++++++++++++++++++++++++++++++----- server/core/WebWorker.js | 4 +- server/core/utils.js | 18 ++++++ server/index.js | 2 +- 4 files changed, 123 insertions(+), 16 deletions(-) diff --git a/server/core/DbCreator.js b/server/core/DbCreator.js index 634fdcd..9290ec8 100644 --- a/server/core/DbCreator.js +++ b/server/core/DbCreator.js @@ -28,15 +28,33 @@ class DbCreator { let titleArr = []; let genreMap = new Map();//жанры let genreArr = []; + let langMap = new Map();//языки + let langArr = []; + //stats + let authorCount = 0; + let bookCount = 0; + let noAuthorBookCount = 0; + let bookDelCount = 0; + + //stuff let recsLoaded = 0; let id = 0; + let chunkNum = 0; const parsedCallback = async(chunk) => { for (const rec of chunk) { rec.id = ++id; - if (!rec.author) - continue; + if (!rec.del) + bookCount++; + else + bookDelCount++; + + if (!rec.author) { + if (!rec.del) + noAuthorBookCount++; + rec.author = 'Автор не указан'; + } //авторы const author = rec.author.split(','); @@ -44,8 +62,10 @@ class DbCreator { author.push(rec.author); const authorIds = []; - for (const a of author) { - let authorRec; + for (let i = 0; i < author.length; i++) { + const a = author[i]; + + let authorRec; if (authorMap.has(a)) { const authorId = authorMap.get(a); authorRec = authorArr[authorId]; @@ -53,6 +73,9 @@ class DbCreator { authorRec = {id: authorArr.length, author: a, value: a.toLowerCase(), bookId: []}; authorArr.push(authorRec); authorMap.set(a, authorRec.id); + + if (author.length == 1 || i < author.length - 1) //без соавторов + authorCount++; } authorRec.bookId.push(id); @@ -84,8 +107,8 @@ class DbCreator { let titleRec; if (titleMap.has(title)) { - const titileId = titleMap.get(title); - titleRec = titleArr[titileId]; + const titleId = titleMap.get(title); + titleRec = titleArr[titleId]; } else { titleRec = {id: titleArr.length, value: title.toLowerCase(), authorId: new Set()}; titleArr.push(titleRec); @@ -117,20 +140,40 @@ class DbCreator { } } } + + //языки + if (rec.lang) { + const lang = rec.lang; + + let langRec; + if (langMap.has(lang)) { + const langId = langMap.get(lang); + langRec = langArr[langId]; + } else { + langRec = {id: langArr.length, value: lang, authorId: new Set()}; + langArr.push(langRec); + langMap.set(lang, langRec.id); + } + + for (const id of authorIds) { + langRec.authorId.add(id); + } + } } await db.insert({table: 'book', rows: chunk}); recsLoaded += chunk.length; callback({recsLoaded}); + + if (chunkNum++ % 10 == 0) + utils.freeMemory(); }; //парсинг const parser = new InpxParser(); await parser.parse(config.inpxFile, readFileCallback, parsedCallback); - callback({job: 'config save', jobMessage: 'Сохранение конфигурации'}); - //чистка памяти, ибо жрет как не в себя authorMap = null; seriesMap = null; @@ -139,13 +182,35 @@ class DbCreator { utils.freeMemory(); - //конфиг + //config + callback({job: 'config save', jobMessage: 'Сохранение конфигурации'}); + await db.create({ + table: 'config' + }); - console.log('author:', authorArr.length); - console.log('series:', seriesArr.length); - console.log('title:', titleArr.length); - console.log('genre:', genreArr.length); + const stats = { + recsLoaded, + authorCount, + authorCountAll: authorArr.length, + bookCount, + bookCountAll: bookCount + bookDelCount, + bookDelCount, + noAuthorBookCount, + titleCount: titleArr.length, + seriesCount: seriesArr.length, + genreCount: genreArr.length, + langCount: langArr.length, + }; + console.log(stats); + + const inpxHash = await utils.getFileHash(config.inpxFile, 'sha256', 'hex'); + + await db.insert({table: 'config', rows: [ + {id: 'inpxInfo', value: parser.info}, + {id: 'stats', value: stats}, + {id: 'inpxHash', value: inpxHash}, + ]}); //сохраним поисковые таблицы const chunkSize = 10000; @@ -196,12 +261,16 @@ class DbCreator { }); //вставка в БД по кусочкам, экономим память + let j = 0; for (let i = 0; i < titleArr.length; i += chunkSize) { const chunk = titleArr.slice(i, i + chunkSize); for (const rec of chunk) rec.authorId = Array.from(rec.authorId); await db.insert({table: 'title', rows: chunk}); + if (j++ % 10 == 0) + utils.freeMemory(); + await utils.sleep(100); } titleArr = null; @@ -228,6 +297,26 @@ class DbCreator { await db.close({table: 'genre'}); utils.freeMemory(); + //genre + callback({job: 'lang save', jobMessage: 'Сохранение языков'}); + await db.create({ + table: 'lang', + index: {field: 'value', depth: config.indexDepth}, + }); + + //вставка в БД по кусочкам, экономим память + for (let i = 0; i < langArr.length; i += chunkSize) { + const chunk = langArr.slice(i, i + chunkSize); + for (const rec of chunk) + rec.authorId = Array.from(rec.authorId); + + await db.insert({table: 'lang', rows: chunk}); + } + + langArr = null; + await db.close({table: 'lang'}); + utils.freeMemory(); + callback({job: 'done', jobMessage: ''}); } } diff --git a/server/core/WebWorker.js b/server/core/WebWorker.js index 6ce6dca..447c1c4 100644 --- a/server/core/WebWorker.js +++ b/server/core/WebWorker.js @@ -106,7 +106,7 @@ class WebWorker { } } - async loadOrCreateDb() { + async loadOrCreateDb(recreate = false) { this.setMyState(ssDbLoading); try { @@ -114,7 +114,7 @@ class WebWorker { const dbPath = `${config.dataDir}/db`; //пересоздаем БД из INPX если нужно - if (config.recreateDb) + if (config.recreateDb || recreate) await fs.remove(dbPath); if (!await fs.pathExists(dbPath)) { diff --git a/server/core/utils.js b/server/core/utils.js index 18c79bd..e100e45 100644 --- a/server/core/utils.js +++ b/server/core/utils.js @@ -1,5 +1,6 @@ const fs = require('fs-extra'); const path = require('path'); +const crypto = require('crypto'); function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); @@ -42,6 +43,21 @@ function freeMemory() { } } +function getFileHash(filename, hashName, enc) { + return new Promise((resolve, reject) => { + const hash = crypto.createHash(hashName); + const rs = fs.createReadStream(filename); + rs.on('error', reject); + rs.on('data', chunk => hash.update(chunk)); + rs.on('end', () => resolve(hash.digest(enc))); + }); +} + +function getBufHash(buf, hashName, enc) { + const hash = crypto.createHash(hashName); + hash.update(buf); + return hash.digest(enc); +} module.exports = { sleep, @@ -50,4 +66,6 @@ module.exports = { touchFile, hasProp, freeMemory, + getFileHash, + getBufHash, }; \ No newline at end of file diff --git a/server/index.js b/server/index.js index 63906c4..2b9dac6 100644 --- a/server/index.js +++ b/server/index.js @@ -96,7 +96,7 @@ async function init() { config.recreateDb = argv.recreate || false; - //TODO as cli param + //TODO as cli param? config.indexDepth = 1000; //app