From 558fed31aa8272b01d2e743eac3c4c601a1cc344 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Fri, 18 Oct 2019 17:07:28 +0700 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=20=D1=84=D0=B8=D0=BB=D1=8C=D1=82=D1=80=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D0=BD=D0=B5=D0=BA=D0=BE=D1=82=D0=BE=D1=80=D1=8B=D1=85?= =?UTF-8?q?=20=D1=81=D0=B0=D0=B9=D1=82=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/BookConverter/ConvertSites.js | 68 +++++++++++++++++++++++ server/core/BookConverter/index.js | 1 + 2 files changed, 69 insertions(+) create mode 100644 server/core/BookConverter/ConvertSites.js diff --git a/server/core/BookConverter/ConvertSites.js b/server/core/BookConverter/ConvertSites.js new file mode 100644 index 00000000..f829c665 --- /dev/null +++ b/server/core/BookConverter/ConvertSites.js @@ -0,0 +1,68 @@ +const URL = require('url').URL; + +const ConvertHtml = require('./ConvertHtml'); + +const sitesFilter = { + 'www.fanfiction.net': { + converter: 'cutter', + begin: `
`, + end: `
`, + }, + 'archiveofourown.org': { + converter: 'cutter', + begin: ``, + end: ``, + } +}; + +class ConvertSites extends ConvertHtml { + check(data, opts) { + const {url, dataType} = opts; + + const parsedUrl = new URL(url); + if (dataType && dataType.ext == 'html') { + if (sitesFilter[parsedUrl.hostname]) + return {hostname: parsedUrl.hostname}; + } + + return false; + } + + async run(data, opts) { + const checkResult = this.check(data, opts); + if (!checkResult) + return false; + + const {hostname} = checkResult; + + let text = this.decode(data).toString(); + + text = this[sitesFilter[hostname].converter](text, sitesFilter[hostname]); + + if (text === false) + return false; + + return await super.run(Buffer.from(text), {skipCheck: true, cutTitle: true}); + } + + getTitle(text) { + let title = ''; + const m = text.match(/([\s\S]*?)<\/title>/); + if (m) + title = m[1]; + + return `<title>${title.trim()}`; + } + + cutter(text, opts) { + const title = this.getTitle(text); + const l = text.indexOf(opts.begin); + const r = text.indexOf(opts.end); + if (l < 0 || r < 0 || r <= l) + return false; + + return text.substring(l, r) + title; + } +} + +module.exports = ConvertSites; diff --git a/server/core/BookConverter/index.js b/server/core/BookConverter/index.js index 433c94b8..4b719ec1 100644 --- a/server/core/BookConverter/index.js +++ b/server/core/BookConverter/index.js @@ -11,6 +11,7 @@ const convertClassFactory = [ require('./ConvertMobi'), require('./ConvertFb2'), require('./ConvertSamlib'), + require('./ConvertSites'), require('./ConvertHtml'), ];