diff --git a/server/core/BookConverter/ConvertSites.js b/server/core/BookConverter/ConvertSites.js new file mode 100644 index 00000000..f829c665 --- /dev/null +++ b/server/core/BookConverter/ConvertSites.js @@ -0,0 +1,68 @@ +const URL = require('url').URL; + +const ConvertHtml = require('./ConvertHtml'); + +const sitesFilter = { + 'www.fanfiction.net': { + converter: 'cutter', + begin: `
`, + end: `
`, + }, + 'archiveofourown.org': { + converter: 'cutter', + begin: ``, + end: ``, + } +}; + +class ConvertSites extends ConvertHtml { + check(data, opts) { + const {url, dataType} = opts; + + const parsedUrl = new URL(url); + if (dataType && dataType.ext == 'html') { + if (sitesFilter[parsedUrl.hostname]) + return {hostname: parsedUrl.hostname}; + } + + return false; + } + + async run(data, opts) { + const checkResult = this.check(data, opts); + if (!checkResult) + return false; + + const {hostname} = checkResult; + + let text = this.decode(data).toString(); + + text = this[sitesFilter[hostname].converter](text, sitesFilter[hostname]); + + if (text === false) + return false; + + return await super.run(Buffer.from(text), {skipCheck: true, cutTitle: true}); + } + + getTitle(text) { + let title = ''; + const m = text.match(/([\s\S]*?)<\/title>/); + if (m) + title = m[1]; + + return `<title>${title.trim()}`; + } + + cutter(text, opts) { + const title = this.getTitle(text); + const l = text.indexOf(opts.begin); + const r = text.indexOf(opts.end); + if (l < 0 || r < 0 || r <= l) + return false; + + return text.substring(l, r) + title; + } +} + +module.exports = ConvertSites; diff --git a/server/core/BookConverter/index.js b/server/core/BookConverter/index.js index 433c94b8..4b719ec1 100644 --- a/server/core/BookConverter/index.js +++ b/server/core/BookConverter/index.js @@ -11,6 +11,7 @@ const convertClassFactory = [ require('./ConvertMobi'), require('./ConvertFb2'), require('./ConvertSamlib'), + require('./ConvertSites'), require('./ConvertHtml'), ];