From f3b69caa123d9f12c73a3b3d69c05b41258af970 Mon Sep 17 00:00:00 2001 From: Book Pauk Date: Tue, 8 Dec 2020 16:17:36 +0700 Subject: [PATCH] =?UTF-8?q?=D0=A0=D0=B0=D0=B1=D0=BE=D1=82=D0=B0=20=D0=BD?= =?UTF-8?q?=D0=B0=D0=B4=20=D0=BC=D0=BE=D0=B4=D1=83=D0=BB=D0=B5=D0=BC=20xml?= =?UTF-8?q?Parser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/core/Reader/BookConverter/xmlParser.js | 120 ++++++++++++++++++ server/core/sax.js | 24 ++-- 2 files changed, 135 insertions(+), 9 deletions(-) create mode 100644 server/core/Reader/BookConverter/xmlParser.js diff --git a/server/core/Reader/BookConverter/xmlParser.js b/server/core/Reader/BookConverter/xmlParser.js new file mode 100644 index 00000000..23c384e7 --- /dev/null +++ b/server/core/Reader/BookConverter/xmlParser.js @@ -0,0 +1,120 @@ +const sax = require('../../sax'); + +function formatXml(parsedXml, textFilterFunc) { + let out = ''; + out += formatXmlNode(parsedXml, textFilterFunc); + return out; +} + +function formatXmlNode(node, textFilterFunc) { + textFilterFunc = (textFilterFunc ? textFilterFunc : text => text); + + const formatXmlNodeImpl = (node, name) => { + let out = ''; + + if (Array.isArray(node)) { + for (const n of node) { + out += formatXmlNode(n); + } + } else if (typeof node == 'string') { + if (name) + out += `<${name}>${textFilterFunc(node)}`; + else + out += textFilterFunc(node); + } else { + if (node._n) + name = node._n; + + let attrs = ''; + if (node._attrs) { + for (let attrName in node._attrs) { + attrs += ` ${attrName}="${node._attrs[attrName]}"`; + } + } + + let tOpen = ''; + let tBody = ''; + let tClose = ''; + if (name) + tOpen += `<${name}${attrs}>`; + if (node.hasOwnProperty('_t')) + tBody += textFilterFunc(node._t); + + for (let nodeName in node) { + if (nodeName && nodeName[0] == '_' && nodeName != '_a') + continue; + + const n = node[nodeName]; + tBody += formatXmlNodeImpl(n, nodeName); + } + + if (name) + tClose += ``; + + out += `${tOpen}${tBody}${tClose}`; + } + return out; + } + + return formatXmlNodeImpl(node); +} + +function parseXml(xmlString, lowerCase = true) { + let result = {}; + let node = result; + + const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars + node._t = text; + }; + + const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars + const newNode = {_n: tag, _p: node}; + + if (tail) { + const parsedAttrs = sax.getAttrsSync(tail, lowerCase); + const atKeys = Object.keys(parsedAttrs); + if (atKeys.length) { + const attrs = {}; + for (let i = 0; i < atKeys.length; i++) { + const attrName = atKeys[i]; + attrs[parsedAttrs[attrName].fullname] = parsedAttrs[attrName].value; + } + + newNode._attrs = attrs; + } + } + + if (!node._a) + node._a = []; + node._a.push(newNode); + node = newNode; + }; + + const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars + if (node._p && node._n == tag) + node = node._p; + }; + + sax.parseSync(xmlString, { + onStartNode, onEndNode, onTextNode, lowerCase + }); + + if (result._a) + result = result._a[0]; + + return result; +} + +function simplifyXml(parsedXml) { +} + +function desimplifyXml(parsedXml) { +} + +module.exports = { + formatXml, + formatXmlNode, + parseXml, + simplifyXml, + desimplifyXml +} \ No newline at end of file diff --git a/server/core/sax.js b/server/core/sax.js index 8cd80b3a..5f5c1535 100644 --- a/server/core/sax.js +++ b/server/core/sax.js @@ -6,7 +6,8 @@ function parseSync(xstr, options) { onCdata: _onCdata = dummy, onComment: _onComment = dummy, onProgress: _onProgress = dummy, - innerCut = new Set() + innerCut = new Set(), + lowerCase = true, } = options; let i = 0; @@ -91,7 +92,8 @@ function parseSync(xstr, options) { } else { tag = tagData; } - tag = tag.toLowerCase(); + if (lowerCase) + tag = tag.toLowerCase(); if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) { if (!cutCounter) @@ -146,7 +148,8 @@ async function parse(xstr, options) { onCdata: _onCdata = dummy, onComment: _onComment = dummy, onProgress: _onProgress = dummy, - innerCut = new Set() + innerCut = new Set(), + lowerCase = true, } = options; let i = 0; @@ -231,7 +234,8 @@ async function parse(xstr, options) { } else { tag = tagData; } - tag = tag.toLowerCase(); + if (lowerCase) + tag = tag.toLowerCase(); if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) { if (!cutCounter) @@ -276,7 +280,7 @@ async function parse(xstr, options) { await _onProgress(100); } -function getAttrsSync(tail) { +function getAttrsSync(tail, lowerCase = true) { let result = {}; let name = ''; let value = ''; @@ -287,14 +291,16 @@ function getAttrsSync(tail) { let waitEq = false; const pushResult = () => { - name = name.toLowerCase(); + if (lowerCase) + name = name.toLowerCase(); if (name != '') { + const fullname = name; let ns = ''; - if (name.indexOf(':') >= 0) { - [ns, name] = name.split(':'); + if (fullname.indexOf(':') >= 0) { + [ns, name] = fullname.split(':'); } - result[name] = {value, ns}; + result[name] = {value, ns, fullname}; } name = ''; value = '';