Skip to content

Commit 655c8d3

Browse files
committed
New XML parser
1 parent 6662985 commit 655c8d3

File tree

3 files changed

+382
-133
lines changed

3 files changed

+382
-133
lines changed

src/display/dom_utils.js

Lines changed: 0 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -135,132 +135,6 @@ class DOMSVGFactory {
135135
}
136136
}
137137

138-
class SimpleDOMNode {
139-
constructor(nodeName, nodeValue) {
140-
this.nodeName = nodeName;
141-
this.nodeValue = nodeValue;
142-
143-
Object.defineProperty(this, 'parentNode', { value: null, writable: true, });
144-
}
145-
146-
get firstChild() {
147-
return this.childNodes[0];
148-
}
149-
150-
get nextSibling() {
151-
let index = this.parentNode.childNodes.indexOf(this);
152-
return this.parentNode.childNodes[index + 1];
153-
}
154-
155-
get textContent() {
156-
if (!this.childNodes) {
157-
return this.nodeValue || '';
158-
}
159-
return this.childNodes.map(function(child) {
160-
return child.textContent;
161-
}).join('');
162-
}
163-
164-
hasChildNodes() {
165-
return this.childNodes && this.childNodes.length > 0;
166-
}
167-
}
168-
169-
class SimpleXMLParser {
170-
parseFromString(data) {
171-
let nodes = [];
172-
173-
// Remove all comments and processing instructions.
174-
data = data.replace(/<\?[\s\S]*?\?>|<!--[\s\S]*?-->/g, '').trim();
175-
data = data.replace(/<!DOCTYPE[^>\[]+(\[[^\]]+)?[^>]+>/g, '').trim();
176-
177-
// Extract all text nodes and replace them with a numeric index in
178-
// the nodes.
179-
data = data.replace(/>([^<][\s\S]*?)</g, (all, text) => {
180-
let length = nodes.length;
181-
let node = new SimpleDOMNode('#text', this._decodeXML(text));
182-
nodes.push(node);
183-
if (node.textContent.trim().length === 0) {
184-
return '><'; // Ignore whitespace.
185-
}
186-
return '>' + length + ',<';
187-
});
188-
189-
// Extract all CDATA nodes.
190-
data = data.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g,
191-
function(all, text) {
192-
let length = nodes.length;
193-
let node = new SimpleDOMNode('#text', text);
194-
nodes.push(node);
195-
return length + ',';
196-
});
197-
198-
// Until nodes without '<' and '>' content are present, replace them
199-
// with a numeric index in the nodes.
200-
let regex =
201-
/<([\w\:]+)((?:[\s\w:=]|'[^']*'|"[^"]*")*)(?:\/>|>([\d,]*)<\/[^>]+>)/g;
202-
let lastLength;
203-
do {
204-
lastLength = nodes.length;
205-
data = data.replace(regex, function(all, name, attrs, data) {
206-
let length = nodes.length;
207-
let node = new SimpleDOMNode(name);
208-
let children = [];
209-
if (data) {
210-
data = data.split(',');
211-
data.pop();
212-
data.forEach(function(child) {
213-
let childNode = nodes[+child];
214-
childNode.parentNode = node;
215-
children.push(childNode);
216-
});
217-
}
218-
219-
node.childNodes = children;
220-
nodes.push(node);
221-
return length + ',';
222-
});
223-
} while (lastLength < nodes.length);
224-
225-
// We should only have one root index left, which will be last in the nodes.
226-
return {
227-
documentElement: nodes.pop(),
228-
};
229-
}
230-
231-
_decodeXML(text) {
232-
if (!text.includes('&')) {
233-
return text;
234-
}
235-
236-
return text.replace(/&(#(x[0-9a-f]+|\d+)|\w+);/gi,
237-
function(all, entityName, number) {
238-
if (number) {
239-
if (number[0] === 'x') {
240-
number = parseInt(number.substring(1), 16);
241-
} else {
242-
number = +number;
243-
}
244-
return String.fromCharCode(number);
245-
}
246-
247-
switch (entityName) {
248-
case 'amp':
249-
return '&';
250-
case 'lt':
251-
return '<';
252-
case 'gt':
253-
return '>';
254-
case 'quot':
255-
return '\"';
256-
case 'apos':
257-
return '\'';
258-
}
259-
return '&' + entityName + ';';
260-
});
261-
}
262-
}
263-
264138
var RenderingCancelledException = (function RenderingCancelledException() {
265139
function RenderingCancelledException(msg, type) {
266140
this.message = msg;
@@ -411,7 +285,6 @@ export {
411285
DOMCanvasFactory,
412286
DOMCMapReaderFactory,
413287
DOMSVGFactory,
414-
SimpleXMLParser,
415288
StatTimer,
416289
DummyStatTimer,
417290
};

src/display/metadata.js

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
*/
1515

1616
import { assert } from '../shared/util';
17-
import { SimpleXMLParser } from './dom_utils';
17+
import { SimpleXMLParser } from './xml_parser';
1818

1919
class Metadata {
2020
constructor(data) {
@@ -23,13 +23,15 @@ class Metadata {
2323
// Ghostscript may produce invalid metadata, so try to repair that first.
2424
data = this._repair(data);
2525

26-
// Convert the string to a DOM `Document`.
26+
// Convert the string to an XML document.
2727
let parser = new SimpleXMLParser();
28-
data = parser.parseFromString(data);
28+
const xmlDocument = parser.parseFromString(data);
2929

3030
this._metadata = Object.create(null);
3131

32-
this._parse(data);
32+
if (xmlDocument) {
33+
this._parse(xmlDocument);
34+
}
3335
}
3436

3537
_repair(data) {
@@ -68,8 +70,8 @@ class Metadata {
6870
});
6971
}
7072

71-
_parse(domDocument) {
72-
let rdf = domDocument.documentElement;
73+
_parse(xmlDocument) {
74+
let rdf = xmlDocument.documentElement;
7375

7476
if (rdf.nodeName.toLowerCase() !== 'rdf:rdf') { // Wrapped in <xmpmeta>
7577
rdf = rdf.firstChild;

0 commit comments

Comments
 (0)