forked from dawidd6/action-download-artifact
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathXml2JsParser.js
More file actions
237 lines (209 loc) · 7.88 KB
/
Copy pathXml2JsParser.js
File metadata and controls
237 lines (209 loc) · 7.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
const StringSource = require("./inputSource/StringSource");
const BufferSource = require("./inputSource/BufferSource");
const {readTagExp,readClosingTagName} = require("./XmlPartReader");
const {readComment, readCdata,readDocType,readPiTag} = require("./XmlSpecialTagsReader");
const TagPath = require("./TagPath");
const TagPathMatcher = require("./TagPathMatcher");
const EntitiesParser = require('./EntitiesParser');
//To hold the data of current tag
//This is usually used to compare jpath expression against current tag
class TagDetail{
constructor(name){
this.name = name;
this.position = 0;
// this.attributes = {};
}
}
class Xml2JsParser {
constructor(options) {
this.options = options;
this.currentTagDetail = null;
this.tagTextData = "";
this.tagsStack = [];
this.entityParser = new EntitiesParser(options.htmlEntities);
this.stopNodes = [];
for (let i = 0; i < this.options.stopNodes.length; i++) {
this.stopNodes.push(new TagPath(this.options.stopNodes[i]));
}
}
parse(strData) {
this.source = new StringSource(strData);
this.parseXml();
return this.outputBuilder.getOutput();
}
parseBytesArr(data) {
this.source = new BufferSource(data );
this.parseXml();
return this.outputBuilder.getOutput();
}
parseXml() {
//TODO: Separate TagValueParser as separate class. So no scope issue in node builder class
//OutputBuilder should be set in XML Parser
this.outputBuilder = this.options.OutputBuilder.getInstance(this.options);
this.root = { root: true};
this.currentTagDetail = this.root;
while(this.source.canRead()){
let ch = this.source.readCh();
if (ch === "") break;
if(ch === "<"){//tagStart
let nextChar = this.source.readChAt(0);
if (nextChar === "" ) throw new Error("Unexpected end of source");
if(nextChar === "!" || nextChar === "?"){
this.source.updateBufferBoundary();
//previously collected text should be added to current node
this.addTextNode();
this.readSpecialTag(nextChar);// Read DOCTYPE, comment, CDATA, PI tag
}else if(nextChar === "/"){
this.source.updateBufferBoundary();
this.readClosingTag();
// console.log(this.source.buffer.length, this.source.readable);
// console.log(this.tagsStack.length);
}else{//opening tag
this.readOpeningTag();
}
}else{
this.tagTextData += ch;
}
}//End While loop
if(this.tagsStack.length > 0 || ( this.tagTextData !== "undefined" && this.tagTextData.trimEnd().length > 0) ) throw new Error("Unexpected data in the end of document");
}
/**
* read closing paired tag. Set parent tag in scope.
* skip a node on user's choice
*/
readClosingTag(){
const tagName = this.processTagName(readClosingTagName(this.source));
// console.log(tagName, this.tagsStack.length);
this.validateClosingTag(tagName);
// All the text data collected, belongs to current tag.
if(!this.currentTagDetail.root) this.addTextNode();
this.outputBuilder.closeTag();
// Since the tag is closed now, parent tag comes in scope
this.currentTagDetail = this.tagsStack.pop();
}
validateClosingTag(tagName){
// This can't be unpaired tag, or a stop tag.
if(this.isUnpaired(tagName) || this.isStopNode(tagName)) throw new Error(`Unexpected closing tag '${tagName}'`);
// This must match with last opening tag
else if(tagName !== this.currentTagDetail.name)
throw new Error(`Unexpected closing tag '${tagName}' expecting '${this.currentTagDetail.name}'`)
}
/**
* Read paired, unpaired, self-closing, stop and special tags.
* Create a new node
* Push paired tag in stack.
*/
readOpeningTag(){
//save previously collected text data to current node
this.addTextNode();
//create new tag
let tagExp = readTagExp(this, ">" );
// process and skip from tagsStack For unpaired tag, self closing tag, and stop node
const tagDetail = new TagDetail(tagExp.tagName);
if(this.isUnpaired(tagExp.tagName)) {
//TODO: this will lead 2 extra stack operation
this.outputBuilder.addTag(tagDetail);
this.outputBuilder.closeTag();
} else if(tagExp.selfClosing){
this.outputBuilder.addTag(tagDetail);
this.outputBuilder.closeTag();
} else if(this.isStopNode(this.currentTagDetail)){
// TODO: let's user set a stop node boundary detector for complex contents like script tag
//TODO: pass tag name only to avoid string operations
const content = source.readUptoCloseTag(`</${tagExp.tagName}`);
this.outputBuilder.addTag(tagDetail);
this.outputBuilder.addValue(content);
this.outputBuilder.closeTag();
}else{//paired tag
//set new nested tag in scope.
this.tagsStack.push(this.currentTagDetail);
this.outputBuilder.addTag(tagDetail);
this.currentTagDetail = tagDetail;
}
// console.log(tagExp.tagName,this.tagsStack.length);
// this.options.onClose()
}
readSpecialTag(startCh){
if(startCh == "!"){
let nextChar = this.source.readCh();
if (nextChar === null || nextChar === undefined) throw new Error("Unexpected ending of the source");
if(nextChar === "-"){//comment
readComment(this);
}else if(nextChar === "["){//CDATA
readCdata(this);
}else if(nextChar === "D"){//DOCTYPE
readDocType(this);
}
}else if(startCh === "?"){
readPiTag(this);
}else{
throw new Error(`Invalid tag '<${startCh}' at ${this.source.line}:${this.source.col}`)
}
}
addTextNode = function() {
// if(this.currentTagDetail){
//save text as child node
// if(this.currentTagDetail.tagname !== '!xml')
if (this.tagTextData !== undefined && this.tagTextData !== "") { //store previously collected data as textNode
if(this.tagTextData.trim().length > 0){
//TODO: shift parsing to output builder
this.outputBuilder.addValue(this.replaceEntities(this.tagTextData));
}
this.tagTextData = "";
}
// }
}
processAttrName(name){
if(name === "__proto__") name = "#__proto__";
name = resolveNameSpace(name, this.removeNSPrefix);
return name;
}
processTagName(name){
if(name === "__proto__") name = "#__proto__";
name = resolveNameSpace(name, this.removeNSPrefix);
return name;
}
/**
* Generate tags path from tagsStack
*/
tagsPath(tagName){
//TODO: return TagPath Object. User can call match method with path
return "";
}
isUnpaired(tagName){
return this.options.tags.unpaired.indexOf(tagName) !== -1;
}
/**
* valid expressions are
* tag nested
* * nested
* tag nested[attribute]
* tag nested[attribute=""]
* tag nested[attribute!=""]
* tag nested:0 //for future
* @param {string} tagName
* @returns
*/
isStopNode(node){
for (let i = 0; i < this.stopNodes.length; i++) {
const givenPath = this.stopNodes[i];
if(givenPath.match(this.tagsStack, node)) return true;
}
return false
}
replaceEntities(text){
//TODO: if option is set then replace entities
return this.entityParser.parse(text)
}
}
function resolveNameSpace(name, removeNSPrefix) {
if (removeNSPrefix) {
const parts = name.split(':');
if(parts.length === 2){
if (parts[0] === 'xmlns') return '';
else return parts[1];
}else reportError(`Multiple namespaces ${name}`)
}
return name;
}
module.exports = Xml2JsParser;