Skip to content

Commit 2e06c15

Browse files
committed
improved whitespace handling, reordered code
fixes fb55#10, cheeriojs/cheerio#683
1 parent e411927 commit 2e06c15

File tree

1 file changed

+50
-42
lines changed

1 file changed

+50
-42
lines changed

index.js

Lines changed: 50 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22

33
module.exports = parse;
44

5-
var re_ws = /^\s/,
6-
re_name = /^(?:\\.|[\w\-\u00c0-\uFFFF])+/,
5+
var re_name = /^(?:\\.|[\w\-\u00c0-\uFFFF])+/,
76
re_escape = /\\([\da-f]{1,6}\s?|(\s)|.)/ig,
87
//modified version of https://github.com/jquery/sizzle/blob/master/src/sizzle.js#L87
98
re_attr = /^\s*((?:\\.|[\w\u00c0-\uFFFF\-])+)\s*(?:(\S?)=\s*(?:(['"])(.*?)\3|(#?(?:\\.|[\w\u00c0-\uFFFF\-])*)|)|)\s*(i)?\]/;
@@ -72,6 +71,10 @@ function unescapeCSS(str){
7271
return str.replace(re_escape, funescape);
7372
}
7473

74+
function isWhitespace(c){
75+
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
76+
}
77+
7578
function parse(selector, options){
7679
var subselects = [];
7780

@@ -89,58 +92,51 @@ function parseSelector(subselects, selector, options){
8992
sawWS = false,
9093
data, firstChar, name, quot;
9194

92-
selector = selector.trimLeft();
93-
9495
function getName(){
9596
var sub = selector.match(re_name)[0];
9697
selector = selector.substr(sub.length);
9798
return unescapeCSS(sub);
9899
}
99100

100-
while(selector !== ""){
101-
if(re_name.test(selector)){
102-
if(sawWS){
103-
tokens.push({type: "descendant"});
104-
sawWS = false;
105-
}
101+
function stripWhitespace(start){
102+
while(isWhitespace(selector.charAt(start))) start++;
103+
selector = selector.substr(start);
104+
}
106105

107-
name = getName();
106+
stripWhitespace(0);
108107

109-
if(!options || ("lowerCaseTags" in options ? options.lowerCaseTags : !options.xmlMode)){
110-
name = name.toLowerCase();
111-
}
108+
while(selector !== ""){
109+
firstChar = selector.charAt(0);
112110

113-
tokens.push({type: "tag", name: name});
114-
} else if(re_ws.test(selector)){
111+
if(isWhitespace(firstChar)){
115112
sawWS = true;
116-
selector = selector.trimLeft();
117-
} else {
118-
firstChar = selector.charAt(0);
119-
selector = selector.substr(1);
120-
121-
if(firstChar in simpleSelectors){
122-
tokens.push({type: simpleSelectors[firstChar]});
123-
selector = selector.trimLeft();
124-
sawWS = false;
125-
continue;
126-
} else if(firstChar === ","){
127-
if(tokens.length === 0){
128-
throw new SyntaxError("empty sub-selector");
129-
}
130-
subselects.push(tokens);
131-
tokens = [];
132-
133-
selector = selector.trimLeft();
134-
sawWS = false;
135-
continue;
136-
} else if(sawWS){
137-
tokens.push({type: "descendant"});
113+
stripWhitespace(1);
114+
} else if(firstChar in simpleSelectors){
115+
tokens.push({type: simpleSelectors[firstChar]});
116+
sawWS = false;
117+
118+
stripWhitespace(1);
119+
} else if(firstChar === ","){
120+
if(tokens.length === 0){
121+
throw new SyntaxError("empty sub-selector");
122+
}
123+
subselects.push(tokens);
124+
tokens = [];
125+
sawWS = false;
126+
stripWhitespace(1);
127+
} else {
128+
if(sawWS){
129+
if(tokens.length > 0){
130+
tokens.push({type: "descendant"});
131+
}
138132
sawWS = false;
139133
}
140134

141-
if(firstChar === "*"){
135+
if(firstChar === "*"){
136+
selector = selector.substr(1);
142137
tokens.push({type: "universal"});
143138
} else if(firstChar in attribSelectors){
139+
selector = selector.substr(1);
144140
tokens.push({
145141
type: "attribute",
146142
name: attribSelectors[firstChar][0],
@@ -149,6 +145,7 @@ function parseSelector(subselects, selector, options){
149145
ignoreCase: false
150146
});
151147
} else if(firstChar === "["){
148+
selector = selector.substr(1);
152149
data = selector.match(re_attr);
153150
if(!data){
154151
throw new SyntaxError("Malformed attribute selector: " + selector);
@@ -175,7 +172,10 @@ function parseSelector(subselects, selector, options){
175172
});
176173

177174
} else if(firstChar === ":"){
178-
//if(selector.charAt(0) === ":"){} //TODO pseudo-element
175+
//if(selector.charAt(1) === ":"){} //TODO pseudo-element
176+
177+
selector = selector.substr(1);
178+
179179
name = getName().toLowerCase();
180180
data = null;
181181

@@ -232,12 +232,20 @@ function parseSelector(subselects, selector, options){
232232
}
233233

234234
tokens.push({type: "pseudo", name: name, data: data});
235-
} else {
235+
} else if(re_name.test(selector)){
236+
name = getName();
237+
238+
if(!options || ("lowerCaseTags" in options ? options.lowerCaseTags : !options.xmlMode)){
239+
name = name.toLowerCase();
240+
}
241+
242+
tokens.push({type: "tag", name: name});
243+
} else {
236244
if(tokens.length && tokens[tokens.length - 1].type === "descendant"){
237245
tokens.pop();
238246
}
239247
addToken(subselects, tokens);
240-
return firstChar + selector;
248+
return selector;
241249
}
242250
}
243251
}

0 commit comments

Comments
 (0)