Skip to content

Commit 5d55541

Browse files
committed
Flatten the tokenizer.
1 parent 4f982f0 commit 5d55541

File tree

1 file changed

+81
-61
lines changed

1 file changed

+81
-61
lines changed

tokenizer.rs

Lines changed: 81 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,31 @@ pub fn next_component_value(parser: &mut Parser) -> Option<ComponentValue> {
4646
if parser.is_eof() { return None }
4747
let c = parser.current_char();
4848
Some(match c {
49-
'-' => {
50-
if parser.starts_with("-->") {
51-
parser.position += 3;
52-
CDC
53-
} else if next_is_namestart_or_escape(parser) {
54-
consume_ident(parser)
55-
} else if (
49+
'\t' | '\n' | ' ' => {
50+
parser.position += 1;
51+
while !parser.is_eof() {
52+
match parser.current_char() {
53+
'\t' | '\n' | ' ' => parser.position += 1,
54+
_ => break,
55+
}
56+
}
57+
WhiteSpace
58+
},
59+
'"' => consume_quoted_string(parser, false),
60+
'#' => { parser.position += 1; consume_hash(parser) },
61+
'$' => {
62+
if parser.starts_with("$=") { parser.position += 2; SuffixMatch }
63+
else { parser.position += 1; Delim(c) }
64+
},
65+
'\'' => consume_quoted_string(parser, true),
66+
'(' => ParenthesisBlock(consume_block(parser, CloseParenthesis)),
67+
')' => { parser.position += 1; CloseParenthesis },
68+
'*' => {
69+
if parser.starts_with("*=") { parser.position += 2; SubstringMatch }
70+
else { parser.position += 1; Delim(c) }
71+
},
72+
'+' => {
73+
if (
5674
parser.position + 1 < parser.length
5775
&& is_match!(parser.char_at(1), '0'..'9')
5876
) || (
@@ -63,78 +81,77 @@ pub fn next_component_value(parser: &mut Parser) -> Option<ComponentValue> {
6381
consume_numeric(parser)
6482
} else {
6583
parser.position += 1;
66-
Delim('-')
84+
Delim(c)
6785
}
6886
},
69-
'<' => {
70-
if parser.starts_with("<!--") {
71-
parser.position += 4;
72-
CDO
87+
'-' => {
88+
if (
89+
parser.position + 1 < parser.length
90+
&& is_match!(parser.char_at(1), '0'..'9')
91+
) || (
92+
parser.position + 2 < parser.length
93+
&& parser.char_at(1) == '.'
94+
&& is_match!(parser.char_at(2), '0'..'9')
95+
) {
96+
consume_numeric(parser)
97+
} else if next_is_namestart_or_escape(parser) {
98+
consume_ident(parser)
99+
} else if parser.starts_with("-->") {
100+
parser.position += 3;
101+
CDC
73102
} else {
74103
parser.position += 1;
75-
Delim('<')
104+
Delim(c)
76105
}
77106
},
78-
'0'..'9' => consume_numeric(parser),
79107
'.' => {
80108
if (parser.position + 1 < parser.length && is_match!(parser.char_at(1), '0'..'9')) {
81109
consume_numeric(parser)
82110
} else {
83111
parser.position += 1;
84-
Delim('.')
112+
Delim(c)
85113
}
86114
}
87-
'+' => {
88-
if (
89-
parser.position + 1 < parser.length
90-
&& is_match!(parser.char_at(1), '0'..'9')
91-
) || (
92-
parser.position + 2 < parser.length
93-
&& parser.char_at(1) == '.'
94-
&& is_match!(parser.char_at(2), '0'..'9')
95-
) {
96-
consume_numeric(parser)
115+
'0'..'9' => consume_numeric(parser),
116+
':' => { parser.position += 1; Colon },
117+
';' => { parser.position += 1; Semicolon },
118+
'<' => {
119+
if parser.starts_with("<!--") {
120+
parser.position += 4;
121+
CDO
97122
} else {
98123
parser.position += 1;
99-
Delim('+')
124+
Delim(c)
100125
}
101126
},
127+
'@' => { parser.position += 1; consume_at_keyword(parser) },
102128
'u' | 'U' => consume_unicode_range(parser),
103129
'a'..'z' | 'A'..'Z' | '_' | '\\' => consume_ident(parser),
104-
'~' if parser.starts_with("~=") => { parser.position += 2; IncludeMath }
105-
'|' if parser.starts_with("|=") => { parser.position += 2; DashMatch }
106-
'^' if parser.starts_with("^=") => { parser.position += 2; PrefixMatch }
107-
'$' if parser.starts_with("$=") => { parser.position += 2; SuffixMatch }
108-
'*' if parser.starts_with("*=") => { parser.position += 2; SubstringMatch }
109-
'|' if parser.starts_with("||") => { parser.position += 2; Column }
110-
_ if c >= '\x80' => consume_ident(parser), // Non-ASCII
130+
'[' => SquareBraketBlock(consume_block(parser, CloseSquareBraket)),
131+
']' => { parser.position += 1; CloseSquareBraket },
132+
'^' => {
133+
if parser.starts_with("^=") { parser.position += 2; PrefixMatch }
134+
else { parser.position += 1; Delim(c) }
135+
},
136+
'{' => CurlyBraketBlock(consume_block(parser, CloseCurlyBraket)),
137+
'|' => {
138+
if parser.starts_with("|=") { parser.position += 2; DashMatch }
139+
else if parser.starts_with("||") { parser.position += 2; Column }
140+
else { parser.position += 1; Delim(c) }
141+
},
142+
'}' => { parser.position += 1; CloseCurlyBraket },
143+
'~' => {
144+
if parser.starts_with("~=") { parser.position += 2; IncludeMath }
145+
else { parser.position += 1; Delim(c) }
146+
},
111147
_ => {
112-
match parser.consume_char() {
113-
'\t' | '\n' | ' ' => {
114-
while !parser.is_eof() {
115-
match parser.current_char() {
116-
'\t' | '\n' | ' '
117-
=> parser.position += 1,
118-
_ => break,
119-
}
120-
}
121-
WhiteSpace
122-
},
123-
'"' => consume_quoted_string(parser, false),
124-
'#' => consume_hash(parser),
125-
'\'' => consume_quoted_string(parser, true),
126-
'(' => ParenthesisBlock(consume_block(parser, CloseParenthesis)),
127-
')' => CloseParenthesis,
128-
':' => Colon,
129-
';' => Semicolon,
130-
'@' => consume_at_keyword(parser),
131-
'[' => SquareBraketBlock(consume_block(parser, CloseSquareBraket)),
132-
']' => CloseSquareBraket,
133-
'{' => CurlyBraketBlock(consume_block(parser, CloseCurlyBraket)),
134-
'}' => CloseCurlyBraket,
135-
_ => Delim(c)
148+
if c > '\x7F' { // Non-ASCII
149+
consume_ident(parser)
150+
} else {
151+
parser.position += 1;
152+
Delim(c)
136153
}
137-
}
154+
},
138155
})
139156
}
140157

@@ -212,6 +229,7 @@ fn consume_comments(parser: &mut Parser) {
212229

213230

214231
fn consume_block(parser: &mut Parser, ending_token: ComponentValue) -> ~[ComponentValue] {
232+
parser.position += 1; // Skip the initial {[(
215233
let mut content = ~[];
216234
loop {
217235
match next_component_value(parser) {
@@ -249,6 +267,7 @@ fn next_is_namestart_or_escape(parser: &mut Parser) -> bool {
249267

250268

251269
fn consume_quoted_string(parser: &mut Parser, single_quote: bool) -> ComponentValue {
270+
parser.position += 1; // Skip the initial quote
252271
let mut string: ~str = ~"";
253272
while !parser.is_eof() {
254273
match parser.consume_char() {
@@ -297,8 +316,10 @@ fn consume_ident(parser: &mut Parser) -> ComponentValue {
297316
if parser.is_eof() { return Ident(string) }
298317
match parser.current_char() {
299318
'(' => {
300-
parser.position += 1;
301-
if eq_ascii_lower(string, "url") { consume_url(parser) }
319+
if eq_ascii_lower(string, "url") {
320+
parser.position += 1;
321+
consume_url(parser)
322+
}
302323
else { Function(string, consume_block(parser, CloseParenthesis)) }
303324
},
304325
_ => Ident(string)
@@ -440,7 +461,6 @@ fn consume_url(parser: &mut Parser) -> ComponentValue {
440461

441462
fn consume_quoted_url(parser: &mut Parser, single_quote: bool)
442463
-> ComponentValue {
443-
parser.position += 1; // The initial quote
444464
match consume_quoted_string(parser, single_quote) {
445465
String(string) => consume_url_end(parser, string),
446466
BadString => consume_bad_url(parser),

0 commit comments

Comments
 (0)