@@ -14,9 +14,8 @@ struct Parser {
14
14
input : ~str ,
15
15
length : uint , // All counted in bytes, not characters
16
16
position : uint , // All counted in bytes, not characters
17
- // TODO: add these in tokens
18
- // priv line: uint,
19
- // priv column: uint, // All counted in bytes, not characters
17
+ line : uint ,
18
+ last_line_start : uint , // All counted in bytes, not characters
20
19
}
21
20
22
21
@@ -27,8 +26,8 @@ impl Parser {
27
26
length : input. len ( ) ,
28
27
input : input,
29
28
position : 0 ,
30
- // line: 1,
31
- // column: 1 ,
29
+ line : 1 ,
30
+ last_line_start : 0 ,
32
31
}
33
32
}
34
33
}
@@ -43,15 +42,28 @@ macro_rules! is_match(
43
42
44
43
pub fn next_component_value ( parser : & mut Parser ) -> Option < ( ComponentValue , SourceLocation ) > {
45
44
consume_comments ( parser) ;
46
- if parser. is_eof ( ) { return None }
47
- let start_location = SourceLocation { position : parser. position } ;
45
+ if parser. is_eof ( ) {
46
+ if CFG_TEST {
47
+ assert ! ( parser. line == parser. input. split_iter( '\n' ) . len_( ) ,
48
+ "The tokenizer is missing a parser.new_line() call somewhere." )
49
+ }
50
+ return None
51
+ }
52
+ let start_location = SourceLocation {
53
+ line : parser. line ,
54
+ // The start of the line is column 1:
55
+ column : parser. position - parser. last_line_start + 1 ,
56
+ } ;
48
57
let c = parser. current_char ( ) ;
49
58
let component_value = match c {
50
59
'\t' | '\n' | ' ' => {
51
- parser. position += 1 ;
52
60
while !parser. is_eof ( ) {
53
61
match parser. current_char ( ) {
54
- '\t' | '\n' | ' ' => parser. position += 1 ,
62
+ ' ' | '\t' => parser. position += 1 ,
63
+ '\n' => {
64
+ parser. position += 1 ;
65
+ parser. new_line ( ) ;
66
+ } ,
55
67
_ => break ,
56
68
}
57
69
}
@@ -184,6 +196,13 @@ pub fn next_component_value(parser: &mut Parser) -> Option<(ComponentValue, Sour
184
196
// *********** End of public API ***********
185
197
186
198
199
+ #[ cfg( not( test) ) ]
200
+ static CFG_TEST : bool = false ;
201
+
202
+ #[ cfg( test) ]
203
+ static CFG_TEST : bool = true ;
204
+
205
+
187
206
#[ inline]
188
207
fn preprocess ( input : & str ) -> ~str {
189
208
// TODO: Is this faster if done in one pass?
@@ -223,17 +242,33 @@ impl Parser {
223
242
fn starts_with ( & self , needle : & str ) -> bool {
224
243
self . input . slice_from ( self . position ) . starts_with ( needle)
225
244
}
245
+
246
+ #[ inline]
247
+ fn new_line ( & mut self ) {
248
+ if CFG_TEST {
249
+ assert ! ( self . input. char_at( self . position - 1 ) == '\n' )
250
+ }
251
+ self . line += 1 ;
252
+ self . last_line_start = self . position ;
253
+ }
226
254
}
227
255
228
256
229
257
#[ inline]
230
258
fn consume_comments ( parser : & mut Parser ) {
231
259
while parser. starts_with ( "/*" ) {
232
260
parser. position += 2 ; // +2 to consume "/*"
233
- match parser. input . slice_from ( parser. position ) . find_str ( "*/" ) {
234
- // +2 to consume "*/"
235
- Some ( offset) => parser. position += offset + 2 ,
236
- None => parser. position = parser. length // EOF
261
+ while !parser. is_eof ( ) {
262
+ match parser. consume_char ( ) {
263
+ '*' => {
264
+ if !parser. is_eof ( ) && parser. current_char ( ) == '/' {
265
+ parser. position += 1 ;
266
+ break
267
+ }
268
+ } ,
269
+ '\n' => parser. new_line ( ) ,
270
+ _ => ( )
271
+ }
237
272
}
238
273
}
239
274
}
@@ -264,6 +299,7 @@ fn consume_string(parser: &mut Parser, single_quote: bool) -> ComponentValue {
264
299
}
265
300
266
301
302
+ // Return None on syntax error (ie. unescaped newline)
267
303
fn consume_quoted_string ( parser : & mut Parser , single_quote : bool ) -> Option < ~str > {
268
304
parser. position += 1 ; // Skip the initial quote
269
305
let mut string: ~str = ~"";
@@ -277,7 +313,10 @@ fn consume_quoted_string(parser: &mut Parser, single_quote: bool) -> Option<~str
277
313
} ,
278
314
'\\' => {
279
315
if !parser. is_eof ( ) {
280
- if parser. current_char ( ) == '\n' { parser. position += 1 } // Escaped newline
316
+ if parser. current_char ( ) == '\n' { // Escaped newline
317
+ parser. position += 1 ;
318
+ parser. new_line ( ) ;
319
+ }
281
320
else { string. push_char ( consume_escape ( parser) ) }
282
321
}
283
322
// else: escaped EOF, do nothing.
@@ -427,8 +466,11 @@ fn consume_url(parser: &mut Parser) -> ComponentValue {
427
466
let mut string = ~"";
428
467
while !parser. is_eof ( ) {
429
468
let next_char = match parser. consume_char ( ) {
430
- '\t' | '\n' | ' '
431
- => return consume_url_end ( parser, string) ,
469
+ ' ' | '\t' => return consume_url_end ( parser, string) ,
470
+ '\n' => {
471
+ parser. new_line ( ) ;
472
+ return consume_url_end ( parser, string)
473
+ } ,
432
474
')' => break ,
433
475
'\x00' ..'\x08' | '\x0B' | '\x0E' ..'\x1F' | '\x7F' // non-printable
434
476
| '"' | '\'' | '(' => return consume_bad_url ( parser) ,
@@ -448,7 +490,8 @@ fn consume_url(parser: &mut Parser) -> ComponentValue {
448
490
fn consume_url_end ( parser : & mut Parser , string : ~str ) -> ComponentValue {
449
491
while !parser. is_eof ( ) {
450
492
match parser. consume_char ( ) {
451
- '\t' | '\n' | ' ' => ( ) ,
493
+ ' ' | '\t' => ( ) ,
494
+ '\n' => parser. new_line ( ) ,
452
495
')' => break ,
453
496
_ => return consume_bad_url ( parser)
454
497
}
@@ -462,6 +505,7 @@ fn consume_url(parser: &mut Parser) -> ComponentValue {
462
505
match parser. consume_char ( ) {
463
506
')' => break ,
464
507
'\\' => parser. position += 1 , // Skip an escaped ')' or '\'
508
+ '\n' => parser. new_line ( ) ,
465
509
_ => ( )
466
510
}
467
511
}
@@ -538,7 +582,8 @@ fn consume_escape(parser: &mut Parser) -> char {
538
582
}
539
583
if !parser. is_eof ( ) {
540
584
match parser. current_char ( ) {
541
- '\t' | '\n' | ' ' => parser. position += 1 ,
585
+ ' ' | '\t' => parser. position += 1 ,
586
+ '\n' => { parser. position += 1 ; parser. new_line ( ) } ,
542
587
_ => ( )
543
588
}
544
589
}
0 commit comments