@@ -361,9 +361,15 @@ impl<'a> Tokenizer<'a> {
361
361
self . input [ self . position ..] . chars ( ) . next ( ) . unwrap ( )
362
362
}
363
363
364
- fn seen_newline ( & mut self , is_cr : bool ) {
365
- if is_cr && self . next_byte ( ) == Some ( /* LF */ b'\n' ) {
366
- return
364
+ // Given that a newline has been seen, advance over the newline
365
+ // and update the state.
366
+ #[ inline]
367
+ fn consume_newline ( & mut self ) {
368
+ let byte = self . next_byte_unchecked ( ) ;
369
+ debug_assert ! ( byte == b'\r' || byte == b'\n' || byte == b'\x0C' ) ;
370
+ self . position += 1 ;
371
+ if byte == b'\r' && self . next_byte ( ) == Some ( b'\n' ) {
372
+ self . position += 1 ;
367
373
}
368
374
self . current_line_start_position = self . position ;
369
375
self . current_line_number += 1 ;
@@ -393,13 +399,8 @@ impl<'a> Tokenizer<'a> {
393
399
b' ' | b'\t' => {
394
400
self . advance( 1 )
395
401
} ,
396
- b'\n' | b'\x0C' => {
397
- self . advance( 1 ) ;
398
- self . seen_newline( false ) ;
399
- } ,
400
- b'\r' => {
401
- self . advance( 1 ) ;
402
- self . seen_newline( true ) ;
402
+ b'\n' | b'\x0C' | b'\r' => {
403
+ self . consume_newline( ) ;
403
404
} ,
404
405
b'/' => {
405
406
if self . starts_with( b"/*" ) {
@@ -421,13 +422,8 @@ impl<'a> Tokenizer<'a> {
421
422
b' ' | b'\t' => {
422
423
self . advance( 1 )
423
424
} ,
424
- b'\n' | b'\x0C' => {
425
- self . advance( 1 ) ;
426
- self . seen_newline( false ) ;
427
- } ,
428
- b'\r' => {
429
- self . advance( 1 ) ;
430
- self . seen_newline( true ) ;
425
+ b'\n' | b'\x0C' | b'\r' => {
426
+ self . consume_newline( ) ;
431
427
} ,
432
428
b'/' => {
433
429
if self . starts_with( b"/*" ) {
@@ -481,13 +477,10 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
481
477
let b = tokenizer. next_byte_unchecked ( ) ;
482
478
let token = match_byte ! { b,
483
479
b' ' | b'\t' => {
484
- consume_whitespace( tokenizer, false , false )
480
+ consume_whitespace( tokenizer, false )
485
481
} ,
486
- b'\n' | b'\x0C' => {
487
- consume_whitespace( tokenizer, true , false )
488
- } ,
489
- b'\r' => {
490
- consume_whitespace( tokenizer, true , true )
482
+ b'\n' | b'\x0C' | b'\r' => {
483
+ consume_whitespace( tokenizer, true )
491
484
} ,
492
485
b'"' => { consume_string( tokenizer, false ) } ,
493
486
b'#' => {
@@ -617,25 +610,21 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
617
610
}
618
611
619
612
620
- fn consume_whitespace < ' a > ( tokenizer : & mut Tokenizer < ' a > , newline : bool , is_cr : bool ) -> Token < ' a > {
613
+ fn consume_whitespace < ' a > ( tokenizer : & mut Tokenizer < ' a > , newline : bool ) -> Token < ' a > {
621
614
let start_position = tokenizer. position ( ) ;
622
- tokenizer. advance ( 1 ) ;
623
615
if newline {
624
- tokenizer. seen_newline ( is_cr)
616
+ tokenizer. consume_newline ( ) ;
617
+ } else {
618
+ tokenizer. advance ( 1 ) ;
625
619
}
626
620
while !tokenizer. is_eof ( ) {
627
621
let b = tokenizer. next_byte_unchecked ( ) ;
628
622
match_byte ! { b,
629
623
b' ' | b'\t' => {
630
624
tokenizer. advance( 1 ) ;
631
625
}
632
- b'\n' | b'\x0C' => {
633
- tokenizer. advance( 1 ) ;
634
- tokenizer. seen_newline( false ) ;
635
- }
636
- b'\r' => {
637
- tokenizer. advance( 1 ) ;
638
- tokenizer. seen_newline( true ) ;
626
+ b'\n' | b'\x0C' | b'\r' => {
627
+ tokenizer. consume_newline( ) ;
639
628
}
640
629
_ => {
641
630
break
@@ -675,13 +664,8 @@ fn consume_comment<'a>(tokenizer: &mut Tokenizer<'a>) -> &'a str {
675
664
return contents
676
665
}
677
666
}
678
- b'\n' | b'\x0C' => {
679
- tokenizer. advance( 1 ) ;
680
- tokenizer. seen_newline( false ) ;
681
- }
682
- b'\r' => {
683
- tokenizer. advance( 1 ) ;
684
- tokenizer. seen_newline( true ) ;
667
+ b'\n' | b'\x0C' | b'\r' => {
668
+ tokenizer. consume_newline( ) ;
685
669
}
686
670
_ => {
687
671
tokenizer. advance( 1 ) ;
@@ -769,19 +753,8 @@ fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool)
769
753
if !tokenizer. is_eof( ) {
770
754
match tokenizer. next_byte_unchecked( ) {
771
755
// Escaped newline
772
- b'\n' | b'\x0C' => {
773
- tokenizer. advance( 1 ) ;
774
- tokenizer. seen_newline( false ) ;
775
- }
776
- b'\r' => {
777
- tokenizer. advance( 1 ) ;
778
- if tokenizer. next_byte( ) == Some ( b'\n' ) {
779
- tokenizer. advance( 1 ) ;
780
- }
781
- // `is_cr = true` is useful to skip \r when the next iteration
782
- // of a loop will call `seen_newline` again for the following \n.
783
- // In this case we’re consuming both in this iteration, so passing `false`.
784
- tokenizer. seen_newline( false ) ;
756
+ b'\n' | b'\x0C' | b'\r' => {
757
+ tokenizer. consume_newline( ) ;
785
758
}
786
759
// This pushes one well-formed code point
787
760
_ => consume_escape_and_write( tokenizer, & mut string_bytes)
@@ -1178,18 +1151,17 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
1178
1151
string : CowRcStr < ' a > )
1179
1152
-> Token < ' a > {
1180
1153
while !tokenizer. is_eof ( ) {
1181
- match_byte ! { tokenizer. consume_byte ( ) ,
1154
+ match_byte ! { tokenizer. next_byte_unchecked ( ) ,
1182
1155
b')' => {
1156
+ tokenizer. advance( 1 ) ;
1183
1157
break
1184
1158
}
1185
- b' ' | b'\t' => { }
1186
- b'\n' | b'\x0C' => {
1187
- tokenizer. seen_newline( false ) ;
1188
- }
1189
- b'\r' => {
1190
- tokenizer. seen_newline( true ) ;
1159
+ b' ' | b'\t' => { tokenizer. advance( 1 ) ; }
1160
+ b'\n' | b'\x0C' | b'\r' => {
1161
+ tokenizer. consume_newline( ) ;
1191
1162
}
1192
1163
_ => {
1164
+ tokenizer. advance( 1 ) ;
1193
1165
return consume_bad_url( tokenizer, start_pos) ;
1194
1166
}
1195
1167
}
@@ -1200,22 +1172,23 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
1200
1172
fn consume_bad_url < ' a > ( tokenizer : & mut Tokenizer < ' a > , start_pos : SourcePosition ) -> Token < ' a > {
1201
1173
// Consume up to the closing )
1202
1174
while !tokenizer. is_eof ( ) {
1203
- match_byte ! { tokenizer. consume_byte ( ) ,
1175
+ match_byte ! { tokenizer. next_byte_unchecked ( ) ,
1204
1176
b')' => {
1177
+ tokenizer. advance( 1 ) ;
1205
1178
break
1206
1179
}
1207
1180
b'\\' => {
1181
+ tokenizer. advance( 1 ) ;
1208
1182
if matches!( tokenizer. next_byte( ) , Some ( b')' ) | Some ( b'\\' ) ) {
1209
1183
tokenizer. advance( 1 ) ; // Skip an escaped ')' or '\'
1210
1184
}
1211
1185
}
1212
- b'\n' | b'\x0C' => {
1213
- tokenizer. seen_newline ( false ) ;
1186
+ b'\n' | b'\x0C' | b'\r' => {
1187
+ tokenizer. consume_newline ( ) ;
1214
1188
}
1215
- b'\r' => {
1216
- tokenizer. seen_newline ( true ) ;
1189
+ _ => {
1190
+ tokenizer. advance ( 1 ) ;
1217
1191
}
1218
- _ => { } ,
1219
1192
}
1220
1193
}
1221
1194
BadUrl ( tokenizer. slice_from ( start_pos) . into ( ) )
@@ -1259,16 +1232,8 @@ fn consume_escape(tokenizer: &mut Tokenizer) -> char {
1259
1232
b' ' | b'\t' => {
1260
1233
tokenizer. advance( 1 )
1261
1234
}
1262
- b'\n' | b'\x0C' => {
1263
- tokenizer. advance( 1 ) ;
1264
- tokenizer. seen_newline( false )
1265
- }
1266
- b'\r' => {
1267
- tokenizer. advance( 1 ) ;
1268
- if !tokenizer. is_eof( ) && tokenizer. next_byte_unchecked( ) == b'\n' {
1269
- tokenizer. advance( 1 ) ;
1270
- }
1271
- tokenizer. seen_newline( false )
1235
+ b'\n' | b'\x0C' | b'\r' => {
1236
+ tokenizer. consume_newline( ) ;
1272
1237
}
1273
1238
_ => { }
1274
1239
}
0 commit comments