@@ -380,8 +380,8 @@ impl<'a> Tokenizer<'a> {
380
380
}
381
381
382
382
#[ inline]
383
- fn starts_with ( & self , needle : & str ) -> bool {
384
- self . input [ self . position ..] . starts_with ( needle)
383
+ fn starts_with ( & self , needle : & [ u8 ] ) -> bool {
384
+ self . input . as_bytes ( ) [ self . position ..] . starts_with ( needle)
385
385
}
386
386
}
387
387
@@ -405,88 +405,88 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option<Token<'a>> {
405
405
if tokenizer. is_eof ( ) {
406
406
return None
407
407
}
408
- let c = tokenizer. next_char ( ) ;
408
+ let c = tokenizer. next_byte_unchecked ( ) ;
409
409
let token = match c {
410
- '\t' | '\n' | ' ' | '\r' | '\x0C' => {
410
+ b '\t' | b '\n' | b ' ' | b '\r' | b '\x0C' => {
411
411
let start_position = tokenizer. position ( ) ;
412
412
tokenizer. advance ( 1 ) ;
413
413
while !tokenizer. is_eof ( ) {
414
- match tokenizer. next_char ( ) {
415
- ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer. advance ( 1 ) ,
414
+ match tokenizer. next_byte_unchecked ( ) {
415
+ b ' ' | b '\t' | b '\n' | b '\r' | b '\x0C' => tokenizer. advance ( 1 ) ,
416
416
_ => break ,
417
417
}
418
418
}
419
419
WhiteSpace ( tokenizer. slice_from ( start_position) )
420
420
} ,
421
- '"' => consume_string ( tokenizer, false ) ,
422
- '#' => {
421
+ b '"' => consume_string ( tokenizer, false ) ,
422
+ b '#' => {
423
423
tokenizer. advance ( 1 ) ;
424
424
if is_ident_start ( tokenizer) { IDHash ( consume_name ( tokenizer) ) }
425
- else if !tokenizer. is_eof ( ) && match tokenizer. next_char ( ) {
426
- 'a' ...'z' | 'A' ...'Z' | '0' ...'9' | '-' | '_' => true ,
427
- '\\' => !tokenizer. has_newline_at ( 1 ) ,
428
- _ => c > '\x7F' , // Non-ASCII
425
+ else if !tokenizer. is_eof ( ) && match tokenizer. next_byte_unchecked ( ) {
426
+ b 'a' ...b 'z' | b 'A' ...b 'Z' | b '0' ...b '9' | b '-' | b '_' => true ,
427
+ b '\\' => !tokenizer. has_newline_at ( 1 ) ,
428
+ _ => !c . is_ascii ( ) ,
429
429
} { Hash ( consume_name ( tokenizer) ) }
430
- else { Delim ( c ) }
430
+ else { Delim ( '#' ) }
431
431
} ,
432
- '$' => {
433
- if tokenizer. starts_with ( "$=" ) { tokenizer. advance ( 2 ) ; SuffixMatch }
434
- else { tokenizer. advance ( 1 ) ; Delim ( c ) }
432
+ b '$' => {
433
+ if tokenizer. starts_with ( b "$=") { tokenizer. advance ( 2 ) ; SuffixMatch }
434
+ else { tokenizer. advance ( 1 ) ; Delim ( '$' ) }
435
435
} ,
436
- '\'' => consume_string ( tokenizer, true ) ,
437
- '(' => { tokenizer. advance ( 1 ) ; ParenthesisBlock } ,
438
- ')' => { tokenizer. advance ( 1 ) ; CloseParenthesis } ,
439
- '*' => {
440
- if tokenizer. starts_with ( "*=" ) { tokenizer. advance ( 2 ) ; SubstringMatch }
441
- else { tokenizer. advance ( 1 ) ; Delim ( c ) }
436
+ b '\'' => consume_string ( tokenizer, true ) ,
437
+ b '(' => { tokenizer. advance ( 1 ) ; ParenthesisBlock } ,
438
+ b ')' => { tokenizer. advance ( 1 ) ; CloseParenthesis } ,
439
+ b '*' => {
440
+ if tokenizer. starts_with ( b "*=") { tokenizer. advance ( 2 ) ; SubstringMatch }
441
+ else { tokenizer. advance ( 1 ) ; Delim ( '*' ) }
442
442
} ,
443
- '+' => {
443
+ b '+' => {
444
444
if (
445
445
tokenizer. has_at_least ( 1 )
446
- && matches ! ( tokenizer. char_at ( 1 ) , '0' ...'9' )
446
+ && matches ! ( tokenizer. byte_at ( 1 ) , b '0' ...b '9')
447
447
) || (
448
448
tokenizer. has_at_least ( 2 )
449
- && tokenizer. char_at ( 1 ) == '.'
450
- && matches ! ( tokenizer. char_at ( 2 ) , '0' ...'9' )
449
+ && tokenizer. byte_at ( 1 ) == b '.'
450
+ && matches ! ( tokenizer. byte_at ( 2 ) , b '0' ...b '9')
451
451
) {
452
452
consume_numeric ( tokenizer)
453
453
} else {
454
454
tokenizer. advance ( 1 ) ;
455
- Delim ( c )
455
+ Delim ( '+' )
456
456
}
457
457
} ,
458
- ',' => { tokenizer. advance ( 1 ) ; Comma } ,
459
- '-' => {
458
+ b ',' => { tokenizer. advance ( 1 ) ; Comma } ,
459
+ b '-' => {
460
460
if (
461
461
tokenizer. has_at_least ( 1 )
462
- && matches ! ( tokenizer. char_at ( 1 ) , '0' ...'9' )
462
+ && matches ! ( tokenizer. byte_at ( 1 ) , b '0' ...b '9')
463
463
) || (
464
464
tokenizer. has_at_least ( 2 )
465
- && tokenizer. char_at ( 1 ) == '.'
466
- && matches ! ( tokenizer. char_at ( 2 ) , '0' ...'9' )
465
+ && tokenizer. byte_at ( 1 ) == b '.'
466
+ && matches ! ( tokenizer. byte_at ( 2 ) , b '0' ...b '9')
467
467
) {
468
468
consume_numeric ( tokenizer)
469
- } else if tokenizer. starts_with ( "-->" ) {
469
+ } else if tokenizer. starts_with ( b "-->") {
470
470
tokenizer. advance ( 3 ) ;
471
471
CDC
472
472
} else if is_ident_start ( tokenizer) {
473
473
consume_ident_like ( tokenizer)
474
474
} else {
475
475
tokenizer. advance ( 1 ) ;
476
- Delim ( c )
476
+ Delim ( '-' )
477
477
}
478
478
} ,
479
- '.' => {
479
+ b '.' => {
480
480
if tokenizer. has_at_least ( 1 )
481
- && matches ! ( tokenizer. char_at ( 1 ) , '0' ...'9'
481
+ && matches ! ( tokenizer. byte_at ( 1 ) , b '0' ...b '9'
482
482
) {
483
483
consume_numeric ( tokenizer)
484
484
} else {
485
485
tokenizer. advance ( 1 ) ;
486
- Delim ( c )
486
+ Delim ( '.' )
487
487
}
488
488
}
489
- '/' if tokenizer. starts_with ( "/*" ) => {
489
+ b '/' if tokenizer. starts_with ( b "/*") => {
490
490
tokenizer. advance ( 2 ) ; // consume "/*"
491
491
let start_position = tokenizer. position ( ) ;
492
492
let content;
@@ -503,58 +503,59 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option<Token<'a>> {
503
503
}
504
504
Comment ( content)
505
505
}
506
- '0' ...'9' => consume_numeric ( tokenizer) ,
507
- ':' => { tokenizer. advance ( 1 ) ; Colon } ,
508
- ';' => { tokenizer. advance ( 1 ) ; Semicolon } ,
509
- '<' => {
510
- if tokenizer. starts_with ( "<!--" ) {
506
+ b '0' ...b '9' => consume_numeric ( tokenizer) ,
507
+ b ':' => { tokenizer. advance ( 1 ) ; Colon } ,
508
+ b ';' => { tokenizer. advance ( 1 ) ; Semicolon } ,
509
+ b '<' => {
510
+ if tokenizer. starts_with ( b "<!--") {
511
511
tokenizer. advance ( 4 ) ;
512
512
CDO
513
513
} else {
514
514
tokenizer. advance ( 1 ) ;
515
- Delim ( c )
515
+ Delim ( '<' )
516
516
}
517
517
} ,
518
- '@' => {
518
+ b '@' => {
519
519
tokenizer. advance ( 1 ) ;
520
520
if is_ident_start ( tokenizer) { AtKeyword ( consume_name ( tokenizer) ) }
521
- else { Delim ( c ) }
521
+ else { Delim ( '@' ) }
522
522
} ,
523
- 'u' | 'U' => {
523
+ b 'u' | b 'U' => {
524
524
if tokenizer. has_at_least ( 2 )
525
- && tokenizer. char_at ( 1 ) == '+'
526
- && matches ! ( tokenizer. char_at ( 2 ) , '0' ...'9' | 'a' ...'f' | 'A' ...'F' | '?' )
525
+ && tokenizer. byte_at ( 1 ) == b '+'
526
+ && matches ! ( tokenizer. byte_at ( 2 ) , b '0' ...b '9' | b 'a' ...b 'f' | b 'A' ...b 'F' | b '?')
527
527
{ consume_unicode_range ( tokenizer) }
528
528
else { consume_ident_like ( tokenizer) }
529
529
} ,
530
- 'a' ...'z' | 'A' ...'Z' | '_' | '\0' => consume_ident_like ( tokenizer) ,
531
- '[' => { tokenizer. advance ( 1 ) ; SquareBracketBlock } ,
532
- '\\' => {
530
+ b 'a' ...b 'z' | b 'A' ...b 'Z' | b '_' | b '\0' => consume_ident_like ( tokenizer) ,
531
+ b '[' => { tokenizer. advance ( 1 ) ; SquareBracketBlock } ,
532
+ b '\\' => {
533
533
if !tokenizer. has_newline_at ( 1 ) { consume_ident_like ( tokenizer) }
534
- else { tokenizer. advance ( 1 ) ; Delim ( c ) }
534
+ else { tokenizer. advance ( 1 ) ; Delim ( '\\' ) }
535
535
} ,
536
- ']' => { tokenizer. advance ( 1 ) ; CloseSquareBracket } ,
537
- '^' => {
538
- if tokenizer. starts_with ( "^=" ) { tokenizer. advance ( 2 ) ; PrefixMatch }
539
- else { tokenizer. advance ( 1 ) ; Delim ( c ) }
536
+ b ']' => { tokenizer. advance ( 1 ) ; CloseSquareBracket } ,
537
+ b '^' => {
538
+ if tokenizer. starts_with ( b "^=") { tokenizer. advance ( 2 ) ; PrefixMatch }
539
+ else { tokenizer. advance ( 1 ) ; Delim ( '^' ) }
540
540
} ,
541
- '{' => { tokenizer. advance ( 1 ) ; CurlyBracketBlock } ,
542
- '|' => {
543
- if tokenizer. starts_with ( "|=" ) { tokenizer. advance ( 2 ) ; DashMatch }
544
- else if tokenizer. starts_with ( "||" ) { tokenizer. advance ( 2 ) ; Column }
545
- else { tokenizer. advance ( 1 ) ; Delim ( c ) }
541
+ b '{' => { tokenizer. advance ( 1 ) ; CurlyBracketBlock } ,
542
+ b '|' => {
543
+ if tokenizer. starts_with ( b "|=") { tokenizer. advance ( 2 ) ; DashMatch }
544
+ else if tokenizer. starts_with ( b "||") { tokenizer. advance ( 2 ) ; Column }
545
+ else { tokenizer. advance ( 1 ) ; Delim ( '|' ) }
546
546
} ,
547
- '}' => { tokenizer. advance ( 1 ) ; CloseCurlyBracket } ,
548
- '~' => {
549
- if tokenizer. starts_with ( "~=" ) { tokenizer. advance ( 2 ) ; IncludeMatch }
550
- else { tokenizer. advance ( 1 ) ; Delim ( c ) }
547
+ b '}' => { tokenizer. advance ( 1 ) ; CloseCurlyBracket } ,
548
+ b '~' => {
549
+ if tokenizer. starts_with ( b "~=") { tokenizer. advance ( 2 ) ; IncludeMatch }
550
+ else { tokenizer. advance ( 1 ) ; Delim ( '~' ) }
551
551
} ,
552
552
_ => {
553
- if c > '\x7F' { // Non-ASCII
553
+ if !c . is_ascii ( ) { // Non-ASCII
554
554
consume_ident_like ( tokenizer)
555
555
} else {
556
+ let ret = Delim ( tokenizer. next_char ( ) ) ;
556
557
tokenizer. advance ( 1 ) ;
557
- Delim ( c )
558
+ ret
558
559
}
559
560
} ,
560
561
} ;
@@ -641,15 +642,15 @@ fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool)
641
642
642
643
#[ inline]
643
644
fn is_ident_start ( tokenizer : & mut Tokenizer ) -> bool {
644
- !tokenizer. is_eof ( ) && match tokenizer. next_char ( ) {
645
- 'a' ...'z' | 'A' ...'Z' | '_' | '\0' => true ,
646
- '-' => tokenizer. has_at_least ( 1 ) && match tokenizer. char_at ( 1 ) {
647
- 'a' ...'z' | 'A' ...'Z' | '-' | '_' | '\0' => true ,
648
- '\\' => !tokenizer. has_newline_at ( 1 ) ,
649
- c => c > '\x7F' , // Non-ASCII
645
+ !tokenizer. is_eof ( ) && match tokenizer. next_byte_unchecked ( ) {
646
+ b 'a' ...b 'z' | b 'A' ...b 'Z' | b '_' | b '\0' => true ,
647
+ b '-' => tokenizer. has_at_least ( 1 ) && match tokenizer. byte_at ( 1 ) {
648
+ b 'a' ...b 'z' | b 'A' ...b 'Z' | b '-' | b '_' | b '\0' => true ,
649
+ b '\\' => !tokenizer. has_newline_at ( 1 ) ,
650
+ c => !c . is_ascii ( ) ,
650
651
} ,
651
- '\\' => !tokenizer. has_newline_at ( 1 ) ,
652
- c => c > '\x7F' , // Non-ASCII
652
+ b '\\' => !tokenizer. has_newline_at ( 1 ) ,
653
+ c => !c . is_ascii ( ) ,
653
654
}
654
655
}
655
656
0 commit comments