@@ -380,8 +380,8 @@ impl<'a> Tokenizer<'a> {
380380 }
381381
382382 #[ inline]
383- fn starts_with ( & self , needle : & str ) -> bool {
384- self . input [ self . position ..] . starts_with ( needle)
383+ fn starts_with ( & self , needle : & [ u8 ] ) -> bool {
384+ self . input . as_bytes ( ) [ self . position ..] . starts_with ( needle)
385385 }
386386}
387387
@@ -405,88 +405,88 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option<Token<'a>> {
405405 if tokenizer. is_eof ( ) {
406406 return None
407407 }
408- let c = tokenizer. next_char ( ) ;
408+ let c = tokenizer. next_byte_unchecked ( ) ;
409409 let token = match c {
410- '\t' | '\n' | ' ' | '\r' | '\x0C' => {
410+ b '\t' | b '\n' | b ' ' | b '\r' | b '\x0C' => {
411411 let start_position = tokenizer. position ( ) ;
412412 tokenizer. advance ( 1 ) ;
413413 while !tokenizer. is_eof ( ) {
414- match tokenizer. next_char ( ) {
415- ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer. advance ( 1 ) ,
414+ match tokenizer. next_byte_unchecked ( ) {
415+ b ' ' | b '\t' | b '\n' | b '\r' | b '\x0C' => tokenizer. advance ( 1 ) ,
416416 _ => break ,
417417 }
418418 }
419419 WhiteSpace ( tokenizer. slice_from ( start_position) )
420420 } ,
421- '"' => consume_string ( tokenizer, false ) ,
422- '#' => {
421+ b '"' => consume_string ( tokenizer, false ) ,
422+ b '#' => {
423423 tokenizer. advance ( 1 ) ;
424424 if is_ident_start ( tokenizer) { IDHash ( consume_name ( tokenizer) ) }
425- else if !tokenizer. is_eof ( ) && match tokenizer. next_char ( ) {
426- 'a' ...'z' | 'A' ...'Z' | '0' ...'9' | '-' | '_' => true ,
427- '\\' => !tokenizer. has_newline_at ( 1 ) ,
428- _ => c > '\x7F' , // Non-ASCII
425+ else if !tokenizer. is_eof ( ) && match tokenizer. next_byte_unchecked ( ) {
426+ b 'a' ...b 'z' | b 'A' ...b 'Z' | b '0' ...b '9' | b '-' | b '_' => true ,
427+ b '\\' => !tokenizer. has_newline_at ( 1 ) ,
428+ _ => !c . is_ascii ( ) ,
429429 } { Hash ( consume_name ( tokenizer) ) }
430- else { Delim ( c ) }
430+ else { Delim ( '#' ) }
431431 } ,
432- '$' => {
433- if tokenizer. starts_with ( "$=" ) { tokenizer. advance ( 2 ) ; SuffixMatch }
434- else { tokenizer. advance ( 1 ) ; Delim ( c ) }
432+ b '$' => {
433+ if tokenizer. starts_with ( b "$=") { tokenizer. advance ( 2 ) ; SuffixMatch }
434+ else { tokenizer. advance ( 1 ) ; Delim ( '$' ) }
435435 } ,
436- '\'' => consume_string ( tokenizer, true ) ,
437- '(' => { tokenizer. advance ( 1 ) ; ParenthesisBlock } ,
438- ')' => { tokenizer. advance ( 1 ) ; CloseParenthesis } ,
439- '*' => {
440- if tokenizer. starts_with ( "*=" ) { tokenizer. advance ( 2 ) ; SubstringMatch }
441- else { tokenizer. advance ( 1 ) ; Delim ( c ) }
436+ b '\'' => consume_string ( tokenizer, true ) ,
437+ b '(' => { tokenizer. advance ( 1 ) ; ParenthesisBlock } ,
438+ b ')' => { tokenizer. advance ( 1 ) ; CloseParenthesis } ,
439+ b '*' => {
440+ if tokenizer. starts_with ( b "*=") { tokenizer. advance ( 2 ) ; SubstringMatch }
441+ else { tokenizer. advance ( 1 ) ; Delim ( '*' ) }
442442 } ,
443- '+' => {
443+ b '+' => {
444444 if (
445445 tokenizer. has_at_least ( 1 )
446- && matches ! ( tokenizer. char_at ( 1 ) , '0' ...'9' )
446+ && matches ! ( tokenizer. byte_at ( 1 ) , b '0' ...b '9')
447447 ) || (
448448 tokenizer. has_at_least ( 2 )
449- && tokenizer. char_at ( 1 ) == '.'
450- && matches ! ( tokenizer. char_at ( 2 ) , '0' ...'9' )
449+ && tokenizer. byte_at ( 1 ) == b '.'
450+ && matches ! ( tokenizer. byte_at ( 2 ) , b '0' ...b '9')
451451 ) {
452452 consume_numeric ( tokenizer)
453453 } else {
454454 tokenizer. advance ( 1 ) ;
455- Delim ( c )
455+ Delim ( '+' )
456456 }
457457 } ,
458- ',' => { tokenizer. advance ( 1 ) ; Comma } ,
459- '-' => {
458+ b ',' => { tokenizer. advance ( 1 ) ; Comma } ,
459+ b '-' => {
460460 if (
461461 tokenizer. has_at_least ( 1 )
462- && matches ! ( tokenizer. char_at ( 1 ) , '0' ...'9' )
462+ && matches ! ( tokenizer. byte_at ( 1 ) , b '0' ...b '9')
463463 ) || (
464464 tokenizer. has_at_least ( 2 )
465- && tokenizer. char_at ( 1 ) == '.'
466- && matches ! ( tokenizer. char_at ( 2 ) , '0' ...'9' )
465+ && tokenizer. byte_at ( 1 ) == b '.'
466+ && matches ! ( tokenizer. byte_at ( 2 ) , b '0' ...b '9')
467467 ) {
468468 consume_numeric ( tokenizer)
469- } else if tokenizer. starts_with ( "-->" ) {
469+ } else if tokenizer. starts_with ( b "-->") {
470470 tokenizer. advance ( 3 ) ;
471471 CDC
472472 } else if is_ident_start ( tokenizer) {
473473 consume_ident_like ( tokenizer)
474474 } else {
475475 tokenizer. advance ( 1 ) ;
476- Delim ( c )
476+ Delim ( '-' )
477477 }
478478 } ,
479- '.' => {
479+ b '.' => {
480480 if tokenizer. has_at_least ( 1 )
481- && matches ! ( tokenizer. char_at ( 1 ) , '0' ...'9'
481+ && matches ! ( tokenizer. byte_at ( 1 ) , b '0' ...b '9'
482482 ) {
483483 consume_numeric ( tokenizer)
484484 } else {
485485 tokenizer. advance ( 1 ) ;
486- Delim ( c )
486+ Delim ( '.' )
487487 }
488488 }
489- '/' if tokenizer. starts_with ( "/*" ) => {
489+ b '/' if tokenizer. starts_with ( b "/*") => {
490490 tokenizer. advance ( 2 ) ; // consume "/*"
491491 let start_position = tokenizer. position ( ) ;
492492 let content;
@@ -503,58 +503,59 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option<Token<'a>> {
503503 }
504504 Comment ( content)
505505 }
506- '0' ...'9' => consume_numeric ( tokenizer) ,
507- ':' => { tokenizer. advance ( 1 ) ; Colon } ,
508- ';' => { tokenizer. advance ( 1 ) ; Semicolon } ,
509- '<' => {
510- if tokenizer. starts_with ( "<!--" ) {
506+ b '0' ...b '9' => consume_numeric ( tokenizer) ,
507+ b ':' => { tokenizer. advance ( 1 ) ; Colon } ,
508+ b ';' => { tokenizer. advance ( 1 ) ; Semicolon } ,
509+ b '<' => {
510+ if tokenizer. starts_with ( b "<!--") {
511511 tokenizer. advance ( 4 ) ;
512512 CDO
513513 } else {
514514 tokenizer. advance ( 1 ) ;
515- Delim ( c )
515+ Delim ( '<' )
516516 }
517517 } ,
518- '@' => {
518+ b '@' => {
519519 tokenizer. advance ( 1 ) ;
520520 if is_ident_start ( tokenizer) { AtKeyword ( consume_name ( tokenizer) ) }
521- else { Delim ( c ) }
521+ else { Delim ( '@' ) }
522522 } ,
523- 'u' | 'U' => {
523+ b 'u' | b 'U' => {
524524 if tokenizer. has_at_least ( 2 )
525- && tokenizer. char_at ( 1 ) == '+'
526- && matches ! ( tokenizer. char_at ( 2 ) , '0' ...'9' | 'a' ...'f' | 'A' ...'F' | '?' )
525+ && tokenizer. byte_at ( 1 ) == b '+'
526+ && matches ! ( tokenizer. byte_at ( 2 ) , b '0' ...b '9' | b 'a' ...b 'f' | b 'A' ...b 'F' | b '?')
527527 { consume_unicode_range ( tokenizer) }
528528 else { consume_ident_like ( tokenizer) }
529529 } ,
530- 'a' ...'z' | 'A' ...'Z' | '_' | '\0' => consume_ident_like ( tokenizer) ,
531- '[' => { tokenizer. advance ( 1 ) ; SquareBracketBlock } ,
532- '\\' => {
530+ b 'a' ...b 'z' | b 'A' ...b 'Z' | b '_' | b '\0' => consume_ident_like ( tokenizer) ,
531+ b '[' => { tokenizer. advance ( 1 ) ; SquareBracketBlock } ,
532+ b '\\' => {
533533 if !tokenizer. has_newline_at ( 1 ) { consume_ident_like ( tokenizer) }
534- else { tokenizer. advance ( 1 ) ; Delim ( c ) }
534+ else { tokenizer. advance ( 1 ) ; Delim ( '\\' ) }
535535 } ,
536- ']' => { tokenizer. advance ( 1 ) ; CloseSquareBracket } ,
537- '^' => {
538- if tokenizer. starts_with ( "^=" ) { tokenizer. advance ( 2 ) ; PrefixMatch }
539- else { tokenizer. advance ( 1 ) ; Delim ( c ) }
536+ b ']' => { tokenizer. advance ( 1 ) ; CloseSquareBracket } ,
537+ b '^' => {
538+ if tokenizer. starts_with ( b "^=") { tokenizer. advance ( 2 ) ; PrefixMatch }
539+ else { tokenizer. advance ( 1 ) ; Delim ( '^' ) }
540540 } ,
541- '{' => { tokenizer. advance ( 1 ) ; CurlyBracketBlock } ,
542- '|' => {
543- if tokenizer. starts_with ( "|=" ) { tokenizer. advance ( 2 ) ; DashMatch }
544- else if tokenizer. starts_with ( "||" ) { tokenizer. advance ( 2 ) ; Column }
545- else { tokenizer. advance ( 1 ) ; Delim ( c ) }
541+ b '{' => { tokenizer. advance ( 1 ) ; CurlyBracketBlock } ,
542+ b '|' => {
543+ if tokenizer. starts_with ( b "|=") { tokenizer. advance ( 2 ) ; DashMatch }
544+ else if tokenizer. starts_with ( b "||") { tokenizer. advance ( 2 ) ; Column }
545+ else { tokenizer. advance ( 1 ) ; Delim ( '|' ) }
546546 } ,
547- '}' => { tokenizer. advance ( 1 ) ; CloseCurlyBracket } ,
548- '~' => {
549- if tokenizer. starts_with ( "~=" ) { tokenizer. advance ( 2 ) ; IncludeMatch }
550- else { tokenizer. advance ( 1 ) ; Delim ( c ) }
547+ b '}' => { tokenizer. advance ( 1 ) ; CloseCurlyBracket } ,
548+ b '~' => {
549+ if tokenizer. starts_with ( b "~=") { tokenizer. advance ( 2 ) ; IncludeMatch }
550+ else { tokenizer. advance ( 1 ) ; Delim ( '~' ) }
551551 } ,
552552 _ => {
553- if c > '\x7F' { // Non-ASCII
553+ if !c . is_ascii ( ) { // Non-ASCII
554554 consume_ident_like ( tokenizer)
555555 } else {
556+ let ret = Delim ( tokenizer. next_char ( ) ) ;
556557 tokenizer. advance ( 1 ) ;
557- Delim ( c )
558+ ret
558559 }
559560 } ,
560561 } ;
@@ -641,15 +642,15 @@ fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool)
641642
642643#[ inline]
643644fn is_ident_start ( tokenizer : & mut Tokenizer ) -> bool {
644- !tokenizer. is_eof ( ) && match tokenizer. next_char ( ) {
645- 'a' ...'z' | 'A' ...'Z' | '_' | '\0' => true ,
646- '-' => tokenizer. has_at_least ( 1 ) && match tokenizer. char_at ( 1 ) {
647- 'a' ...'z' | 'A' ...'Z' | '-' | '_' | '\0' => true ,
648- '\\' => !tokenizer. has_newline_at ( 1 ) ,
649- c => c > '\x7F' , // Non-ASCII
645+ !tokenizer. is_eof ( ) && match tokenizer. next_byte_unchecked ( ) {
646+ b 'a' ...b 'z' | b 'A' ...b 'Z' | b '_' | b '\0' => true ,
647+ b '-' => tokenizer. has_at_least ( 1 ) && match tokenizer. byte_at ( 1 ) {
648+ b 'a' ...b 'z' | b 'A' ...b 'Z' | b '-' | b '_' | b '\0' => true ,
649+ b '\\' => !tokenizer. has_newline_at ( 1 ) ,
650+ c => !c . is_ascii ( ) ,
650651 } ,
651- '\\' => !tokenizer. has_newline_at ( 1 ) ,
652- c => c > '\x7F' , // Non-ASCII
652+ b '\\' => !tokenizer. has_newline_at ( 1 ) ,
653+ c => !c . is_ascii ( ) ,
653654 }
654655}
655656
0 commit comments