diff --git a/.travis.yml b/.travis.yml index 94f98322..ae971ab0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,9 @@ script: - cargo test --verbose - cargo doc --verbose - cargo test --features heapsize + - cargo test --features dummy_match_byte + - if [ "$TRAVIS_RUST_VERSION" == "nightly" ]; then cargo bench --features bench; fi + - if [ "$TRAVIS_RUST_VERSION" == "nightly" ]; then cargo bench --features "bench dummy_match_byte"; fi notifications: webhooks: http://build.servo.org:54856/travis diff --git a/Cargo.toml b/Cargo.toml index 414003dc..f46859b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ repository = "https://github.com/servo/rust-cssparser" readme = "README.md" keywords = ["css", "syntax", "parser"] license = "MPL-2.0" +build = "build.rs" [dev-dependencies] @@ -22,7 +23,12 @@ heapsize = {version = ">=0.1.1, <0.4.0", optional = true} matches = "0.1" serde = {version = ">=0.6.6, <0.9", optional = true} +[build-dependencies] +syn = { version = "0.10.6", features = ["full", "visit"]} +quote = "0.3" + [features] serde-serialization = [ "serde" ] heap_size = [ "heapsize" ] bench = [] +dummy_match_byte = [] diff --git a/build.rs b/build.rs new file mode 100644 index 00000000..2c7dcbfc --- /dev/null +++ b/build.rs @@ -0,0 +1,37 @@ + +#[macro_use] extern crate quote; +extern crate syn; + +use std::env; +use std::path::Path; + + +#[cfg(feature = "dummy_match_byte")] +mod codegen { + use std::path::Path; + pub fn main(_: &Path) {} +} + +#[cfg(not(feature = "dummy_match_byte"))] +#[path = "src/macros/mod.rs"] +mod macros; + +#[cfg(not(feature = "dummy_match_byte"))] +mod codegen { + use macros; + use std::env; + use std::path::Path; + + pub fn main(tokenizer_rs: &Path) { + macros::match_byte::expand(tokenizer_rs, + &Path::new(&env::var("OUT_DIR").unwrap()).join("tokenizer.rs")); + + } +} + +fn main() { + let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); + let tokenizer_rs = Path::new(&manifest_dir).join("src/tokenizer.rs"); + codegen::main(&tokenizer_rs); + println!("cargo:rerun-if-changed={}", tokenizer_rs.display()); +} diff --git a/src/lib.rs b/src/lib.rs index 0681bcde..58859e88 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -137,7 +137,25 @@ macro_rules! match_ignore_ascii_case { } mod rules_and_declarations; + +#[cfg(feature = "dummy_match_byte")] +macro_rules! match_byte { + ($value:expr, $($rest:tt)* ) => { + match $value { + $( + $rest + )+ + } + }; +} + +#[cfg(feature = "dummy_match_byte")] mod tokenizer; + +#[cfg(not(feature = "dummy_match_byte"))] +mod tokenizer { + include!(concat!(env!("OUT_DIR"), "/tokenizer.rs")); +} mod parser; mod from_bytes; mod color; diff --git a/src/macros/match_byte.rs b/src/macros/match_byte.rs new file mode 100644 index 00000000..da69dd90 --- /dev/null +++ b/src/macros/match_byte.rs @@ -0,0 +1,229 @@ + +use quote::{ToTokens, Tokens}; +use super::visit::{Visitor, RecursiveVisitor}; +use std::fs::File; +use std::io::{Read, Write}; +use std::mem; +use std::path::Path; +use std::vec; +use std::iter; +use syn; + +pub fn expand(from: &Path, to: &Path) { + let mut source = String::new(); + File::open(from).unwrap().read_to_string(&mut source).unwrap(); + let mut crate_ = syn::parse_crate(&source).expect("Parsing rules.rs module"); + let mut visitor = ExpanderVisitor; + + RecursiveVisitor { node_visitor: &mut visitor }.visit_crate(&mut crate_); + + let mut tokens = Tokens::new(); + crate_.to_tokens(&mut tokens); + let code = tokens.to_string().replace("{ ", "{\n").replace(" }", "\n}"); + File::create(to).unwrap().write_all(code.as_bytes()).unwrap(); +} + +struct ExpanderVisitor; + +impl Visitor for ExpanderVisitor { + fn visit_expression(&mut self, expr: &mut syn::Expr) { + let tokens = match expr.node { + syn::ExprKind::Mac(ref mut macro_) if macro_.path == syn::Path::from("match_byte") => { + mem::replace(&mut macro_.tts, vec![]) + } + _ => return, + }; + let (to_be_matched, table, cases, wildcard_binding) = parse_match_bytes_macro(tokens); + *expr = expand_match_bytes_macro(to_be_matched, table, cases, wildcard_binding); + } + + fn visit_statement(&mut self, stmt: &mut syn::Stmt) { + let tokens = match *stmt { + syn::Stmt::Mac(ref mut macro_) if macro_.0.path == syn::Path::from("match_byte") => { + mem::replace(&mut macro_.0.tts, vec![]) + } + _ => return, + }; + let (to_be_matched, table, cases, wildcard_binding) = parse_match_bytes_macro(tokens); + let expr = expand_match_bytes_macro(to_be_matched, table, cases, wildcard_binding); + *stmt = syn::Stmt::Expr(Box::new(expr)); + } +} + +fn parse_match_bytes_macro(tts: Vec) -> (Vec, Vec, Vec, Option) { + use syn::TokenTree::Delimited; + use syn::DelimToken::Brace; + + let mut tts = tts.into_iter(); + let inner_tts = match tts.next() { + Some(Delimited(syn::Delimited { delim: Brace, tts })) => tts, + other => panic!("expected one top-level {{}} block, got: {:?}", other), + }; + + assert_eq!(tts.next(), None); + + let mut tts = inner_tts.into_iter(); + + // Grab the thing we're matching, until we find a comma. + let mut left_hand_side = vec![]; + loop { + match tts.next() { + Some(syn::TokenTree::Token(syn::Token::Comma)) => break, + Some(other) => left_hand_side.push(other), + None => panic!("Expected not to run out of tokens looking for a comma"), + } + } + + let mut cases = vec![]; + let mut table = vec![0; 256]; + + let mut tts = tts.peekable(); + let mut case_id: u8 = 1; + let mut binding = None; + while tts.len() > 0 { + cases.push(parse_case(&mut tts, &mut *table, &mut binding, case_id)); + + // Allow an optional comma between cases. + match tts.peek() { + Some(&syn::TokenTree::Token(syn::Token::Comma)) => { + tts.next(); + }, + _ => {}, + } + + case_id += 1; + } + + (left_hand_side, table, cases, binding) +} + +#[derive(Debug)] +struct Case(Vec); + +/// Parses a single pattern => expression, and returns the case, filling in the +/// table with the case id for every byte that matched. +/// +/// The `binding` parameter is the identifier that is used by the wildcard +/// pattern. +fn parse_case(tts: &mut iter::Peekable>, + table: &mut [u8], + binding: &mut Option, + case_id: u8) + -> Case { + // The last byte checked, as part of this pattern, to properly detect + // ranges. + let mut last_byte: Option = None; + + // Loop through the pattern filling with bytes the table. + loop { + match tts.next() { + Some(syn::TokenTree::Token(syn::Token::Literal(syn::Lit::Byte(byte)))) => { + table[byte as usize] = case_id; + last_byte = Some(byte); + } + Some(syn::TokenTree::Token(syn::Token::BinOp(syn::BinOpToken::Or))) => { + last_byte = None; // This pattern is over. + }, + Some(syn::TokenTree::Token(syn::Token::DotDotDot)) => { + assert!(last_byte.is_some(), "Expected closed range!"); + match tts.next() { + Some(syn::TokenTree::Token(syn::Token::Literal(syn::Lit::Byte(byte)))) => { + for b in last_byte.take().unwrap()..byte { + if table[b as usize] == 0 { + table[b as usize] = case_id; + } + } + if table[byte as usize] == 0 { + table[byte as usize] = case_id; + } + } + other => panic!("Expected closed range, got: {:?}", other), + } + }, + Some(syn::TokenTree::Token(syn::Token::FatArrow)) => break, + Some(syn::TokenTree::Token(syn::Token::Ident(ident))) => { + assert_eq!(last_byte, None, "I don't support ranges with identifiers!"); + assert_eq!(*binding, None); + for mut byte in table.iter_mut() { + if *byte == 0 { + *byte = case_id; + } + } + *binding = Some(ident) + } + Some(syn::TokenTree::Token(syn::Token::Underscore)) => { + assert_eq!(last_byte, None); + for mut byte in table.iter_mut() { + if *byte == 0 { + *byte = case_id; + } + } + }, + other => panic!("Expected literal byte, got: {:?}", other), + } + } + + match tts.next() { + Some(syn::TokenTree::Delimited(syn::Delimited { delim: syn::DelimToken::Brace, tts })) => { + Case(tts) + } + other => panic!("Expected case with braces after fat arrow, got: {:?}", other), + } +} + +fn expand_match_bytes_macro(to_be_matched: Vec, + table: Vec, + cases: Vec, + binding: Option) + -> syn::Expr { + use std::fmt::Write; + + assert!(!to_be_matched.is_empty()); + assert_eq!(table.len(), 256); + assert!(table.iter().all(|b| *b != 0), "Incomplete pattern? Bogus code!"); + + // We build the expression with text since it's easier. + let mut expr = "{\n".to_owned(); + expr.push_str("enum Case {\n"); + for (i, _) in cases.iter().enumerate() { + write!(&mut expr, "Case{} = {},", i + 1, i + 1).unwrap(); + } + expr.push_str("}\n"); // enum Case + + expr.push_str("static __CASES: [Case; 256] = ["); + for byte in &table { + write!(&mut expr, "Case::Case{}, ", *byte).unwrap(); + } + expr.push_str("];\n"); + + let mut tokens = Tokens::new(); + let to_be_matched = syn::Delimited { + delim: if binding.is_some() { syn::DelimToken::Brace } else { syn::DelimToken::Paren }, + tts: to_be_matched + }; + to_be_matched.to_tokens(&mut tokens); + + if let Some(ref binding) = binding { + write!(&mut expr, "let {} = {};\n", binding.to_string(), tokens.as_str()).unwrap(); + } + + write!(&mut expr, "match __CASES[{} as usize] {{", match binding { + Some(binding) => binding.to_string(), + None => tokens.to_string(), + }).unwrap(); + + for (i, case) in cases.into_iter().enumerate() { + let mut case_tokens = Tokens::new(); + let case = syn::Delimited { + delim: syn::DelimToken::Brace, + tts: case.0 + }; + case.to_tokens(&mut case_tokens); + write!(&mut expr, "Case::Case{} => {},\n", i + 1, case_tokens.as_str()).unwrap(); + } + expr.push_str("}\n"); // match + + expr.push_str("}\n"); // top + + syn::parse_expr(&expr).expect("couldn't parse expression?") +} diff --git a/src/macros/mod.rs b/src/macros/mod.rs new file mode 100644 index 00000000..6c6c9401 --- /dev/null +++ b/src/macros/mod.rs @@ -0,0 +1,5 @@ + + + +pub mod match_byte; +pub mod visit; diff --git a/src/macros/visit.rs b/src/macros/visit.rs new file mode 100644 index 00000000..120a1d30 --- /dev/null +++ b/src/macros/visit.rs @@ -0,0 +1,265 @@ +// Copyright 2016 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/// Just enough of an AST visitor to reach every expression. + +use syn; + +pub trait Visitor { + fn visit_crate(&mut self, _crate: &mut syn::Crate) {} + fn visit_item(&mut self, _item: &mut syn::Item) {} + fn visit_trait_item(&mut self, _item: &mut syn::TraitItem) {} + fn visit_impl_item(&mut self, _item: &mut syn::ImplItem) {} + fn visit_block(&mut self, _expr: &mut syn::Block) {} + fn visit_statement(&mut self, _expr: &mut syn::Stmt) {} + fn visit_expression(&mut self, _expr: &mut syn::Expr) {} +} + +pub struct RecursiveVisitor<'a, V: 'a> { + pub node_visitor: &'a mut V, +} + +impl<'a, V: Visitor + 'a> Visitor for RecursiveVisitor<'a, V> { + fn visit_crate(&mut self, crate_: &mut syn::Crate) { + self.node_visitor.visit_crate(crate_); + for item in &mut crate_.items { + self.visit_item(item) + } + } + + fn visit_item(&mut self, item: &mut syn::Item) { + use syn::ItemKind::*; + self.node_visitor.visit_item(item); + match item.node { + ExternCrate(_) => {} + Use(_) => {} + Static(_, _, ref mut expr) => self.visit_expression(expr), + Const(_, ref mut expr) => self.visit_expression(expr), + Fn(_, _, _, _, _, ref mut block) => self.visit_block(block), + Mod(Some(ref mut items)) => { + for item in items { + self.visit_item(item) + } + } + Mod(None) => {} + ForeignMod(_) => {} + Ty(_, _) => {} + Enum(_, _) => {} + Struct(_, _) => {} + Union(_, _) => {} + Trait(_, _, _, ref mut trait_items) => { + for trait_item in trait_items { + self.visit_trait_item(trait_item) + } + } + DefaultImpl(_, _) => {} + Impl(_, _, _, _, _, ref mut impl_items) => { + for impl_item in impl_items { + self.visit_impl_item(impl_item) + } + } + Mac(_) => {} + } + } + + fn visit_trait_item(&mut self, trait_item: &mut syn::TraitItem) { + use syn::TraitItemKind::*; + self.node_visitor.visit_trait_item(trait_item); + match trait_item.node { + Const(_, Some(ref mut expr)) => self.visit_expression(expr), + Const(_, None) => {} + Method(_, Some(ref mut block)) => self.visit_block(block), + Method(_, None) => {} + Type(_, _) => {} + Macro(_) => {} + } + } + + fn visit_impl_item(&mut self, impl_item: &mut syn::ImplItem) { + use syn::ImplItemKind::*; + self.node_visitor.visit_impl_item(impl_item); + match impl_item.node { + Const(_, ref mut expr) => self.visit_expression(expr), + Method(_, ref mut block) => self.visit_block(block), + Type(_) => {} + Macro(_) => {} + } + } + + fn visit_block(&mut self, block: &mut syn::Block) { + self.node_visitor.visit_block(block); + for statement in &mut block.stmts { + self.visit_statement(statement) + } + } + + fn visit_statement(&mut self, statement: &mut syn::Stmt) { + use syn::Stmt::*; + self.node_visitor.visit_statement(statement); + match *statement { + Local(ref mut local) => { + if let Some(ref mut expr) = local.init { + self.visit_expression(expr) + } + } + Item(ref mut item) => self.visit_item(item), + Expr(ref mut expr) => self.visit_expression(expr), + Semi(ref mut expr) => self.visit_expression(expr), + Mac(_) => {} + } + } + + fn visit_expression(&mut self, expr: &mut syn::Expr) { + use syn::ExprKind::*; + self.node_visitor.visit_expression(expr); + match expr.node { + Box(ref mut boxed) => { + self.visit_expression(boxed) + } + Vec(ref mut elements) => { + for element in elements { + self.visit_expression(element) + } + } + Call(ref mut called, ref mut args) => { + self.visit_expression(called); + for arg in args { + self.visit_expression(arg) + } + } + MethodCall(_, _, ref mut args) => { + for arg in args { + self.visit_expression(arg) + } + } + Tup(ref mut elements) => { + for element in elements { + self.visit_expression(element) + } + } + Binary(_, ref mut left, ref mut right) => { + self.visit_expression(left); + self.visit_expression(right); + } + Unary(_, ref mut operand) => { + self.visit_expression(operand) + } + Lit(_) => {} + Cast(ref mut expr, _) => { + self.visit_expression(expr) + } + Type(ref mut expr, _) => { + self.visit_expression(expr) + } + If(ref mut test, ref mut then, ref mut else_) => { + self.visit_expression(test); + self.visit_block(then); + if let Some(ref mut else_) = *else_ { + self.visit_expression(else_); + } + } + IfLet(_, ref mut test, ref mut then, ref mut else_) => { + self.visit_expression(test); + self.visit_block(then); + if let Some(ref mut else_) = *else_ { + self.visit_expression(else_); + } + } + While(ref mut test, ref mut block, _) => { + self.visit_expression(test); + self.visit_block(block); + } + WhileLet(_, ref mut test, ref mut block, _) => { + self.visit_expression(test); + self.visit_block(block); + } + ForLoop(_, ref mut iterable, ref mut block, _) => { + self.visit_expression(iterable); + self.visit_block(block); + } + Loop(ref mut block, _) => { + self.visit_block(block); + } + Match(ref mut matched, ref mut arms) => { + self.visit_expression(matched); + for arm in arms { + if let Some(ref mut guard) = arm.guard { + self.visit_expression(guard) + } + self.visit_expression(&mut arm.body) + } + } + Closure(_, _, ref mut block) => { + self.visit_block(block) + } + Block(_, ref mut block) => { + self.visit_block(block) + } + Assign(ref mut left, ref mut right) => { + self.visit_expression(left); + self.visit_expression(right); + } + AssignOp(_, ref mut left, ref mut right) => { + self.visit_expression(left); + self.visit_expression(right); + } + Field(ref mut base, _) => { + self.visit_expression(base) + } + TupField(ref mut base, _) => { + self.visit_expression(base) + } + Index(ref mut base, ref mut index) => { + self.visit_expression(base); + self.visit_expression(index); + } + Range(ref mut start, ref mut end, _) => { + if let Some(ref mut start) = *start { + self.visit_expression(start) + } + if let Some(ref mut end) = *end { + self.visit_expression(end) + } + } + Path(_, _) => {} + AddrOf(_, ref mut base) => { + self.visit_expression(base) + } + Break(_) => {} + Continue(_) => {} + Ret(Some(ref mut expr)) => { + self.visit_expression(expr) + } + Ret(None) => {} + Mac(_) => {} + Struct(_, ref mut fields, ref mut base) => { + for field in fields { + self.visit_expression(&mut field.expr) + } + if let Some(ref mut base) = *base { + self.visit_expression(base) + } + } + Repeat(ref mut element, ref mut number) => { + self.visit_expression(element); + self.visit_expression(number); + } + Paren(ref mut expr) => { + self.visit_expression(expr) + } + Try(ref mut expr) => { + self.visit_expression(expr) + } + InPlace(ref mut expr, ref mut other) => { + self.visit_expression(expr); + self.visit_expression(other); + } + } + } +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3fac405e..0bc7f857 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -262,7 +262,7 @@ impl<'a> Tokenizer<'a> { #[inline] pub fn next(&mut self) -> Result, ()> { - next_token(self).ok_or(()) + next_token(self) } #[inline] @@ -359,17 +359,14 @@ impl<'a> Tokenizer<'a> { } #[inline] - fn next_char(&self) -> char { self.char_at(0) } - - #[inline] - fn char_at(&self, offset: usize) -> char { - self.input[self.position + offset..].chars().next().unwrap() + fn next_char(&self) -> char { + self.input[self.position..].chars().next().unwrap() } #[inline] fn has_newline_at(&self, offset: usize) -> bool { self.position + offset < self.input.len() && - matches!(self.char_at(offset), '\n' | '\r' | '\x0C') + matches!(self.byte_at(offset), b'\n' | b'\r' | b'\x0C') } #[inline] @@ -380,8 +377,8 @@ impl<'a> Tokenizer<'a> { } #[inline] - fn starts_with(&self, needle: &str) -> bool { - self.input[self.position..].starts_with(needle) + fn starts_with(&self, needle: &[u8]) -> bool { + self.input.as_bytes()[self.position..].starts_with(needle) } } @@ -401,164 +398,170 @@ pub struct SourceLocation { } -fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Option> { +fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { if tokenizer.is_eof() { - return None + return Err(()) } - let c = tokenizer.next_char(); - let token = match c { - '\t' | '\n' | ' ' | '\r' | '\x0C' => { + let c = tokenizer.next_byte_unchecked(); + let token = match_byte! { c, + b'\t' | b'\n' | b' ' | b'\r' | b'\x0C' => { let start_position = tokenizer.position(); tokenizer.advance(1); while !tokenizer.is_eof() { - match tokenizer.next_char() { - ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.advance(1), + match tokenizer.next_byte_unchecked() { + b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => tokenizer.advance(1), _ => break, } } WhiteSpace(tokenizer.slice_from(start_position)) }, - '"' => consume_string(tokenizer, false), - '#' => { + b'"' => { consume_string(tokenizer, false) }, + b'#' => { tokenizer.advance(1); if is_ident_start(tokenizer) { IDHash(consume_name(tokenizer)) } - else if !tokenizer.is_eof() && match tokenizer.next_char() { - 'a'...'z' | 'A'...'Z' | '0'...'9' | '-' | '_' => true, - '\\' => !tokenizer.has_newline_at(1), - _ => c > '\x7F', // Non-ASCII + else if !tokenizer.is_eof() && match tokenizer.next_byte_unchecked() { + b'a'...b'z' | b'A'...b'Z' | b'0'...b'9' | b'-' | b'_' => true, + b'\\' => !tokenizer.has_newline_at(1), + _ => !c.is_ascii(), } { Hash(consume_name(tokenizer)) } - else { Delim(c) } + else { Delim('#') } }, - '$' => { - if tokenizer.starts_with("$=") { tokenizer.advance(2); SuffixMatch } - else { tokenizer.advance(1); Delim(c) } + b'$' => { + if tokenizer.starts_with(b"$=") { tokenizer.advance(2); SuffixMatch } + else { tokenizer.advance(1); Delim('$') } }, - '\'' => consume_string(tokenizer, true), - '(' => { tokenizer.advance(1); ParenthesisBlock }, - ')' => { tokenizer.advance(1); CloseParenthesis }, - '*' => { - if tokenizer.starts_with("*=") { tokenizer.advance(2); SubstringMatch } - else { tokenizer.advance(1); Delim(c) } + b'\'' => { consume_string(tokenizer, true) }, + b'(' => { tokenizer.advance(1); ParenthesisBlock }, + b')' => { tokenizer.advance(1); CloseParenthesis }, + b'*' => { + if tokenizer.starts_with(b"*=") { tokenizer.advance(2); SubstringMatch } + else { tokenizer.advance(1); Delim('*') } }, - '+' => { + b'+' => { if ( tokenizer.has_at_least(1) - && matches!(tokenizer.char_at(1), '0'...'9') + && matches!(tokenizer.byte_at(1), b'0'...b'9') ) || ( tokenizer.has_at_least(2) - && tokenizer.char_at(1) == '.' - && matches!(tokenizer.char_at(2), '0'...'9') + && tokenizer.byte_at(1) == b'.' + && matches!(tokenizer.byte_at(2), b'0'...b'9') ) { consume_numeric(tokenizer) } else { tokenizer.advance(1); - Delim(c) + Delim('+') } }, - ',' => { tokenizer.advance(1); Comma }, - '-' => { + b',' => { tokenizer.advance(1); Comma }, + b'-' => { if ( tokenizer.has_at_least(1) - && matches!(tokenizer.char_at(1), '0'...'9') + && matches!(tokenizer.byte_at(1), b'0'...b'9') ) || ( tokenizer.has_at_least(2) - && tokenizer.char_at(1) == '.' - && matches!(tokenizer.char_at(2), '0'...'9') + && tokenizer.byte_at(1) == b'.' + && matches!(tokenizer.byte_at(2), b'0'...b'9') ) { consume_numeric(tokenizer) - } else if tokenizer.starts_with("-->") { + } else if tokenizer.starts_with(b"-->") { tokenizer.advance(3); CDC } else if is_ident_start(tokenizer) { consume_ident_like(tokenizer) } else { tokenizer.advance(1); - Delim(c) + Delim('-') } }, - '.' => { + b'.' => { if tokenizer.has_at_least(1) - && matches!(tokenizer.char_at(1), '0'...'9' + && matches!(tokenizer.byte_at(1), b'0'...b'9' ) { consume_numeric(tokenizer) } else { tokenizer.advance(1); - Delim(c) + Delim('.') } } - '/' if tokenizer.starts_with("/*") => { - tokenizer.advance(2); // consume "/*" - let start_position = tokenizer.position(); - let content; - match tokenizer.input[tokenizer.position..].find("*/") { - Some(offset) => { - tokenizer.advance(offset); - content = tokenizer.slice_from(start_position); - tokenizer.advance(2); - } - None => { - tokenizer.position = tokenizer.input.len(); - content = tokenizer.slice_from(start_position); + b'/' => { + if tokenizer.starts_with(b"/*") { + tokenizer.advance(2); // consume "/*" + let start_position = tokenizer.position(); + let content; + match tokenizer.input[tokenizer.position..].find("*/") { + Some(offset) => { + tokenizer.advance(offset); + content = tokenizer.slice_from(start_position); + tokenizer.advance(2); + } + None => { + tokenizer.position = tokenizer.input.len(); + content = tokenizer.slice_from(start_position); + } } + Comment(content) + } else { + tokenizer.advance(1); + Delim('/') } - Comment(content) } - '0'...'9' => consume_numeric(tokenizer), - ':' => { tokenizer.advance(1); Colon }, - ';' => { tokenizer.advance(1); Semicolon }, - '<' => { - if tokenizer.starts_with("