diff --git a/Cargo.lock b/Cargo.lock index f87a6e889b0e..87fdc0539def 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -50,6 +50,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "classification-macros" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "convert_case" version = "0.6.0" @@ -531,6 +540,7 @@ version = "0.1.0" dependencies = [ "bexpand", "bstr", + "classification-macros", "crossbeam", "dunce", "fast-glob", diff --git a/crates/classification-macros/Cargo.toml b/crates/classification-macros/Cargo.toml new file mode 100644 index 000000000000..7053c893ab69 --- /dev/null +++ b/crates/classification-macros/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "classification-macros" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +syn = "2" +quote = "1" +proc-macro2 = "1" diff --git a/crates/classification-macros/src/lib.rs b/crates/classification-macros/src/lib.rs new file mode 100644 index 000000000000..4e10fc5ad7b1 --- /dev/null +++ b/crates/classification-macros/src/lib.rs @@ -0,0 +1,247 @@ +use proc_macro::TokenStream; +use quote::quote; +use syn::{ + parse_macro_input, punctuated::Punctuated, token::Comma, Attribute, Data, DataEnum, + DeriveInput, Expr, ExprLit, ExprRange, Ident, Lit, RangeLimits, Result, Variant, +}; + +/// A custom derive that supports: +/// +/// - `#[bytes(…)]` for single byte literals +/// - `#[bytes_range(…)]` for inclusive byte ranges (b'a'..=b'z') +/// - `#[fallback]` for a variant that covers everything else +/// +/// Example usage: +/// +/// ```rust +/// use classification_macros::ClassifyBytes; +/// +/// #[derive(Clone, Copy, ClassifyBytes)] +/// enum Class { +/// #[bytes(b'a', b'b', b'c')] +/// Letters, +/// +/// #[bytes_range(b'0'..=b'9')] +/// Digits, +/// +/// #[fallback] +/// Other, +/// } +/// ``` +/// Then call `b'a'.into()` to get `Example::SomeLetters`. +#[proc_macro_derive(ClassifyBytes, attributes(bytes, bytes_range, fallback))] +pub fn classify_bytes_derive(input: TokenStream) -> TokenStream { + let ast = parse_macro_input!(input as DeriveInput); + + // This derive only works on an enum + let Data::Enum(DataEnum { variants, .. }) = &ast.data else { + return syn::Error::new_spanned( + &ast.ident, + "ClassifyBytes can only be derived on an enum.", + ) + .to_compile_error() + .into(); + }; + + let enum_name = &ast.ident; + + let mut byte_map: [Option; 256] = [const { None }; 256]; + let mut fallback_variant: Option = None; + + // Start parsing the variants + for variant in variants { + let variant_ident = &variant.ident; + + // If this variant has #[fallback], record it + if has_fallback_attr(variant) { + if fallback_variant.is_some() { + let err = syn::Error::new_spanned( + variant_ident, + "Multiple variants have #[fallback]. Only one allowed.", + ); + return err.to_compile_error().into(); + } + fallback_variant = Some(variant_ident.clone()); + } + + // Get #[bytes(…)] + let single_bytes = get_bytes_attrs(&variant.attrs); + + // Get #[bytes_range(…)] + let range_bytes = get_bytes_range_attrs(&variant.attrs); + + // Combine them + let all_bytes = single_bytes + .into_iter() + .chain(range_bytes) + .collect::>(); + + // Mark them in the table + for b in all_bytes { + byte_map[b as usize] = Some(variant_ident.clone()); + } + } + + // If no fallback variant is found, default to "Other" + let fallback_ident = fallback_variant.expect("A variant marked with #[fallback] is missing"); + + // For each of the 256 byte values, fill the table + let fill = byte_map + .clone() + .into_iter() + .map(|variant_opt| match variant_opt { + Some(ident) => quote!(#enum_name::#ident), + None => quote!(#enum_name::#fallback_ident), + }); + + // Generate the final expanded code + let expanded = quote! { + impl #enum_name { + pub const TABLE: [#enum_name; 256] = [ + #(#fill),* + ]; + } + + impl From for #enum_name { + fn from(byte: u8) -> Self { + #enum_name::TABLE[byte as usize] + } + } + }; + + TokenStream::from(expanded) +} + +/// Checks if a variant has `#[fallback]` +fn has_fallback_attr(variant: &Variant) -> bool { + variant + .attrs + .iter() + .any(|attr| attr.path().is_ident("fallback")) +} + +/// Get all single byte literals from `#[bytes(…)]` +fn get_bytes_attrs(attrs: &[Attribute]) -> Vec { + let mut assigned = Vec::new(); + for attr in attrs { + if attr.path().is_ident("bytes") { + match parse_bytes_attr(attr) { + Ok(list) => assigned.extend(list), + Err(e) => panic!("Error parsing #[bytes(...)]: {}", e), + } + } + } + assigned +} + +/// Parse `#[bytes(...)]` as a comma-separated list of **byte literals**, e.g. `b'a'`, `b'\n'`. +fn parse_bytes_attr(attr: &Attribute) -> Result> { + // We'll parse it as a list of syn::Lit separated by commas: e.g. (b'a', b'b') + let items: Punctuated = attr.parse_args_with(Punctuated::parse_terminated)?; + let mut out = Vec::new(); + for lit in items { + match lit { + Lit::Byte(lb) => out.push(lb.value()), + _ => { + return Err(syn::Error::new_spanned( + lit, + "Expected a byte literal like b'a'", + )) + } + } + } + Ok(out) +} + +/// Get all byte ranges from `#[bytes_range(...)]` +fn get_bytes_range_attrs(attrs: &[Attribute]) -> Vec { + let mut assigned = Vec::new(); + for attr in attrs { + if attr.path().is_ident("bytes_range") { + match parse_bytes_range_attr(attr) { + Ok(list) => assigned.extend(list), + Err(e) => panic!("Error parsing #[bytes_range(...)]: {}", e), + } + } + } + assigned +} + +/// Parse `#[bytes_range(...)]` as a comma-separated list of range expressions, e.g.: +/// `b'a'..=b'z', b'0'..=b'9'` +fn parse_bytes_range_attr(attr: &Attribute) -> Result> { + // We'll parse each element as a syn::Expr, then see if it's an Expr::Range + let exprs: Punctuated = attr.parse_args_with(Punctuated::parse_terminated)?; + let mut out = Vec::new(); + + for expr in exprs { + if let Expr::Range(ExprRange { + start: Some(start), + end: Some(end), + limits, + .. + }) = expr + { + let from = extract_byte_literal(&start)?; + let to = extract_byte_literal(&end)?; + + match limits { + RangeLimits::Closed(_) => { + // b'a'..=b'z' + if from <= to { + out.extend(from..=to); + } + } + RangeLimits::HalfOpen(_) => { + // b'a'..b'z' => from..(to-1) + if from < to { + out.extend(from..to); + } + } + } + } else { + return Err(syn::Error::new_spanned( + expr, + "Expected a byte range like b'a'..=b'z'", + )); + } + } + + Ok(out) +} + +/// Extract a u8 from an expression that can be: +/// +/// - `Expr::Lit(Lit::Byte(...))`, e.g. b'a' +/// - `Expr::Lit(Lit::Int(...))`, e.g. 0x80 or 255 +fn extract_byte_literal(expr: &Expr) -> Result { + if let Expr::Lit(ExprLit { lit, .. }) = expr { + match lit { + // Existing case: b'a' + Lit::Byte(lb) => Ok(lb.value()), + + // New case: 0x80, 255, etc. + Lit::Int(li) => { + let value = li.base10_parse::()?; + if value <= 255 { + Ok(value as u8) + } else { + Err(syn::Error::new_spanned( + li, + format!("Integer literal {} out of range for a byte (0..255)", value), + )) + } + } + + _ => Err(syn::Error::new_spanned( + lit, + "Expected b'...' or an integer literal in range 0..=255", + )), + } + } else { + Err(syn::Error::new_spanned( + expr, + "Expected a literal expression like b'a' or 0x80", + )) + } +} diff --git a/crates/oxide/Cargo.toml b/crates/oxide/Cargo.toml index 3964b93887bf..1b2d42d52543 100644 --- a/crates/oxide/Cargo.toml +++ b/crates/oxide/Cargo.toml @@ -17,6 +17,7 @@ ignore = "0.4.23" dunce = "1.0.5" bexpand = "1.2.0" fast-glob = "0.4.3" +classification-macros = { path = "../classification-macros" } [dev-dependencies] tempfile = "3.13.0" diff --git a/crates/oxide/src/extractor/arbitrary_property_machine.rs b/crates/oxide/src/extractor/arbitrary_property_machine.rs index 4c8e7ef1eaca..975c3df9809f 100644 --- a/crates/oxide/src/extractor/arbitrary_property_machine.rs +++ b/crates/oxide/src/extractor/arbitrary_property_machine.rs @@ -3,6 +3,7 @@ use crate::extractor::bracket_stack::BracketStack; use crate::extractor::machine::{Machine, MachineState}; use crate::extractor::string_machine::StringMachine; use crate::extractor::CssVariableMachine; +use classification_macros::ClassifyBytes; /// Extracts arbitrary properties from the input, including the brackets. /// @@ -68,7 +69,7 @@ impl Machine for ArbitraryPropertyMachine { let len = cursor.input.len(); match self.state { - State::Idle => match CLASS_TABLE[cursor.curr as usize] { + State::Idle => match cursor.curr.into() { // Start of an arbitrary property Class::OpenBracket => { self.start_pos = cursor.pos; @@ -83,8 +84,8 @@ impl Machine for ArbitraryPropertyMachine { State::ParsingProperty => { while cursor.pos < len { - match CLASS_TABLE[cursor.curr as usize] { - Class::Dash => match CLASS_TABLE[cursor.next as usize] { + match cursor.curr.into() { + Class::Dash => match cursor.next.into() { // Start of a CSS variable // // E.g.: `[--my-color:red]` @@ -102,7 +103,7 @@ impl Machine for ArbitraryPropertyMachine { // // E.g.: `[color:red]` // ^^^^^ - Class::Alpha => cursor.advance(), + Class::AlphaLower => cursor.advance(), // End of the property name, but there must be at least a single character Class::Colon if cursor.pos > self.start_pos + 1 => { @@ -121,8 +122,8 @@ impl Machine for ArbitraryPropertyMachine { State::ParsingValue => { while cursor.pos < len { - match CLASS_TABLE[cursor.curr as usize] { - Class::Escape => match CLASS_TABLE[cursor.next as usize] { + match cursor.curr.into() { + Class::Escape => match cursor.next.into() { // An escaped whitespace character is not allowed // // E.g.: `[color:var(--my-\ color)]` @@ -195,7 +196,7 @@ impl ArbitraryPropertyMachine { fn parse_property_variable(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { match self.css_variable_machine.next(cursor) { MachineState::Idle => self.restart(), - MachineState::Done(_) => match CLASS_TABLE[cursor.next as usize] { + MachineState::Done(_) => match cursor.next.into() { // End of the CSS variable, must be followed by a `:` // // E.g.: `[--my-color:red]` @@ -223,94 +224,51 @@ impl ArbitraryPropertyMachine { } } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, ClassifyBytes)] enum Class { - /// `(` + #[bytes(b'(')] OpenParen, - /// `[` + #[bytes(b'[')] OpenBracket, - /// `{` + #[bytes(b'{')] OpenCurly, - /// `)` + #[bytes(b')')] CloseParen, - /// `]` + #[bytes(b']')] CloseBracket, - /// `}` + #[bytes(b'}')] CloseCurly, - /// `\` + #[bytes(b'\\')] Escape, - /// ', ", or ` + #[bytes(b'"', b'\'', b'`')] Quote, - /// `-` + #[bytes(b'-')] Dash, - /// `a`..`z` or `A`..`Z` - Alpha, + #[bytes_range(b'a'..=b'z')] + AlphaLower, - /// `:` + #[bytes(b':')] Colon, - /// Whitespace characters + #[bytes(b' ', b'\t', b'\n', b'\r', b'\x0C')] Whitespace, - /// End of the input + #[bytes(b'\0')] End, + #[fallback] Other, } -const CLASS_TABLE: [Class; 256] = { - let mut table = [Class::Other; 256]; - - macro_rules! set { - ($class:expr, $($byte:expr),+ $(,)?) => { - $(table[$byte as usize] = $class;)+ - }; - } - - macro_rules! set_range { - ($class:expr, $start:literal ..= $end:literal) => { - let mut i = $start; - while i <= $end { - table[i as usize] = $class; - i += 1; - } - }; - } - - set!(Class::OpenParen, b'('); - set!(Class::OpenBracket, b'['); - set!(Class::OpenCurly, b'{'); - - set!(Class::CloseParen, b')'); - set!(Class::CloseBracket, b']'); - set!(Class::CloseCurly, b'}'); - - set!(Class::Escape, b'\\'); - - set!(Class::Quote, b'"', b'\'', b'`'); - - set!(Class::Dash, b'-'); - - set_range!(Class::Alpha, b'a'..=b'z'); - set_range!(Class::Alpha, b'A'..=b'Z'); - - set!(Class::Colon, b':'); - set!(Class::End, b'\0'); - - set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); - - table -}; - #[cfg(test)] mod tests { use super::ArbitraryPropertyMachine; diff --git a/crates/oxide/src/extractor/arbitrary_value_machine.rs b/crates/oxide/src/extractor/arbitrary_value_machine.rs index eab286a3d9ac..aa493c624e59 100644 --- a/crates/oxide/src/extractor/arbitrary_value_machine.rs +++ b/crates/oxide/src/extractor/arbitrary_value_machine.rs @@ -2,6 +2,7 @@ use crate::cursor; use crate::extractor::bracket_stack::BracketStack; use crate::extractor::machine::{Machine, MachineState}; use crate::extractor::string_machine::StringMachine; +use classification_macros::ClassifyBytes; /// Extracts arbitrary values including the brackets. /// @@ -31,7 +32,7 @@ impl Machine for ArbitraryValueMachine { #[inline] fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { // An arbitrary value must start with an open bracket - if CLASS_TABLE[cursor.curr as usize] != Class::OpenBracket { + if Class::OpenBracket != cursor.curr.into() { return MachineState::Idle; } @@ -41,8 +42,8 @@ impl Machine for ArbitraryValueMachine { let len = cursor.input.len(); while cursor.pos < len { - match CLASS_TABLE[cursor.curr as usize] { - Class::Escape => match CLASS_TABLE[cursor.next as usize] { + match cursor.curr.into() { + Class::Escape => match cursor.next.into() { // An escaped whitespace character is not allowed // // E.g.: `[color:var(--my-\ color)]` @@ -103,65 +104,39 @@ impl Machine for ArbitraryValueMachine { } } -#[derive(Clone, Copy, PartialEq)] +#[derive(Clone, Copy, PartialEq, ClassifyBytes)] enum Class { - /// `\` + #[bytes(b'\\')] Escape, - /// `(` + #[bytes(b'(')] OpenParen, - /// `)` + #[bytes(b')')] CloseParen, - /// `[` + #[bytes(b'[')] OpenBracket, - /// `]` + #[bytes(b']')] CloseBracket, - /// `{` + #[bytes(b'{')] OpenCurly, - /// `}` + #[bytes(b'}')] CloseCurly, - /// ', ", or ` + #[bytes(b'"', b'\'', b'`')] Quote, - /// Whitespace + #[bytes(b' ', b'\t', b'\n', b'\r', b'\x0C')] Whitespace, + #[fallback] Other, } -const CLASS_TABLE: [Class; 256] = { - let mut table = [Class::Other; 256]; - - macro_rules! set { - ($class:expr, $($byte:expr),+ $(,)?) => { - $(table[$byte as usize] = $class;)+ - }; - } - - set!(Class::Escape, b'\\'); - - set!(Class::OpenParen, b'('); - set!(Class::CloseParen, b')'); - - set!(Class::OpenBracket, b'['); - set!(Class::CloseBracket, b']'); - - set!(Class::OpenCurly, b'{'); - set!(Class::CloseCurly, b'}'); - - set!(Class::Quote, b'"', b'\'', b'`'); - - set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); - - table -}; - #[cfg(test)] mod tests { use super::ArbitraryValueMachine; diff --git a/crates/oxide/src/extractor/arbitrary_variable_machine.rs b/crates/oxide/src/extractor/arbitrary_variable_machine.rs index 7339b2377c8f..a56df24ed4de 100644 --- a/crates/oxide/src/extractor/arbitrary_variable_machine.rs +++ b/crates/oxide/src/extractor/arbitrary_variable_machine.rs @@ -3,6 +3,7 @@ use crate::extractor::bracket_stack::BracketStack; use crate::extractor::css_variable_machine::CssVariableMachine; use crate::extractor::machine::{Machine, MachineState}; use crate::extractor::string_machine::StringMachine; +use classification_macros::ClassifyBytes; /// Extracts arbitrary variables including the parens. /// @@ -62,7 +63,7 @@ impl Machine for ArbitraryVariableMachine { #[inline] fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { - let class_curr = CLASS_TABLE[cursor.curr as usize]; + let class_curr = cursor.curr.into(); let len = cursor.input.len(); match self.state { @@ -72,7 +73,7 @@ impl Machine for ArbitraryVariableMachine { // E.g.: `(--my-variable)` // ^^ // - Class::OpenParen => match CLASS_TABLE[cursor.next as usize] { + Class::OpenParen => match cursor.next.into() { Class::Dash => { self.start_pos = cursor.pos; self.state = State::Parsing; @@ -90,7 +91,7 @@ impl Machine for ArbitraryVariableMachine { State::Parsing => match self.css_variable_machine.next(cursor) { MachineState::Idle => self.restart(), - MachineState::Done(_) => match CLASS_TABLE[cursor.next as usize] { + MachineState::Done(_) => match cursor.next.into() { // A CSS variable followed by a `,` means that there is a fallback // // E.g.: `(--my-color,red)` @@ -108,7 +109,7 @@ impl Machine for ArbitraryVariableMachine { _ => { cursor.advance(); - match CLASS_TABLE[cursor.curr as usize] { + match cursor.curr.into() { // End of an arbitrary variable, must be followed by `)` Class::CloseParen => self.done(self.start_pos, cursor), @@ -121,8 +122,8 @@ impl Machine for ArbitraryVariableMachine { State::ParsingFallback => { while cursor.pos < len { - match CLASS_TABLE[cursor.curr as usize] { - Class::Escape => match CLASS_TABLE[cursor.next as usize] { + match cursor.curr.into() { + Class::Escape => match cursor.next.into() { // An escaped whitespace character is not allowed // // E.g.: `(--my-\ color)` @@ -192,118 +193,69 @@ impl Machine for ArbitraryVariableMachine { } } -#[derive(Clone, Copy, PartialEq)] +#[derive(Clone, Copy, PartialEq, ClassifyBytes)] enum Class { - /// `'a'..='z'` + #[bytes_range(b'a'..=b'z')] AlphaLower, - /// `'A'..='Z'` + #[bytes_range(b'A'..=b'Z')] AlphaUpper, - /// `@` + #[bytes(b'@')] At, - // `:` + #[bytes(b':')] Colon, - /// `,` + #[bytes(b',')] Comma, - /// `-` + #[bytes(b'-')] Dash, - /// `:` + #[bytes(b'.')] Dot, - /// `\\` + #[bytes(b'\\')] Escape, - /// `0x00` + #[bytes(b'\0')] End, - /// `'0'..='9'` + #[bytes_range(b'0'..=b'9')] Number, - /// `[` + #[bytes(b'[')] OpenBracket, - /// `]` + #[bytes(b']')] CloseBracket, - /// `(` + #[bytes(b'(')] OpenParen, - /// `)` + #[bytes(b')')] CloseParen, - /// `{` + #[bytes(b'{')] OpenCurly, - /// `}` + #[bytes(b'}')] CloseCurly, - /// ', ", or ` + #[bytes(b'"', b'\'', b'`')] Quote, - /// _ + #[bytes(b'_')] Underscore, - /// Whitespace characters: ' ', '\t', '\n', '\r', '\x0C' + #[bytes(b' ', b'\t', b'\n', b'\r', b'\x0C')] Whitespace, - /// Anything else + #[fallback] Other, } -const CLASS_TABLE: [Class; 256] = { - let mut table = [Class::Other; 256]; - - macro_rules! set { - ($class:expr, $($byte:expr),+ $(,)?) => { - $(table[$byte as usize] = $class;)+ - }; - } - - macro_rules! set_range { - ($class:expr, $start:literal ..= $end:literal) => { - let mut i = $start; - while i <= $end { - table[i as usize] = $class; - i += 1; - } - }; - } - - set!(Class::At, b'@'); - set!(Class::Underscore, b'_'); - set!(Class::Dash, b'-'); - set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); - set!(Class::Comma, b','); - set!(Class::Escape, b'\\'); - - set!(Class::OpenBracket, b'['); - set!(Class::CloseBracket, b']'); - - set!(Class::OpenParen, b'('); - set!(Class::CloseParen, b')'); - - set!(Class::OpenCurly, b'{'); - set!(Class::CloseCurly, b'}'); - - set!(Class::Dot, b'.'); - set!(Class::Colon, b':'); - - set!(Class::Quote, b'"', b'\'', b'`'); - - set_range!(Class::AlphaLower, b'a'..=b'z'); - set_range!(Class::AlphaUpper, b'A'..=b'Z'); - set_range!(Class::Number, b'0'..=b'9'); - - set!(Class::End, 0x00); - - table -}; - #[cfg(test)] mod tests { use super::ArbitraryVariableMachine; diff --git a/crates/oxide/src/extractor/css_variable_machine.rs b/crates/oxide/src/extractor/css_variable_machine.rs index f443e5275859..ab6ab3121a27 100644 --- a/crates/oxide/src/extractor/css_variable_machine.rs +++ b/crates/oxide/src/extractor/css_variable_machine.rs @@ -1,5 +1,6 @@ use crate::cursor; use crate::extractor::machine::{Machine, MachineState}; +use classification_macros::ClassifyBytes; /// Extract CSS variables from an input. /// @@ -19,9 +20,7 @@ impl Machine for CssVariableMachine { #[inline] fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { // CSS Variables must start with `--` - if CLASS_TABLE[cursor.curr as usize] != Class::Dash - || CLASS_TABLE[cursor.next as usize] != Class::Dash - { + if Class::Dash != cursor.curr.into() || Class::Dash != cursor.next.into() { return MachineState::Idle; } @@ -31,33 +30,35 @@ impl Machine for CssVariableMachine { cursor.advance_twice(); while cursor.pos < len { - match CLASS_TABLE[cursor.curr as usize] { + match cursor.curr.into() { // https://drafts.csswg.org/css-syntax-3/#ident-token-diagram // - Class::AllowedCharacter | Class::Dash => match CLASS_TABLE[cursor.next as usize] { - // Valid character followed by a valid character or an escape character - // - // E.g.: `--my-variable` - // ^^ - // E.g.: `--my-\#variable` - // ^^ - Class::AllowedCharacter | Class::Dash | Class::Escape => cursor.advance(), - - // Valid character followed by anything else means the variable is done - // - // E.g.: `'--my-variable'` - // ^ - _ => { - // There must be at least 1 character after the `--` - if cursor.pos - start_pos < 2 { - return self.restart(); - } else { - return self.done(start_pos, cursor); + Class::AllowedCharacter | Class::Dash => { + match cursor.next.into() { + // Valid character followed by a valid character or an escape character + // + // E.g.: `--my-variable` + // ^^ + // E.g.: `--my-\#variable` + // ^^ + Class::AllowedCharacter | Class::Dash | Class::Escape => cursor.advance(), + + // Valid character followed by anything else means the variable is done + // + // E.g.: `'--my-variable'` + // ^ + _ => { + // There must be at least 1 character after the `--` + if cursor.pos - start_pos < 2 { + return self.restart(); + } else { + return self.done(start_pos, cursor); + } } } - }, + } - Class::Escape => match CLASS_TABLE[cursor.next as usize] { + Class::Escape => match cursor.next.into() { // An escaped whitespace character is not allowed // // In CSS it is allowed, but in the context of a class it's not because then we @@ -87,62 +88,30 @@ impl Machine for CssVariableMachine { } } -#[derive(Clone, Copy, PartialEq)] +#[derive(Clone, Copy, PartialEq, ClassifyBytes)] enum Class { - /// - + #[bytes(b'-')] Dash, - /// _, a-z, A-Z, 0-9 + #[bytes(b'_')] + #[bytes_range(b'a'..=b'z', b'A'..=b'Z', b'0'..=b'9')] + // non-ASCII (such as Emoji): https://drafts.csswg.org/css-syntax-3/#non-ascii-ident-code-point + #[bytes_range(0x80..=0xff)] AllowedCharacter, - /// \ + #[bytes(b'\\')] Escape, - /// Whitespace + #[bytes(b' ', b'\t', b'\n', b'\r', b'\x0C')] Whitespace, - /// End of the input + #[bytes(b'\0')] End, + #[fallback] Other, } -const CLASS_TABLE: [Class; 256] = { - let mut table = [Class::Other; 256]; - - macro_rules! set { - ($class:expr, $($byte:expr),+ $(,)?) => { - $(table[$byte as usize] = $class;)+ - }; - } - - macro_rules! set_range { - ($class:expr, $start:literal ..= $end:literal) => { - let mut i = $start; - while i <= $end { - table[i as usize] = $class; - i += 1; - } - }; - } - - set!(Class::Dash, b'-'); - set!(Class::Escape, b'\\'); - set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); - - set!(Class::AllowedCharacter, b'_'); - set_range!(Class::AllowedCharacter, b'a'..=b'z'); - set_range!(Class::AllowedCharacter, b'A'..=b'Z'); - set_range!(Class::AllowedCharacter, b'0'..=b'9'); - - // non-ASCII (such as Emoji): https://drafts.csswg.org/css-syntax-3/#non-ascii-ident-code-point - set_range!(Class::AllowedCharacter, 0x80..=0xff); - - set!(Class::End, b'\0'); - - table -}; - #[cfg(test)] mod tests { use super::CssVariableMachine; diff --git a/crates/oxide/src/extractor/modifier_machine.rs b/crates/oxide/src/extractor/modifier_machine.rs index 6a64f1424c3f..3104a0b11ee9 100644 --- a/crates/oxide/src/extractor/modifier_machine.rs +++ b/crates/oxide/src/extractor/modifier_machine.rs @@ -2,6 +2,7 @@ use crate::cursor; use crate::extractor::arbitrary_value_machine::ArbitraryValueMachine; use crate::extractor::arbitrary_variable_machine::ArbitraryVariableMachine; use crate::extractor::machine::{Machine, MachineState}; +use classification_macros::ClassifyBytes; /// Extract modifiers from an input including the `/`. /// @@ -30,14 +31,14 @@ impl Machine for ModifierMachine { #[inline] fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { // A modifier must start with a `/`, everything else is not a valid start of a modifier - if CLASS_TABLE[cursor.curr as usize] != Class::Slash { + if Class::Slash != cursor.curr.into() { return MachineState::Idle; } let start_pos = cursor.pos; cursor.advance(); - match CLASS_TABLE[cursor.curr as usize] { + match cursor.curr.into() { // Start of an arbitrary value: // // ``` @@ -69,9 +70,9 @@ impl Machine for ModifierMachine { Class::ValidStart => { let len = cursor.input.len(); while cursor.pos < len { - match CLASS_TABLE[cursor.curr as usize] { + match cursor.curr.into() { Class::ValidStart | Class::ValidInside => { - match CLASS_TABLE[cursor.next as usize] { + match cursor.next.into() { // Only valid characters are allowed, if followed by another valid character Class::ValidStart | Class::ValidInside => cursor.advance(), @@ -95,59 +96,27 @@ impl Machine for ModifierMachine { } } -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq, ClassifyBytes)] enum Class { - /// `'a'..='z' | 'A'..='Z' | '0'..='9'` + #[bytes_range(b'a'..=b'z', b'A'..=b'Z', b'0'..=b'9')] ValidStart, - /// `-`, `_`, `.` + #[bytes(b'-', b'_', b'.')] ValidInside, - /// `[` + #[bytes(b'[')] OpenBracket, - /// `(` + #[bytes(b'(')] OpenParen, - /// `/` + #[bytes(b'/')] Slash, + #[fallback] Other, } -const CLASS_TABLE: [Class; 256] = { - let mut table = [Class::Other; 256]; - - macro_rules! set { - ($class:expr, $($byte:expr),+ $(,)?) => { - $(table[$byte as usize] = $class;)+ - }; - } - - macro_rules! set_range { - ($class:expr, $start:literal ..= $end:literal) => { - let mut i = $start; - while i <= $end { - table[i as usize] = $class; - i += 1; - } - }; - } - - set_range!(Class::ValidStart, b'a'..=b'z'); - set_range!(Class::ValidStart, b'A'..=b'Z'); - set_range!(Class::ValidStart, b'0'..=b'9'); - - set!(Class::OpenBracket, b'['); - set!(Class::OpenParen, b'('); - - set!(Class::Slash, b'/'); - - set!(Class::ValidInside, b'-', b'_', b'.'); - - table -}; - #[cfg(test)] mod tests { use super::ModifierMachine; diff --git a/crates/oxide/src/extractor/named_utility_machine.rs b/crates/oxide/src/extractor/named_utility_machine.rs index b76186d2fe39..1284091b945c 100644 --- a/crates/oxide/src/extractor/named_utility_machine.rs +++ b/crates/oxide/src/extractor/named_utility_machine.rs @@ -2,6 +2,7 @@ use crate::cursor; use crate::extractor::arbitrary_value_machine::ArbitraryValueMachine; use crate::extractor::arbitrary_variable_machine::ArbitraryVariableMachine; use crate::extractor::machine::{Machine, MachineState}; +use classification_macros::ClassifyBytes; /// Extracts named utilities from an input. /// @@ -47,8 +48,8 @@ impl Machine for NamedUtilityMachine { let len = cursor.input.len(); match self.state { - State::Idle => match CLASS_TABLE[cursor.curr as usize] { - Class::AlphaLower => match CLASS_TABLE[cursor.next as usize] { + State::Idle => match cursor.curr.into() { + Class::AlphaLower => match cursor.next.into() { // Valid single character utility in between quotes // // E.g.: `
` @@ -87,7 +88,7 @@ impl Machine for NamedUtilityMachine { // // E.g.: `-mx-2.5` // ^^ - Class::Dash => match CLASS_TABLE[cursor.next as usize] { + Class::Dash => match cursor.next.into() { Class::AlphaLower => { self.start_pos = cursor.pos; self.state = State::Parsing; @@ -105,7 +106,7 @@ impl Machine for NamedUtilityMachine { State::Parsing => { while cursor.pos < len { - match CLASS_TABLE[cursor.curr as usize] { + match cursor.curr.into() { // Followed by a boundary character, we are at the end of the utility. // // E.g.: `'flex'` @@ -119,7 +120,7 @@ impl Machine for NamedUtilityMachine { // E.g.: `:div="{ flex: true }"` (JavaScript object syntax) // ^ Class::AlphaLower | Class::AlphaUpper => { - match CLASS_TABLE[cursor.next as usize] { + match cursor.next.into() { Class::Quote | Class::Whitespace | Class::CloseBracket @@ -134,7 +135,7 @@ impl Machine for NamedUtilityMachine { } } - Class::Dash => match CLASS_TABLE[cursor.next as usize] { + Class::Dash => match cursor.next.into() { // Start of an arbitrary value // // E.g.: `bg-[#0088cc]` @@ -178,7 +179,7 @@ impl Machine for NamedUtilityMachine { _ => return self.restart(), }, - Class::Underscore => match CLASS_TABLE[cursor.next as usize] { + Class::Underscore => match cursor.next.into() { // Valid characters _if_ followed by another valid character. These characters are // only valid inside of the utility but not at the end of the utility. // @@ -228,11 +229,11 @@ impl Machine for NamedUtilityMachine { // E.g.: `px-2.5` // ^^^ Class::Dot => { - if !matches!(CLASS_TABLE[cursor.prev as usize], Class::Number) { + if !matches!(cursor.prev.into(), Class::Number) { return self.restart(); } - if !matches!(CLASS_TABLE[cursor.next as usize], Class::Number) { + if !matches!(cursor.next.into(), Class::Number) { return self.restart(); } @@ -256,14 +257,14 @@ impl Machine for NamedUtilityMachine { // Class::Number => { if !matches!( - CLASS_TABLE[cursor.prev as usize], + cursor.prev.into(), Class::Dash | Class::Dot | Class::Number | Class::AlphaLower ) { return self.restart(); } if !matches!( - CLASS_TABLE[cursor.next as usize], + cursor.next.into(), Class::Dot | Class::Number | Class::AlphaLower @@ -285,7 +286,7 @@ impl Machine for NamedUtilityMachine { // ^^ // ``` Class::Percent => { - if !matches!(CLASS_TABLE[cursor.prev as usize], Class::Number) { + if !matches!(cursor.prev.into(), Class::Number) { return self.restart(); } @@ -303,111 +304,63 @@ impl Machine for NamedUtilityMachine { } } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, ClassifyBytes)] enum Class { - /// `'a'..='z'` + #[bytes_range(b'a'..=b'z')] AlphaLower, - /// `'A'..='Z'` + #[bytes_range(b'A'..=b'Z')] AlphaUpper, - /// `@` + #[bytes(b'@')] At, - // `:` + #[bytes(b':')] Colon, - /// `-` + #[bytes(b'-')] Dash, - /// `:` + #[bytes(b'.')] Dot, - /// `0x00` + #[bytes(b'\0')] End, - /// `!` + #[bytes(b'!')] Exclamation, - /// `'0'..='9'` + #[bytes_range(b'0'..=b'9')] Number, - /// `[` + #[bytes(b'[')] OpenBracket, - /// `]` + #[bytes(b']')] CloseBracket, - /// `(` + #[bytes(b'(')] OpenParen, - /// `%` + #[bytes(b'%')] Percent, - /// ', ", or ` + #[bytes(b'"', b'\'', b'`')] Quote, - /// `/` + #[bytes(b'/')] Slash, - /// _ + #[bytes(b'_')] Underscore, - /// Whitespace characters: ' ', '\t', '\n', '\r', '\x0C' + #[bytes(b' ', b'\t', b'\n', b'\r', b'\x0C')] Whitespace, - /// Anything else + #[fallback] Other, } -const CLASS_TABLE: [Class; 256] = { - let mut table = [Class::Other; 256]; - - macro_rules! set { - ($class:expr, $($byte:expr),+ $(,)?) => { - $(table[$byte as usize] = $class;)+ - }; - } - - macro_rules! set_range { - ($class:expr, $start:literal ..= $end:literal) => { - let mut i = $start; - while i <= $end { - table[i as usize] = $class; - i += 1; - } - }; - } - - set!(Class::At, b'@'); - set!(Class::Underscore, b'_'); - set!(Class::Dash, b'-'); - set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); - - set!(Class::OpenBracket, b'['); - set!(Class::CloseBracket, b']'); - - set!(Class::OpenParen, b'('); - - set!(Class::Dot, b'.'); - set!(Class::Colon, b':'); - - set!(Class::Percent, b'%'); - - set!(Class::Quote, b'"', b'\'', b'`'); - - set!(Class::Exclamation, b'!'); - set!(Class::Slash, b'/'); - - set_range!(Class::AlphaLower, b'a'..=b'z'); - set_range!(Class::AlphaUpper, b'A'..=b'Z'); - set_range!(Class::Number, b'0'..=b'9'); - - set!(Class::End, 0x00); - - table -}; - #[cfg(test)] mod tests { use super::NamedUtilityMachine; diff --git a/crates/oxide/src/extractor/named_variant_machine.rs b/crates/oxide/src/extractor/named_variant_machine.rs index 7baa5e151d0a..47f9776dd315 100644 --- a/crates/oxide/src/extractor/named_variant_machine.rs +++ b/crates/oxide/src/extractor/named_variant_machine.rs @@ -3,6 +3,7 @@ use crate::extractor::arbitrary_value_machine::ArbitraryValueMachine; use crate::extractor::arbitrary_variable_machine::ArbitraryVariableMachine; use crate::extractor::machine::{Machine, MachineState}; use crate::extractor::modifier_machine::ModifierMachine; +use classification_macros::ClassifyBytes; /// Extract named variants from an input including the `:`. /// @@ -73,8 +74,8 @@ impl Machine for NamedVariantMachine { let len = cursor.input.len(); match self.state { - State::Idle => match CLASS_TABLE[cursor.curr as usize] { - Class::AlphaLower | Class::Star => match CLASS_TABLE[cursor.next as usize] { + State::Idle => match cursor.curr.into() { + Class::AlphaLower | Class::Star => match cursor.next.into() { // Valid single character variant, must be followed by a `:` // // E.g.: `
` @@ -120,8 +121,8 @@ impl Machine for NamedVariantMachine { State::Parsing => { while cursor.pos < len { - match CLASS_TABLE[cursor.curr as usize] { - Class::Dash => match CLASS_TABLE[cursor.next as usize] { + match cursor.curr.into() { + Class::Dash => match cursor.next.into() { // Start of an arbitrary value // // E.g.: `data-[state=pending]:`. @@ -168,7 +169,7 @@ impl Machine for NamedVariantMachine { _ => return self.restart(), }, - Class::Underscore => match CLASS_TABLE[cursor.next as usize] { + Class::Underscore => match cursor.next.into() { // Valid characters _if_ followed by another valid character. These characters are // only valid inside of the variant but not at the end of the variant. // @@ -224,7 +225,7 @@ impl Machine for NamedVariantMachine { State::ParsingModifier => match self.modifier_machine.next(cursor) { MachineState::Idle => self.restart(), - MachineState::Done(_) => match CLASS_TABLE[cursor.next as usize] { + MachineState::Done(_) => match cursor.next.into() { // Modifier must be followed by a `:` // // E.g.: `group-hover/name:` @@ -240,7 +241,7 @@ impl Machine for NamedVariantMachine { }, }, - State::ParseEnd => match CLASS_TABLE[cursor.curr as usize] { + State::ParseEnd => match cursor.curr.into() { // The end of a variant must be the `:` // // E.g.: `hover:` @@ -257,7 +258,7 @@ impl Machine for NamedVariantMachine { impl NamedVariantMachine { #[inline(always)] fn parse_arbitrary_end(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { - match CLASS_TABLE[cursor.next as usize] { + match cursor.next.into() { Class::Slash => { self.state = State::ParsingModifier; cursor.advance(); @@ -273,118 +274,72 @@ impl NamedVariantMachine { } } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, ClassifyBytes)] enum Class { - /// `'a'..='z'` + #[bytes_range(b'a'..=b'z')] AlphaLower, - /// `'A'..='Z'` + #[bytes_range(b'A'..=b'Z')] AlphaUpper, - /// `@` + #[bytes(b'@')] At, - // `:` + #[bytes(b':')] Colon, - /// `-` + #[bytes(b'-')] Dash, - /// `:` + #[bytes(b'.')] Dot, - /// `0x00` + #[bytes(b'\0')] End, - /// `'0'..='9'` + #[bytes_range(b'0'..=b'9')] Number, - /// `[` + #[bytes(b'[')] OpenBracket, - /// `]` + #[bytes(b']')] CloseBracket, - /// `(` + #[bytes(b'(')] OpenParen, - /// ', ", or ` + #[bytes(b'\'', b'"', b'`')] Quote, - /// `*` + #[bytes(b'*')] Star, - /// `/` + #[bytes(b'/')] Slash, - /// _ + #[bytes(b'_')] Underscore, - /// Whitespace characters: ' ', '\t', '\n', '\r', '\x0C' + #[bytes(b' ', b'\t', b'\n', b'\r', b'\x0C')] Whitespace, - /// Anything else + #[fallback] Other, } -const CLASS_TABLE: [Class; 256] = { - let mut table = [Class::Other; 256]; - - macro_rules! set { - ($class:expr, $($byte:expr),+ $(,)?) => { - $(table[$byte as usize] = $class;)+ - }; - } - - macro_rules! set_range { - ($class:expr, $start:literal ..= $end:literal) => { - let mut i = $start; - while i <= $end { - table[i as usize] = $class; - i += 1; - } - }; - } - - set!(Class::At, b'@'); - set!(Class::Underscore, b'_'); - set!(Class::Dash, b'-'); - set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); - - set!(Class::OpenBracket, b'['); - set!(Class::CloseBracket, b']'); - - set!(Class::OpenParen, b'('); - - set!(Class::Dot, b'.'); - set!(Class::Colon, b':'); - - set!(Class::Quote, b'"', b'\'', b'`'); - - set!(Class::Star, b'*'); - set!(Class::Slash, b'/'); - - set_range!(Class::AlphaLower, b'a'..=b'z'); - set_range!(Class::AlphaUpper, b'A'..=b'Z'); - set_range!(Class::Number, b'0'..=b'9'); - - set!(Class::End, 0x00); - - table -}; - #[cfg(test)] mod tests { use super::NamedVariantMachine; - use crate::extractor::{machine::Machine, variant_machine::VariantMachine}; + use crate::extractor::machine::Machine; #[test] #[ignore] fn test_named_variant_machine_performance() { let input = r#"