diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b08ddef7..3f669833 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,10 +18,11 @@ jobs: - nightly - beta - stable - - 1.63.0 + - 1.68.0 features: - - --features dummy_match_byte + - --features malloc_size_of include: - toolchain: nightly features: --features bench diff --git a/Cargo.toml b/Cargo.toml index f783f19f..41b6bb53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cssparser" -version = "0.34.0" +version = "0.36.0" authors = ["Simon Sapin "] description = "Rust implementation of CSS Syntax Level 3" @@ -10,7 +10,7 @@ readme = "README.md" keywords = ["css", "syntax", "parser"] license = "MPL-2.0" edition = "2018" -rust-version = "1.63" +rust-version = "1.68" exclude = ["src/css-parsing-tests/**", "src/big-data-url.css"] @@ -23,8 +23,9 @@ encoding_rs = "0.8" cssparser-macros = { path = "./macros", version = "0.6.1" } dtoa-short = "0.3" itoa = "1.0" -phf = { version = "0.11.2", features = ["macros"] } +phf = { version = "0.13.1", features = ["macros"] } serde = { version = "1.0", features = ["derive"], optional = true } +malloc_size_of = { version = "0.1", default-features = false, optional = true } smallvec = "1.0" [profile.profiling] diff --git a/color/Cargo.toml b/color/Cargo.toml index 47544815..48a539f0 100644 --- a/color/Cargo.toml +++ b/color/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cssparser-color" -version = "0.1.0" +version = "0.4.0" authors = ["Emilio Cobos Álvarez "] description = "Color implementation based on cssparser" documentation = "https://docs.rs/cssparser-color/" @@ -12,7 +12,7 @@ edition = "2021" path = "lib.rs" [dependencies] -cssparser = { path = ".." } +cssparser = { path = "..", version = "0.36" } serde = { version = "1.0", features = ["derive"], optional = true } [features] diff --git a/macros/lib.rs b/macros/lib.rs index 8b40bd4c..dc7b36e3 100644 --- a/macros/lib.rs +++ b/macros/lib.rs @@ -162,7 +162,7 @@ pub fn match_byte(input: TokenStream) -> TokenStream { for (i, ref arm) in arms.iter().enumerate() { let case_id = i + 1; let index = case_id as isize; - let name = syn::Ident::new(&format!("Case{}", case_id), arm.span()); + let name = syn::Ident::new(&format!("Case{case_id}"), arm.span()); let pat = &arm.pat; parse_pat_to_table(pat, case_id as u8, &mut wildcard, &mut table); @@ -177,7 +177,7 @@ pub fn match_byte(input: TokenStream) -> TokenStream { let mut table_content = Vec::new(); for entry in table.iter() { - let name: syn::Path = syn::parse_str(&format!("Case::Case{}", entry)).unwrap(); + let name: syn::Path = syn::parse_str(&format!("Case::Case{entry}")).unwrap(); table_content.push(name); } let table = quote::quote!(static __CASES: [Case; 256] = [#(#table_content),*];); diff --git a/src/color.rs b/src/color.rs index 978936e0..472c6478 100644 --- a/src/color.rs +++ b/src/color.rs @@ -85,6 +85,8 @@ pub enum PredefinedColorSpace { SrgbLinear, /// DisplayP3, + /// + DisplayP3Linear, /// A98Rgb, /// @@ -107,6 +109,7 @@ impl PredefinedColorSpace { "srgb" => Self::Srgb, "srgb-linear" => Self::SrgbLinear, "display-p3" => Self::DisplayP3, + "display-p3-linear" => Self::DisplayP3Linear, "a98-rgb" => Self::A98Rgb, "prophoto-rgb" => Self::ProphotoRgb, "rec2020" => Self::Rec2020, @@ -126,6 +129,7 @@ impl ToCss for PredefinedColorSpace { Self::Srgb => "srgb", Self::SrgbLinear => "srgb-linear", Self::DisplayP3 => "display-p3", + Self::DisplayP3Linear => "display-p3-linear", Self::A98Rgb => "a98-rgb", Self::ProphotoRgb => "prophoto-rgb", Self::Rec2020 => "rec2020", diff --git a/src/cow_rc_str.rs b/src/cow_rc_str.rs index 26508481..03631f47 100644 --- a/src/cow_rc_str.rs +++ b/src/cow_rc_str.rs @@ -51,7 +51,7 @@ impl<'a> From<&'a str> for CowRcStr<'a> { } } -impl<'a> From for CowRcStr<'a> { +impl From for CowRcStr<'_> { #[inline] fn from(s: String) -> Self { CowRcStr::from_rc(Rc::new(s)) @@ -84,7 +84,7 @@ impl<'a> CowRcStr<'a> { } } -impl<'a> Clone for CowRcStr<'a> { +impl Clone for CowRcStr<'_> { #[inline] fn clone(&self) -> Self { match self.unpack() { @@ -99,7 +99,7 @@ impl<'a> Clone for CowRcStr<'a> { } } -impl<'a> Drop for CowRcStr<'a> { +impl Drop for CowRcStr<'_> { #[inline] fn drop(&mut self) { if let Err(ptr) = self.unpack() { @@ -108,7 +108,7 @@ impl<'a> Drop for CowRcStr<'a> { } } -impl<'a> ops::Deref for CowRcStr<'a> { +impl ops::Deref for CowRcStr<'_> { type Target = str; #[inline] @@ -119,65 +119,65 @@ impl<'a> ops::Deref for CowRcStr<'a> { // Boilerplate / trivial impls below. -impl<'a> AsRef for CowRcStr<'a> { +impl AsRef for CowRcStr<'_> { #[inline] fn as_ref(&self) -> &str { self } } -impl<'a> Borrow for CowRcStr<'a> { +impl Borrow for CowRcStr<'_> { #[inline] fn borrow(&self) -> &str { self } } -impl<'a> Default for CowRcStr<'a> { +impl Default for CowRcStr<'_> { #[inline] fn default() -> Self { Self::from("") } } -impl<'a> hash::Hash for CowRcStr<'a> { +impl hash::Hash for CowRcStr<'_> { #[inline] fn hash(&self, hasher: &mut H) { str::hash(self, hasher) } } -impl<'a, T: AsRef> PartialEq for CowRcStr<'a> { +impl> PartialEq for CowRcStr<'_> { #[inline] fn eq(&self, other: &T) -> bool { str::eq(self, other.as_ref()) } } -impl<'a, T: AsRef> PartialOrd for CowRcStr<'a> { +impl> PartialOrd for CowRcStr<'_> { #[inline] fn partial_cmp(&self, other: &T) -> Option { str::partial_cmp(self, other.as_ref()) } } -impl<'a> Eq for CowRcStr<'a> {} +impl Eq for CowRcStr<'_> {} -impl<'a> Ord for CowRcStr<'a> { +impl Ord for CowRcStr<'_> { #[inline] fn cmp(&self, other: &Self) -> cmp::Ordering { str::cmp(self, other) } } -impl<'a> fmt::Display for CowRcStr<'a> { +impl fmt::Display for CowRcStr<'_> { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { str::fmt(self, formatter) } } -impl<'a> fmt::Debug for CowRcStr<'a> { +impl fmt::Debug for CowRcStr<'_> { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { str::fmt(self, formatter) diff --git a/src/css-parsing-tests/component_value_list.json b/src/css-parsing-tests/component_value_list.json index ed26e125..6d4bcddd 100644 --- a/src/css-parsing-tests/component_value_list.json +++ b/src/css-parsing-tests/component_value_list.json @@ -223,7 +223,7 @@ ["error", "bad-url"] ], -"12 +34 -45 .67 +.89 -.01 2.3 +45.0 -0.67", [ +"12 +34 -45 .67 +.89 -.01 2.3 +45.0 -0.67 1000001", [ ["number", "12", 12, "integer"], " ", ["number", "+34", 34, "integer"], " ", ["number", "-45", -45, "integer"], " ", @@ -232,7 +232,8 @@ ["number", "-0.01", -0.01, "number"], " ", ["number", "2.3", 2.3, "number"], " ", ["number", "+45.0", 45, "number"], " ", - ["number", "-0.67", -0.67, "number"] + ["number", "-0.67", -0.67, "number"], " ", + ["number", "1000001", 1000001, "integer"] ], "12e2 +34e+1 -45E-0 .68e+3 +.79e-1 -.01E2 2.3E+1 +45.0e6 -0.67e0", [ diff --git a/src/css-parsing-tests/declaration_list.json b/src/css-parsing-tests/declaration_list.json index abd23042..cafc3c1b 100644 --- a/src/css-parsing-tests/declaration_list.json +++ b/src/css-parsing-tests/declaration_list.json @@ -51,8 +51,6 @@ ], ["declaration", "a", [["ident", "b"]], false], ["at-rule", "media", [" ", ["ident", "print"]], [["ident", "div"], ["{}"]]] -], - -"", [] +] ] diff --git a/src/css-parsing-tests/urange.json b/src/css-parsing-tests/urange.json index 857d1d62..4dcb529c 100644 --- a/src/css-parsing-tests/urange.json +++ b/src/css-parsing-tests/urange.json @@ -84,6 +84,11 @@ null, null, null +], + +"U+26F9200D2640, U+10000-26F9200D2640", [ + null, + null ] ] diff --git a/src/from_bytes.rs b/src/from_bytes.rs index 78a56d3e..7d9d2c76 100644 --- a/src/from_bytes.rs +++ b/src/from_bytes.rs @@ -24,9 +24,9 @@ pub trait EncodingSupport { /// /// * `css_bytes`: A byte string. /// * `protocol_encoding`: The encoding label, if any, defined by HTTP or equivalent protocol. -/// (e.g. via the `charset` parameter of the `Content-Type` header.) +/// (e.g. via the `charset` parameter of the `Content-Type` header.) /// * `environment_encoding`: An optional `Encoding` object for the [environment encoding] -/// (https://drafts.csswg.org/css-syntax/#environment-encoding), if any. +/// (https://drafts.csswg.org/css-syntax/#environment-encoding), if any. /// /// Returns the encoding to use. pub fn stylesheet_encoding( diff --git a/src/lib.rs b/src/lib.rs index dc44fb74..3968eea0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,7 +41,7 @@ As a consequence, when calling another parsing function, either: Examples: -```{rust,ignore} +```rust,ignore // 'none' | fn parse_background_image(context: &ParserContext, input: &mut Parser) -> Result, ()> { @@ -53,7 +53,7 @@ fn parse_background_image(context: &ParserContext, input: &mut Parser) } ``` -```{rust,ignore} +```rust,ignore // [ | ] [ | ]? fn parse_border_spacing(_context: &ParserContext, input: &mut Parser) -> Result<(LengthOrPercentage, LengthOrPercentage), ()> { @@ -102,7 +102,7 @@ mod parser; mod serializer; mod unicode_range; -#[cfg(test)] +#[cfg(all(test, target_pointer_width = "64"))] mod size_of_tests; #[cfg(test)] mod tests; diff --git a/src/nth.rs b/src/nth.rs index 4fe5a6bc..76c13f35 100644 --- a/src/nth.rs +++ b/src/nth.rs @@ -20,8 +20,8 @@ pub fn parse_nth<'i>(input: &mut Parser<'i, '_>) -> Result<(i32, i32), BasicPars } => { match_ignore_ascii_case! { unit, - "n" => Ok(parse_b(input, a)?), - "n-" => Ok(parse_signless_b(input, a, -1)?), + "n" => parse_b(input, a), + "n-" => parse_signless_b(input, a, -1), _ => match parse_n_dash_digits(unit) { Ok(b) => Ok((a, b)), Err(()) => { @@ -35,10 +35,10 @@ pub fn parse_nth<'i>(input: &mut Parser<'i, '_>) -> Result<(i32, i32), BasicPars match_ignore_ascii_case! { value, "even" => Ok((2, 0)), "odd" => Ok((2, 1)), - "n" => Ok(parse_b(input, 1)?), - "-n" => Ok(parse_b(input, -1)?), - "n-" => Ok(parse_signless_b(input, 1, -1)?), - "-n-" => Ok(parse_signless_b(input, -1, -1)?), + "n" => parse_b(input, 1), + "-n" => parse_b(input, -1), + "n-" => parse_signless_b(input, 1, -1), + "-n-" => parse_signless_b(input, -1, -1), _ => { let (slice, a) = if let Some(stripped) = value.strip_prefix('-') { (stripped, -1) diff --git a/src/parser.rs b/src/parser.rs index dd35fc50..a7cab1f2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -76,15 +76,15 @@ pub enum BasicParseErrorKind<'i> { QualifiedRuleInvalid, } -impl<'i> fmt::Display for BasicParseErrorKind<'i> { +impl fmt::Display for BasicParseErrorKind<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { BasicParseErrorKind::UnexpectedToken(token) => { - write!(f, "unexpected token: {:?}", token) + write!(f, "unexpected token: {token:?}") } BasicParseErrorKind::EndOfInput => write!(f, "unexpected end of input"), BasicParseErrorKind::AtRuleInvalid(rule) => { - write!(f, "invalid @ rule encountered: '@{}'", rule) + write!(f, "invalid @ rule encountered: '@{rule}'") } BasicParseErrorKind::AtRuleBodyInvalid => write!(f, "invalid @ rule body encountered"), BasicParseErrorKind::QualifiedRuleInvalid => { @@ -176,7 +176,7 @@ impl<'i, T> ParseErrorKind<'i, T> { } } -impl<'i, E: fmt::Display> fmt::Display for ParseErrorKind<'i, E> { +impl fmt::Display for ParseErrorKind<'_, E> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { ParseErrorKind::Basic(ref basic) => basic.fmt(f), @@ -218,13 +218,13 @@ impl<'i, T> ParseError<'i, T> { } } -impl<'i, E: fmt::Display> fmt::Display for ParseError<'i, E> { +impl fmt::Display for ParseError<'_, E> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.kind.fmt(f) } } -impl<'i, E: fmt::Display + fmt::Debug> std::error::Error for ParseError<'i, E> {} +impl std::error::Error for ParseError<'_, E> {} /// The owned input for a parser. pub struct ParserInput<'i> { @@ -295,7 +295,7 @@ impl BlockType { /// /// The union of two sets can be obtained with the `|` operator. Example: /// -/// ```{rust,ignore} +/// ```rust,ignore /// input.parse_until_before(Delimiter::CurlyBracketBlock | Delimiter::Semicolon) /// ``` #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -360,10 +360,8 @@ impl Delimiters { table }; - match byte { - None => Delimiter::None, - Some(b) => TABLE[b as usize], - } + assert_eq!(TABLE[0], Delimiter::None); + TABLE[byte.unwrap_or(0) as usize] } } @@ -382,6 +380,10 @@ macro_rules! expect { } } +/// A list of arbitrary substitution functions. Should be lowercase ascii. +/// See https://drafts.csswg.org/css-values-5/#arbitrary-substitution +pub type ArbitrarySubstitutionFunctions<'a> = &'a [&'static str]; + impl<'i: 't, 't> Parser<'i, 't> { /// Create a new parser #[inline] @@ -548,19 +550,23 @@ impl<'i: 't, 't> Parser<'i, 't> { self.at_start_of = state.at_start_of; } - /// Start looking for `var()` / `env()` functions. (See the - /// `.seen_var_or_env_functions()` method.) + /// Start looking for arbitrary substitution functions like `var()` / `env()` functions. + /// (See the `.seen_arbitrary_substitution_functions()` method.) #[inline] - pub fn look_for_var_or_env_functions(&mut self) { - self.input.tokenizer.look_for_var_or_env_functions() + pub fn look_for_arbitrary_substitution_functions( + &mut self, + fns: ArbitrarySubstitutionFunctions<'i>, + ) { + self.input + .tokenizer + .look_for_arbitrary_substitution_functions(fns) } - /// Return whether a `var()` or `env()` function has been seen by the - /// tokenizer since either `look_for_var_or_env_functions` was called, and - /// stop looking. + /// Return whether a relevant function has been seen by the tokenizer since + /// `look_for_arbitrary_substitution_functions` was called, and stop looking. #[inline] - pub fn seen_var_or_env_functions(&mut self) -> bool { - self.input.tokenizer.seen_var_or_env_functions() + pub fn seen_arbitrary_substitution_functions(&mut self) -> bool { + self.input.tokenizer.seen_arbitrary_substitution_functions() } /// The old name of `try_parse`, which requires raw identifiers in the Rust 2018 edition. diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 48da02b5..7f268c59 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -31,7 +31,7 @@ pub trait DeclarationParser<'i> { /// Parse the value of a declaration with the given `name`. /// /// Return the finished representation for the declaration - /// as returned by `DeclarationListParser::next`, + /// as returned by `RuleBodyParser::next`, /// or an `Err(..)` to ignore the entire declaration as invalid. /// /// Declaration name matching should be case-insensitive in the ASCII range. @@ -49,6 +49,7 @@ pub trait DeclarationParser<'i> { &mut self, name: CowRcStr<'i>, input: &mut Parser<'i, 't>, + _declaration_start: &ParserState, ) -> Result> { Err(input.new_error(BasicParseErrorKind::UnexpectedToken(Token::Ident(name)))) } @@ -62,7 +63,7 @@ pub trait DeclarationParser<'i> { /// /// Default implementations that reject all at-rules are provided, /// so that `impl AtRuleParser<(), ()> for ... {}` can be used -/// for using `DeclarationListParser` to parse a declarations list with only qualified rules. +/// for using `RuleBodyParser` to parse a declarations list with only qualified rules. pub trait AtRuleParser<'i> { /// The intermediate representation of prelude of an at-rule. type Prelude; @@ -120,7 +121,7 @@ pub trait AtRuleParser<'i> { /// The location passed in is source location of the start of the prelude. /// /// Return the finished representation of the at-rule - /// as returned by `RuleListParser::next` or `DeclarationListParser::next`, + /// as returned by `StyleSheetParser::next` or `RuleBodyParser::next`, /// or an `Err(..)` to ignore the entire at-rule as invalid. /// /// This is only called when `parse_prelude` returned `WithBlock`, and a block @@ -145,7 +146,7 @@ pub trait AtRuleParser<'i> { /// /// Default implementations that reject all qualified rules are provided, so that /// `impl QualifiedRuleParser<(), ()> for ... {}` can be used for example for using -/// `RuleListParser` to parse a rule list with only at-rules (such as inside +/// `StyleSheetParser` to parse a rule list with only at-rules (such as inside /// `@font-feature-values`). pub trait QualifiedRuleParser<'i> { /// The intermediate representation of a qualified rule prelude. @@ -178,7 +179,7 @@ pub trait QualifiedRuleParser<'i> { /// The location passed in is source location of the start of the prelude. /// /// Return the finished representation of the qualified rule - /// as returned by `RuleListParser::next`, + /// as returned by `StyleSheetParser::next`, /// or an `Err(..)` to ignore the entire at-rule as invalid. fn parse_block<'t>( &mut self, @@ -196,7 +197,7 @@ pub trait QualifiedRuleParser<'i> { pub struct RuleBodyParser<'i, 't, 'a, P, I, E> { /// The input given to the parser. pub input: &'a mut Parser<'i, 't>, - /// The parser given to `DeclarationListParser::new` + /// The parser given to `RuleBodyParser::new` pub parser: &'a mut P, _phantom: std::marker::PhantomData<(I, E)>, @@ -217,7 +218,7 @@ pub trait RuleBodyItemParser<'i, DeclOrRule, Error: 'i>: } impl<'i, 't, 'a, P, I, E> RuleBodyParser<'i, 't, 'a, P, I, E> { - /// Create a new `DeclarationListParser` for the given `input` and `parser`. + /// Create a new `RuleBodyParser` for the given `input` and `parser`. /// /// Note that all CSS declaration lists can on principle contain at-rules. /// Even if no such valid at-rule exists (yet), @@ -229,7 +230,7 @@ impl<'i, 't, 'a, P, I, E> RuleBodyParser<'i, 't, 'a, P, I, E> { /// since `AtRuleParser` provides default implementations of its methods. /// /// The return type for finished declarations and at-rules also needs to be the same, - /// since `::next` can return either. + /// since `::next` can return either. /// It could be a custom enum. pub fn new(input: &'a mut Parser<'i, 't>, parser: &'a mut P) -> Self { Self { @@ -241,7 +242,7 @@ impl<'i, 't, 'a, P, I, E> RuleBodyParser<'i, 't, 'a, P, I, E> { } /// https://drafts.csswg.org/css-syntax/#consume-a-blocks-contents -impl<'i, 't, 'a, I, P, E: 'i> Iterator for RuleBodyParser<'i, 't, 'a, P, I, E> +impl<'i, I, P, E: 'i> Iterator for RuleBodyParser<'i, '_, '_, P, I, E> where P: RuleBodyItemParser<'i, I, E>, { @@ -279,7 +280,7 @@ where error_behavior, |input| { input.expect_colon()?; - parser.parse_value(name, input) + parser.parse_value(name, input, &start) }, ) }; @@ -338,7 +339,7 @@ where /// implementations of their methods. /// /// The return type for finished qualified rules and at-rules also needs to be the same, - /// since `::next` can return either. It could be a custom enum. + /// since `::next` can return either. It could be a custom enum. pub fn new(input: &'a mut Parser<'i, 't>, parser: &'a mut P) -> Self { Self { input, @@ -348,8 +349,8 @@ where } } -/// `RuleListParser` is an iterator that yields `Ok(_)` for a rule or an `Err(..)` for an invalid one. -impl<'i, 't, 'a, R, P, E: 'i> Iterator for StyleSheetParser<'i, 't, 'a, P> +/// `StyleSheetParser` is an iterator that yields `Ok(_)` for a rule or an `Err(..)` for an invalid one. +impl<'i, R, P, E: 'i> Iterator for StyleSheetParser<'i, '_, '_, P> where P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E> + AtRuleParser<'i, AtRule = R, Error = E>, @@ -408,12 +409,13 @@ pub fn parse_one_declaration<'i, 't, P, E>( where P: DeclarationParser<'i, Error = E>, { + let start = input.state(); let start_position = input.position(); input .parse_entirely(|input| { let name = input.expect_ident()?.clone(); input.expect_colon()?; - parser.parse_value(name, input) + parser.parse_value(name, input, &start) }) .map_err(|e| (e, input.slice_from(start_position))) } diff --git a/src/serializer.rs b/src/serializer.rs index 3c6e31cb..ca6eda81 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -3,7 +3,6 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use crate::match_byte; -use dtoa_short::Notation; use std::fmt::{self, Write}; use std::str; @@ -32,30 +31,27 @@ fn write_numeric(value: f32, int_value: Option, has_sign: bool, dest: &m where W: fmt::Write, { - // `value.value >= 0` is true for negative 0. - if has_sign && value.is_sign_positive() { + if value == 0.0 && value.is_sign_negative() { + // Negative zero. Work around #20596. + return dest.write_str("-0"); + } + // NOTE: `value.value >= 0` is true for negative 0 but we've dealt with it above. + if has_sign && value >= 0.0 { dest.write_str("+")?; } - let notation = if value == 0.0 && value.is_sign_negative() { - // Negative zero. Work around #20596. - dest.write_str("-0")?; - Notation { - decimal_point: false, - scientific: false, - } - } else { - dtoa_short::write(dest, value)? - }; + if let Some(v) = int_value { + return write!(dest, "{}", v); + } - if int_value.is_none() && value.fract() == 0. && !notation.decimal_point && !notation.scientific - { + let notation = dtoa_short::write(dest, value)?; + if value.fract() == 0. && !notation.decimal_point && !notation.scientific { dest.write_str(".0")?; } Ok(()) } -impl<'a> ToCss for Token<'a> { +impl ToCss for Token<'_> { fn to_css(&self, dest: &mut W) -> fmt::Result where W: fmt::Write, @@ -286,7 +282,7 @@ where /// /// Typical usage: /// -/// ```{rust,ignore} +/// ```rust,ignore /// fn write_foo(foo: &Foo, dest: &mut W) -> fmt::Result where W: fmt::Write { /// dest.write_str("\"")?; /// { @@ -311,7 +307,7 @@ where } } -impl<'a, W> fmt::Write for CssStringWriter<'a, W> +impl fmt::Write for CssStringWriter<'_, W> where W: fmt::Write, { @@ -466,6 +462,9 @@ pub enum TokenSerializationType { Other, } +#[cfg(feature = "malloc_size_of")] +malloc_size_of::malloc_size_of_is_0!(TokenSerializationType); + impl TokenSerializationType { /// Return a value that represents the absence of a token, e.g. before the start of the input. #[deprecated( @@ -539,7 +538,7 @@ impl TokenSerializationType { } } -impl<'a> Token<'a> { +impl Token<'_> { /// Categorize a token into a type that determines when `/**/` needs to be inserted /// between two tokens when serialized next to each other without whitespace in between. /// diff --git a/src/size_of_tests.rs b/src/size_of_tests.rs index edd2b439..7f4b85fa 100644 --- a/src/size_of_tests.rs +++ b/src/size_of_tests.rs @@ -42,8 +42,8 @@ size_of_test!(token, Token, 32); size_of_test!(std_cow_str, std::borrow::Cow<'static, str>, 24, 32); size_of_test!(cow_rc_str, CowRcStr, 16); -size_of_test!(tokenizer, crate::tokenizer::Tokenizer, 72); -size_of_test!(parser_input, crate::parser::ParserInput, 136); +size_of_test!(tokenizer, crate::tokenizer::Tokenizer, 96); +size_of_test!(parser_input, crate::parser::ParserInput, 160); size_of_test!(parser, crate::parser::Parser, 16); size_of_test!(source_position, crate::SourcePosition, 8); size_of_test!(parser_state, crate::ParserState, 24); diff --git a/src/tests.rs b/src/tests.rs index 7389664d..0c2acbe8 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -7,6 +7,9 @@ extern crate test; use serde_json::{json, Map, Value}; +#[cfg(feature = "bench")] +use crate::parser::ArbitrarySubstitutionFunctions; + #[cfg(feature = "bench")] use self::test::Bencher; @@ -24,7 +27,7 @@ macro_rules! JArray { } fn almost_equals(a: &Value, b: &Value) -> bool { - let var_name = match (a, b) { + match (a, b) { (Value::Number(a), Value::Number(b)) => { let a = a.as_f64().unwrap(); let b = b.as_f64().unwrap(); @@ -39,8 +42,7 @@ fn almost_equals(a: &Value, b: &Value) -> bool { (&Value::Object(_), &Value::Object(_)) => panic!("Not implemented"), (&Value::Null, &Value::Null) => true, _ => false, - }; - var_name + } } fn normalize(json: &mut Value) { @@ -773,7 +775,7 @@ where } } -impl<'a> ToJson for CowRcStr<'a> { +impl ToJson for CowRcStr<'_> { fn to_json(&self) -> Value { let s: &str = self; s.to_json() @@ -794,7 +796,10 @@ fn delimiter_from_byte(b: &mut Bencher) { } #[cfg(feature = "bench")] -const BACKGROUND_IMAGE: &'static str = include_str!("big-data-url.css"); +const BACKGROUND_IMAGE: &str = include_str!("big-data-url.css"); + +#[cfg(feature = "bench")] +const ARBITRARY_SUBSTITUTION_FUNCTIONS: ArbitrarySubstitutionFunctions = &["var", "env"]; #[cfg(feature = "bench")] #[bench] @@ -802,14 +807,16 @@ fn unquoted_url(b: &mut Bencher) { b.iter(|| { let mut input = ParserInput::new(BACKGROUND_IMAGE); let mut input = Parser::new(&mut input); - input.look_for_var_or_env_functions(); + input.look_for_arbitrary_substitution_functions(ARBITRARY_SUBSTITUTION_FUNCTIONS); let result = input.try_parse(|input| input.expect_url()); assert!(result.is_ok()); - input.seen_var_or_env_functions(); - (result.is_ok(), input.seen_var_or_env_functions()) + ( + result.is_ok(), + input.seen_arbitrary_substitution_functions(), + ) }) } @@ -849,6 +856,7 @@ impl<'i> DeclarationParser<'i> for JsonParser { &mut self, name: CowRcStr<'i>, input: &mut Parser<'i, 't>, + _declaration_start: &ParserState, ) -> Result> { let mut value = vec![]; let mut important = false; @@ -946,7 +954,7 @@ impl<'i> QualifiedRuleParser<'i> for JsonParser { } } -impl<'i> RuleBodyItemParser<'i, Value, ()> for JsonParser { +impl RuleBodyItemParser<'_, Value, ()> for JsonParser { fn parse_qualified(&self) -> bool { true } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ea173a5e..65562766 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -6,7 +6,7 @@ use self::Token::*; use crate::cow_rc_str::CowRcStr; -use crate::parser::ParserState; +use crate::parser::{ArbitrarySubstitutionFunctions, ParserState}; use std::char; use std::ops::Range; @@ -190,7 +190,7 @@ pub enum Token<'a> { CloseCurlyBracket, } -impl<'a> Token<'a> { +impl Token<'_> { /// Return whether this token represents a parse error. /// /// `BadUrl` and `BadString` are tokenizer-level parse errors. @@ -215,50 +215,53 @@ pub struct Tokenizer<'a> { /// of UTF-16 characters. current_line_start_position: usize, current_line_number: u32, - var_or_env_functions: SeenStatus, + arbitrary_substitution_functions: SeenStatus<'a>, source_map_url: Option<&'a str>, source_url: Option<&'a str>, } #[derive(Copy, Clone, PartialEq, Eq)] -enum SeenStatus { +enum SeenStatus<'a> { DontCare, - LookingForThem, + LookingForThem(ArbitrarySubstitutionFunctions<'a>), SeenAtLeastOne, } impl<'a> Tokenizer<'a> { #[inline] - pub fn new(input: &str) -> Tokenizer { + pub fn new(input: &'a str) -> Self { Tokenizer { input, position: 0, current_line_start_position: 0, current_line_number: 0, - var_or_env_functions: SeenStatus::DontCare, + arbitrary_substitution_functions: SeenStatus::DontCare, source_map_url: None, source_url: None, } } #[inline] - pub fn look_for_var_or_env_functions(&mut self) { - self.var_or_env_functions = SeenStatus::LookingForThem; + pub fn look_for_arbitrary_substitution_functions( + &mut self, + fns: ArbitrarySubstitutionFunctions<'a>, + ) { + self.arbitrary_substitution_functions = SeenStatus::LookingForThem(fns); } #[inline] - pub fn seen_var_or_env_functions(&mut self) -> bool { - let seen = self.var_or_env_functions == SeenStatus::SeenAtLeastOne; - self.var_or_env_functions = SeenStatus::DontCare; + pub fn seen_arbitrary_substitution_functions(&mut self) -> bool { + let seen = self.arbitrary_substitution_functions == SeenStatus::SeenAtLeastOne; + self.arbitrary_substitution_functions = SeenStatus::DontCare; seen } #[inline] pub fn see_function(&mut self, name: &str) { - if self.var_or_env_functions == SeenStatus::LookingForThem - && (name.eq_ignore_ascii_case("var") || name.eq_ignore_ascii_case("env")) - { - self.var_or_env_functions = SeenStatus::SeenAtLeastOne; + if let SeenStatus::LookingForThem(fns) = self.arbitrary_substitution_functions { + if fns.iter().any(|a| name.eq_ignore_ascii_case(a)) { + self.arbitrary_substitution_functions = SeenStatus::SeenAtLeastOne; + } } } @@ -324,11 +327,11 @@ impl<'a> Tokenizer<'a> { let current = self.position(); let start = self .slice(SourcePosition(0)..current) - .rfind(|c| matches!(c, '\r' | '\n' | '\x0C')) + .rfind(['\r', '\n', '\x0C']) .map_or(0, |start| start + 1); let end = self .slice(current..SourcePosition(self.input.len())) - .find(|c| matches!(c, '\r' | '\n' | '\x0C')) + .find(['\r', '\n', '\x0C']) .map_or(self.input.len(), |end| current.0 + end); self.slice(SourcePosition(start)..SourcePosition(end)) } @@ -533,6 +536,9 @@ impl<'a> Tokenizer<'a> { #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)] pub struct SourcePosition(pub(crate) usize); +#[cfg(feature = "malloc_size_of")] +malloc_size_of::malloc_size_of_is_0!(SourcePosition); + impl SourcePosition { /// Returns the current byte index in the original input. #[inline] @@ -542,7 +548,7 @@ impl SourcePosition { } /// The line and column number for a given position within the input. -#[derive(PartialEq, Eq, Debug, Clone, Copy)] +#[derive(PartialEq, Eq, Debug, Clone, Copy, Default)] pub struct SourceLocation { /// The line number, starting at 0 for the first line. pub line: u32, @@ -552,6 +558,9 @@ pub struct SourceLocation { pub column: u32, } +#[cfg(feature = "malloc_size_of")] +malloc_size_of::malloc_size_of_is_0!(SourceLocation); + fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { if tokenizer.is_eof() { return Err(()); @@ -720,9 +729,7 @@ fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) { // If there is a source map directive, extract the URL. if contents.starts_with(directive) || contents.starts_with(directive_old) { let contents = &contents[directive.len()..]; - tokenizer.source_map_url = contents - .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n') - .next() + tokenizer.source_map_url = contents.split([' ', '\t', '\x0C', '\r', '\n']).next(); } let directive = "# sourceURL="; @@ -731,9 +738,7 @@ fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) { // If there is a source map directive, extract the URL. if contents.starts_with(directive) || contents.starts_with(directive_old) { let contents = &contents[directive.len()..]; - tokenizer.source_url = contents - .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n') - .next() + tokenizer.source_url = contents.split([' ', '\t', '\x0C', '\r', '\n']).next() } } @@ -889,7 +894,7 @@ fn consume_quoted_string<'a>( } #[inline] -fn is_ident_start(tokenizer: &mut Tokenizer) -> bool { +fn is_ident_start(tokenizer: &Tokenizer) -> bool { !tokenizer.is_eof() && match_byte! { tokenizer.next_byte_unchecked(), b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => true, diff --git a/src/unicode_range.rs b/src/unicode_range.rs index ce6bb3b5..a4130ef0 100644 --- a/src/unicode_range.rs +++ b/src/unicode_range.rs @@ -104,7 +104,7 @@ fn parse_concatenated(text: &[u8]) -> Result { Some((&b'+', text)) => text, _ => return Err(()), }; - let (first_hex_value, hex_digit_count) = consume_hex(&mut text); + let (first_hex_value, hex_digit_count) = consume_hex(&mut text, 6)?; let question_marks = consume_question_marks(&mut text); let consumed = hex_digit_count + question_marks; if consumed == 0 || consumed > 6 { @@ -124,7 +124,7 @@ fn parse_concatenated(text: &[u8]) -> Result { end: first_hex_value, }); } else if let Some((&b'-', mut text)) = text.split_first() { - let (second_hex_value, hex_digit_count) = consume_hex(&mut text); + let (second_hex_value, hex_digit_count) = consume_hex(&mut text, 6)?; if hex_digit_count > 0 && hex_digit_count <= 6 && text.is_empty() { return Ok(UnicodeRange { start: first_hex_value, @@ -135,19 +135,23 @@ fn parse_concatenated(text: &[u8]) -> Result { Err(()) } -fn consume_hex(text: &mut &[u8]) -> (u32, usize) { +// Consume hex digits, but return an error if more than digit_limit are found. +fn consume_hex(text: &mut &[u8], digit_limit: usize) -> Result<(u32, usize), ()> { let mut value = 0; let mut digits = 0; while let Some((&byte, rest)) = text.split_first() { if let Some(digit_value) = (byte as char).to_digit(16) { + if digits == digit_limit { + return Err(()); + } value = value * 0x10 + digit_value; digits += 1; - *text = rest + *text = rest; } else { break; } } - (value, digits) + Ok((value, digits)) } fn consume_question_marks(text: &mut &[u8]) -> usize {