From 4765111c002e247c9c9e2cccefd239ddd5c46b38 Mon Sep 17 00:00:00 2001 From: Jonathan Kew Date: Wed, 5 Jun 2024 23:56:56 +0100 Subject: [PATCH 1/9] [unicode-range] Avoid integer overflow (panic in debug builds) If a long string of hex digits are passed to unicode-range, the code tries to parse them into a number before checking whether there are more than the 6 digits allowed by the syntax, and this may lead to integer overflow. To avoid this, check the number of digits and error out earlier if there are too many to possibly be valid. (See https://bugzilla.mozilla.org/show_bug.cgi?id=1900403) --- src/css-parsing-tests/urange.json | 5 +++++ src/unicode_range.rs | 14 +++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/css-parsing-tests/urange.json b/src/css-parsing-tests/urange.json index 857d1d62..4dcb529c 100644 --- a/src/css-parsing-tests/urange.json +++ b/src/css-parsing-tests/urange.json @@ -84,6 +84,11 @@ null, null, null +], + +"U+26F9200D2640, U+10000-26F9200D2640", [ + null, + null ] ] diff --git a/src/unicode_range.rs b/src/unicode_range.rs index ce6bb3b5..a4130ef0 100644 --- a/src/unicode_range.rs +++ b/src/unicode_range.rs @@ -104,7 +104,7 @@ fn parse_concatenated(text: &[u8]) -> Result { Some((&b'+', text)) => text, _ => return Err(()), }; - let (first_hex_value, hex_digit_count) = consume_hex(&mut text); + let (first_hex_value, hex_digit_count) = consume_hex(&mut text, 6)?; let question_marks = consume_question_marks(&mut text); let consumed = hex_digit_count + question_marks; if consumed == 0 || consumed > 6 { @@ -124,7 +124,7 @@ fn parse_concatenated(text: &[u8]) -> Result { end: first_hex_value, }); } else if let Some((&b'-', mut text)) = text.split_first() { - let (second_hex_value, hex_digit_count) = consume_hex(&mut text); + let (second_hex_value, hex_digit_count) = consume_hex(&mut text, 6)?; if hex_digit_count > 0 && hex_digit_count <= 6 && text.is_empty() { return Ok(UnicodeRange { start: first_hex_value, @@ -135,19 +135,23 @@ fn parse_concatenated(text: &[u8]) -> Result { Err(()) } -fn consume_hex(text: &mut &[u8]) -> (u32, usize) { +// Consume hex digits, but return an error if more than digit_limit are found. +fn consume_hex(text: &mut &[u8], digit_limit: usize) -> Result<(u32, usize), ()> { let mut value = 0; let mut digits = 0; while let Some((&byte, rest)) = text.split_first() { if let Some(digit_value) = (byte as char).to_digit(16) { + if digits == digit_limit { + return Err(()); + } value = value * 0x10 + digit_value; digits += 1; - *text = rest + *text = rest; } else { break; } } - (value, digits) + Ok((value, digits)) } fn consume_question_marks(text: &mut &[u8]) -> usize { From bb071313db0d23a1cea87d1589e42e4ff388e51a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Thu, 7 Nov 2024 18:25:09 +0100 Subject: [PATCH 2/9] Fix clippy lints. --- src/cow_rc_str.rs | 28 ++++++++++++++-------------- src/parser.rs | 8 ++++---- src/rules_and_declarations.rs | 4 ++-- src/serializer.rs | 6 +++--- src/tests.rs | 4 ++-- src/tokenizer.rs | 14 +++++--------- 6 files changed, 30 insertions(+), 34 deletions(-) diff --git a/src/cow_rc_str.rs b/src/cow_rc_str.rs index 26508481..03631f47 100644 --- a/src/cow_rc_str.rs +++ b/src/cow_rc_str.rs @@ -51,7 +51,7 @@ impl<'a> From<&'a str> for CowRcStr<'a> { } } -impl<'a> From for CowRcStr<'a> { +impl From for CowRcStr<'_> { #[inline] fn from(s: String) -> Self { CowRcStr::from_rc(Rc::new(s)) @@ -84,7 +84,7 @@ impl<'a> CowRcStr<'a> { } } -impl<'a> Clone for CowRcStr<'a> { +impl Clone for CowRcStr<'_> { #[inline] fn clone(&self) -> Self { match self.unpack() { @@ -99,7 +99,7 @@ impl<'a> Clone for CowRcStr<'a> { } } -impl<'a> Drop for CowRcStr<'a> { +impl Drop for CowRcStr<'_> { #[inline] fn drop(&mut self) { if let Err(ptr) = self.unpack() { @@ -108,7 +108,7 @@ impl<'a> Drop for CowRcStr<'a> { } } -impl<'a> ops::Deref for CowRcStr<'a> { +impl ops::Deref for CowRcStr<'_> { type Target = str; #[inline] @@ -119,65 +119,65 @@ impl<'a> ops::Deref for CowRcStr<'a> { // Boilerplate / trivial impls below. -impl<'a> AsRef for CowRcStr<'a> { +impl AsRef for CowRcStr<'_> { #[inline] fn as_ref(&self) -> &str { self } } -impl<'a> Borrow for CowRcStr<'a> { +impl Borrow for CowRcStr<'_> { #[inline] fn borrow(&self) -> &str { self } } -impl<'a> Default for CowRcStr<'a> { +impl Default for CowRcStr<'_> { #[inline] fn default() -> Self { Self::from("") } } -impl<'a> hash::Hash for CowRcStr<'a> { +impl hash::Hash for CowRcStr<'_> { #[inline] fn hash(&self, hasher: &mut H) { str::hash(self, hasher) } } -impl<'a, T: AsRef> PartialEq for CowRcStr<'a> { +impl> PartialEq for CowRcStr<'_> { #[inline] fn eq(&self, other: &T) -> bool { str::eq(self, other.as_ref()) } } -impl<'a, T: AsRef> PartialOrd for CowRcStr<'a> { +impl> PartialOrd for CowRcStr<'_> { #[inline] fn partial_cmp(&self, other: &T) -> Option { str::partial_cmp(self, other.as_ref()) } } -impl<'a> Eq for CowRcStr<'a> {} +impl Eq for CowRcStr<'_> {} -impl<'a> Ord for CowRcStr<'a> { +impl Ord for CowRcStr<'_> { #[inline] fn cmp(&self, other: &Self) -> cmp::Ordering { str::cmp(self, other) } } -impl<'a> fmt::Display for CowRcStr<'a> { +impl fmt::Display for CowRcStr<'_> { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { str::fmt(self, formatter) } } -impl<'a> fmt::Debug for CowRcStr<'a> { +impl fmt::Debug for CowRcStr<'_> { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { str::fmt(self, formatter) diff --git a/src/parser.rs b/src/parser.rs index dd35fc50..0a432912 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -76,7 +76,7 @@ pub enum BasicParseErrorKind<'i> { QualifiedRuleInvalid, } -impl<'i> fmt::Display for BasicParseErrorKind<'i> { +impl fmt::Display for BasicParseErrorKind<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { BasicParseErrorKind::UnexpectedToken(token) => { @@ -176,7 +176,7 @@ impl<'i, T> ParseErrorKind<'i, T> { } } -impl<'i, E: fmt::Display> fmt::Display for ParseErrorKind<'i, E> { +impl fmt::Display for ParseErrorKind<'_, E> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { ParseErrorKind::Basic(ref basic) => basic.fmt(f), @@ -218,13 +218,13 @@ impl<'i, T> ParseError<'i, T> { } } -impl<'i, E: fmt::Display> fmt::Display for ParseError<'i, E> { +impl fmt::Display for ParseError<'_, E> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.kind.fmt(f) } } -impl<'i, E: fmt::Display + fmt::Debug> std::error::Error for ParseError<'i, E> {} +impl std::error::Error for ParseError<'_, E> {} /// The owned input for a parser. pub struct ParserInput<'i> { diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 48da02b5..188e354e 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -241,7 +241,7 @@ impl<'i, 't, 'a, P, I, E> RuleBodyParser<'i, 't, 'a, P, I, E> { } /// https://drafts.csswg.org/css-syntax/#consume-a-blocks-contents -impl<'i, 't, 'a, I, P, E: 'i> Iterator for RuleBodyParser<'i, 't, 'a, P, I, E> +impl<'i, I, P, E: 'i> Iterator for RuleBodyParser<'i, '_, '_, P, I, E> where P: RuleBodyItemParser<'i, I, E>, { @@ -349,7 +349,7 @@ where } /// `RuleListParser` is an iterator that yields `Ok(_)` for a rule or an `Err(..)` for an invalid one. -impl<'i, 't, 'a, R, P, E: 'i> Iterator for StyleSheetParser<'i, 't, 'a, P> +impl<'i, R, P, E: 'i> Iterator for StyleSheetParser<'i, '_, '_, P> where P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E> + AtRuleParser<'i, AtRule = R, Error = E>, diff --git a/src/serializer.rs b/src/serializer.rs index 3c6e31cb..5df73954 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -55,7 +55,7 @@ where Ok(()) } -impl<'a> ToCss for Token<'a> { +impl ToCss for Token<'_> { fn to_css(&self, dest: &mut W) -> fmt::Result where W: fmt::Write, @@ -311,7 +311,7 @@ where } } -impl<'a, W> fmt::Write for CssStringWriter<'a, W> +impl fmt::Write for CssStringWriter<'_, W> where W: fmt::Write, { @@ -539,7 +539,7 @@ impl TokenSerializationType { } } -impl<'a> Token<'a> { +impl Token<'_> { /// Categorize a token into a type that determines when `/**/` needs to be inserted /// between two tokens when serialized next to each other without whitespace in between. /// diff --git a/src/tests.rs b/src/tests.rs index 7389664d..ec0fc517 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -773,7 +773,7 @@ where } } -impl<'a> ToJson for CowRcStr<'a> { +impl ToJson for CowRcStr<'_> { fn to_json(&self) -> Value { let s: &str = self; s.to_json() @@ -946,7 +946,7 @@ impl<'i> QualifiedRuleParser<'i> for JsonParser { } } -impl<'i> RuleBodyItemParser<'i, Value, ()> for JsonParser { +impl RuleBodyItemParser<'_, Value, ()> for JsonParser { fn parse_qualified(&self) -> bool { true } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ea173a5e..2e86c66a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -190,7 +190,7 @@ pub enum Token<'a> { CloseCurlyBracket, } -impl<'a> Token<'a> { +impl Token<'_> { /// Return whether this token represents a parse error. /// /// `BadUrl` and `BadString` are tokenizer-level parse errors. @@ -324,11 +324,11 @@ impl<'a> Tokenizer<'a> { let current = self.position(); let start = self .slice(SourcePosition(0)..current) - .rfind(|c| matches!(c, '\r' | '\n' | '\x0C')) + .rfind(['\r', '\n', '\x0C']) .map_or(0, |start| start + 1); let end = self .slice(current..SourcePosition(self.input.len())) - .find(|c| matches!(c, '\r' | '\n' | '\x0C')) + .find(['\r', '\n', '\x0C']) .map_or(self.input.len(), |end| current.0 + end); self.slice(SourcePosition(start)..SourcePosition(end)) } @@ -720,9 +720,7 @@ fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) { // If there is a source map directive, extract the URL. if contents.starts_with(directive) || contents.starts_with(directive_old) { let contents = &contents[directive.len()..]; - tokenizer.source_map_url = contents - .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n') - .next() + tokenizer.source_map_url = contents.split([' ', '\t', '\x0C', '\r', '\n']).next(); } let directive = "# sourceURL="; @@ -731,9 +729,7 @@ fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) { // If there is a source map directive, extract the URL. if contents.starts_with(directive) || contents.starts_with(directive_old) { let contents = &contents[directive.len()..]; - tokenizer.source_url = contents - .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n') - .next() + tokenizer.source_url = contents.split([' ', '\t', '\x0C', '\r', '\n']).next() } } From 6fb149bc852a019b08958eac199720404c6b4932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Thu, 7 Nov 2024 19:17:56 +0100 Subject: [PATCH 3/9] Bump cssparser_color version. (#395) --- color/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/color/Cargo.toml b/color/Cargo.toml index 47544815..d5cde913 100644 --- a/color/Cargo.toml +++ b/color/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cssparser-color" -version = "0.1.0" +version = "0.2.0" authors = ["Emilio Cobos Álvarez "] description = "Color implementation based on cssparser" documentation = "https://docs.rs/cssparser-color/" @@ -12,7 +12,7 @@ edition = "2021" path = "lib.rs" [dependencies] -cssparser = { path = ".." } +cssparser = { path = "..", version = "0.34" } serde = { version = "1.0", features = ["derive"], optional = true } [features] From fa6f5eb23f058c6fce444ac781b0b380003fdb59 Mon Sep 17 00:00:00 2001 From: AlaskanEmily <31194934+AlaskanEmily@users.noreply.github.com> Date: Tue, 3 Dec 2024 14:07:38 -0800 Subject: [PATCH 4/9] Minor optimization of Delimiters::from_byte to avoid an extra branch. (#397) Co-authored-by: AlaskanEmily --- src/parser.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 0a432912..aabeea6a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -360,10 +360,8 @@ impl Delimiters { table }; - match byte { - None => Delimiter::None, - Some(b) => TABLE[b as usize], - } + assert_eq!(TABLE[0], Delimiter::None); + TABLE[byte.unwrap_or(0) as usize] } } From 008a91c3cea7da09baf092c9be8b77c4d385141a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Fri, 21 Feb 2025 11:30:43 +0100 Subject: [PATCH 5/9] Fix clippy lints (#401) --- src/from_bytes.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/from_bytes.rs b/src/from_bytes.rs index 78a56d3e..7d9d2c76 100644 --- a/src/from_bytes.rs +++ b/src/from_bytes.rs @@ -24,9 +24,9 @@ pub trait EncodingSupport { /// /// * `css_bytes`: A byte string. /// * `protocol_encoding`: The encoding label, if any, defined by HTTP or equivalent protocol. -/// (e.g. via the `charset` parameter of the `Content-Type` header.) +/// (e.g. via the `charset` parameter of the `Content-Type` header.) /// * `environment_encoding`: An optional `Encoding` object for the [environment encoding] -/// (https://drafts.csswg.org/css-syntax/#environment-encoding), if any. +/// (https://drafts.csswg.org/css-syntax/#environment-encoding), if any. /// /// Returns the encoding to use. pub fn stylesheet_encoding( From 5e477ab8195f64ed9b22ab1cf7b08685347f3879 Mon Sep 17 00:00:00 2001 From: Nicolas Chevobbe Date: Fri, 21 Feb 2025 11:30:02 +0100 Subject: [PATCH 6/9] Add declaration_start parameter to DeclarationParser parse_value (#400) --- src/rules_and_declarations.rs | 6 ++++-- src/tests.rs | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 188e354e..bdaef077 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -49,6 +49,7 @@ pub trait DeclarationParser<'i> { &mut self, name: CowRcStr<'i>, input: &mut Parser<'i, 't>, + _declaration_start: &ParserState, ) -> Result> { Err(input.new_error(BasicParseErrorKind::UnexpectedToken(Token::Ident(name)))) } @@ -279,7 +280,7 @@ where error_behavior, |input| { input.expect_colon()?; - parser.parse_value(name, input) + parser.parse_value(name, input, &start) }, ) }; @@ -408,12 +409,13 @@ pub fn parse_one_declaration<'i, 't, P, E>( where P: DeclarationParser<'i, Error = E>, { + let start = input.state(); let start_position = input.position(); input .parse_entirely(|input| { let name = input.expect_ident()?.clone(); input.expect_colon()?; - parser.parse_value(name, input) + parser.parse_value(name, input, &start) }) .map_err(|e| (e, input.slice_from(start_position))) } diff --git a/src/tests.rs b/src/tests.rs index ec0fc517..3c122f0d 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -849,6 +849,7 @@ impl<'i> DeclarationParser<'i> for JsonParser { &mut self, name: CowRcStr<'i>, input: &mut Parser<'i, 't>, + _declaration_start: &ParserState, ) -> Result> { let mut value = vec![]; let mut important = false; From 0d800216303aa6586b1469cb948859fb1dbd6a2f Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Fri, 21 Feb 2025 23:31:11 +1300 Subject: [PATCH 7/9] Implement `MallocSizeOf` for cssparser types (#399) * Implement MallocSizeOf for cssparser types * Fix clippy lints Signed-off-by: Nico Burns --------- Signed-off-by: Nico Burns --- .github/workflows/main.yml | 1 + Cargo.toml | 3 ++- src/serializer.rs | 3 +++ src/tokenizer.rs | 6 ++++++ 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b08ddef7..a7be6aec 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -22,6 +22,7 @@ jobs: features: - - --features dummy_match_byte + - --features malloc_size_of include: - toolchain: nightly features: --features bench diff --git a/Cargo.toml b/Cargo.toml index f783f19f..f4e980fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cssparser" -version = "0.34.0" +version = "0.34.1" authors = ["Simon Sapin "] description = "Rust implementation of CSS Syntax Level 3" @@ -25,6 +25,7 @@ dtoa-short = "0.3" itoa = "1.0" phf = { version = "0.11.2", features = ["macros"] } serde = { version = "1.0", features = ["derive"], optional = true } +malloc_size_of = { version = "0.1", default-features = false, optional = true } smallvec = "1.0" [profile.profiling] diff --git a/src/serializer.rs b/src/serializer.rs index 5df73954..6696a622 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -466,6 +466,9 @@ pub enum TokenSerializationType { Other, } +#[cfg(feature = "malloc_size_of")] +malloc_size_of::malloc_size_of_is_0!(TokenSerializationType); + impl TokenSerializationType { /// Return a value that represents the absence of a token, e.g. before the start of the input. #[deprecated( diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 2e86c66a..892d10ae 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -533,6 +533,9 @@ impl<'a> Tokenizer<'a> { #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)] pub struct SourcePosition(pub(crate) usize); +#[cfg(feature = "malloc_size_of")] +malloc_size_of::malloc_size_of_is_0!(SourcePosition); + impl SourcePosition { /// Returns the current byte index in the original input. #[inline] @@ -552,6 +555,9 @@ pub struct SourceLocation { pub column: u32, } +#[cfg(feature = "malloc_size_of")] +malloc_size_of::malloc_size_of_is_0!(SourceLocation); + fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { if tokenizer.is_eof() { return Err(()); From 958a3f098acb92ddacdce18a7ef2c4a87ac3326f Mon Sep 17 00:00:00 2001 From: Nicolas Chevobbe Date: Fri, 21 Feb 2025 14:28:55 +0100 Subject: [PATCH 8/9] Derive Default for SourceLocation (#402) --- src/tokenizer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 892d10ae..1c85e10a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -545,7 +545,7 @@ impl SourcePosition { } /// The line and column number for a given position within the input. -#[derive(PartialEq, Eq, Debug, Clone, Copy)] +#[derive(PartialEq, Eq, Debug, Clone, Copy, Default)] pub struct SourceLocation { /// The line number, starting at 0 for the first line. pub line: u32, From 3dd4f63639925438d9932dafc0d81395847f9be4 Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Fri, 21 Mar 2025 07:11:19 +1300 Subject: [PATCH 9/9] Bump version 0.35.0 (#404) Signed-off-by: Nico Burns --- Cargo.toml | 2 +- color/Cargo.toml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f4e980fd..6604f20e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cssparser" -version = "0.34.1" +version = "0.35.0" authors = ["Simon Sapin "] description = "Rust implementation of CSS Syntax Level 3" diff --git a/color/Cargo.toml b/color/Cargo.toml index d5cde913..5dd6aac7 100644 --- a/color/Cargo.toml +++ b/color/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cssparser-color" -version = "0.2.0" +version = "0.3.0" authors = ["Emilio Cobos Álvarez "] description = "Color implementation based on cssparser" documentation = "https://docs.rs/cssparser-color/" @@ -12,7 +12,7 @@ edition = "2021" path = "lib.rs" [dependencies] -cssparser = { path = "..", version = "0.34" } +cssparser = { path = "..", version = "0.35" } serde = { version = "1.0", features = ["derive"], optional = true } [features]