From 4765111c002e247c9c9e2cccefd239ddd5c46b38 Mon Sep 17 00:00:00 2001 From: Jonathan Kew Date: Wed, 5 Jun 2024 23:56:56 +0100 Subject: [PATCH 1/2] [unicode-range] Avoid integer overflow (panic in debug builds) If a long string of hex digits are passed to unicode-range, the code tries to parse them into a number before checking whether there are more than the 6 digits allowed by the syntax, and this may lead to integer overflow. To avoid this, check the number of digits and error out earlier if there are too many to possibly be valid. (See https://bugzilla.mozilla.org/show_bug.cgi?id=1900403) --- src/css-parsing-tests/urange.json | 5 +++++ src/unicode_range.rs | 14 +++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/css-parsing-tests/urange.json b/src/css-parsing-tests/urange.json index 857d1d62..4dcb529c 100644 --- a/src/css-parsing-tests/urange.json +++ b/src/css-parsing-tests/urange.json @@ -84,6 +84,11 @@ null, null, null +], + +"U+26F9200D2640, U+10000-26F9200D2640", [ + null, + null ] ] diff --git a/src/unicode_range.rs b/src/unicode_range.rs index ce6bb3b5..a4130ef0 100644 --- a/src/unicode_range.rs +++ b/src/unicode_range.rs @@ -104,7 +104,7 @@ fn parse_concatenated(text: &[u8]) -> Result { Some((&b'+', text)) => text, _ => return Err(()), }; - let (first_hex_value, hex_digit_count) = consume_hex(&mut text); + let (first_hex_value, hex_digit_count) = consume_hex(&mut text, 6)?; let question_marks = consume_question_marks(&mut text); let consumed = hex_digit_count + question_marks; if consumed == 0 || consumed > 6 { @@ -124,7 +124,7 @@ fn parse_concatenated(text: &[u8]) -> Result { end: first_hex_value, }); } else if let Some((&b'-', mut text)) = text.split_first() { - let (second_hex_value, hex_digit_count) = consume_hex(&mut text); + let (second_hex_value, hex_digit_count) = consume_hex(&mut text, 6)?; if hex_digit_count > 0 && hex_digit_count <= 6 && text.is_empty() { return Ok(UnicodeRange { start: first_hex_value, @@ -135,19 +135,23 @@ fn parse_concatenated(text: &[u8]) -> Result { Err(()) } -fn consume_hex(text: &mut &[u8]) -> (u32, usize) { +// Consume hex digits, but return an error if more than digit_limit are found. +fn consume_hex(text: &mut &[u8], digit_limit: usize) -> Result<(u32, usize), ()> { let mut value = 0; let mut digits = 0; while let Some((&byte, rest)) = text.split_first() { if let Some(digit_value) = (byte as char).to_digit(16) { + if digits == digit_limit { + return Err(()); + } value = value * 0x10 + digit_value; digits += 1; - *text = rest + *text = rest; } else { break; } } - (value, digits) + Ok((value, digits)) } fn consume_question_marks(text: &mut &[u8]) -> usize { From bb071313db0d23a1cea87d1589e42e4ff388e51a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Thu, 7 Nov 2024 18:25:09 +0100 Subject: [PATCH 2/2] Fix clippy lints. --- src/cow_rc_str.rs | 28 ++++++++++++++-------------- src/parser.rs | 8 ++++---- src/rules_and_declarations.rs | 4 ++-- src/serializer.rs | 6 +++--- src/tests.rs | 4 ++-- src/tokenizer.rs | 14 +++++--------- 6 files changed, 30 insertions(+), 34 deletions(-) diff --git a/src/cow_rc_str.rs b/src/cow_rc_str.rs index 26508481..03631f47 100644 --- a/src/cow_rc_str.rs +++ b/src/cow_rc_str.rs @@ -51,7 +51,7 @@ impl<'a> From<&'a str> for CowRcStr<'a> { } } -impl<'a> From for CowRcStr<'a> { +impl From for CowRcStr<'_> { #[inline] fn from(s: String) -> Self { CowRcStr::from_rc(Rc::new(s)) @@ -84,7 +84,7 @@ impl<'a> CowRcStr<'a> { } } -impl<'a> Clone for CowRcStr<'a> { +impl Clone for CowRcStr<'_> { #[inline] fn clone(&self) -> Self { match self.unpack() { @@ -99,7 +99,7 @@ impl<'a> Clone for CowRcStr<'a> { } } -impl<'a> Drop for CowRcStr<'a> { +impl Drop for CowRcStr<'_> { #[inline] fn drop(&mut self) { if let Err(ptr) = self.unpack() { @@ -108,7 +108,7 @@ impl<'a> Drop for CowRcStr<'a> { } } -impl<'a> ops::Deref for CowRcStr<'a> { +impl ops::Deref for CowRcStr<'_> { type Target = str; #[inline] @@ -119,65 +119,65 @@ impl<'a> ops::Deref for CowRcStr<'a> { // Boilerplate / trivial impls below. -impl<'a> AsRef for CowRcStr<'a> { +impl AsRef for CowRcStr<'_> { #[inline] fn as_ref(&self) -> &str { self } } -impl<'a> Borrow for CowRcStr<'a> { +impl Borrow for CowRcStr<'_> { #[inline] fn borrow(&self) -> &str { self } } -impl<'a> Default for CowRcStr<'a> { +impl Default for CowRcStr<'_> { #[inline] fn default() -> Self { Self::from("") } } -impl<'a> hash::Hash for CowRcStr<'a> { +impl hash::Hash for CowRcStr<'_> { #[inline] fn hash(&self, hasher: &mut H) { str::hash(self, hasher) } } -impl<'a, T: AsRef> PartialEq for CowRcStr<'a> { +impl> PartialEq for CowRcStr<'_> { #[inline] fn eq(&self, other: &T) -> bool { str::eq(self, other.as_ref()) } } -impl<'a, T: AsRef> PartialOrd for CowRcStr<'a> { +impl> PartialOrd for CowRcStr<'_> { #[inline] fn partial_cmp(&self, other: &T) -> Option { str::partial_cmp(self, other.as_ref()) } } -impl<'a> Eq for CowRcStr<'a> {} +impl Eq for CowRcStr<'_> {} -impl<'a> Ord for CowRcStr<'a> { +impl Ord for CowRcStr<'_> { #[inline] fn cmp(&self, other: &Self) -> cmp::Ordering { str::cmp(self, other) } } -impl<'a> fmt::Display for CowRcStr<'a> { +impl fmt::Display for CowRcStr<'_> { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { str::fmt(self, formatter) } } -impl<'a> fmt::Debug for CowRcStr<'a> { +impl fmt::Debug for CowRcStr<'_> { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { str::fmt(self, formatter) diff --git a/src/parser.rs b/src/parser.rs index dd35fc50..0a432912 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -76,7 +76,7 @@ pub enum BasicParseErrorKind<'i> { QualifiedRuleInvalid, } -impl<'i> fmt::Display for BasicParseErrorKind<'i> { +impl fmt::Display for BasicParseErrorKind<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { BasicParseErrorKind::UnexpectedToken(token) => { @@ -176,7 +176,7 @@ impl<'i, T> ParseErrorKind<'i, T> { } } -impl<'i, E: fmt::Display> fmt::Display for ParseErrorKind<'i, E> { +impl fmt::Display for ParseErrorKind<'_, E> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { ParseErrorKind::Basic(ref basic) => basic.fmt(f), @@ -218,13 +218,13 @@ impl<'i, T> ParseError<'i, T> { } } -impl<'i, E: fmt::Display> fmt::Display for ParseError<'i, E> { +impl fmt::Display for ParseError<'_, E> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.kind.fmt(f) } } -impl<'i, E: fmt::Display + fmt::Debug> std::error::Error for ParseError<'i, E> {} +impl std::error::Error for ParseError<'_, E> {} /// The owned input for a parser. pub struct ParserInput<'i> { diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 48da02b5..188e354e 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -241,7 +241,7 @@ impl<'i, 't, 'a, P, I, E> RuleBodyParser<'i, 't, 'a, P, I, E> { } /// https://drafts.csswg.org/css-syntax/#consume-a-blocks-contents -impl<'i, 't, 'a, I, P, E: 'i> Iterator for RuleBodyParser<'i, 't, 'a, P, I, E> +impl<'i, I, P, E: 'i> Iterator for RuleBodyParser<'i, '_, '_, P, I, E> where P: RuleBodyItemParser<'i, I, E>, { @@ -349,7 +349,7 @@ where } /// `RuleListParser` is an iterator that yields `Ok(_)` for a rule or an `Err(..)` for an invalid one. -impl<'i, 't, 'a, R, P, E: 'i> Iterator for StyleSheetParser<'i, 't, 'a, P> +impl<'i, R, P, E: 'i> Iterator for StyleSheetParser<'i, '_, '_, P> where P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E> + AtRuleParser<'i, AtRule = R, Error = E>, diff --git a/src/serializer.rs b/src/serializer.rs index 3c6e31cb..5df73954 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -55,7 +55,7 @@ where Ok(()) } -impl<'a> ToCss for Token<'a> { +impl ToCss for Token<'_> { fn to_css(&self, dest: &mut W) -> fmt::Result where W: fmt::Write, @@ -311,7 +311,7 @@ where } } -impl<'a, W> fmt::Write for CssStringWriter<'a, W> +impl fmt::Write for CssStringWriter<'_, W> where W: fmt::Write, { @@ -539,7 +539,7 @@ impl TokenSerializationType { } } -impl<'a> Token<'a> { +impl Token<'_> { /// Categorize a token into a type that determines when `/**/` needs to be inserted /// between two tokens when serialized next to each other without whitespace in between. /// diff --git a/src/tests.rs b/src/tests.rs index 7389664d..ec0fc517 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -773,7 +773,7 @@ where } } -impl<'a> ToJson for CowRcStr<'a> { +impl ToJson for CowRcStr<'_> { fn to_json(&self) -> Value { let s: &str = self; s.to_json() @@ -946,7 +946,7 @@ impl<'i> QualifiedRuleParser<'i> for JsonParser { } } -impl<'i> RuleBodyItemParser<'i, Value, ()> for JsonParser { +impl RuleBodyItemParser<'_, Value, ()> for JsonParser { fn parse_qualified(&self) -> bool { true } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ea173a5e..2e86c66a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -190,7 +190,7 @@ pub enum Token<'a> { CloseCurlyBracket, } -impl<'a> Token<'a> { +impl Token<'_> { /// Return whether this token represents a parse error. /// /// `BadUrl` and `BadString` are tokenizer-level parse errors. @@ -324,11 +324,11 @@ impl<'a> Tokenizer<'a> { let current = self.position(); let start = self .slice(SourcePosition(0)..current) - .rfind(|c| matches!(c, '\r' | '\n' | '\x0C')) + .rfind(['\r', '\n', '\x0C']) .map_or(0, |start| start + 1); let end = self .slice(current..SourcePosition(self.input.len())) - .find(|c| matches!(c, '\r' | '\n' | '\x0C')) + .find(['\r', '\n', '\x0C']) .map_or(self.input.len(), |end| current.0 + end); self.slice(SourcePosition(start)..SourcePosition(end)) } @@ -720,9 +720,7 @@ fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) { // If there is a source map directive, extract the URL. if contents.starts_with(directive) || contents.starts_with(directive_old) { let contents = &contents[directive.len()..]; - tokenizer.source_map_url = contents - .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n') - .next() + tokenizer.source_map_url = contents.split([' ', '\t', '\x0C', '\r', '\n']).next(); } let directive = "# sourceURL="; @@ -731,9 +729,7 @@ fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) { // If there is a source map directive, extract the URL. if contents.starts_with(directive) || contents.starts_with(directive_old) { let contents = &contents[directive.len()..]; - tokenizer.source_url = contents - .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n') - .next() + tokenizer.source_url = contents.split([' ', '\t', '\x0C', '\r', '\n']).next() } }