diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b08ddef7..a7be6aec 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -22,6 +22,7 @@ jobs: features: - - --features dummy_match_byte + - --features malloc_size_of include: - toolchain: nightly features: --features bench diff --git a/Cargo.toml b/Cargo.toml index f783f19f..6604f20e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cssparser" -version = "0.34.0" +version = "0.35.0" authors = ["Simon Sapin "] description = "Rust implementation of CSS Syntax Level 3" @@ -25,6 +25,7 @@ dtoa-short = "0.3" itoa = "1.0" phf = { version = "0.11.2", features = ["macros"] } serde = { version = "1.0", features = ["derive"], optional = true } +malloc_size_of = { version = "0.1", default-features = false, optional = true } smallvec = "1.0" [profile.profiling] diff --git a/color/Cargo.toml b/color/Cargo.toml index 47544815..5dd6aac7 100644 --- a/color/Cargo.toml +++ b/color/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cssparser-color" -version = "0.1.0" +version = "0.3.0" authors = ["Emilio Cobos Álvarez "] description = "Color implementation based on cssparser" documentation = "https://docs.rs/cssparser-color/" @@ -12,7 +12,7 @@ edition = "2021" path = "lib.rs" [dependencies] -cssparser = { path = ".." } +cssparser = { path = "..", version = "0.35" } serde = { version = "1.0", features = ["derive"], optional = true } [features] diff --git a/src/cow_rc_str.rs b/src/cow_rc_str.rs index 26508481..03631f47 100644 --- a/src/cow_rc_str.rs +++ b/src/cow_rc_str.rs @@ -51,7 +51,7 @@ impl<'a> From<&'a str> for CowRcStr<'a> { } } -impl<'a> From for CowRcStr<'a> { +impl From for CowRcStr<'_> { #[inline] fn from(s: String) -> Self { CowRcStr::from_rc(Rc::new(s)) @@ -84,7 +84,7 @@ impl<'a> CowRcStr<'a> { } } -impl<'a> Clone for CowRcStr<'a> { +impl Clone for CowRcStr<'_> { #[inline] fn clone(&self) -> Self { match self.unpack() { @@ -99,7 +99,7 @@ impl<'a> Clone for CowRcStr<'a> { } } -impl<'a> Drop for CowRcStr<'a> { +impl Drop for CowRcStr<'_> { #[inline] fn drop(&mut self) { if let Err(ptr) = self.unpack() { @@ -108,7 +108,7 @@ impl<'a> Drop for CowRcStr<'a> { } } -impl<'a> ops::Deref for CowRcStr<'a> { +impl ops::Deref for CowRcStr<'_> { type Target = str; #[inline] @@ -119,65 +119,65 @@ impl<'a> ops::Deref for CowRcStr<'a> { // Boilerplate / trivial impls below. -impl<'a> AsRef for CowRcStr<'a> { +impl AsRef for CowRcStr<'_> { #[inline] fn as_ref(&self) -> &str { self } } -impl<'a> Borrow for CowRcStr<'a> { +impl Borrow for CowRcStr<'_> { #[inline] fn borrow(&self) -> &str { self } } -impl<'a> Default for CowRcStr<'a> { +impl Default for CowRcStr<'_> { #[inline] fn default() -> Self { Self::from("") } } -impl<'a> hash::Hash for CowRcStr<'a> { +impl hash::Hash for CowRcStr<'_> { #[inline] fn hash(&self, hasher: &mut H) { str::hash(self, hasher) } } -impl<'a, T: AsRef> PartialEq for CowRcStr<'a> { +impl> PartialEq for CowRcStr<'_> { #[inline] fn eq(&self, other: &T) -> bool { str::eq(self, other.as_ref()) } } -impl<'a, T: AsRef> PartialOrd for CowRcStr<'a> { +impl> PartialOrd for CowRcStr<'_> { #[inline] fn partial_cmp(&self, other: &T) -> Option { str::partial_cmp(self, other.as_ref()) } } -impl<'a> Eq for CowRcStr<'a> {} +impl Eq for CowRcStr<'_> {} -impl<'a> Ord for CowRcStr<'a> { +impl Ord for CowRcStr<'_> { #[inline] fn cmp(&self, other: &Self) -> cmp::Ordering { str::cmp(self, other) } } -impl<'a> fmt::Display for CowRcStr<'a> { +impl fmt::Display for CowRcStr<'_> { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { str::fmt(self, formatter) } } -impl<'a> fmt::Debug for CowRcStr<'a> { +impl fmt::Debug for CowRcStr<'_> { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { str::fmt(self, formatter) diff --git a/src/css-parsing-tests/urange.json b/src/css-parsing-tests/urange.json index 857d1d62..4dcb529c 100644 --- a/src/css-parsing-tests/urange.json +++ b/src/css-parsing-tests/urange.json @@ -84,6 +84,11 @@ null, null, null +], + +"U+26F9200D2640, U+10000-26F9200D2640", [ + null, + null ] ] diff --git a/src/from_bytes.rs b/src/from_bytes.rs index 78a56d3e..7d9d2c76 100644 --- a/src/from_bytes.rs +++ b/src/from_bytes.rs @@ -24,9 +24,9 @@ pub trait EncodingSupport { /// /// * `css_bytes`: A byte string. /// * `protocol_encoding`: The encoding label, if any, defined by HTTP or equivalent protocol. -/// (e.g. via the `charset` parameter of the `Content-Type` header.) +/// (e.g. via the `charset` parameter of the `Content-Type` header.) /// * `environment_encoding`: An optional `Encoding` object for the [environment encoding] -/// (https://drafts.csswg.org/css-syntax/#environment-encoding), if any. +/// (https://drafts.csswg.org/css-syntax/#environment-encoding), if any. /// /// Returns the encoding to use. pub fn stylesheet_encoding( diff --git a/src/parser.rs b/src/parser.rs index dd35fc50..aabeea6a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -76,7 +76,7 @@ pub enum BasicParseErrorKind<'i> { QualifiedRuleInvalid, } -impl<'i> fmt::Display for BasicParseErrorKind<'i> { +impl fmt::Display for BasicParseErrorKind<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { BasicParseErrorKind::UnexpectedToken(token) => { @@ -176,7 +176,7 @@ impl<'i, T> ParseErrorKind<'i, T> { } } -impl<'i, E: fmt::Display> fmt::Display for ParseErrorKind<'i, E> { +impl fmt::Display for ParseErrorKind<'_, E> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { ParseErrorKind::Basic(ref basic) => basic.fmt(f), @@ -218,13 +218,13 @@ impl<'i, T> ParseError<'i, T> { } } -impl<'i, E: fmt::Display> fmt::Display for ParseError<'i, E> { +impl fmt::Display for ParseError<'_, E> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.kind.fmt(f) } } -impl<'i, E: fmt::Display + fmt::Debug> std::error::Error for ParseError<'i, E> {} +impl std::error::Error for ParseError<'_, E> {} /// The owned input for a parser. pub struct ParserInput<'i> { @@ -360,10 +360,8 @@ impl Delimiters { table }; - match byte { - None => Delimiter::None, - Some(b) => TABLE[b as usize], - } + assert_eq!(TABLE[0], Delimiter::None); + TABLE[byte.unwrap_or(0) as usize] } } diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 48da02b5..bdaef077 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -49,6 +49,7 @@ pub trait DeclarationParser<'i> { &mut self, name: CowRcStr<'i>, input: &mut Parser<'i, 't>, + _declaration_start: &ParserState, ) -> Result> { Err(input.new_error(BasicParseErrorKind::UnexpectedToken(Token::Ident(name)))) } @@ -241,7 +242,7 @@ impl<'i, 't, 'a, P, I, E> RuleBodyParser<'i, 't, 'a, P, I, E> { } /// https://drafts.csswg.org/css-syntax/#consume-a-blocks-contents -impl<'i, 't, 'a, I, P, E: 'i> Iterator for RuleBodyParser<'i, 't, 'a, P, I, E> +impl<'i, I, P, E: 'i> Iterator for RuleBodyParser<'i, '_, '_, P, I, E> where P: RuleBodyItemParser<'i, I, E>, { @@ -279,7 +280,7 @@ where error_behavior, |input| { input.expect_colon()?; - parser.parse_value(name, input) + parser.parse_value(name, input, &start) }, ) }; @@ -349,7 +350,7 @@ where } /// `RuleListParser` is an iterator that yields `Ok(_)` for a rule or an `Err(..)` for an invalid one. -impl<'i, 't, 'a, R, P, E: 'i> Iterator for StyleSheetParser<'i, 't, 'a, P> +impl<'i, R, P, E: 'i> Iterator for StyleSheetParser<'i, '_, '_, P> where P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E> + AtRuleParser<'i, AtRule = R, Error = E>, @@ -408,12 +409,13 @@ pub fn parse_one_declaration<'i, 't, P, E>( where P: DeclarationParser<'i, Error = E>, { + let start = input.state(); let start_position = input.position(); input .parse_entirely(|input| { let name = input.expect_ident()?.clone(); input.expect_colon()?; - parser.parse_value(name, input) + parser.parse_value(name, input, &start) }) .map_err(|e| (e, input.slice_from(start_position))) } diff --git a/src/serializer.rs b/src/serializer.rs index 3c6e31cb..6696a622 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -55,7 +55,7 @@ where Ok(()) } -impl<'a> ToCss for Token<'a> { +impl ToCss for Token<'_> { fn to_css(&self, dest: &mut W) -> fmt::Result where W: fmt::Write, @@ -311,7 +311,7 @@ where } } -impl<'a, W> fmt::Write for CssStringWriter<'a, W> +impl fmt::Write for CssStringWriter<'_, W> where W: fmt::Write, { @@ -466,6 +466,9 @@ pub enum TokenSerializationType { Other, } +#[cfg(feature = "malloc_size_of")] +malloc_size_of::malloc_size_of_is_0!(TokenSerializationType); + impl TokenSerializationType { /// Return a value that represents the absence of a token, e.g. before the start of the input. #[deprecated( @@ -539,7 +542,7 @@ impl TokenSerializationType { } } -impl<'a> Token<'a> { +impl Token<'_> { /// Categorize a token into a type that determines when `/**/` needs to be inserted /// between two tokens when serialized next to each other without whitespace in between. /// diff --git a/src/tests.rs b/src/tests.rs index 7389664d..3c122f0d 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -773,7 +773,7 @@ where } } -impl<'a> ToJson for CowRcStr<'a> { +impl ToJson for CowRcStr<'_> { fn to_json(&self) -> Value { let s: &str = self; s.to_json() @@ -849,6 +849,7 @@ impl<'i> DeclarationParser<'i> for JsonParser { &mut self, name: CowRcStr<'i>, input: &mut Parser<'i, 't>, + _declaration_start: &ParserState, ) -> Result> { let mut value = vec![]; let mut important = false; @@ -946,7 +947,7 @@ impl<'i> QualifiedRuleParser<'i> for JsonParser { } } -impl<'i> RuleBodyItemParser<'i, Value, ()> for JsonParser { +impl RuleBodyItemParser<'_, Value, ()> for JsonParser { fn parse_qualified(&self) -> bool { true } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ea173a5e..1c85e10a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -190,7 +190,7 @@ pub enum Token<'a> { CloseCurlyBracket, } -impl<'a> Token<'a> { +impl Token<'_> { /// Return whether this token represents a parse error. /// /// `BadUrl` and `BadString` are tokenizer-level parse errors. @@ -324,11 +324,11 @@ impl<'a> Tokenizer<'a> { let current = self.position(); let start = self .slice(SourcePosition(0)..current) - .rfind(|c| matches!(c, '\r' | '\n' | '\x0C')) + .rfind(['\r', '\n', '\x0C']) .map_or(0, |start| start + 1); let end = self .slice(current..SourcePosition(self.input.len())) - .find(|c| matches!(c, '\r' | '\n' | '\x0C')) + .find(['\r', '\n', '\x0C']) .map_or(self.input.len(), |end| current.0 + end); self.slice(SourcePosition(start)..SourcePosition(end)) } @@ -533,6 +533,9 @@ impl<'a> Tokenizer<'a> { #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)] pub struct SourcePosition(pub(crate) usize); +#[cfg(feature = "malloc_size_of")] +malloc_size_of::malloc_size_of_is_0!(SourcePosition); + impl SourcePosition { /// Returns the current byte index in the original input. #[inline] @@ -542,7 +545,7 @@ impl SourcePosition { } /// The line and column number for a given position within the input. -#[derive(PartialEq, Eq, Debug, Clone, Copy)] +#[derive(PartialEq, Eq, Debug, Clone, Copy, Default)] pub struct SourceLocation { /// The line number, starting at 0 for the first line. pub line: u32, @@ -552,6 +555,9 @@ pub struct SourceLocation { pub column: u32, } +#[cfg(feature = "malloc_size_of")] +malloc_size_of::malloc_size_of_is_0!(SourceLocation); + fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { if tokenizer.is_eof() { return Err(()); @@ -720,9 +726,7 @@ fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) { // If there is a source map directive, extract the URL. if contents.starts_with(directive) || contents.starts_with(directive_old) { let contents = &contents[directive.len()..]; - tokenizer.source_map_url = contents - .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n') - .next() + tokenizer.source_map_url = contents.split([' ', '\t', '\x0C', '\r', '\n']).next(); } let directive = "# sourceURL="; @@ -731,9 +735,7 @@ fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) { // If there is a source map directive, extract the URL. if contents.starts_with(directive) || contents.starts_with(directive_old) { let contents = &contents[directive.len()..]; - tokenizer.source_url = contents - .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n') - .next() + tokenizer.source_url = contents.split([' ', '\t', '\x0C', '\r', '\n']).next() } } diff --git a/src/unicode_range.rs b/src/unicode_range.rs index ce6bb3b5..a4130ef0 100644 --- a/src/unicode_range.rs +++ b/src/unicode_range.rs @@ -104,7 +104,7 @@ fn parse_concatenated(text: &[u8]) -> Result { Some((&b'+', text)) => text, _ => return Err(()), }; - let (first_hex_value, hex_digit_count) = consume_hex(&mut text); + let (first_hex_value, hex_digit_count) = consume_hex(&mut text, 6)?; let question_marks = consume_question_marks(&mut text); let consumed = hex_digit_count + question_marks; if consumed == 0 || consumed > 6 { @@ -124,7 +124,7 @@ fn parse_concatenated(text: &[u8]) -> Result { end: first_hex_value, }); } else if let Some((&b'-', mut text)) = text.split_first() { - let (second_hex_value, hex_digit_count) = consume_hex(&mut text); + let (second_hex_value, hex_digit_count) = consume_hex(&mut text, 6)?; if hex_digit_count > 0 && hex_digit_count <= 6 && text.is_empty() { return Ok(UnicodeRange { start: first_hex_value, @@ -135,19 +135,23 @@ fn parse_concatenated(text: &[u8]) -> Result { Err(()) } -fn consume_hex(text: &mut &[u8]) -> (u32, usize) { +// Consume hex digits, but return an error if more than digit_limit are found. +fn consume_hex(text: &mut &[u8], digit_limit: usize) -> Result<(u32, usize), ()> { let mut value = 0; let mut digits = 0; while let Some((&byte, rest)) = text.split_first() { if let Some(digit_value) = (byte as char).to_digit(16) { + if digits == digit_limit { + return Err(()); + } value = value * 0x10 + digit_value; digits += 1; - *text = rest + *text = rest; } else { break; } } - (value, digits) + Ok((value, digits)) } fn consume_question_marks(text: &mut &[u8]) -> usize {