From 5bc93b5eaa8e561f8a87e5a5f8a5335d41328592 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 6 Jul 2019 13:54:55 +0200 Subject: [PATCH 1/2] Une new inclusive range literal syntax Fix some deprecation warnings --- build.rs | 1 - src/color.rs | 6 ++-- src/macros.rs | 2 +- src/nth.rs | 2 +- src/serializer.rs | 8 ++--- src/tokenizer.rs | 76 +++++++++++++++++++++++------------------------ 6 files changed, 47 insertions(+), 48 deletions(-) diff --git a/build.rs b/build.rs index c4ca61b2..7a73ab18 100644 --- a/build.rs +++ b/build.rs @@ -10,7 +10,6 @@ extern crate proc_macro2; #[cfg(feature = "dummy_match_byte")] mod codegen { - use std::path::Path; pub fn main() {} } diff --git a/src/color.rs b/src/color.rs index af9c0942..4eb3d95c 100644 --- a/src/color.rs +++ b/src/color.rs @@ -530,9 +530,9 @@ pub fn parse_color_keyword(ident: &str) -> Result { #[inline] fn from_hex(c: u8) -> Result { match c { - b'0'...b'9' => Ok(c - b'0'), - b'a'...b'f' => Ok(c - b'a' + 10), - b'A'...b'F' => Ok(c - b'A' + 10), + b'0'..=b'9' => Ok(c - b'0'), + b'a'..=b'f' => Ok(c - b'a' + 10), + b'A'..=b'F' => Ok(c - b'A' + 10), _ => Err(()), } } diff --git a/src/macros.rs b/src/macros.rs index 986c18a4..ce7ed045 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -130,7 +130,7 @@ macro_rules! cssparser_internal__to_lowercase { #[allow(non_snake_case)] pub fn _internal__to_lowercase<'a>(buffer: &'a mut [u8], input: &'a str) -> Option<&'a str> { if let Some(buffer) = buffer.get_mut(..input.len()) { - if let Some(first_uppercase) = input.bytes().position(|byte| matches!(byte, b'A'...b'Z')) { + if let Some(first_uppercase) = input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) { buffer.copy_from_slice(input.as_bytes()); buffer[first_uppercase..].make_ascii_lowercase(); // `buffer` was initialized to a copy of `input` (which is &str so well-formed UTF-8) diff --git a/src/nth.rs b/src/nth.rs index 691ff441..939a42c0 100644 --- a/src/nth.rs +++ b/src/nth.rs @@ -105,7 +105,7 @@ fn parse_n_dash_digits(string: &str) -> Result { let bytes = string.as_bytes(); if bytes.len() >= 3 && bytes[..2].eq_ignore_ascii_case(b"n-") - && bytes[2..].iter().all(|&c| matches!(c, b'0'...b'9')) + && bytes[2..].iter().all(|&c| matches!(c, b'0'..=b'9')) { Ok(parse_number_saturate(&string[1..]).unwrap()) // Include the minus sign } else { diff --git a/src/serializer.rs b/src/serializer.rs index 0157f0de..fb0462cc 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -207,7 +207,7 @@ where dest.write_str("-")?; value = &value[1..]; } - if let digit @ b'0'...b'9' = value.as_bytes()[0] { + if let digit @ b'0'..=b'9' = value.as_bytes()[0] { hex_escape(digit, dest)?; value = &value[1..]; } @@ -226,7 +226,7 @@ where let mut chunk_start = 0; for (i, b) in value.bytes().enumerate() { let escaped = match b { - b'0'...b'9' | b'A'...b'Z' | b'a'...b'z' | b'_' | b'-' => continue, + b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' => continue, _ if !b.is_ascii() => continue, b'\0' => Some("\u{FFFD}"), _ => None, @@ -251,7 +251,7 @@ where let mut chunk_start = 0; for (i, b) in value.bytes().enumerate() { let hex = match b { - b'\0'...b' ' | b'\x7F' => true, + b'\0'..=b' ' | b'\x7F' => true, b'(' | b')' | b'"' | b'\'' | b'\\' => false, _ => continue, }; @@ -318,7 +318,7 @@ where b'"' => Some("\\\""), b'\\' => Some("\\\\"), b'\0' => Some("\u{FFFD}"), - b'\x01'...b'\x1F' | b'\x7F' => None, + b'\x01'..=b'\x1F' | b'\x7F' => None, _ => continue, }; self.inner.write_str(&s[chunk_start..i])?; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 629507df..73cb031a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -557,7 +557,7 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { if is_ident_start(tokenizer) { IDHash(consume_name(tokenizer)) } else if !tokenizer.is_eof() && match tokenizer.next_byte_unchecked() { // Any other valid case here already resulted in IDHash. - b'0'...b'9' | b'-' => true, + b'0'..=b'9' | b'-' => true, _ => false, } { Hash(consume_name(tokenizer)) } else { Delim('#') } @@ -576,11 +576,11 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { b'+' => { if ( tokenizer.has_at_least(1) - && matches!(tokenizer.byte_at(1), b'0'...b'9') + && matches!(tokenizer.byte_at(1), b'0'..=b'9') ) || ( tokenizer.has_at_least(2) && tokenizer.byte_at(1) == b'.' - && matches!(tokenizer.byte_at(2), b'0'...b'9') + && matches!(tokenizer.byte_at(2), b'0'..=b'9') ) { consume_numeric(tokenizer) } else { @@ -592,11 +592,11 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { b'-' => { if ( tokenizer.has_at_least(1) - && matches!(tokenizer.byte_at(1), b'0'...b'9') + && matches!(tokenizer.byte_at(1), b'0'..=b'9') ) || ( tokenizer.has_at_least(2) && tokenizer.byte_at(1) == b'.' - && matches!(tokenizer.byte_at(2), b'0'...b'9') + && matches!(tokenizer.byte_at(2), b'0'..=b'9') ) { consume_numeric(tokenizer) } else if tokenizer.starts_with(b"-->") { @@ -611,7 +611,7 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { }, b'.' => { if tokenizer.has_at_least(1) - && matches!(tokenizer.byte_at(1), b'0'...b'9' + && matches!(tokenizer.byte_at(1), b'0'..=b'9' ) { consume_numeric(tokenizer) } else { @@ -627,7 +627,7 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { Delim('/') } } - b'0'...b'9' => { consume_numeric(tokenizer) }, + b'0'..=b'9' => { consume_numeric(tokenizer) }, b':' => { tokenizer.advance(1); Colon }, b';' => { tokenizer.advance(1); Semicolon }, b'<' => { @@ -644,7 +644,7 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { if is_ident_start(tokenizer) { AtKeyword(consume_name(tokenizer)) } else { Delim('@') } }, - b'a'...b'z' | b'A'...b'Z' | b'_' | b'\0' => { consume_ident_like(tokenizer) }, + b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => { consume_ident_like(tokenizer) }, b'[' => { tokenizer.advance(1); SquareBracketBlock }, b'\\' => { if !tokenizer.has_newline_at(1) { consume_ident_like(tokenizer) } @@ -745,8 +745,8 @@ fn consume_comment<'a>(tokenizer: &mut Tokenizer<'a>) -> &'a str { b'\n' | b'\x0C' | b'\r' => { tokenizer.consume_newline(); } - b'\x80'...b'\xBF' => { tokenizer.consume_continuation_byte(); } - b'\xF0'...b'\xFF' => { tokenizer.consume_4byte_intro(); } + b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); } + b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); } _ => { // ASCII or other leading byte. tokenizer.advance(1); @@ -807,8 +807,8 @@ fn consume_quoted_string<'a>( b'\n' | b'\r' | b'\x0C' => { return Err(tokenizer.slice_from(start_pos).into()) }, - b'\x80'...b'\xBF' => { tokenizer.consume_continuation_byte(); } - b'\xF0'...b'\xFF' => { tokenizer.consume_4byte_intro(); } + b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); } + b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); } _ => { // ASCII or other leading byte. tokenizer.advance(1); @@ -859,8 +859,8 @@ fn consume_quoted_string<'a>( string_bytes.extend("\u{FFFD}".as_bytes()); continue; } - b'\x80'...b'\xBF' => { tokenizer.consume_continuation_byte(); } - b'\xF0'...b'\xFF' => { tokenizer.consume_4byte_intro(); } + b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); } + b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); } _ => { // ASCII or other leading byte. tokenizer.advance(1); @@ -882,10 +882,10 @@ fn consume_quoted_string<'a>( fn is_ident_start(tokenizer: &mut Tokenizer) -> bool { !tokenizer.is_eof() && match_byte! { tokenizer.next_byte_unchecked(), - b'a'...b'z' | b'A'...b'Z' | b'_' | b'\0' => { true }, + b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => { true }, b'-' => { tokenizer.has_at_least(1) && match_byte! { tokenizer.byte_at(1), - b'a'...b'z' | b'A'...b'Z' | b'-' | b'_' | b'\0' => { + b'a'..=b'z' | b'A'..=b'Z' | b'-' | b'_' | b'\0' => { true } b'\\' => { !tokenizer.has_newline_at(1) } @@ -921,7 +921,7 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> { return tokenizer.slice_from(start_pos).into(); } match_byte! { tokenizer.next_byte_unchecked(), - b'a'...b'z' | b'A'...b'Z' | b'0'...b'9' | b'_' | b'-' => { tokenizer.advance(1) }, + b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => { tokenizer.advance(1) }, b'\\' | b'\0' => { // * The tokenizer’s input is UTF-8 since it’s `&str`. // * start_pos is at a code point boundary @@ -931,10 +931,10 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> { value_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned(); break } - b'\x80'...b'\xBF' => { tokenizer.consume_continuation_byte(); } - b'\xC0'...b'\xEF' => { tokenizer.advance(1); } - b'\xF0'...b'\xFF' => { tokenizer.consume_4byte_intro(); } - b => { + b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); } + b'\xC0'..=b'\xEF' => { tokenizer.advance(1); } + b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); } + _b => { return tokenizer.slice_from(start_pos).into(); } } @@ -943,7 +943,7 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> { while !tokenizer.is_eof() { let b = tokenizer.next_byte_unchecked(); match_byte! { b, - b'a'...b'z' | b'A'...b'Z' | b'0'...b'9' | b'_' | b'-' => { + b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => { tokenizer.advance(1); value_bytes.push(b) // ASCII } @@ -957,19 +957,19 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> { tokenizer.advance(1); value_bytes.extend("\u{FFFD}".as_bytes()); }, - b'\x80'...b'\xBF' => { + b'\x80'..=b'\xBF' => { // This byte *is* part of a multi-byte code point, // we’ll end up copying the whole code point before this loop does something else. tokenizer.consume_continuation_byte(); value_bytes.push(b) } - b'\xC0'...b'\xEF' => { + b'\xC0'..=b'\xEF' => { // This byte *is* part of a multi-byte code point, // we’ll end up copying the whole code point before this loop does something else. tokenizer.advance(1); value_bytes.push(b) } - b'\xF0'...b'\xFF' => { + b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); value_bytes.push(b) } @@ -985,9 +985,9 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> { fn byte_to_hex_digit(b: u8) -> Option { Some(match_byte! { b, - b'0' ... b'9' => { b - b'0' }, - b'a' ... b'f' => { b - b'a' + 10 }, - b'A' ... b'F' => { b - b'A' + 10 }, + b'0' ..= b'9' => { b - b'0' }, + b'a' ..= b'f' => { b - b'a' + 10 }, + b'A' ..= b'F' => { b - b'A' + 10 }, _ => { return None } @@ -1032,7 +1032,7 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { let mut fractional_part: f64 = 0.; if tokenizer.has_at_least(1) && tokenizer.next_byte_unchecked() == b'.' - && matches!(tokenizer.byte_at(1), b'0'...b'9') + && matches!(tokenizer.byte_at(1), b'0'..=b'9') { is_integer = false; tokenizer.advance(1); // Consume '.' @@ -1050,10 +1050,10 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { let mut value = sign * (integral_part + fractional_part); if tokenizer.has_at_least(1) && matches!(tokenizer.next_byte_unchecked(), b'e' | b'E') { - if matches!(tokenizer.byte_at(1), b'0'...b'9') + if matches!(tokenizer.byte_at(1), b'0'..=b'9') || (tokenizer.has_at_least(2) && matches!(tokenizer.byte_at(1), b'+' | b'-') - && matches!(tokenizer.byte_at(2), b'0'...b'9')) + && matches!(tokenizer.byte_at(2), b'0'..=b'9')) { is_integer = false; tokenizer.advance(1); @@ -1202,7 +1202,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, tokenizer.advance(1); return UnquotedUrl(value.into()) } - b'\x01'...b'\x08' | b'\x0B' | b'\x0E'...b'\x1F' | b'\x7F' // non-printable + b'\x01'..=b'\x08' | b'\x0B' | b'\x0E'..=b'\x1F' | b'\x7F' // non-printable | b'"' | b'\'' | b'(' => { tokenizer.advance(1); return consume_bad_url(tokenizer, start_pos) @@ -1216,8 +1216,8 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned(); break } - b'\x80'...b'\xBF' => { tokenizer.consume_continuation_byte(); } - b'\xF0'...b'\xFF' => { tokenizer.consume_4byte_intro(); } + b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); } + b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); } _ => { // ASCII or other leading byte. tokenizer.advance(1); @@ -1236,7 +1236,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, tokenizer.advance(1); break; } - b'\x01'...b'\x08' | b'\x0B' | b'\x0E'...b'\x1F' | b'\x7F' // non-printable + b'\x01'..=b'\x08' | b'\x0B' | b'\x0E'..=b'\x1F' | b'\x7F' // non-printable | b'"' | b'\'' | b'(' => { tokenizer.advance(1); return consume_bad_url(tokenizer, start_pos); @@ -1254,13 +1254,13 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, tokenizer.advance(1); string_bytes.extend("\u{FFFD}".as_bytes()); } - b'\x80'...b'\xBF' => { + b'\x80'..=b'\xBF' => { // We’ll end up copying the whole code point // before this loop does something else. tokenizer.consume_continuation_byte(); string_bytes.push(b); } - b'\xF0'...b'\xFF' => { + b'\xF0'..=b'\xFF' => { // We’ll end up copying the whole code point // before this loop does something else. tokenizer.consume_4byte_intro(); @@ -1367,7 +1367,7 @@ fn consume_escape(tokenizer: &mut Tokenizer) -> char { return '\u{FFFD}'; } // Escaped EOF match_byte! { tokenizer.next_byte_unchecked(), - b'0'...b'9' | b'A'...b'F' | b'a'...b'f' => { + b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => { let (c, _) = consume_hex_digits(tokenizer); if !tokenizer.is_eof() { match_byte! { tokenizer.next_byte_unchecked(), From 376f4c6e8644d3b40c5d9d0ccd952fb5f5b9a620 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 6 Jul 2019 14:36:24 +0200 Subject: [PATCH 2/2] Use MaybeUninit when available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix deprecation warnings for mem::uninitialized. (They also affected other crates, since it’s used in a public macro.) --- Cargo.toml | 3 ++- build.rs | 3 +++ src/macros.rs | 19 ++++++++++++++++--- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index efe32e64..9b01bb29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cssparser" -version = "0.25.7" +version = "0.25.8" authors = [ "Simon Sapin " ] description = "Rust implementation of CSS Syntax Level 3" @@ -30,6 +30,7 @@ serde = {version = "1.0", optional = true} smallvec = "0.6" [build-dependencies] +autocfg = "0.1.4" syn = { version = "0.15.12", features = ["extra-traits", "fold", "full"] } quote = "0.6" proc-macro2 = "0.4" diff --git a/build.rs b/build.rs index 7a73ab18..2cef2756 100644 --- a/build.rs +++ b/build.rs @@ -2,6 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +extern crate autocfg; #[macro_use] extern crate quote; #[macro_use] @@ -49,5 +50,7 @@ fn main() { println!("cargo:rustc-cfg=rustc_has_pr45225") } + autocfg::new().emit_has_path("std::mem::MaybeUninit"); + codegen::main(); } diff --git a/src/macros.rs b/src/macros.rs index ce7ed045..cc47898c 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -110,13 +110,26 @@ macro_rules! ascii_case_insensitive_phf_map { #[doc(hidden)] macro_rules! cssparser_internal__to_lowercase { ($input: expr, $BUFFER_SIZE: expr => $output: ident) => { - // mem::uninitialized() is ok because `buffer` is only used in `_internal__to_lowercase`, + let mut buffer; + // Safety: `buffer` is only used in `_internal__to_lowercase`, // which initializes with `copy_from_slice` the part of the buffer it uses, // before it uses it. #[allow(unsafe_code)] - let mut buffer: [u8; $BUFFER_SIZE] = unsafe { ::std::mem::uninitialized() }; + let buffer = unsafe { + // FIXME: remove this when we require Rust 1.36 + #[cfg(not(has_std__mem__MaybeUninit))] + { + buffer = ::std::mem::uninitialized::<[u8; $BUFFER_SIZE]>(); + &mut buffer + } + #[cfg(has_std__mem__MaybeUninit)] + { + buffer = ::std::mem::MaybeUninit::<[u8; $BUFFER_SIZE]>::uninit(); + &mut *(buffer.as_mut_ptr()) + } + }; let input: &str = $input; - let $output = $crate::_internal__to_lowercase(&mut buffer, input); + let $output = $crate::_internal__to_lowercase(buffer, input); }; }