From 534be35112c11127e07da3b09f1ef2736da477a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Sun, 10 Jan 2021 22:50:11 +0100 Subject: [PATCH 1/2] macros: Optimize cssparser_internal_to_lowercase for already-lowercase inputs. By inlining the happy-path. this improves CSS parsing performance of benchmarks, even on PGO builds. --- src/macros.rs | 51 +++++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/src/macros.rs b/src/macros.rs index 49c56427..234920d8 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -144,37 +144,44 @@ macro_rules! _cssparser_internal_to_lowercase { /// Otherwise, return `input` ASCII-lowercased, using `buffer` as temporary space if necessary. #[doc(hidden)] #[allow(non_snake_case)] +#[inline] pub fn _cssparser_internal_to_lowercase<'a>( buffer: &'a mut [MaybeUninit], input: &'a str, ) -> Option<&'a str> { - if let Some(buffer) = buffer.get_mut(..input.len()) { - if let Some(first_uppercase) = input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) { - unsafe { - // This cast doesn’t change the pointer’s validity - // since `u8` has the same layout as `MaybeUninit`: - let input_bytes = &*(input.as_bytes() as *const [u8] as *const [MaybeUninit]); + let buffer = buffer.get_mut(..input.len())?; - buffer.copy_from_slice(&*input_bytes); + #[cold] + fn make_ascii_lowercase<'a>( + buffer: &'a mut [MaybeUninit], + input: &'a str, + first_uppercase: usize, + ) -> &'a str { + unsafe { + // This cast doesn't change the pointer's validity + // since `u8` has the same layout as `MaybeUninit`: + let input_bytes = &*(input.as_bytes() as *const [u8] as *const [MaybeUninit]); - // Same as above re layout, plus these bytes have been initialized: - let buffer = &mut *(buffer as *mut [MaybeUninit] as *mut [u8]); + buffer.copy_from_slice(&*input_bytes); - buffer[first_uppercase..].make_ascii_lowercase(); - // `buffer` was initialized to a copy of `input` - // (which is `&str` so well-formed UTF-8) - // then ASCII-lowercased (which preserves UTF-8 well-formedness): - Some(::std::str::from_utf8_unchecked(buffer)) - } - } else { - // Input is already lower-case - Some(input) + // Same as above re layout, plus these bytes have been initialized: + let buffer = &mut *(buffer as *mut [MaybeUninit] as *mut [u8]); + + buffer[first_uppercase..].make_ascii_lowercase(); + // `buffer` was initialized to a copy of `input` + // (which is `&str` so well-formed UTF-8) + // then ASCII-lowercased (which preserves UTF-8 well-formedness): + ::std::str::from_utf8_unchecked(buffer) } - } else { - // Input is longer than buffer, which has the length of the longest expected string: - // none of the expected strings would match. - None } + + Some( + match input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) { + Some(first_uppercase) => make_ascii_lowercase(buffer, input, first_uppercase), + // common case: input is already lower-case + None => input, + }, + ) } #[cfg(feature = "dummy_match_byte")] From 906ed7fc680512de6d5d813e18600f7e0919655b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Mon, 11 Jan 2021 02:40:00 +0100 Subject: [PATCH 2/2] Minor version bump. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 350bf434..3e2beb8d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cssparser" -version = "0.28.0" +version = "0.28.1" authors = [ "Simon Sapin " ] description = "Rust implementation of CSS Syntax Level 3"