Skip to content

macros: Optimize cssparser_internal_to_lowercase for already-lowercase inputs. #279

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "cssparser"
version = "0.28.0"
version = "0.28.1"
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]

description = "Rust implementation of CSS Syntax Level 3"
Expand Down
51 changes: 29 additions & 22 deletions src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,37 +144,44 @@ macro_rules! _cssparser_internal_to_lowercase {
/// Otherwise, return `input` ASCII-lowercased, using `buffer` as temporary space if necessary.
#[doc(hidden)]
#[allow(non_snake_case)]
#[inline]
pub fn _cssparser_internal_to_lowercase<'a>(
buffer: &'a mut [MaybeUninit<u8>],
input: &'a str,
) -> Option<&'a str> {
if let Some(buffer) = buffer.get_mut(..input.len()) {
if let Some(first_uppercase) = input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) {
unsafe {
// This cast doesn’t change the pointer’s validity
// since `u8` has the same layout as `MaybeUninit<u8>`:
let input_bytes = &*(input.as_bytes() as *const [u8] as *const [MaybeUninit<u8>]);
let buffer = buffer.get_mut(..input.len())?;

buffer.copy_from_slice(&*input_bytes);
#[cold]
fn make_ascii_lowercase<'a>(
buffer: &'a mut [MaybeUninit<u8>],
input: &'a str,
first_uppercase: usize,
) -> &'a str {
unsafe {
// This cast doesn't change the pointer's validity
// since `u8` has the same layout as `MaybeUninit<u8>`:
let input_bytes = &*(input.as_bytes() as *const [u8] as *const [MaybeUninit<u8>]);

// Same as above re layout, plus these bytes have been initialized:
let buffer = &mut *(buffer as *mut [MaybeUninit<u8>] as *mut [u8]);
buffer.copy_from_slice(&*input_bytes);

buffer[first_uppercase..].make_ascii_lowercase();
// `buffer` was initialized to a copy of `input`
// (which is `&str` so well-formed UTF-8)
// then ASCII-lowercased (which preserves UTF-8 well-formedness):
Some(::std::str::from_utf8_unchecked(buffer))
}
} else {
// Input is already lower-case
Some(input)
// Same as above re layout, plus these bytes have been initialized:
let buffer = &mut *(buffer as *mut [MaybeUninit<u8>] as *mut [u8]);

buffer[first_uppercase..].make_ascii_lowercase();
// `buffer` was initialized to a copy of `input`
// (which is `&str` so well-formed UTF-8)
// then ASCII-lowercased (which preserves UTF-8 well-formedness):
::std::str::from_utf8_unchecked(buffer)
}
} else {
// Input is longer than buffer, which has the length of the longest expected string:
// none of the expected strings would match.
None
}

Some(
match input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) {
Some(first_uppercase) => make_ascii_lowercase(buffer, input, first_uppercase),
// common case: input is already lower-case
None => input,
},
)
}

#[cfg(feature = "dummy_match_byte")]
Expand Down