Skip to content

Commit 5cc53cf

Browse files
committed
macros: Optimize cssparser_internal_to_lowercase for already-lowercase inputs.
By inlining the happy-path. this improves CSS parsing performance of benchmarks, even on PGO builds.
1 parent a37566b commit 5cc53cf

File tree

1 file changed

+23
-22
lines changed

1 file changed

+23
-22
lines changed

src/macros.rs

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -144,37 +144,38 @@ macro_rules! _cssparser_internal_to_lowercase {
144144
/// Otherwise, return `input` ASCII-lowercased, using `buffer` as temporary space if necessary.
145145
#[doc(hidden)]
146146
#[allow(non_snake_case)]
147+
#[inline]
147148
pub fn _cssparser_internal_to_lowercase<'a>(
148149
buffer: &'a mut [MaybeUninit<u8>],
149150
input: &'a str,
150151
) -> Option<&'a str> {
151-
if let Some(buffer) = buffer.get_mut(..input.len()) {
152-
if let Some(first_uppercase) = input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) {
153-
unsafe {
154-
// This cast doesn’t change the pointer’s validity
155-
// since `u8` has the same layout as `MaybeUninit<u8>`:
156-
let input_bytes = &*(input.as_bytes() as *const [u8] as *const [MaybeUninit<u8>]);
152+
let buffer = buffer.get_mut(..input.len())?;
157153

158-
buffer.copy_from_slice(&*input_bytes);
154+
#[cold]
155+
fn make_ascii_lowercase<'a>(buffer: &'a mut [MaybeUninit<u8>], input: &'a str, first_uppercase: usize) -> &'a str {
156+
unsafe {
157+
// This cast doesn't change the pointer's validity
158+
// since `u8` has the same layout as `MaybeUninit<u8>`:
159+
let input_bytes = &*(input.as_bytes() as *const [u8] as *const [MaybeUninit<u8>]);
159160

160-
// Same as above re layout, plus these bytes have been initialized:
161-
let buffer = &mut *(buffer as *mut [MaybeUninit<u8>] as *mut [u8]);
161+
buffer.copy_from_slice(&*input_bytes);
162162

163-
buffer[first_uppercase..].make_ascii_lowercase();
164-
// `buffer` was initialized to a copy of `input`
165-
// (which is `&str` so well-formed UTF-8)
166-
// then ASCII-lowercased (which preserves UTF-8 well-formedness):
167-
Some(::std::str::from_utf8_unchecked(buffer))
168-
}
169-
} else {
170-
// Input is already lower-case
171-
Some(input)
163+
// Same as above re layout, plus these bytes have been initialized:
164+
let buffer = &mut *(buffer as *mut [MaybeUninit<u8>] as *mut [u8]);
165+
166+
buffer[first_uppercase..].make_ascii_lowercase();
167+
// `buffer` was initialized to a copy of `input`
168+
// (which is `&str` so well-formed UTF-8)
169+
// then ASCII-lowercased (which preserves UTF-8 well-formedness):
170+
::std::str::from_utf8_unchecked(buffer)
172171
}
173-
} else {
174-
// Input is longer than buffer, which has the length of the longest expected string:
175-
// none of the expected strings would match.
176-
None
177172
}
173+
174+
Some(match input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) {
175+
Some(first_uppercase) => make_ascii_lowercase(buffer, input, first_uppercase),
176+
// common case: input is already lower-case
177+
None => input,
178+
})
178179
}
179180

180181
#[cfg(feature = "dummy_match_byte")]

0 commit comments

Comments
 (0)