Skip to content

Commit 534be35

Browse files
committed
macros: Optimize cssparser_internal_to_lowercase for already-lowercase inputs.
By inlining the happy-path. this improves CSS parsing performance of benchmarks, even on PGO builds.
1 parent a37566b commit 534be35

File tree

1 file changed

+29
-22
lines changed

1 file changed

+29
-22
lines changed

src/macros.rs

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -144,37 +144,44 @@ macro_rules! _cssparser_internal_to_lowercase {
144144
/// Otherwise, return `input` ASCII-lowercased, using `buffer` as temporary space if necessary.
145145
#[doc(hidden)]
146146
#[allow(non_snake_case)]
147+
#[inline]
147148
pub fn _cssparser_internal_to_lowercase<'a>(
148149
buffer: &'a mut [MaybeUninit<u8>],
149150
input: &'a str,
150151
) -> Option<&'a str> {
151-
if let Some(buffer) = buffer.get_mut(..input.len()) {
152-
if let Some(first_uppercase) = input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) {
153-
unsafe {
154-
// This cast doesn’t change the pointer’s validity
155-
// since `u8` has the same layout as `MaybeUninit<u8>`:
156-
let input_bytes = &*(input.as_bytes() as *const [u8] as *const [MaybeUninit<u8>]);
152+
let buffer = buffer.get_mut(..input.len())?;
157153

158-
buffer.copy_from_slice(&*input_bytes);
154+
#[cold]
155+
fn make_ascii_lowercase<'a>(
156+
buffer: &'a mut [MaybeUninit<u8>],
157+
input: &'a str,
158+
first_uppercase: usize,
159+
) -> &'a str {
160+
unsafe {
161+
// This cast doesn't change the pointer's validity
162+
// since `u8` has the same layout as `MaybeUninit<u8>`:
163+
let input_bytes = &*(input.as_bytes() as *const [u8] as *const [MaybeUninit<u8>]);
159164

160-
// Same as above re layout, plus these bytes have been initialized:
161-
let buffer = &mut *(buffer as *mut [MaybeUninit<u8>] as *mut [u8]);
165+
buffer.copy_from_slice(&*input_bytes);
162166

163-
buffer[first_uppercase..].make_ascii_lowercase();
164-
// `buffer` was initialized to a copy of `input`
165-
// (which is `&str` so well-formed UTF-8)
166-
// then ASCII-lowercased (which preserves UTF-8 well-formedness):
167-
Some(::std::str::from_utf8_unchecked(buffer))
168-
}
169-
} else {
170-
// Input is already lower-case
171-
Some(input)
167+
// Same as above re layout, plus these bytes have been initialized:
168+
let buffer = &mut *(buffer as *mut [MaybeUninit<u8>] as *mut [u8]);
169+
170+
buffer[first_uppercase..].make_ascii_lowercase();
171+
// `buffer` was initialized to a copy of `input`
172+
// (which is `&str` so well-formed UTF-8)
173+
// then ASCII-lowercased (which preserves UTF-8 well-formedness):
174+
::std::str::from_utf8_unchecked(buffer)
172175
}
173-
} else {
174-
// Input is longer than buffer, which has the length of the longest expected string:
175-
// none of the expected strings would match.
176-
None
177176
}
177+
178+
Some(
179+
match input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) {
180+
Some(first_uppercase) => make_ascii_lowercase(buffer, input, first_uppercase),
181+
// common case: input is already lower-case
182+
None => input,
183+
},
184+
)
178185
}
179186

180187
#[cfg(feature = "dummy_match_byte")]

0 commit comments

Comments
 (0)