Skip to content

Commit 0212922

Browse files
authored
Auto merge of #279 - servo:lowercase-happy, r=heycam
macros: Optimize cssparser_internal_to_lowercase for already-lowercase inputs. By inlining the happy-path. this improves CSS parsing performance of benchmarks, even on PGO builds.
2 parents a37566b + 906ed7f commit 0212922

File tree

2 files changed

+30
-23
lines changed

2 files changed

+30
-23
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "cssparser"
3-
version = "0.28.0"
3+
version = "0.28.1"
44
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]
55

66
description = "Rust implementation of CSS Syntax Level 3"

src/macros.rs

+29-22
Original file line numberDiff line numberDiff line change
@@ -144,37 +144,44 @@ macro_rules! _cssparser_internal_to_lowercase {
144144
/// Otherwise, return `input` ASCII-lowercased, using `buffer` as temporary space if necessary.
145145
#[doc(hidden)]
146146
#[allow(non_snake_case)]
147+
#[inline]
147148
pub fn _cssparser_internal_to_lowercase<'a>(
148149
buffer: &'a mut [MaybeUninit<u8>],
149150
input: &'a str,
150151
) -> Option<&'a str> {
151-
if let Some(buffer) = buffer.get_mut(..input.len()) {
152-
if let Some(first_uppercase) = input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) {
153-
unsafe {
154-
// This cast doesn’t change the pointer’s validity
155-
// since `u8` has the same layout as `MaybeUninit<u8>`:
156-
let input_bytes = &*(input.as_bytes() as *const [u8] as *const [MaybeUninit<u8>]);
152+
let buffer = buffer.get_mut(..input.len())?;
157153

158-
buffer.copy_from_slice(&*input_bytes);
154+
#[cold]
155+
fn make_ascii_lowercase<'a>(
156+
buffer: &'a mut [MaybeUninit<u8>],
157+
input: &'a str,
158+
first_uppercase: usize,
159+
) -> &'a str {
160+
unsafe {
161+
// This cast doesn't change the pointer's validity
162+
// since `u8` has the same layout as `MaybeUninit<u8>`:
163+
let input_bytes = &*(input.as_bytes() as *const [u8] as *const [MaybeUninit<u8>]);
159164

160-
// Same as above re layout, plus these bytes have been initialized:
161-
let buffer = &mut *(buffer as *mut [MaybeUninit<u8>] as *mut [u8]);
165+
buffer.copy_from_slice(&*input_bytes);
162166

163-
buffer[first_uppercase..].make_ascii_lowercase();
164-
// `buffer` was initialized to a copy of `input`
165-
// (which is `&str` so well-formed UTF-8)
166-
// then ASCII-lowercased (which preserves UTF-8 well-formedness):
167-
Some(::std::str::from_utf8_unchecked(buffer))
168-
}
169-
} else {
170-
// Input is already lower-case
171-
Some(input)
167+
// Same as above re layout, plus these bytes have been initialized:
168+
let buffer = &mut *(buffer as *mut [MaybeUninit<u8>] as *mut [u8]);
169+
170+
buffer[first_uppercase..].make_ascii_lowercase();
171+
// `buffer` was initialized to a copy of `input`
172+
// (which is `&str` so well-formed UTF-8)
173+
// then ASCII-lowercased (which preserves UTF-8 well-formedness):
174+
::std::str::from_utf8_unchecked(buffer)
172175
}
173-
} else {
174-
// Input is longer than buffer, which has the length of the longest expected string:
175-
// none of the expected strings would match.
176-
None
177176
}
177+
178+
Some(
179+
match input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) {
180+
Some(first_uppercase) => make_ascii_lowercase(buffer, input, first_uppercase),
181+
// common case: input is already lower-case
182+
None => input,
183+
},
184+
)
178185
}
179186

180187
#[cfg(feature = "dummy_match_byte")]

0 commit comments

Comments
 (0)