Skip to content

Commit 38748b4

Browse files
committed
add ArbitraryVariableMachine
1 parent bfe611d commit 38748b4

File tree

1 file changed

+356
-0
lines changed

1 file changed

+356
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,356 @@
1+
use crate::cursor;
2+
use crate::extractor::bracket_stack::BracketStack;
3+
use crate::extractor::css_variable_machine::CssVariableMachine;
4+
use crate::extractor::machine::{Machine, MachineState};
5+
use crate::extractor::string_machine::StringMachine;
6+
7+
/// Extracts arbitrary variables including the parens.
8+
///
9+
/// E.g.:
10+
///
11+
/// ```text
12+
/// (--my-value)
13+
/// ^^^^^^^^^^^^
14+
///
15+
/// bg-red-500/(--my-opacity)
16+
/// ^^^^^^^^^^^^^^
17+
/// ```
18+
#[derive(Debug, Default)]
19+
pub struct ArbitraryVariableMachine {
20+
/// Start position of the arbitrary variable
21+
start_pos: usize,
22+
23+
/// Track brackets to ensure they are balanced
24+
bracket_stack: BracketStack,
25+
26+
/// Current state of the machine
27+
state: State,
28+
29+
string_machine: StringMachine,
30+
css_variable_machine: CssVariableMachine,
31+
}
32+
33+
#[derive(Debug, Default)]
34+
enum State {
35+
#[default]
36+
Idle,
37+
38+
/// Currently parsing the inside of the arbitrary variable
39+
///
40+
/// ```text
41+
/// (--my-opacity)
42+
/// ^^^^^^^^^^^^
43+
/// ```
44+
Parsing,
45+
46+
/// Currently parsing the fallback of the arbitrary variable
47+
///
48+
/// ```text
49+
/// (--my-opacity,50%)
50+
/// ^^^^
51+
/// ```
52+
ParsingFallback,
53+
}
54+
55+
impl Machine for ArbitraryVariableMachine {
56+
#[inline(always)]
57+
fn reset(&mut self) {
58+
self.start_pos = 0;
59+
self.state = State::Idle;
60+
self.bracket_stack.reset();
61+
}
62+
63+
#[inline]
64+
fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState {
65+
let class_curr = CLASS_TABLE[cursor.curr as usize];
66+
let len = cursor.input.len();
67+
68+
match self.state {
69+
State::Idle => match class_curr {
70+
// Arbitrary variables start with `(` followed by a CSS variable
71+
//
72+
// E.g.: `(--my-variable)`
73+
// ^^
74+
//
75+
Class::OpenParen => match CLASS_TABLE[cursor.next as usize] {
76+
Class::Dash => {
77+
self.start_pos = cursor.pos;
78+
self.state = State::Parsing;
79+
cursor.advance();
80+
self.next(cursor)
81+
}
82+
83+
_ => MachineState::Idle,
84+
},
85+
86+
// Everything else, is not a valid start of the arbitrary variable. But the next
87+
// character might be a valid start for a new utility.
88+
_ => MachineState::Idle,
89+
},
90+
91+
State::Parsing => match self.css_variable_machine.next(cursor) {
92+
MachineState::Idle => self.restart(),
93+
MachineState::Done(_) => match CLASS_TABLE[cursor.next as usize] {
94+
// A CSS variable followed by a `,` means that there is a fallback
95+
//
96+
// E.g.: `(--my-color,red)`
97+
// ^
98+
Class::Comma => {
99+
self.state = State::ParsingFallback;
100+
cursor.advance_by(2); // Skip the `,`
101+
self.next(cursor)
102+
}
103+
104+
// End of the CSS variable
105+
//
106+
// E.g.: `(--my-color)`
107+
// ^
108+
_ => {
109+
cursor.advance();
110+
111+
match CLASS_TABLE[cursor.curr as usize] {
112+
// End of an arbitrary variable, must be followed by `)`
113+
Class::CloseParen => self.done(self.start_pos, cursor),
114+
115+
// Invalid arbitrary variable, not ending at `)`
116+
_ => self.restart(),
117+
}
118+
}
119+
},
120+
},
121+
122+
State::ParsingFallback => {
123+
while cursor.pos < len {
124+
match CLASS_TABLE[cursor.curr as usize] {
125+
Class::Escape => match CLASS_TABLE[cursor.next as usize] {
126+
// An escaped whitespace character is not allowed
127+
//
128+
// E.g.: `(--my-\ color)`
129+
// ^^
130+
Class::Whitespace => return self.restart(),
131+
132+
// An escaped character, skip the next character, resume after
133+
//
134+
// E.g.: `(--my-\#color)`
135+
// ^^
136+
_ => cursor.advance_by(2),
137+
},
138+
139+
Class::OpenParen | Class::OpenBracket | Class::OpenCurly => {
140+
if !self.bracket_stack.push(cursor.curr) {
141+
return self.restart();
142+
}
143+
cursor.advance();
144+
}
145+
146+
Class::CloseParen | Class::CloseBracket | Class::CloseCurly
147+
if !self.bracket_stack.is_empty() =>
148+
{
149+
if !self.bracket_stack.pop(cursor.curr) {
150+
return self.restart();
151+
}
152+
cursor.advance();
153+
}
154+
155+
// End of an arbitrary variable
156+
Class::CloseParen => return self.done(self.start_pos, cursor),
157+
158+
// Start of a string
159+
Class::Quote => match self.string_machine.next(cursor) {
160+
MachineState::Idle => return self.restart(),
161+
MachineState::Done(_) => {
162+
self.state = State::ParsingFallback;
163+
cursor.advance();
164+
return self.next(cursor);
165+
}
166+
},
167+
168+
// A `:` inside of a fallback value is only valid inside of brackets or inside of a
169+
// string. Everywhere else, it's invalid.
170+
//
171+
// E.g.: `(--foo,bar:baz)`
172+
// ^ Not valid
173+
//
174+
// E.g.: `(--url,url(https://example.com))`
175+
// ^ Valid
176+
//
177+
// E.g.: `(--my-content:'a:b:c:')`
178+
// ^ ^ ^ Valid
179+
Class::Colon if self.bracket_stack.is_empty() => return self.restart(),
180+
181+
// Any kind of whitespace is not allowed
182+
Class::Whitespace => return self.restart(),
183+
184+
// Everything else is valid
185+
_ => cursor.advance(),
186+
};
187+
}
188+
189+
self.restart()
190+
}
191+
}
192+
}
193+
}
194+
195+
#[derive(Clone, Copy, PartialEq)]
196+
enum Class {
197+
/// `'a'..='z'`
198+
AlphaLower,
199+
200+
/// `'A'..='Z'`
201+
AlphaUpper,
202+
203+
/// `@`
204+
At,
205+
206+
// `:`
207+
Colon,
208+
209+
/// `,`
210+
Comma,
211+
212+
/// `-`
213+
Dash,
214+
215+
/// `:`
216+
Dot,
217+
218+
/// `\\`
219+
Escape,
220+
221+
/// `0x00`
222+
End,
223+
224+
/// `'0'..='9'`
225+
Number,
226+
227+
/// `[`
228+
OpenBracket,
229+
230+
/// `]`
231+
CloseBracket,
232+
233+
/// `(`
234+
OpenParen,
235+
236+
/// `)`
237+
CloseParen,
238+
239+
/// `{`
240+
OpenCurly,
241+
242+
/// `}`
243+
CloseCurly,
244+
245+
/// ', ", or `
246+
Quote,
247+
248+
/// _
249+
Underscore,
250+
251+
/// Whitespace characters: ' ', '\t', '\n', '\r', '\x0C'
252+
Whitespace,
253+
254+
/// Anything else
255+
Other,
256+
}
257+
258+
const CLASS_TABLE: [Class; 256] = {
259+
let mut table = [Class::Other; 256];
260+
261+
macro_rules! set {
262+
($class:expr, $($byte:expr),+ $(,)?) => {
263+
$(table[$byte as usize] = $class;)+
264+
};
265+
}
266+
267+
macro_rules! set_range {
268+
($class:expr, $start:literal ..= $end:literal) => {
269+
let mut i = $start;
270+
while i <= $end {
271+
table[i as usize] = $class;
272+
i += 1;
273+
}
274+
};
275+
}
276+
277+
set!(Class::At, b'@');
278+
set!(Class::Underscore, b'_');
279+
set!(Class::Dash, b'-');
280+
set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C');
281+
set!(Class::Comma, b',');
282+
set!(Class::Escape, b'\\');
283+
284+
set!(Class::OpenBracket, b'[');
285+
set!(Class::CloseBracket, b']');
286+
287+
set!(Class::OpenParen, b'(');
288+
set!(Class::CloseParen, b')');
289+
290+
set!(Class::OpenCurly, b'{');
291+
set!(Class::CloseCurly, b'}');
292+
293+
set!(Class::Dot, b'.');
294+
set!(Class::Colon, b':');
295+
296+
set!(Class::Quote, b'"', b'\'', b'`');
297+
298+
set_range!(Class::AlphaLower, b'a'..=b'z');
299+
set_range!(Class::AlphaUpper, b'A'..=b'Z');
300+
set_range!(Class::Number, b'0'..=b'9');
301+
302+
set!(Class::End, 0x00);
303+
304+
table
305+
};
306+
307+
#[cfg(test)]
308+
mod tests {
309+
use super::ArbitraryVariableMachine;
310+
use crate::extractor::machine::Machine;
311+
312+
#[test]
313+
#[ignore]
314+
fn test_arbitrary_variable_machine_performance() {
315+
let input = r#"<div class="(--foo) (--my-color,red,blue) (--my-img,url('https://example.com?q=(][)'))"></div>"#.repeat(100);
316+
317+
ArbitraryVariableMachine::test_throughput(100_000, &input);
318+
ArbitraryVariableMachine::test_duration_once(&input);
319+
320+
todo!()
321+
}
322+
323+
#[test]
324+
fn test_arbitrary_variable_extraction() {
325+
for (input, expected) in [
326+
// Simple utility
327+
("(--foo)", vec!["(--foo)"]),
328+
// With dashes
329+
("(--my-color)", vec!["(--my-color)"]),
330+
// With a fallback
331+
("(--my-color,red,blue)", vec!["(--my-color,red,blue)"]),
332+
// With a fallback containing a string with unbalanced brackets
333+
(
334+
"(--my-img,url('https://example.com?q=(][)'))",
335+
vec!["(--my-img,url('https://example.com?q=(][)'))"],
336+
),
337+
// --------------------------------------------------------
338+
339+
// Exceptions:
340+
// Arbitrary variable must start with a CSS variable
341+
(r"(bar)", vec![]),
342+
// Arbitrary variables must be valid CSS variables
343+
(r"(--my-\ color)", vec![]),
344+
(r"(--my#color)", vec![]),
345+
// Fallbacks cannot have spaces
346+
(r"(--my-color, red)", vec![]),
347+
// Fallbacks cannot have escaped spaces
348+
(r"(--my-color,\ red)", vec![]),
349+
// Variables must have at least one character after the `--`
350+
(r"(--)", vec![]),
351+
(r"(--,red)", vec![]),
352+
] {
353+
assert_eq!(ArbitraryVariableMachine::test_extract_all(input), expected);
354+
}
355+
}
356+
}

0 commit comments

Comments
 (0)