|
| 1 | +// This is a shared buffer that is used to keep track of the current nesting level |
| 2 | +// of parens, brackets, and braces. It is used to determine if a character is at |
| 3 | +// the top-level of a string. This is a performance optimization to avoid memory |
| 4 | +// allocations on every call to `segment`. |
| 5 | +const closingBracketStack = new Uint8Array(256) |
| 6 | + |
| 7 | +// All numbers are equivalent to the value returned by `String#charCodeAt(0)` |
| 8 | +const BACKSLASH = 0x5c |
| 9 | +const OPEN_PAREN = 0x28 |
| 10 | +const OPEN_BRACKET = 0x5b |
| 11 | +const OPEN_CURLY = 0x7b |
| 12 | +const CLOSE_PAREN = 0x29 |
| 13 | +const CLOSE_BRACKET = 0x5d |
| 14 | +const CLOSE_CURLY = 0x7d |
| 15 | + |
1 | 16 | /** |
2 | 17 | * This splits a string on a top-level character. |
3 | 18 | * |
4 | | - * Regex doesn't support recursion (at least not the JS-flavored version). |
5 | | - * So we have to use a tiny state machine to keep track of paren placement. |
| 19 | + * Regex doesn't support recursion (at least not the JS-flavored version), |
| 20 | + * so we have to use a tiny state machine to keep track of paren placement. |
6 | 21 | * |
7 | 22 | * Expected behavior using commas: |
8 | 23 | * var(--a, 0 0 1px rgb(0, 0, 0)), 0 0 1px rgb(0, 0, 0) |
|
11 | 26 | * ╰──────────────┴──┴───────────── Ignored b/c inside >= 1 levels of parens |
12 | 27 | */ |
13 | 28 | export function segment(input: string, separator: string) { |
14 | | - // Stack of characters to close open brackets. Appending to a string because |
15 | | - // it's faster than an array of strings. |
16 | | - let closingBracketStack = '' |
| 29 | + // SAFETY: We can use an index into a shared buffer because this function is |
| 30 | + // synchronous, non-recursive, and runs in a single-threaded envionment. |
| 31 | + let stackPos = 0 |
17 | 32 | let parts: string[] = [] |
18 | 33 | let lastPos = 0 |
19 | 34 |
|
| 35 | + let separatorCode = separator.charCodeAt(0) |
| 36 | + |
20 | 37 | for (let idx = 0; idx < input.length; idx++) { |
21 | | - let char = input[idx] |
| 38 | + let char = input.charCodeAt(idx) |
22 | 39 |
|
23 | | - if (closingBracketStack.length === 0 && char === separator) { |
| 40 | + if (stackPos === 0 && char === separatorCode) { |
24 | 41 | parts.push(input.slice(lastPos, idx)) |
25 | 42 | lastPos = idx + 1 |
26 | 43 | continue |
27 | 44 | } |
28 | 45 |
|
29 | 46 | switch (char) { |
30 | | - case '\\': |
| 47 | + case BACKSLASH: |
31 | 48 | // The next character is escaped, so we skip it. |
32 | 49 | idx += 1 |
33 | 50 | break |
34 | | - case '(': |
35 | | - closingBracketStack += ')' |
| 51 | + case OPEN_PAREN: |
| 52 | + closingBracketStack[stackPos] = CLOSE_PAREN |
| 53 | + stackPos++ |
36 | 54 | break |
37 | | - case '[': |
38 | | - closingBracketStack += ']' |
| 55 | + case OPEN_BRACKET: |
| 56 | + closingBracketStack[stackPos] = CLOSE_BRACKET |
| 57 | + stackPos++ |
39 | 58 | break |
40 | | - case '{': |
41 | | - closingBracketStack += '}' |
| 59 | + case OPEN_CURLY: |
| 60 | + closingBracketStack[stackPos] = CLOSE_CURLY |
| 61 | + stackPos++ |
42 | 62 | break |
43 | | - case ')': |
44 | | - case ']': |
45 | | - case '}': |
46 | | - if ( |
47 | | - closingBracketStack.length > 0 && |
48 | | - char === closingBracketStack[closingBracketStack.length - 1] |
49 | | - ) { |
50 | | - closingBracketStack = closingBracketStack.slice(0, closingBracketStack.length - 1) |
| 63 | + case CLOSE_BRACKET: |
| 64 | + case CLOSE_CURLY: |
| 65 | + case CLOSE_PAREN: |
| 66 | + if (stackPos > 0 && char === closingBracketStack[stackPos - 1]) { |
| 67 | + // SAFETY: The buffer does not need to be mutated because the stack is |
| 68 | + // only ever read from or written to its current position. Its current |
| 69 | + // position is only ever incremented after writing to it. Meaning that |
| 70 | + // the buffer can be dirty for the next use and still be correct since |
| 71 | + // reading/writing always starts at position `0`. |
| 72 | + stackPos-- |
51 | 73 | } |
52 | 74 | break |
53 | 75 | } |
|
0 commit comments