From 245c75743657014cfea0c4a3590dcd765057dd5d Mon Sep 17 00:00:00 2001 From: ottomated <31470743+ottomated@users.noreply.github.com> Date: Sat, 30 Mar 2024 21:53:09 -0700 Subject: [PATCH 1/7] Speed up segmentation --- packages/tailwindcss/src/utils/segment.ts | 52 ++++++++++++++--------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/packages/tailwindcss/src/utils/segment.ts b/packages/tailwindcss/src/utils/segment.ts index e043d30a87a5..026067578adc 100644 --- a/packages/tailwindcss/src/utils/segment.ts +++ b/packages/tailwindcss/src/utils/segment.ts @@ -10,44 +10,56 @@ * x x x ╰──────── Split because top-level * ╰──────────────┴──┴───────────── Ignored b/c inside >= 1 levels of parens */ +const closingBracketStack = new Uint8Array(256) +const OPEN_PAREN = '('.charCodeAt(0) +const OPEN_BRACKET = '['.charCodeAt(0) +const OPEN_BRACE = '{'.charCodeAt(0) +const CLOSE_PAREN = ')'.charCodeAt(0) +const CLOSE_BRACKET = ']'.charCodeAt(0) +const CLOSE_BRACE = '}'.charCodeAt(0) +const BACKSLASH = '\\'.charCodeAt(0) + export function segment(input: string, separator: string) { - // Stack of characters to close open brackets. Appending to a string because - // it's faster than an array of strings. - let closingBracketStack = '' + // Since JavaScript is single-threaded, using a shared buffer + // is more efficient and should still be safe. + let stackPointer = 0 let parts: string[] = [] let lastPos = 0 + let separatorCode = separator.charCodeAt(0) + for (let idx = 0; idx < input.length; idx++) { - let char = input[idx] + let char = input.charCodeAt(idx) - if (closingBracketStack.length === 0 && char === separator) { + if (stackPointer === 0 && char === separatorCode) { parts.push(input.slice(lastPos, idx)) lastPos = idx + 1 continue } switch (char) { - case '\\': + case BACKSLASH: // The next character is escaped, so we skip it. idx += 1 break - case '(': - closingBracketStack += ')' + case OPEN_PAREN: + closingBracketStack[stackPointer] = CLOSE_PAREN + stackPointer++ break - case '[': - closingBracketStack += ']' + case OPEN_BRACKET: + closingBracketStack[stackPointer] = CLOSE_BRACKET + stackPointer++ break - case '{': - closingBracketStack += '}' + case OPEN_BRACE: + closingBracketStack[stackPointer] = CLOSE_BRACE + stackPointer++ break - case ')': - case ']': - case '}': - if ( - closingBracketStack.length > 0 && - char === closingBracketStack[closingBracketStack.length - 1] - ) { - closingBracketStack = closingBracketStack.slice(0, closingBracketStack.length - 1) + case CLOSE_BRACKET: + case CLOSE_BRACE: + case CLOSE_PAREN: + if (stackPointer > 0 && char === closingBracketStack[stackPointer - 1]) { + // No need to mutate the buffer here, as it can stay dirty for the next use + stackPointer-- } break } From 12b54bcaebde6e0db99414f94dba2f94aa38f0fe Mon Sep 17 00:00:00 2001 From: ottomated <31470743+ottomated@users.noreply.github.com> Date: Sat, 30 Mar 2024 21:54:08 -0700 Subject: [PATCH 2/7] Add segment benchmark --- packages/tailwindcss/src/utils/segment.bench.ts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 packages/tailwindcss/src/utils/segment.bench.ts diff --git a/packages/tailwindcss/src/utils/segment.bench.ts b/packages/tailwindcss/src/utils/segment.bench.ts new file mode 100644 index 000000000000..1ee335d5ec4f --- /dev/null +++ b/packages/tailwindcss/src/utils/segment.bench.ts @@ -0,0 +1,14 @@ +import { bench } from 'vitest' +import { segment } from './segment' + +const values = [ + ['hover:focus:underline', ':'], + ['var(--a, 0 0 1px rgb(0, 0, 0)), 0 0 1px rgb(0, 0, 0)', ','], + ['var(--some-value,env(safe-area-inset-top,var(--some-other-value,env(safe-area-inset))))', ','], +] + +bench('segment', () => { + for (let [value, sep] of values) { + segment(value, sep) + } +}) From 7da294d7c73a85a8df9769be4f226331d94fff68 Mon Sep 17 00:00:00 2001 From: Ottomated Date: Sun, 31 Mar 2024 15:21:45 -0700 Subject: [PATCH 3/7] Add and move comments --- packages/tailwindcss/src/utils/segment.ts | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/packages/tailwindcss/src/utils/segment.ts b/packages/tailwindcss/src/utils/segment.ts index 026067578adc..d4595efd9b71 100644 --- a/packages/tailwindcss/src/utils/segment.ts +++ b/packages/tailwindcss/src/utils/segment.ts @@ -1,3 +1,13 @@ +/* Supports up to 256 levels of nesting. This should be more than enough for any reasonable usage */ +const closingBracketStack = new Uint8Array(256) +const OPEN_PAREN = '('.charCodeAt(0) +const OPEN_BRACKET = '['.charCodeAt(0) +const OPEN_BRACE = '{'.charCodeAt(0) +const CLOSE_PAREN = ')'.charCodeAt(0) +const CLOSE_BRACKET = ']'.charCodeAt(0) +const CLOSE_BRACE = '}'.charCodeAt(0) +const BACKSLASH = '\\'.charCodeAt(0) + /** * This splits a string on a top-level character. * @@ -10,15 +20,6 @@ * x x x ╰──────── Split because top-level * ╰──────────────┴──┴───────────── Ignored b/c inside >= 1 levels of parens */ -const closingBracketStack = new Uint8Array(256) -const OPEN_PAREN = '('.charCodeAt(0) -const OPEN_BRACKET = '['.charCodeAt(0) -const OPEN_BRACE = '{'.charCodeAt(0) -const CLOSE_PAREN = ')'.charCodeAt(0) -const CLOSE_BRACKET = ']'.charCodeAt(0) -const CLOSE_BRACE = '}'.charCodeAt(0) -const BACKSLASH = '\\'.charCodeAt(0) - export function segment(input: string, separator: string) { // Since JavaScript is single-threaded, using a shared buffer // is more efficient and should still be safe. From 296165bcacae84bdd0cd196c0b6f943994e891be Mon Sep 17 00:00:00 2001 From: Jordan Pittman Date: Tue, 2 Apr 2024 15:48:25 -0400 Subject: [PATCH 4/7] Update --- packages/tailwindcss/src/utils/segment.ts | 37 ++++++++++++++--------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/packages/tailwindcss/src/utils/segment.ts b/packages/tailwindcss/src/utils/segment.ts index d4595efd9b71..b19e511f61cd 100644 --- a/packages/tailwindcss/src/utils/segment.ts +++ b/packages/tailwindcss/src/utils/segment.ts @@ -1,12 +1,16 @@ -/* Supports up to 256 levels of nesting. This should be more than enough for any reasonable usage */ +// This is a shared buffer that is used to keep track of the current nesting level +// of parens, brackets, and braces. It is used to determine if a character is at +// the top-level of a string. This is a performance optimization to avoid memory +// allocations on ever call to `segment`. const closingBracketStack = new Uint8Array(256) + +const BACKSLASH = '\\'.charCodeAt(0) const OPEN_PAREN = '('.charCodeAt(0) const OPEN_BRACKET = '['.charCodeAt(0) const OPEN_BRACE = '{'.charCodeAt(0) const CLOSE_PAREN = ')'.charCodeAt(0) const CLOSE_BRACKET = ']'.charCodeAt(0) const CLOSE_BRACE = '}'.charCodeAt(0) -const BACKSLASH = '\\'.charCodeAt(0) /** * This splits a string on a top-level character. @@ -21,9 +25,9 @@ const BACKSLASH = '\\'.charCodeAt(0) * ╰──────────────┴──┴───────────── Ignored b/c inside >= 1 levels of parens */ export function segment(input: string, separator: string) { - // Since JavaScript is single-threaded, using a shared buffer - // is more efficient and should still be safe. - let stackPointer = 0 + // SAFETY: We can use an index into a shared buffer because this function is + // synchronous, non-recursive, and runs in a single-threaded envionment. + let stackPos = 0 let parts: string[] = [] let lastPos = 0 @@ -32,7 +36,7 @@ export function segment(input: string, separator: string) { for (let idx = 0; idx < input.length; idx++) { let char = input.charCodeAt(idx) - if (stackPointer === 0 && char === separatorCode) { + if (stackPos === 0 && char === separatorCode) { parts.push(input.slice(lastPos, idx)) lastPos = idx + 1 continue @@ -44,23 +48,26 @@ export function segment(input: string, separator: string) { idx += 1 break case OPEN_PAREN: - closingBracketStack[stackPointer] = CLOSE_PAREN - stackPointer++ + closingBracketStack[stackPos] = CLOSE_PAREN + stackPos++ break case OPEN_BRACKET: - closingBracketStack[stackPointer] = CLOSE_BRACKET - stackPointer++ + closingBracketStack[stackPos] = CLOSE_BRACKET + stackPos++ break case OPEN_BRACE: - closingBracketStack[stackPointer] = CLOSE_BRACE - stackPointer++ + closingBracketStack[stackPos] = CLOSE_BRACE + stackPos++ break case CLOSE_BRACKET: case CLOSE_BRACE: case CLOSE_PAREN: - if (stackPointer > 0 && char === closingBracketStack[stackPointer - 1]) { - // No need to mutate the buffer here, as it can stay dirty for the next use - stackPointer-- + if (stackPos > 0 && char === closingBracketStack[stackPos - 1]) { + // SAFETY: The buffer does not need to be mutated because the stack + // only ever read to or written from it's current position. This means + // that the buffer can be dirty for the next use and still be correct + // since reading will start at position `0`. + stackPos-- } break } From 7a9cb610e37bfd19738b6367c4f1b8a5ed12d59c Mon Sep 17 00:00:00 2001 From: Jordan Pittman Date: Tue, 2 Apr 2024 16:04:23 -0400 Subject: [PATCH 5/7] Tweak comments --- packages/tailwindcss/src/utils/segment.ts | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/packages/tailwindcss/src/utils/segment.ts b/packages/tailwindcss/src/utils/segment.ts index b19e511f61cd..acb6584ab64c 100644 --- a/packages/tailwindcss/src/utils/segment.ts +++ b/packages/tailwindcss/src/utils/segment.ts @@ -1,7 +1,7 @@ // This is a shared buffer that is used to keep track of the current nesting level // of parens, brackets, and braces. It is used to determine if a character is at // the top-level of a string. This is a performance optimization to avoid memory -// allocations on ever call to `segment`. +// allocations on every call to `segment`. const closingBracketStack = new Uint8Array(256) const BACKSLASH = '\\'.charCodeAt(0) @@ -15,8 +15,8 @@ const CLOSE_BRACE = '}'.charCodeAt(0) /** * This splits a string on a top-level character. * - * Regex doesn't support recursion (at least not the JS-flavored version). - * So we have to use a tiny state machine to keep track of paren placement. + * Regex doesn't support recursion (at least not the JS-flavored version), + * so we have to use a tiny state machine to keep track of paren placement. * * Expected behavior using commas: * var(--a, 0 0 1px rgb(0, 0, 0)), 0 0 1px rgb(0, 0, 0) @@ -63,10 +63,11 @@ export function segment(input: string, separator: string) { case CLOSE_BRACE: case CLOSE_PAREN: if (stackPos > 0 && char === closingBracketStack[stackPos - 1]) { - // SAFETY: The buffer does not need to be mutated because the stack - // only ever read to or written from it's current position. This means - // that the buffer can be dirty for the next use and still be correct - // since reading will start at position `0`. + // SAFETY: The buffer does not need to be mutated because the stack is + // only ever read from or written to its current position. Its current + // position is only ever incremented after writing to it. Meaning that + // the buffer can be dirty for the next use and still be correct since + // reading/writing always starts at position `0`. stackPos-- } break From 94daf892a088d4d9b3abe729af7797b852481b87 Mon Sep 17 00:00:00 2001 From: Jordan Pittman Date: Tue, 2 Apr 2024 16:08:53 -0400 Subject: [PATCH 6/7] Tweak variable name --- packages/tailwindcss/src/utils/segment.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/tailwindcss/src/utils/segment.ts b/packages/tailwindcss/src/utils/segment.ts index acb6584ab64c..7defe86414b9 100644 --- a/packages/tailwindcss/src/utils/segment.ts +++ b/packages/tailwindcss/src/utils/segment.ts @@ -7,10 +7,10 @@ const closingBracketStack = new Uint8Array(256) const BACKSLASH = '\\'.charCodeAt(0) const OPEN_PAREN = '('.charCodeAt(0) const OPEN_BRACKET = '['.charCodeAt(0) -const OPEN_BRACE = '{'.charCodeAt(0) +const OPEN_CURLY = '{'.charCodeAt(0) const CLOSE_PAREN = ')'.charCodeAt(0) const CLOSE_BRACKET = ']'.charCodeAt(0) -const CLOSE_BRACE = '}'.charCodeAt(0) +const CLOSE_CURLY = '}'.charCodeAt(0) /** * This splits a string on a top-level character. @@ -55,12 +55,12 @@ export function segment(input: string, separator: string) { closingBracketStack[stackPos] = CLOSE_BRACKET stackPos++ break - case OPEN_BRACE: - closingBracketStack[stackPos] = CLOSE_BRACE + case OPEN_CURLY: + closingBracketStack[stackPos] = CLOSE_CURLY stackPos++ break case CLOSE_BRACKET: - case CLOSE_BRACE: + case CLOSE_CURLY: case CLOSE_PAREN: if (stackPos > 0 && char === closingBracketStack[stackPos - 1]) { // SAFETY: The buffer does not need to be mutated because the stack is From 4c1b7a5f5612335c066c1df2175127b1723045f2 Mon Sep 17 00:00:00 2001 From: Jordan Pittman Date: Tue, 2 Apr 2024 16:12:15 -0400 Subject: [PATCH 7/7] Tweak --- packages/tailwindcss/src/utils/segment.ts | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/packages/tailwindcss/src/utils/segment.ts b/packages/tailwindcss/src/utils/segment.ts index 7defe86414b9..9ed471d2dda6 100644 --- a/packages/tailwindcss/src/utils/segment.ts +++ b/packages/tailwindcss/src/utils/segment.ts @@ -4,13 +4,14 @@ // allocations on every call to `segment`. const closingBracketStack = new Uint8Array(256) -const BACKSLASH = '\\'.charCodeAt(0) -const OPEN_PAREN = '('.charCodeAt(0) -const OPEN_BRACKET = '['.charCodeAt(0) -const OPEN_CURLY = '{'.charCodeAt(0) -const CLOSE_PAREN = ')'.charCodeAt(0) -const CLOSE_BRACKET = ']'.charCodeAt(0) -const CLOSE_CURLY = '}'.charCodeAt(0) +// All numbers are equivalent to the value returned by `String#charCodeAt(0)` +const BACKSLASH = 0x5c +const OPEN_PAREN = 0x28 +const OPEN_BRACKET = 0x5b +const OPEN_CURLY = 0x7b +const CLOSE_PAREN = 0x29 +const CLOSE_BRACKET = 0x5d +const CLOSE_CURLY = 0x7d /** * This splits a string on a top-level character.