Skip to content

Commit e1c76ff

Browse files
committed
Rework unesc for a 60+% performance boost to all of postcss.
In profiling postcss I found that a significant amount of time was being spent in unesc, this was due to the expenive regex checks that were being performed on the fly for every selector in the codebase. By migrating the implementation to a constant runtime implementation I am seeing in my local application testing a 60% speedup which is saving me multiple seconds off of my overall build time! This implementation passes all of the existing test cases and aims to mirror the prior implementation's implementation details :)
1 parent 96a85e3 commit e1c76ff

File tree

2 files changed

+99
-16
lines changed

2 files changed

+99
-16
lines changed

src/__tests__/classes.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,9 @@ test('class selector with escaping (36)', '.not-pseudo\\:\\:focus', (t, tree) =>
264264
t.deepEqual(tree.nodes[0].nodes[0].type, 'class');
265265
t.deepEqual(tree.nodes[0].nodes[0].raws.value, 'not-pseudo\\:\\:focus');
266266
});
267+
268+
test('class selector with escaping with more chars (37)', '.\\1D306k', (t, tree) => {
269+
t.deepEqual(tree.nodes[0].nodes[0].value, '𝌆k');
270+
t.deepEqual(tree.nodes[0].nodes[0].type, 'class');
271+
t.deepEqual(tree.nodes[0].nodes[0].raws.value, '\\1D306k');
272+
});

src/util/unesc.js

Lines changed: 93 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,96 @@
1-
const whitespace = '[\\x20\\t\\r\\n\\f]';
2-
const unescapeRegExp = new RegExp('\\\\([\\da-f]{1,6}' + whitespace + '?|(' + whitespace + ')|.)', 'ig');
1+
// Many thanks for this post which made this migration much easier.
2+
// https://mathiasbynens.be/notes/css-escapes
3+
4+
const CSS_SPECIAL_CHARS = new Set([
5+
"!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ";", "<", "=", ">", "?",
6+
"@", "[", "]", "^", "`", "{", "|", "}", "~", "_", ":",
7+
]);
8+
9+
const CSS_SPECIAL_CHARS_HEX_LONG = {};
10+
const CSS_SPECIAL_CHARS_HEX_SHORT = {};
11+
12+
for (const char of Array.from(CSS_SPECIAL_CHARS)) {
13+
CSS_SPECIAL_CHARS_HEX_LONG[`0000${char.charCodeAt(0).toString(16)}`] = char;
14+
CSS_SPECIAL_CHARS_HEX_SHORT[`${char.charCodeAt(0).toString(16)}`] = char;
15+
}
16+
17+
function matchesOctal (s) {
18+
const short = s[0] + s[1];
19+
if (CSS_SPECIAL_CHARS_HEX_SHORT[short]) {
20+
return {
21+
char: CSS_SPECIAL_CHARS_HEX_SHORT[short],
22+
length: 3,
23+
};
24+
}
25+
const long = s[0] + s[1] + s[2] + s[3] + s[4] + s[5];
26+
if (CSS_SPECIAL_CHARS_HEX_LONG[long]) {
27+
return {
28+
char: CSS_SPECIAL_CHARS_HEX_LONG[long],
29+
length: 6,
30+
};
31+
}
32+
33+
return undefined;
34+
}
335

436
export default function unesc (str) {
5-
return str.replace(unescapeRegExp, (_, escaped, escapedWhitespace) => {
6-
const high = '0x' + escaped - 0x10000;
7-
8-
// NaN means non-codepoint
9-
// Workaround erroneous numeric interpretation of +"0x"
10-
// eslint-disable-next-line no-self-compare
11-
return high !== high || escapedWhitespace
12-
? escaped
13-
: high < 0
14-
? // BMP codepoint
15-
String.fromCharCode(high + 0x10000)
16-
: // Supplemental Plane codepoint (surrogate pair)
17-
String.fromCharCode((high >> 10) | 0xd800, (high & 0x3ff) | 0xdc00);
18-
});
37+
let ret = "";
38+
39+
for (let i = 0; i < str.length; i++) {
40+
if (str[i] === "\\" && !isNaN(Number(str[i + 1]))) {
41+
// Handle the \3 leading digit escape case.
42+
if (str[i + 1] === "3" && Number(str[i + 2]) <= 9) {
43+
ret += str[i + 2];
44+
i += 3;
45+
continue;
46+
// Special case the ":" handling .. thus the "A" check.
47+
} else if (str[i +2] === "A") {
48+
ret += ":";
49+
i += 3;
50+
continue;
51+
} else {
52+
const match = matchesOctal(str.slice(i+1, i + 7));
53+
if (match) {
54+
ret += match.char;
55+
i += match.length;
56+
continue;
57+
}
58+
}
59+
}
60+
61+
if ((str[i] === "\\")) {
62+
// if // is at the end of the string retain it
63+
// https://github.com/postcss/postcss-selector-parser/commit/01a6b346e3612ce1ab20219acc26abdc259ccefb
64+
if (str.length === i + 1) {
65+
ret += str[i];
66+
}
67+
68+
// Retain a pair of \\ if double escaped `\\\\`
69+
// https://github.com/postcss/postcss-selector-parser/commit/268c9a7656fb53f543dc620aa5b73a30ec3ff20e
70+
if (str[i +1] === "\\") {
71+
ret += "\\";
72+
i++;
73+
continue;
74+
}
75+
76+
// Need to check if we have an emoji
77+
// \1D306
78+
// i_____
79+
// Do a fast bounds check before doing the more expensive emoji check
80+
if ((str.length) > i+5) {
81+
// https://github.com/postcss/postcss-selector-parser/pull/184
82+
const codePoint = parseInt(`0x${str.slice(i+1, i+6)}`, 16);
83+
if (codePoint > 0x10000) {
84+
ret += String.fromCodePoint(codePoint);
85+
i+= 5;
86+
}
87+
}
88+
89+
continue;
90+
}
91+
92+
ret += str[i];
93+
}
94+
95+
return ret;
1996
}

0 commit comments

Comments
 (0)