diff --git a/lib/parse.js b/lib/parse.js index 9841107..692373b 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -1,6 +1,7 @@ "use strict"; var Parser = require("fastparse"); +var regexpu = require("regexpu-core"); function unescape(str) { return str.replace(/\\(.)/g, "$1"); @@ -164,11 +165,19 @@ function bracketEnd(match) { this.token.content += match; } -var parser = new Parser({ - selector: { - "/\\*([\\s\\S]*?)\\*/": commentMatch, - "\\.((?:\\\\.|[A-Za-z_\\-])(?:\\\\.|[A-Za-z_\\-0-9])*)": typeMatch("class"), - "#((?:\\\\.|[A-Za-z_\\-])(?:\\\\.|[A-Za-z_\\-0-9])*)": typeMatch("id"), +function getSelectors() { + // The assignment here is split to preserve the property enumeration order. + var selectors = { + "/\\*([\\s\\S]*?)\\*/": commentMatch + }; + // https://www.w3.org/TR/CSS21/syndata.html#characters + // 4.1.3: identifiers (...) can contain only the characters [a-zA-Z0-9] and + // ISO 10646 characters U+00A0 and higher, plus the hyphen (-) and the underscore (_) + // + // 10ffff is the maximum allowed in current Unicode + selectors[regexpu("\\.((?:\\\\.|[A-Za-z_\\-\\u{00a0}-\\u{10ffff}])(?:\\\\.|[A-Za-z_\\-0-9\\u{00a0}-\\u{10ffff}])*)", "u")] = typeMatch("class"); + selectors[regexpu("#((?:\\\\.|[A-Za-z_\\-\\u{00a0}-\\u{10ffff}])(?:\\\\.|[A-Za-z_\\-0-9\\u{00a0}-\\u{10ffff}])*)", "u")] = typeMatch("id"); + var selectorsSecondHalf = { ":(not|matches|has|local|global)\\((\\s*)": nestedPseudoClassStartMatch, ":((?:\\\\.|[A-Za-z_\\-0-9])+)\\(": pseudoClassStartMatch, ":((?:\\\\.|[A-Za-z_\\-0-9])+)": typeMatch("pseudo-class"), @@ -185,7 +194,18 @@ var parser = new Parser({ "^\\s+": irrelevantSpacingStartMatch, "\\s+": spacingMatch, ".": invalidMatch - }, + }; + var selector; + for (selector in selectorsSecondHalf) { + if (Object.prototype.hasOwnProperty.call(selectorsSecondHalf, selector)) { + selectors[selector] = selectorsSecondHalf[selector]; + } + } + return selectors; +} + +var parser = new Parser({ + selector: getSelectors(), inBrackets: { "/\\*[\\s\\S]*?\\*/": addToCurrent, "\"([^\\\\\"]|\\\\.)*\"": addToCurrent, diff --git a/lib/stringify.js b/lib/stringify.js index b97710a..1c9d6aa 100644 --- a/lib/stringify.js +++ b/lib/stringify.js @@ -2,11 +2,21 @@ var stringify; -function escape(str) { +var regexpu = require("regexpu-core"); +var identifierEscapeRegexp = new RegExp( + regexpu("(^[^A-Za-z_\\-\\u{00a0}-\\u{10ffff}]|^\\-\\-|[^A-Za-z_0-9\\-\\u{00a0}-\\u{10ffff}])", "ug"), + "g" +); + +function escape(str, identifier) { if(str === "*") { return "*"; } - return str.replace(/(^[^A-Za-z_\\-]|^\-\-|[^A-Za-z_0-9\\-])/g, "\\$1"); + if (identifier) { + return str.replace(identifierEscapeRegexp, "\\$1"); + } else { + return str.replace(/(^[^A-Za-z_\\-]|^\-\-|[^A-Za-z_0-9\\-])/g, "\\$1"); + } } function stringifyWithoutBeforeAfter(tree) { @@ -18,9 +28,9 @@ function stringifyWithoutBeforeAfter(tree) { case "element": return (typeof tree.namespace === "string" ? escape(tree.namespace) + "|" : "") + escape(tree.name); case "class": - return "." + escape(tree.name); + return "." + escape(tree.name, true); case "id": - return "#" + escape(tree.name); + return "#" + escape(tree.name, true); case "attribute": return "[" + tree.content + "]"; case "spacing": diff --git a/package.json b/package.json index dd888ca..2823a8c 100644 --- a/package.json +++ b/package.json @@ -32,7 +32,8 @@ "homepage": "https://github.com/css-modules/css-selector-tokenizer", "dependencies": { "cssesc": "^0.1.0", - "fastparse": "^1.1.1" + "fastparse": "^1.1.1", + "regexpu-core": "^1.0.0" }, "devDependencies": { "chokidar-cli": "^0.2.1", diff --git a/test/test-cases.js b/test/test-cases.js index dd21bd6..86d48a5 100644 --- a/test/test-cases.js +++ b/test/test-cases.js @@ -65,6 +65,27 @@ module.exports = { ]) ], + "class name with high BMP character": [ + ".ๅญ—", + singleSelector([ + { type: "class", name: "ๅญ—" } + ]) + ], + + "class name with emoji": [ + ".๐Ÿค”", + singleSelector([ + { type: "class", name: "๐Ÿค”" } + ]) + ], + + "class name with multiple emoji": [ + ".๐Ÿ‘๐Ÿ‘Œ", + singleSelector([ + { type: "class", name: "๐Ÿ‘๐Ÿ‘Œ" } + ]) + ], + "id name": [ "#idName", singleSelector([ @@ -79,6 +100,20 @@ module.exports = { ]) ], + "id name with latin-1 character": [ + "#ยก", + singleSelector([ + { type: "id", name: "ยก" } + ]) + ], + + "id name with complex emoji": [ + ".๐Ÿ––๐Ÿผ", + singleSelector([ + { type: "class", name: "๐Ÿ––๐Ÿผ" } + ]) + ], + "pseudo class": [ ":before", singleSelector([