From 62d8c803abefeb69f13a164abae56515cf96bddc Mon Sep 17 00:00:00 2001 From: "Michael[tm] Smith" Date: Wed, 2 Nov 2022 13:12:01 +0900 Subject: [PATCH] =?UTF-8?q?Don=E2=80=99t=20do=20surrogate=20replacement=20?= =?UTF-8?q?when=20preprocessing=20input=20stream?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes https://github.com/w3c/css-validator/issues/383 This change drops the code for replacing surrogate code points from our implementation of “filter code points” from “Preprocessing the input stream” at https://drafts.csswg.org/css-syntax/#css-filter-code-points https://github.com/w3c/csswg-drafts/issues/3307#issuecomment-442198338 notes that the only way to produce a surrogate code point in CSS content is by directly assigning a DOMString with one in it via an OM operation; in other words, by manipulating a document using JavaScript to insert a surrogate code point into the document. But because the CSS validator doesn’t execute any JavaScript from a document, there’s no way for a document being checked by the CSS validator to contain any surrogate code points. Therefore, it’s unnecessary for our implementation to handle replacement of surrogate code points. In other words, our implementation can still conform to the spec requirements even if we don’t perform surrogate replacement. --- org/w3c/css/util/UnescapeFilterReader.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/org/w3c/css/util/UnescapeFilterReader.java b/org/w3c/css/util/UnescapeFilterReader.java index 179d03bd1..3e68518f3 100644 --- a/org/w3c/css/util/UnescapeFilterReader.java +++ b/org/w3c/css/util/UnescapeFilterReader.java @@ -31,9 +31,6 @@ public int read() if (c == 0) { // U+0000 NULL return 0xfffd; // U+FFFD REPLACEMENT CHARACTER } - if (c >= 0xd800 && c <= 0xdfff) { // surrogate - return 0xfffd; - } // now specific case of CSS unicode escape for ascii values [A-Za-z0-9]. if (c != '\\') { @@ -108,8 +105,6 @@ public int read(char[] cbuf, int off, int len) throws IOException { chars[j++] = 0x000a; } else if (chars[i] == 0) { chars[j++] = 0xfffd; - } else if (chars[i] >= 0xd800 && chars[i] <= 0xdfff) { - chars[j++] = 0xfffd; } // escaping