From 62d8c803abefeb69f13a164abae56515cf96bddc Mon Sep 17 00:00:00 2001
From: "Michael[tm] Smith" <mike@w3.org>
Date: Wed, 2 Nov 2022 13:12:01 +0900
Subject: [PATCH] =?UTF-8?q?Don=E2=80=99t=20do=20surrogate=20replacement=20?=
 =?UTF-8?q?when=20preprocessing=20input=20stream?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes https://github.com/w3c/css-validator/issues/383

This change drops the code for replacing surrogate code points from our
implementation of “filter code points” from “Preprocessing the input
stream” at https://drafts.csswg.org/css-syntax/#css-filter-code-points

https://github.com/w3c/csswg-drafts/issues/3307#issuecomment-442198338
notes that the only way to produce a surrogate code point in CSS content
is by directly assigning a DOMString with one in it via an OM operation;
in other words, by manipulating a document using JavaScript to insert
a surrogate code point into the document.

But because the CSS validator doesn’t execute any JavaScript from a
document, there’s no way for a document being checked by the CSS
validator to contain any surrogate code points. Therefore, it’s
unnecessary for our implementation to handle replacement of surrogate
code points. In other words, our implementation can still conform to the
spec requirements even if we don’t perform surrogate replacement.
---
 org/w3c/css/util/UnescapeFilterReader.java | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/org/w3c/css/util/UnescapeFilterReader.java b/org/w3c/css/util/UnescapeFilterReader.java
index 179d03bd1..3e68518f3 100644
--- a/org/w3c/css/util/UnescapeFilterReader.java
+++ b/org/w3c/css/util/UnescapeFilterReader.java
@@ -31,9 +31,6 @@ public int read()
         if (c == 0) { // U+0000 NULL
             return 0xfffd; // U+FFFD REPLACEMENT CHARACTER
         }
-        if (c >= 0xd800 && c <= 0xdfff) { // surrogate
-            return 0xfffd;
-        }
 
         // now specific case of CSS unicode escape for ascii values [A-Za-z0-9].
         if (c != '\\') {
@@ -108,8 +105,6 @@ public int read(char[] cbuf, int off, int len) throws IOException {
                 chars[j++] = 0x000a;
             } else if (chars[i] == 0) {
                 chars[j++] = 0xfffd;
-            } else if (chars[i] >= 0xd800 && chars[i] <= 0xdfff) {
-                chars[j++] = 0xfffd;
             }
             // escaping