Skip to content

Commit 7755dea

Browse files
Make preprocessing of input stream handle supplementary characters
Fixes #383 When performing preprocessing of the input stream as specified in https://drafts.csswg.org/css-syntax/#input-preprocessing, this change makes our implementation handle non-BMP supplementary characters as expected — by only replacing surrogates with U+FFFD if they are lone surrogates, but not replacing surrogates that are part of surrogate pairs (a high surrogate followed by a low surrogate). Otherwise, without this change, a parse error will occur when our implementation encounters supplementary characters in the input stream.
1 parent ebabe74 commit 7755dea

File tree

1 file changed

+20
-2
lines changed

1 file changed

+20
-2
lines changed

org/w3c/css/util/UnescapeFilterReader.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import java.io.FilterReader;
44
import java.io.IOException;
55
import java.io.Reader;
6+
import java.lang.Character;
67

78
public class UnescapeFilterReader extends FilterReader {
89

@@ -32,7 +33,14 @@ public int read()
3233
return 0xfffd; // U+FFFD REPLACEMENT CHARACTER
3334
}
3435
if (c >= 0xd800 && c <= 0xdfff) { // surrogate
35-
return 0xfffd;
36+
if (!Character.isHighSurrogate((char) c)) {
37+
return 0xfffd;
38+
}
39+
mark(1);
40+
if (!Character.isLowSurrogate((char) in.read())) {
41+
return 0xfffd;
42+
}
43+
reset();
3644
}
3745

3846
// now specific case of CSS unicode escape for ascii values [A-Za-z0-9].
@@ -109,7 +117,17 @@ public int read(char[] cbuf, int off, int len) throws IOException {
109117
} else if (chars[i] == 0) {
110118
chars[j++] = 0xfffd;
111119
} else if (chars[i] >= 0xd800 && chars[i] <= 0xdfff) {
112-
chars[j++] = 0xfffd;
120+
if (!Character.isHighSurrogate((char) chars[i])) {
121+
chars[j++] = 0xfffd;
122+
} else if (i + 1 < l) {
123+
if (!Character.isLowSurrogate((char) chars[i + 1])) {
124+
chars[j++] = 0xfffd;
125+
}
126+
} else {
127+
chars[j++] = 0xfffd;
128+
}
129+
i++;
130+
continue;
113131
}
114132
// escaping
115133

0 commit comments

Comments
 (0)