Skip to content

Commit 4ecda5b

Browse files
Make preprocessing of input stream handle supplementary characters
Fixes #383 When performing preprocessing of the input stream as specified in https://drafts.csswg.org/css-syntax/#input-preprocessing, this change makes our implementation handle non-BMP supplementary characters as expected — by only replacing surrogates with U+FFFD if they are lone (unpaired) surrogates, but not replacing surrogates that are part of surrogate pairs (a high surrogate followed by a low surrogate). Otherwise, without this change, a parse error will occur when our implementation encounters supplementary characters in the input stream.
1 parent ebabe74 commit 4ecda5b

File tree

3 files changed

+22
-10
lines changed

3 files changed

+22
-10
lines changed

org/w3c/css/css/StyleSheetParser.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -299,13 +299,7 @@ public void parseStyleElement(ApplContext ac, Reader reader,
299299

300300
// if (cssFouffa == null) {
301301
String charset = ac.getCharsetForURL(url);
302-
if (ac.getCssVersion().compareTo(CssVersion.CSS2) >=0
303-
&& !isPreprocessed) {
304-
cssFouffa = new CssFouffa(ac, new UnescapeFilterReader(new BufferedReader(reader)), url, lineno);
305-
} else {
306-
cssFouffa = new CssFouffa(ac, reader, url, lineno);
307-
308-
}
302+
cssFouffa = new CssFouffa(ac, reader, url, lineno);
309303
cssFouffa.addListener(this);
310304
// } else {
311305
// cssFouffa.ReInit(ac, input, url, lineno);

org/w3c/css/parser/CssFouffa.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,14 @@
2626
import org.w3c.css.util.CssVersion;
2727
import org.w3c.css.util.HTTPURL;
2828
import org.w3c.css.util.InvalidParamException;
29+
import org.w3c.css.util.UnescapeFilterReader;
2930
import org.w3c.css.util.Util;
3031
import org.w3c.css.util.WarningParamException;
3132
import org.w3c.css.util.Warnings;
3233
import org.w3c.css.values.CssExpression;
3334
import org.w3c.css.values.CssValue;
3435

36+
import java.io.BufferedReader;
3537
import java.io.FileNotFoundException;
3638
import java.io.IOException;
3739
import java.io.InputStream;
@@ -88,7 +90,7 @@ public final class CssFouffa extends CssParser {
8890
*/
8991
public CssFouffa(ApplContext ac, Reader reader, URL file, int beginLine)
9092
throws IOException {
91-
super(reader);
93+
super(new UnescapeFilterReader(new BufferedReader(reader)));
9294
if (ac.getOrigin() == -1) {
9395
setOrigin(StyleSheetOrigin.AUTHOR); // default is user
9496
} else {

org/w3c/css/util/UnescapeFilterReader.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import java.io.FilterReader;
44
import java.io.IOException;
55
import java.io.Reader;
6+
import java.lang.Character;
67

78
public class UnescapeFilterReader extends FilterReader {
89

@@ -32,7 +33,14 @@ public int read()
3233
return 0xfffd; // U+FFFD REPLACEMENT CHARACTER
3334
}
3435
if (c >= 0xd800 && c <= 0xdfff) { // surrogate
35-
return 0xfffd;
36+
if (!Character.isHighSurrogate((char) c)) {
37+
return 0xfffd;
38+
}
39+
mark(1);
40+
if (!Character.isLowSurrogate((char) in.read())) {
41+
return 0xfffd;
42+
}
43+
reset();
3644
}
3745

3846
// now specific case of CSS unicode escape for ascii values [A-Za-z0-9].
@@ -109,7 +117,15 @@ public int read(char[] cbuf, int off, int len) throws IOException {
109117
} else if (chars[i] == 0) {
110118
chars[j++] = 0xfffd;
111119
} else if (chars[i] >= 0xd800 && chars[i] <= 0xdfff) {
112-
chars[j++] = 0xfffd;
120+
if (i + 1 >= l) {
121+
chars[j++] = 0xfffd;
122+
} else if (!Character.isHighSurrogate((char) chars[i])) {
123+
chars[j++] = 0xfffd;
124+
} else if (!Character.isLowSurrogate((char) chars[i + 1])) {
125+
chars[j++] = 0xfffd;
126+
}
127+
i++;
128+
j++;
113129
}
114130
// escaping
115131

0 commit comments

Comments
 (0)