Skip to content

Commit bd5bc13

Browse files
committed
use a pre-processing stream reader to enforce css3-syntax pre processing and unescape codes that are in the ascii range of checkable values
1 parent e672466 commit bd5bc13

File tree

3 files changed

+191
-6
lines changed

3 files changed

+191
-6
lines changed

org/w3c/css/css/StyleSheetParser.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@
2424
import org.w3c.css.util.CssVersion;
2525
import org.w3c.css.util.InvalidParamException;
2626
import org.w3c.css.util.Messages;
27+
import org.w3c.css.util.UnescapeFilterReader;
2728
import org.w3c.css.util.Util;
2829
import org.w3c.css.util.Warning;
2930
import org.w3c.css.util.Warnings;
3031

32+
import java.io.BufferedReader;
3133
import java.io.IOException;
3234
import java.io.InputStream;
3335
import java.io.InputStreamReader;
@@ -289,7 +291,12 @@ public void parseStyleElement(ApplContext ac, Reader reader,
289291

290292
// if (cssFouffa == null) {
291293
String charset = ac.getCharsetForURL(url);
292-
cssFouffa = new CssFouffa(ac, reader, url, lineno);
294+
if (ac.getCssVersion().compareTo(CssVersion.CSS2) >=0 ) {
295+
cssFouffa = new CssFouffa(ac, new UnescapeFilterReader(new BufferedReader(reader)), url, lineno);
296+
} else {
297+
cssFouffa = new CssFouffa(ac, reader, url, lineno);
298+
299+
}
293300
cssFouffa.addListener(this);
294301
// } else {
295302
// cssFouffa.ReInit(ac, input, url, lineno);

org/w3c/css/parser/analyzer/CssParser.jj

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -531,9 +531,6 @@ SPECIAL_TOKEN :
531531
{
532532
< #H : ["0"-"9", "a"-"f"] >
533533
| < #NONASCII : ["\u0080"-"\uFFFF"] >
534-
| < #__U : "u" | ( "\\" ( "0" )? ( "0" )? ( "0" )? ( "0" )? ( "55" | "75" ) ( "\r\n" | [ " ", "\t" , "\n" , "\r", "\f" ] )? ) >
535-
| < #__R : "r" | ( "\\" ( "0" )? ( "0" )? ( "0" )? ( "0" )? ( "52" | "72" ) ( "\r\n" | [ " ", "\t" , "\n" , "\r", "\f" ] )? ) >
536-
| < #__L : "l" | ( "\\" ( "0" )? ( "0" )? ( "0" )? ( "0" )? ( "4c" | "6c" ) ( "\r\n" | [ " ", "\t" , "\n" , "\r", "\f" ] )? ) >
537534
| < #UNICODE : "\\" <H> ( <H> )? ( <H> )? ( <H> )? ( <H> )? ( <H> )?
538535
( "\r\n" | [ " ", "\t" , "\n" , "\r", "\f" ] )? >
539536
| < #ESCAPE : <UNICODE> | ( "\\" ~[ "\r", "\n", "\f", "0"-"9", "a"-"f" ] ) >
@@ -552,7 +549,6 @@ SPECIAL_TOKEN :
552549
| < #_S : ( [ " ", "\t" , "\n" , "\r", "\f" ] ) ( <COMMENT> | [ " ", "\t" , "\n" , "\r", "\f" ] )* >
553550
| < #_W : ( <_S> )? >
554551
| < #NL : ( "\n" | "\r\n" | "\r" | "\f" ) >
555-
| < URLPREFIX : <__U> <__R> <__L> "(" >
556552
}
557553
/*
558554
* The _S definition is not ( [ " ", "\t" , "\n" , "\r", "\f" ] ) + as we need to add support
@@ -645,7 +641,8 @@ TOKEN :
645641
<DEFAULT>
646642
TOKEN [IGNORE_CASE] :
647643
{
648-
< URL : <URLPREFIX> ( <S> )* ( <STRING> | <_URL> ) ( <S> )* <RPAREN> >
644+
< URLPREFIX : "url(" >
645+
| < URL : <URLPREFIX> ( <S> )* ( <STRING> | <_URL> ) ( <S> )* <RPAREN> >
649646
}
650647

651648
<DEFAULT>
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
package org.w3c.css.util;
2+
3+
import java.io.FilterReader;
4+
import java.io.IOException;
5+
import java.io.Reader;
6+
7+
public class UnescapeFilterReader extends FilterReader {
8+
9+
public UnescapeFilterReader(Reader r) {
10+
super(r);
11+
}
12+
13+
@Override
14+
public int read()
15+
throws IOException {
16+
int esc;
17+
int c = in.read();
18+
// https://www.w3.org/TR/css-syntax-3/#input-preprocessing
19+
if (c == 13) { // U+000D CARRIAGE RETURN (CR)
20+
mark(1);
21+
c = in.read();
22+
// eat any LF
23+
if (c != 10) { // U+000A LINE FEED (LF)
24+
reset();
25+
}
26+
return 10; // U+000A LINE FEED (LF)
27+
}
28+
if (c == 12) { //U+000C FORM FEED (FF)
29+
return 10;// U+000A LINE FEED (LF)
30+
}
31+
if (c == 0) { // U+0000 NULL
32+
return 65533; // U+FFFD REPLACEMENT CHARACTER
33+
}
34+
35+
// now specific case of CSS unicode escape for ascii values [A-Za-z0-9].
36+
if (c != '\\') {
37+
return c;
38+
}
39+
mark(6);
40+
int val = 0;
41+
for (int i = 0; i < 6; i++) {
42+
esc = in.read();
43+
// 0-9
44+
if (esc > 47 && esc < 58) {
45+
val = (val << 4) + (esc - 48);
46+
} else if (esc > 64 && esc < 71) {
47+
// A_F
48+
val = (val << 4) + (esc - 55);
49+
} else if (esc > 96 && esc < 103) {
50+
val = (val << 4) + (esc - 87);
51+
} else if (esc == 10 || esc == 9 || esc == 32) { // CSS whitespace.
52+
// U+000A LINE FEED, U+0009 CHARACTER TABULATION, or U+0020 SPACE.
53+
if ((val > 96 && val < 124) || (val > 64 && val < 91) || (val > 47 && val < 58)) {
54+
return val;
55+
}
56+
} else {
57+
if ((val > 96 && val < 124) || (val > 64 && val < 91) || (val > 47 && val < 58)) {
58+
//we must unread 1
59+
reset();
60+
i++;
61+
for (int j = 0; j < i; j++) {
62+
in.read();
63+
}
64+
return val;
65+
}
66+
reset();
67+
return c;
68+
}
69+
}
70+
// we read up to 6 char test value first
71+
if ((val <= 96 || val >= 124) && (val <= 64 || val >= 91) && (val <= 47 || val >= 58)) {
72+
reset();
73+
return c;
74+
}
75+
mark(1);
76+
c = in.read();
77+
// not a CSS WHITESPACE
78+
if (c != 10 && c != 9 && c != 32) {
79+
reset();
80+
}
81+
return val;
82+
}
83+
84+
@Override
85+
public int read(char[] cbuf, int off, int len) throws IOException {
86+
int i, j, k, l, cki;
87+
char[] chars = new char[len];
88+
in.mark(len);
89+
l = super.read(chars, 0, len);
90+
if (l <= 0) {
91+
return l;
92+
}
93+
for (i = 0, j = 0; i < l; i++) {
94+
// pre-processing
95+
if (chars[i] == 13) {
96+
chars[j++] = 10;
97+
// test for CRLF
98+
if (i + 1 < l && chars[i + 1] == 10) {
99+
i++;
100+
}
101+
} else if (chars[i] == 12) {
102+
chars[j++] = 10;
103+
} else if (chars[i] == 0) {
104+
chars[j++] = 65533;
105+
}
106+
// escaping
107+
108+
if (chars[i] == '\\') {
109+
int val = 0;
110+
boolean escaped = false;
111+
for (k = 1; k < 7 && k + i < l; k++) {
112+
cki = chars[k + i];
113+
// 0-9
114+
if (cki > 47 && cki < 58) {
115+
val = (val << 4) + (cki - 48);
116+
} else if (cki > 64 && cki < 71) {
117+
// A_F
118+
val = (val << 4) + (cki - 55);
119+
} else if (cki > 96 && cki < 103) {
120+
val = (val << 4) + (cki - 87);
121+
} else if (cki == 10 || cki == 9 || cki == 32) { // CSS whitespace.
122+
// U+000A LINE FEED, U+0009 CHARACTER TABULATION, or U+0020 SPACE.
123+
if ((val > 96 && val < 124) || (val > 64 && val < 91) || (val > 47 && val < 58)) {
124+
chars[j++] = (char) val;
125+
escaped = true;
126+
i += k;
127+
break;
128+
}
129+
} else {
130+
if (val == 0) {
131+
if ((cki > 96 && cki < 124) || (cki > 64 && cki < 91)) {
132+
// so we found a regular char, just remove the escaping
133+
break;
134+
}
135+
}
136+
if ((val > 96 && val < 124) || (val > 64 && val < 91) || (val > 47 && val < 58)) {
137+
chars[j++] = (char) val;
138+
escaped = true;
139+
i += k - 1;
140+
break;
141+
}
142+
}
143+
}
144+
if (k == 7 && !escaped) {
145+
if ((val > 96 && val < 124) || (val > 64 && val < 91) || (val > 47 && val < 58)) {
146+
chars[j++] = (char) val;
147+
escaped = true;
148+
i += k - 1;
149+
if (i + 1 < l) {
150+
cki = chars[i + 1];
151+
// skip extra space
152+
if (cki == 10 || cki == 9 || cki == 32) {
153+
i++;
154+
}
155+
}
156+
} else {
157+
// do nothing
158+
chars[j++] = chars[i];
159+
}
160+
} else {
161+
// we reached the end, unescaping didn't happen let's stop here
162+
// unless we are the last
163+
if (!escaped) {
164+
if (j != 0) {
165+
in.reset();
166+
in.skip(i);
167+
break;
168+
} else {
169+
chars[j++] = chars[i];
170+
}
171+
}
172+
}
173+
} else {
174+
chars[j++] = chars[i];
175+
}
176+
}
177+
178+
System.arraycopy(chars, 0, cbuf, off, j);
179+
return j;
180+
}
181+
}

0 commit comments

Comments
 (0)