Skip to content

Commit cea1862

Browse files
committed
add support for documenting charset when creating the inputstream (and not using a reader), this should address w3c#233
1 parent d17050b commit cea1862

File tree

5 files changed

+99
-27
lines changed

5 files changed

+99
-27
lines changed

org/w3c/css/css/CssParser.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import java.io.InputStream;
1414
import java.io.Reader;
1515
import java.net.URL;
16+
import java.nio.charset.Charset;
1617

1718
/**
1819
* This class describes how to implements your cascading
@@ -81,6 +82,25 @@ public abstract void parseStyleElement(ApplContext ac, InputStream input,
8182
String title, String media, URL url,
8283
int lineno);
8384

85+
/**
86+
* Parse a STYLE element.
87+
* The real difference between this method and the precedent
88+
* is that this method can take an InputStream. The URL is used
89+
* to resolve import statement and URL statement in the style
90+
* sheet.
91+
*
92+
* @param input the input stream.
93+
* @param charset the charset for that input stream.
94+
* @param title the title of the style element
95+
* @param media the media of the style element
96+
* @param url the URL where the input stream comes from.
97+
* @param lineno The number line in the source document. It is used for error message
98+
*/
99+
public abstract void parseStyleElement(ApplContext ac, InputStream input,
100+
Charset charset,
101+
String title, String media, URL url,
102+
int lineno);
103+
84104
/**
85105
* Parse a STYLE element.
86106
* The real difference between this method and the precedent

org/w3c/css/css/StyleSheetParser.java

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,11 @@
3333
import java.io.InputStreamReader;
3434
import java.io.Reader;
3535
import java.io.StringReader;
36-
import java.io.UnsupportedEncodingException;
3736
import java.lang.reflect.Constructor;
3837
import java.net.URL;
38+
import java.nio.charset.Charset;
39+
import java.nio.charset.StandardCharsets;
40+
import java.nio.charset.UnsupportedCharsetException;
3941
import java.util.ArrayList;
4042
import java.util.StringTokenizer;
4143

@@ -367,27 +369,45 @@ public void parseStyleElement(ApplContext ac, Reader reader,
367369
public void parseStyleElement(ApplContext ac, InputStream input,
368370
String title, String media,
369371
URL url, int lineno) {
370-
InputStreamReader reader = null;
372+
// FIXME better handling of charset using a charset detection library
371373
String charset = ac.getCharsetForURL(url);
372-
try {
373-
reader = new InputStreamReader(input, (charset == null) ?
374-
"iso-8859-1" : charset);
375-
} catch (UnsupportedEncodingException uex) {
376-
Errors er = new Errors();
377-
er.addError(new org.w3c.css.parser.CssError(url.toString(),
378-
-1, uex));
379-
notifyErrors(er);
380-
} catch (Exception ex) {
381-
// in case of error, ignore it.
382-
reader = null;
383-
if (Util.onDebug) {
384-
System.err.println("Error in StyleSheet.parseStyleElement(" + title + ","
385-
+ url + "," + lineno + ")");
374+
Charset c = null;
375+
if (charset == null) {
376+
parseStyleElement(ac, input, null, title,
377+
media, url, lineno);
378+
} else {
379+
try {
380+
c = Charset.forName(charset);
381+
} catch (UnsupportedCharsetException ucx) {
382+
Errors er = new Errors();
383+
er.addError(new org.w3c.css.parser.CssError(url.toString(),
384+
-1, ucx));
385+
notifyErrors(er);
386+
} catch (Exception ex) {
387+
// in case of error, ignore it.
388+
if (Util.onDebug) {
389+
System.err.println("Error in StyleSheet.parseStyleElement(" + title + ","
390+
+ url + "," + lineno + ")");
391+
}
386392
}
393+
parseStyleElement(ac, input, c, title, media, url, lineno);
387394
}
388-
if (reader != null) {
389-
parseStyleElement(ac, reader, title, media, url, lineno);
390-
}
395+
}
396+
397+
/**
398+
* Parse a style element. The Style element always comes from the user
399+
*
400+
* @param input the input stream containing the style data
401+
* @param url the name of the file the style element was read in.
402+
* @throws IOException an IO error
403+
*/
404+
public void parseStyleElement(ApplContext ac, InputStream input,
405+
Charset charset,
406+
String title, String media,
407+
URL url, int lineno) {
408+
InputStreamReader reader = null;
409+
reader = new InputStreamReader(input, (charset == null) ? StandardCharsets.ISO_8859_1 : charset);
410+
parseStyleElement(ac, reader, title, media, url, lineno);
391411
}
392412

393413
/**

org/w3c/css/servlet/CssValidator.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import java.io.PrintWriter;
4747
import java.net.ProtocolException;
4848
import java.net.URL;
49+
import java.nio.charset.Charset;
4950

5051
/**
5152
* This class is a servlet to use the validator.
@@ -65,6 +66,7 @@ public final class CssValidator extends HttpServlet {
6566

6667
final static String opt_file = "file";
6768
final static String opt_text = "text";
69+
final static String opt_textcharset = "textcharset";
6870
final static String opt_lang = "lang";
6971
final static String opt_output = "output";
7072
final static String opt_warning = "warning";
@@ -517,6 +519,7 @@ public void doPost(HttpServletRequest req, HttpServletResponse res)
517519
CssParser parser = null;
518520
FakeFile file = null;
519521
String text = null;
522+
Charset textcharset = null;
520523
String output = null;
521524
//boolean XMLinput = false;
522525
String warning = null;
@@ -584,6 +587,9 @@ public void doPost(HttpServletRequest req, HttpServletResponse res)
584587
case opt_text:
585588
text = (String) pair.getValue();
586589
break;
590+
case opt_textcharset:
591+
textcharset = (Charset) pair.getValue();
592+
break;
587593
case opt_lang:
588594
lang = (String) pair.getValue();
589595
break;
@@ -688,7 +694,7 @@ public void doPost(HttpServletRequest req, HttpServletResponse res)
688694
handleScam(ac, text, res, output, warningLevel, errorReport);
689695
return;
690696
}
691-
ac.setFakeText(text);
697+
ac.setFakeText(text, textcharset);
692698
fileName = "TextArea";
693699
Util.verbose("- " + fileName + " Data -");
694700
Util.verbose(text);
@@ -717,7 +723,7 @@ public void doPost(HttpServletRequest req, HttpServletResponse res)
717723
if (isCSS) {
718724
//if CSS:
719725
parser = new StyleSheetParser(ac);
720-
parser.parseStyleElement(ac, is, null, ac.getMedium(),
726+
parser.parseStyleElement(ac, is, textcharset, null, ac.getMedium(),
721727
new URL(fileName), 0);
722728

723729
handleRequest(ac, res, fileName, parser.getStyleSheet(),

org/w3c/css/util/ApplContext.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ public void setFollowlinks(boolean followlinks) {
103103

104104
FakeFile fakefile = null;
105105
String faketext = null;
106+
Charset faketextcharset = null;
106107
URL fakeurl = null;
107108

108109
URL referrer = null;
@@ -508,23 +509,29 @@ public void setFakeFile(FakeFile fakefile) {
508509
/**
509510
* store content of entered text
510511
*/
511-
public void setFakeText(String faketext) {
512+
public void setFakeText(String faketext, Charset faketextcharset) {
512513
this.faketext = faketext;
514+
this.faketextcharset = faketextcharset;
515+
513516
}
514517

515518
public InputStream getFakeInputStream(URL source)
516519
throws IOException {
517520
InputStream is = null;
521+
Charset c = null;
518522
if (fakefile != null) {
519523
is = fakefile.getInputStream();
520524
}
521525
if (faketext != null) {
522-
is = new ByteArrayInputStream(faketext.getBytes());
526+
is = new ByteArrayInputStream(faketext.getBytes(faketextcharset));
527+
c = faketextcharset;
523528
}
524529
if (is == null) {
525530
return null;
526531
}
527-
Charset c = getCharsetObjForURL(source);
532+
if (c == null) {
533+
c = getCharsetObjForURL(source);
534+
}
528535
if (c == null) {
529536
UnicodeInputStream uis = new UnicodeInputStream(is);
530537
String guessedCharset = uis.getEncodingFromStream();

org/w3c/css/util/Codecs.java

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@
4242
import org.apache.commons.lang3.tuple.Pair;
4343

4444
import java.io.IOException;
45+
import java.nio.ByteBuffer;
46+
import java.nio.charset.CharacterCodingException;
47+
import java.nio.charset.CharsetDecoder;
4548
import java.nio.charset.StandardCharsets;
4649
import java.util.ArrayList;
4750

@@ -110,7 +113,7 @@ private Codecs() {
110113
* @throws IOException If any file operation fails.
111114
*/
112115
public final static synchronized ArrayList<Pair<String, ?>> mpFormDataDecode(byte[] data,
113-
String cont_type)
116+
String cont_type)
114117
throws IOException {
115118

116119
ArrayList<Pair<String, ?>> pList = new ArrayList<>();
@@ -318,9 +321,25 @@ private Codecs() {
318321

319322
value = file;
320323
} else { // It's simple data
321-
value = new String(data, start, end - start);
324+
if (name.equals("text")) {
325+
try {
326+
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
327+
decoder.decode(ByteBuffer.wrap(data, start, end - start));
328+
value = new String(data, start, end - start, StandardCharsets.UTF_8);
329+
pList.add(new ImmutablePair<>("textcharset", StandardCharsets.UTF_8));
330+
} catch (CharacterCodingException ignoredEx) {
331+
value = new String(data, start, end - start, StandardCharsets.ISO_8859_1);
332+
pList.add(new ImmutablePair<>("textcharset", StandardCharsets.ISO_8859_1));
333+
}
334+
} else {
335+
value = new String(data, start, end - start, StandardCharsets.ISO_8859_1);
336+
}
337+
// value = new String(data, start, end - start);
338+
}
339+
// reserved name for fake text charset
340+
if (!name.equals("textcharset")) {
341+
pList.add(new ImmutablePair<>(name, value));
322342
}
323-
pList.add(new ImmutablePair<>(name, value));
324343
if (debugMode) {
325344
System.err.println("[ADD " + name + ',' + value + ','
326345
+ value.getClass() + ']');

0 commit comments

Comments
 (0)