Skip to content

Commit c9dedff

Browse files
committed
CSV-67 UnicodeUnescapeReader should not be applied before parsing
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1301928 13f79535-47bb-0310-9956-ffa450edef68
1 parent 29efef7 commit c9dedff

5 files changed

Lines changed: 18 additions & 163 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 14 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ public class CSVFormat implements Serializable {
3838
private final char escape;
3939
private final boolean leadingSpacesIgnored;
4040
private final boolean trailingSpacesIgnored;
41-
private final boolean unicodeEscapesInterpreted;
4241
private final boolean emptyLinesIgnored;
4342
private final String lineSeparator; // for outputs
4443
private final String[] header;
@@ -53,7 +52,7 @@ public class CSVFormat implements Serializable {
5352
static final char DISABLED = '\ufffe';
5453

5554
/** Standard comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. */
56-
public static final CSVFormat DEFAULT = new CSVFormat(',', '"', DISABLED, DISABLED, true, true, false, true, CRLF, null);
55+
public static final CSVFormat DEFAULT = new CSVFormat(',', '"', DISABLED, DISABLED, true, true, true, CRLF, null);
5756

5857
/**
5958
* Excel file format (using a comma as the value delimiter).
@@ -66,10 +65,10 @@ public class CSVFormat implements Serializable {
6665
*
6766
* <pre>CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');</pre>
6867
*/
69-
public static final CSVFormat EXCEL = new CSVFormat(',', '"', DISABLED, DISABLED, false, false, false, false, CRLF, null);
68+
public static final CSVFormat EXCEL = new CSVFormat(',', '"', DISABLED, DISABLED, false, false, false, CRLF, null);
7069

7170
/** Tab-delimited format, with quote; leading and trailing spaces ignored. */
72-
public static final CSVFormat TDF = new CSVFormat('\t', '"', DISABLED, DISABLED, true, true, false, true, CRLF, null);
71+
public static final CSVFormat TDF = new CSVFormat('\t', '"', DISABLED, DISABLED, true, true, true, CRLF, null);
7372

7473
/**
7574
* Default MySQL format used by the <tt>SELECT INTO OUTFILE</tt> and
@@ -79,7 +78,7 @@ public class CSVFormat implements Serializable {
7978
*
8079
* @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
8180
*/
82-
public static final CSVFormat MYSQL = new CSVFormat('\t', DISABLED, DISABLED, '\\', false, false, false, false, "\n", null);
81+
public static final CSVFormat MYSQL = new CSVFormat('\t', DISABLED, DISABLED, '\\', false, false, false, "\n", null);
8382

8483

8584
/**
@@ -91,7 +90,6 @@ public class CSVFormat implements Serializable {
9190
* @param escape the char used to escape special characters in values
9291
* @param leadingSpacesIgnored <tt>true</tt> when leading whitespaces should be ignored
9392
* @param trailingSpacesIgnored <tt>true</tt> when trailing whitespaces should be ignored
94-
* @param unicodeEscapesInterpreted <tt>true</tt> when unicode escapes should be interpreted
9593
* @param emptyLinesIgnored <tt>true</tt> when the parser should skip emtpy lines
9694
* @param lineSeparator the line separator to use for output
9795
* @param header the header
@@ -103,7 +101,6 @@ public class CSVFormat implements Serializable {
103101
char escape,
104102
boolean leadingSpacesIgnored,
105103
boolean trailingSpacesIgnored,
106-
boolean unicodeEscapesInterpreted,
107104
boolean emptyLinesIgnored,
108105
String lineSeparator,
109106
String[] header) {
@@ -113,7 +110,6 @@ public class CSVFormat implements Serializable {
113110
this.escape = escape;
114111
this.leadingSpacesIgnored = leadingSpacesIgnored;
115112
this.trailingSpacesIgnored = trailingSpacesIgnored;
116-
this.unicodeEscapesInterpreted = unicodeEscapesInterpreted;
117113
this.emptyLinesIgnored = emptyLinesIgnored;
118114
this.lineSeparator = lineSeparator;
119115
this.header = header;
@@ -176,7 +172,7 @@ public CSVFormat withDelimiter(char delimiter) {
176172
throw new IllegalArgumentException("The delimiter cannot be a line break");
177173
}
178174

179-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
175+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
180176
}
181177

182178
/**
@@ -200,7 +196,7 @@ public CSVFormat withEncapsulator(char encapsulator) {
200196
throw new IllegalArgumentException("The encapsulator cannot be a line break");
201197
}
202198

203-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
199+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
204200
}
205201

206202
boolean isEncapsulating() {
@@ -228,7 +224,7 @@ public CSVFormat withCommentStart(char commentStart) {
228224
throw new IllegalArgumentException("The comment start character cannot be a line break");
229225
}
230226

231-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
227+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
232228
}
233229

234230
/**
@@ -261,7 +257,7 @@ public CSVFormat withEscape(char escape) {
261257
throw new IllegalArgumentException("The escape character cannot be a line break");
262258
}
263259

264-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
260+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
265261
}
266262

267263
boolean isEscaping() {
@@ -285,7 +281,7 @@ public boolean isLeadingSpacesIgnored() {
285281
* @return A copy of this format with the specified left trimming behavior.
286282
*/
287283
public CSVFormat withLeadingSpacesIgnored(boolean leadingSpacesIgnored) {
288-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
284+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
289285
}
290286

291287
/**
@@ -305,7 +301,7 @@ public boolean isTrailingSpacesIgnored() {
305301
* @return A copy of this format with the specified right trimming behavior.
306302
*/
307303
public CSVFormat withTrailingSpacesIgnored(boolean trailingSpacesIgnored) {
308-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
304+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
309305
}
310306

311307
/**
@@ -316,28 +312,7 @@ public CSVFormat withTrailingSpacesIgnored(boolean trailingSpacesIgnored) {
316312
* @return A copy of this format with the specified trimming behavior.
317313
*/
318314
public CSVFormat withSurroundingSpacesIgnored(boolean surroundingSpacesIgnored) {
319-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, surroundingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
320-
}
321-
322-
/**
323-
* Tells if unicode escape sequences (e.g. {@literal \u1234}) are turned into their corresponding character
324-
* when parsing input.
325-
*
326-
* @return <tt>true</tt> if unicode escape sequences are interpreted, <tt>false</tt> if they are left as is.
327-
*/
328-
public boolean isUnicodeEscapesInterpreted() {
329-
return unicodeEscapesInterpreted;
330-
}
331-
332-
/**
333-
* Returns a copy of this format with the specified unicode escaping behavior.
334-
*
335-
* @param unicodeEscapesInterpreted the escaping behavior, <tt>true</tt> to interpret unicode escape sequences,
336-
* <tt>false</tt> to leave the escape sequences as is.
337-
* @return A copy of this format with the specified unicode escaping behavior.
338-
*/
339-
public CSVFormat withUnicodeEscapesInterpreted(boolean unicodeEscapesInterpreted) {
340-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
315+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
341316
}
342317

343318
/**
@@ -357,7 +332,7 @@ public boolean isEmptyLinesIgnored() {
357332
* @return A copy of this format with the specified empty line skipping behavior.
358333
*/
359334
public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) {
360-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
335+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
361336
}
362337

363338
/**
@@ -377,7 +352,7 @@ public String getLineSeparator() {
377352
* @return A copy of this format using the specified output line separator
378353
*/
379354
public CSVFormat withLineSeparator(String lineSeparator) {
380-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
355+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
381356
}
382357

383358
String[] getHeader() {
@@ -399,7 +374,7 @@ String[] getHeader() {
399374
* @return A copy of this format using the specified header
400375
*/
401376
public CSVFormat withHeader(String... header) {
402-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
377+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
403378
}
404379

405380
/**

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,6 @@ public CSVParser(Reader input) throws IOException {
9292
public CSVParser(Reader input, CSVFormat format) throws IOException {
9393
format.validate();
9494

95-
if (format.isUnicodeEscapesInterpreted()) {
96-
input = new UnicodeUnescapeReader(input);
97-
}
98-
9995
this.lexer = new CSVLexer(format, new ExtendedBufferedReader(input));
10096

10197
initializeHeader(format);

src/main/java/org/apache/commons/csv/UnicodeUnescapeReader.java

Lines changed: 0 additions & 88 deletions
This file was deleted.

src/test/java/org/apache/commons/csv/CSVFormatTest.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public class CSVFormatTest {
3030

3131
@Test
3232
public void testImmutalibity() {
33-
CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, true, "\r\n", null);
33+
CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, "\r\n", null);
3434

3535
format.withDelimiter('?');
3636
format.withEncapsulator('?');
@@ -40,7 +40,6 @@ public void testImmutalibity() {
4040
format.withLeadingSpacesIgnored(false);
4141
format.withTrailingSpacesIgnored(false);
4242
format.withEmptyLinesIgnored(false);
43-
format.withUnicodeEscapesInterpreted(false);
4443

4544
assertEquals('!', format.getDelimiter());
4645
assertEquals('!', format.getEncapsulator());
@@ -51,12 +50,11 @@ public void testImmutalibity() {
5150
assertTrue(format.isLeadingSpacesIgnored());
5251
assertTrue(format.isTrailingSpacesIgnored());
5352
assertTrue(format.isEmptyLinesIgnored());
54-
assertTrue(format.isUnicodeEscapesInterpreted());
5553
}
5654

5755
@Test
5856
public void testMutators() {
59-
CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, true, "\r\n", null);
57+
CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, "\r\n", null);
6058

6159
assertEquals('?', format.withDelimiter('?').getDelimiter());
6260
assertEquals('?', format.withEncapsulator('?').getEncapsulator());
@@ -69,7 +67,6 @@ public void testMutators() {
6967
assertFalse(format.withSurroundingSpacesIgnored(false).isLeadingSpacesIgnored());
7068
assertFalse(format.withSurroundingSpacesIgnored(false).isTrailingSpacesIgnored());
7169
assertFalse(format.withEmptyLinesIgnored(false).isEmptyLinesIgnored());
72-
assertFalse(format.withUnicodeEscapesInterpreted(false).isUnicodeEscapesInterpreted());
7370
}
7471

7572
@Test
@@ -172,7 +169,6 @@ public void testSerialization() throws Exception {
172169
assertEquals("comment start", CSVFormat.DEFAULT.getCommentStart(), format.getCommentStart());
173170
assertEquals("line separator", CSVFormat.DEFAULT.getLineSeparator(), format.getLineSeparator());
174171
assertEquals("escape", CSVFormat.DEFAULT.getEscape(), format.getEscape());
175-
assertEquals("unicode escape", CSVFormat.DEFAULT.isUnicodeEscapesInterpreted(), format.isUnicodeEscapesInterpreted());
176172
assertEquals("trim left", CSVFormat.DEFAULT.isLeadingSpacesIgnored(), format.isLeadingSpacesIgnored());
177173
assertEquals("trim right", CSVFormat.DEFAULT.isTrailingSpacesIgnored(), format.isTrailingSpacesIgnored());
178174
assertEquals("empty lines", CSVFormat.DEFAULT.isEmptyLinesIgnored(), format.isEmptyLinesIgnored());

src/test/java/org/apache/commons/csv/CSVParserTest.java

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ public void testBackslashEscaping() throws IOException {
283283
};
284284

285285

286-
CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/', false, false, true, true, "\r\n", null);
286+
CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/', false, false, true, "\r\n", null);
287287

288288
CSVParser parser = new CSVParser(code, format);
289289
List<CSVRecord> records = parser.getRecords();
@@ -312,7 +312,7 @@ public void testBackslashEscaping2() throws IOException {
312312
};
313313

314314

315-
CSVFormat format = new CSVFormat(',', CSVFormat.DISABLED, CSVFormat.DISABLED, '/', false, false, true, true, "\r\n", null);
315+
CSVFormat format = new CSVFormat(',', CSVFormat.DISABLED, CSVFormat.DISABLED, '/', false, false, true, "\r\n", null);
316316

317317
CSVParser parser = new CSVParser(code, format);
318318
List<CSVRecord> records = parser.getRecords();
@@ -356,30 +356,6 @@ public void testDefaultFormat() throws IOException {
356356
assertTrue(CSVPrinterTest.equals(res_comments, records));
357357
}
358358

359-
@Test
360-
public void testUnicodeEscape() throws Exception {
361-
String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
362-
CSVParser parser = new CSVParser(code, CSVFormat.DEFAULT.withUnicodeEscapesInterpreted(true));
363-
final Iterator<CSVRecord> iterator = parser.iterator();
364-
CSVRecord record = iterator.next();
365-
assertEquals(2, record.size());
366-
assertEquals("abc", record.get(0));
367-
assertEquals("public", record.get(1));
368-
assertFalse("Should not have any more records", iterator.hasNext());
369-
}
370-
371-
@Test
372-
public void testUnicodeEscapeMySQL() throws Exception {
373-
String code = "abc\t\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
374-
CSVParser parser = new CSVParser(code, CSVFormat.MYSQL.withUnicodeEscapesInterpreted(true));
375-
final Iterator<CSVRecord> iterator = parser.iterator();
376-
CSVRecord record = iterator.next();
377-
assertEquals(2, record.size());
378-
assertEquals("abc", record.get(0));
379-
assertEquals("public", record.get(1));
380-
assertFalse("Should not have any more records", iterator.hasNext());
381-
}
382-
383359
@Test
384360
public void testCarriageReturnLineFeedEndings() throws IOException {
385361
String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";

0 commit comments

Comments
 (0)