Skip to content

Commit 50e2719

Browse files
committed
Remove DISABLED character hack.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1397783 13f79535-47bb-0310-9956-ffa450edef68
1 parent dc7a719 commit 50e2719

3 files changed

Lines changed: 91 additions & 57 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 76 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.commons.csv;
1919

2020
import static org.apache.commons.csv.Constants.COMMA;
21+
import static org.apache.commons.csv.Constants.CR;
2122
import static org.apache.commons.csv.Constants.CRLF;
2223
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE;
2324
import static org.apache.commons.csv.Constants.ESCAPE;
@@ -38,30 +39,19 @@ public class CSVFormat implements Serializable {
3839

3940
private static final long serialVersionUID = 1L;
4041

41-
private final char delimiter;
42-
private final char encapsulator;
43-
private final char commentStart;
44-
private final char escape;
42+
private final Character delimiter;
43+
private final Character encapsulator;
44+
private final Character commentStart;
45+
private final Character escape;
4546
private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
4647
private final boolean ignoreEmptyLines;
4748
private final String lineSeparator; // for outputs
4849
private final String[] header;
4950

50-
private final boolean isEscaping;
51-
private final boolean isCommentingEnabled;
52-
private final boolean isEncapsulating;
53-
54-
/**
55-
* Constant char to be used for disabling comments, escapes and encapsulation. The value -2 is used because it
56-
* won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two chars
57-
* (using surrogates) and thus there should never be a collision with a real text char.
58-
*/
59-
static final char DISABLED = '\ufffe';
60-
6151
/**
6252
* Starting format with no settings defined; used for creating other formats from scratch.
6353
*/
64-
static final CSVFormat PRISTINE = new CSVFormat(DISABLED, DISABLED, DISABLED, DISABLED, false, false, null, null);
54+
static final CSVFormat PRISTINE = new CSVFormat(null, null, null, null, false, false, null, null);
6555

6656
/**
6757
* Standard comma separated format, as for {@link #RFC4180} but allowing blank lines.
@@ -73,8 +63,8 @@ public class CSVFormat implements Serializable {
7363
* </ul>
7464
*/
7565
public static final CSVFormat DEFAULT =
76-
PRISTINE.
77-
withDelimiter(COMMA)
66+
PRISTINE
67+
.withDelimiter(COMMA)
7868
.withEncapsulator(DOUBLE_QUOTE)
7969
.withIgnoreEmptyLines(true)
8070
.withLineSeparator(CRLF);
@@ -89,8 +79,8 @@ public class CSVFormat implements Serializable {
8979
* </ul>
9080
*/
9181
public static final CSVFormat RFC4180 =
92-
PRISTINE.
93-
withDelimiter(COMMA)
82+
PRISTINE
83+
.withDelimiter(COMMA)
9484
.withEncapsulator(DOUBLE_QUOTE)
9585
.withLineSeparator(CRLF);
9686

@@ -127,7 +117,7 @@ public class CSVFormat implements Serializable {
127117
* @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">
128118
* http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
129119
*/
130-
public static final CSVFormat MYSQL =
120+
public static final CSVFormat MYSQL =
131121
PRISTINE
132122
.withDelimiter(TAB)
133123
.withEscape(ESCAPE)
@@ -153,7 +143,7 @@ public class CSVFormat implements Serializable {
153143
* @param header
154144
* the header
155145
*/
156-
CSVFormat(final char delimiter, final char encapsulator, final char commentStart, final char escape, final boolean surroundingSpacesIgnored,
146+
CSVFormat(final Character delimiter, final Character encapsulator, final Character commentStart, final Character escape, final boolean surroundingSpacesIgnored,
157147
final boolean emptyLinesIgnored, final String lineSeparator, final String[] header) {
158148
this.delimiter = delimiter;
159149
this.encapsulator = encapsulator;
@@ -163,9 +153,6 @@ public class CSVFormat implements Serializable {
163153
this.ignoreEmptyLines = emptyLinesIgnored;
164154
this.lineSeparator = lineSeparator;
165155
this.header = header;
166-
this.isEncapsulating = encapsulator != DISABLED;
167-
this.isCommentingEnabled = commentStart != DISABLED;
168-
this.isEscaping = escape != DISABLED;
169156
}
170157

171158
/**
@@ -176,8 +163,8 @@ public class CSVFormat implements Serializable {
176163
*
177164
* @return true if <code>c</code> is a line break character
178165
*/
179-
private static boolean isLineBreak(final char c) {
180-
return c == '\n' || c == '\r';
166+
private static boolean isLineBreak(final Character c) {
167+
return c != null && (c == LF || c == CR);
181168
}
182169

183170
/**
@@ -199,12 +186,12 @@ void validate() throws IllegalArgumentException {
199186
commentStart + "\")");
200187
}
201188

202-
if (encapsulator != DISABLED && encapsulator == commentStart) {
189+
if (encapsulator != null && encapsulator == commentStart) {
203190
throw new IllegalArgumentException(
204191
"The comment start character and the encapsulator cannot be the same (\"" + commentStart + "\")");
205192
}
206193

207-
if (escape != DISABLED && escape == commentStart) {
194+
if (escape != null && escape == commentStart) {
208195
throw new IllegalArgumentException("The comment start and the escape character cannot be the same (\"" +
209196
commentStart + "\")");
210197
}
@@ -229,6 +216,19 @@ public char getDelimiter() {
229216
* thrown if the specified character is a line break
230217
*/
231218
public CSVFormat withDelimiter(final char delimiter) {
219+
return withDelimiter(Character.valueOf(delimiter));
220+
}
221+
222+
/**
223+
* Returns a copy of this format using the specified delimiter character.
224+
*
225+
* @param delimiter
226+
* the delimiter character
227+
* @return A copy of this format using the specified delimiter character
228+
* @throws IllegalArgumentException
229+
* thrown if the specified character is a line break
230+
*/
231+
public CSVFormat withDelimiter(final Character delimiter) {
232232
if (isLineBreak(delimiter)) {
233233
throw new IllegalArgumentException("The delimiter cannot be a line break");
234234
}
@@ -241,7 +241,7 @@ public CSVFormat withDelimiter(final char delimiter) {
241241
*
242242
* @return the encapsulator character
243243
*/
244-
public char getEncapsulator() {
244+
public Character getEncapsulator() {
245245
return encapsulator;
246246
}
247247

@@ -255,6 +255,19 @@ public char getEncapsulator() {
255255
* thrown if the specified character is a line break
256256
*/
257257
public CSVFormat withEncapsulator(final char encapsulator) {
258+
return withEncapsulator(Character.valueOf(encapsulator));
259+
}
260+
261+
/**
262+
* Returns a copy of this format using the specified encapsulator character.
263+
*
264+
* @param encapsulator
265+
* the encapsulator character
266+
* @return A copy of this format using the specified encapsulator character
267+
* @throws IllegalArgumentException
268+
* thrown if the specified character is a line break
269+
*/
270+
public CSVFormat withEncapsulator(final Character encapsulator) {
258271
if (isLineBreak(encapsulator)) {
259272
throw new IllegalArgumentException("The encapsulator cannot be a line break");
260273
}
@@ -268,15 +281,15 @@ public CSVFormat withEncapsulator(final char encapsulator) {
268281
* @return {@code true} if an encapsulator is defined
269282
*/
270283
public boolean isEncapsulating() {
271-
return isEncapsulating;
284+
return encapsulator != null;
272285
}
273286

274287
/**
275288
* Returns the character marking the start of a line comment.
276289
*
277290
* @return the comment start marker.
278291
*/
279-
public char getCommentStart() {
292+
public Character getCommentStart() {
280293
return commentStart;
281294
}
282295

@@ -292,6 +305,21 @@ public char getCommentStart() {
292305
* thrown if the specified character is a line break
293306
*/
294307
public CSVFormat withCommentStart(final char commentStart) {
308+
return withCommentStart(Character.valueOf(commentStart));
309+
}
310+
311+
/**
312+
* Returns a copy of this format using the specified character as the comment start marker.
313+
*
314+
* Note that the comment introducer character is only recognised at the start of a line.
315+
*
316+
* @param commentStart
317+
* the comment start marker
318+
* @return A copy of this format using the specified character as the comment start marker
319+
* @throws IllegalArgumentException
320+
* thrown if the specified character is a line break
321+
*/
322+
public CSVFormat withCommentStart(final Character commentStart) {
295323
if (isLineBreak(commentStart)) {
296324
throw new IllegalArgumentException("The comment start character cannot be a line break");
297325
}
@@ -307,15 +335,15 @@ public CSVFormat withCommentStart(final char commentStart) {
307335
* @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise
308336
*/
309337
public boolean isCommentingEnabled() {
310-
return isCommentingEnabled;
338+
return commentStart != null;
311339
}
312340

313341
/**
314342
* Returns the escape character.
315343
*
316344
* @return the escape character
317345
*/
318-
public char getEscape() {
346+
public Character getEscape() {
319347
return escape;
320348
}
321349

@@ -329,6 +357,19 @@ public char getEscape() {
329357
* thrown if the specified character is a line break
330358
*/
331359
public CSVFormat withEscape(final char escape) {
360+
return withEscape(Character.valueOf(escape));
361+
}
362+
363+
/**
364+
* Returns a copy of this format using the specified escape character.
365+
*
366+
* @param escape
367+
* the escape character
368+
* @return A copy of this format using the specified escape character
369+
* @throws IllegalArgumentException
370+
* thrown if the specified character is a line break
371+
*/
372+
public CSVFormat withEscape(final Character escape) {
332373
if (isLineBreak(escape)) {
333374
throw new IllegalArgumentException("The escape character cannot be a line break");
334375
}
@@ -342,7 +383,7 @@ public CSVFormat withEscape(final char escape) {
342383
* @return {@code true} if escapes are processed
343384
*/
344385
public boolean isEscaping() {
345-
return isEscaping;
386+
return escape != null;
346387
}
347388

348389
/**

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,10 @@
3232
*/
3333
abstract class Lexer {
3434

35-
private final boolean isEncapsulating;
36-
private final boolean isEscaping;
37-
private final boolean isCommentEnabled;
38-
39-
private final char delimiter;
40-
private final char escape;
41-
private final char encapsulator;
42-
private final char commmentStart;
35+
private final Character delimiter;
36+
private final Character escape;
37+
private final Character encapsulator;
38+
private final Character commmentStart;
4339

4440
final boolean surroundingSpacesIgnored;
4541
final boolean emptyLinesIgnored;
@@ -52,9 +48,6 @@ abstract class Lexer {
5248
Lexer(final CSVFormat format, final ExtendedBufferedReader in) {
5349
this.format = format;
5450
this.in = in;
55-
this.isEncapsulating = format.isEncapsulating();
56-
this.isEscaping = format.isEscaping();
57-
this.isCommentEnabled = format.isCommentingEnabled();
5851
this.delimiter = format.getDelimiter();
5952
this.escape = format.getEscape();
6053
this.encapsulator = format.getEncapsulator();
@@ -144,14 +137,14 @@ boolean isDelimiter(final int c) {
144137
}
145138

146139
boolean isEscape(final int c) {
147-
return isEscaping && c == escape;
140+
return escape != null && c == escape;
148141
}
149142

150143
boolean isEncapsulator(final int c) {
151-
return isEncapsulating && c == encapsulator;
144+
return encapsulator != null && c == encapsulator;
152145
}
153146

154147
boolean isCommentStart(final int c) {
155-
return isCommentEnabled && c == commmentStart;
148+
return commmentStart != null && c == commmentStart;
156149
}
157150
}

src/test/java/org/apache/commons/csv/CSVFormatTest.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ public void testImmutalibity() {
4646
format.withIgnoreEmptyLines(false);
4747

4848
assertEquals('!', format.getDelimiter());
49-
assertEquals('!', format.getEncapsulator());
50-
assertEquals('!', format.getCommentStart());
51-
assertEquals('!', format.getEscape());
49+
assertEquals('!', format.getEncapsulator().charValue());
50+
assertEquals('!', format.getCommentStart().charValue());
51+
assertEquals('!', format.getEscape().charValue());
5252
assertEquals(CRLF, format.getLineSeparator());
5353

5454
assertTrue(format.getIgnoreSurroundingSpaces());
@@ -60,10 +60,10 @@ public void testMutators() {
6060
final CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, CRLF, null);
6161

6262
assertEquals('?', format.withDelimiter('?').getDelimiter());
63-
assertEquals('?', format.withEncapsulator('?').getEncapsulator());
64-
assertEquals('?', format.withCommentStart('?').getCommentStart());
63+
assertEquals('?', format.withEncapsulator('?').getEncapsulator().charValue());
64+
assertEquals('?', format.withCommentStart('?').getCommentStart().charValue());
6565
assertEquals("?", format.withLineSeparator("?").getLineSeparator());
66-
assertEquals('?', format.withEscape('?').getEscape());
66+
assertEquals('?', format.withEscape('?').getEscape().charValue());
6767

6868
assertFalse(format.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces());
6969
assertFalse(format.withIgnoreEmptyLines(false).getIgnoreEmptyLines());
@@ -131,7 +131,7 @@ public void testValidation() {
131131
// expected
132132
}
133133

134-
format.withEncapsulator(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate();
134+
format.withEncapsulator(null).withCommentStart(null).validate();
135135

136136
try {
137137
format.withEscape('!').withCommentStart('!').validate();
@@ -140,7 +140,7 @@ public void testValidation() {
140140
// expected
141141
}
142142

143-
format.withEscape(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate();
143+
format.withEscape(null).withCommentStart(null).validate();
144144

145145

146146
try {

0 commit comments

Comments
 (0)