Skip to content

Commit 38741a4

Browse files
committed
CSV-54 Confusing semantic of the ignore leading/trailing spaces parameters
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1305494 13f79535-47bb-0310-9956-ffa450edef68
1 parent 5063b16 commit 38741a4

7 files changed

Lines changed: 38 additions & 82 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 18 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ public class CSVFormat implements Serializable {
3636
private final char encapsulator;
3737
private final char commentStart;
3838
private final char escape;
39-
private final boolean leadingSpacesIgnored;
40-
private final boolean trailingSpacesIgnored;
39+
private final boolean surroundingSpacesIgnored; // Should leading/trailing spaces be ignored around values?
4140
private final boolean emptyLinesIgnored;
4241
private final String lineSeparator; // for outputs
4342
private final String[] header;
@@ -55,7 +54,7 @@ public class CSVFormat implements Serializable {
5554
* Starting format with no settings defined; used for creating other formats from scratch.
5655
*/
5756
private static CSVFormat PRISTINE =
58-
new CSVFormat(DISABLED, DISABLED, DISABLED, DISABLED, false, false, false, null, null);
57+
new CSVFormat(DISABLED, DISABLED, DISABLED, DISABLED, false, false, null, null);
5958

6059
/**
6160
* Standard comma separated format, as for {@link #RFC4180} but allowing blank lines.
@@ -113,8 +112,7 @@ public class CSVFormat implements Serializable {
113112
PRISTINE
114113
.withDelimiter('\t')
115114
.withEncapsulator('"')
116-
.withLeadingSpacesIgnored(true)
117-
.withTrailingSpacesIgnored(true)
115+
.withSurroundingSpacesIgnored(true)
118116
.withEmptyLinesIgnored(true)
119117
.withLineSeparator(CRLF)
120118
;
@@ -142,8 +140,7 @@ public class CSVFormat implements Serializable {
142140
* @param encapsulator the char used as value encapsulation marker
143141
* @param commentStart the char used for comment identification
144142
* @param escape the char used to escape special characters in values
145-
* @param leadingSpacesIgnored <tt>true</tt> when leading whitespaces should be ignored
146-
* @param trailingSpacesIgnored <tt>true</tt> when trailing whitespaces should be ignored
143+
* @param surroundingSpacesIgnored <tt>true</tt> when whitespaces enclosing values should be ignored
147144
* @param emptyLinesIgnored <tt>true</tt> when the parser should skip emtpy lines
148145
* @param lineSeparator the line separator to use for output
149146
* @param header the header
@@ -153,17 +150,15 @@ public class CSVFormat implements Serializable {
153150
char encapsulator,
154151
char commentStart,
155152
char escape,
156-
boolean leadingSpacesIgnored,
157-
boolean trailingSpacesIgnored,
153+
boolean surroundingSpacesIgnored,
158154
boolean emptyLinesIgnored,
159155
String lineSeparator,
160156
String[] header) {
161157
this.delimiter = delimiter;
162158
this.encapsulator = encapsulator;
163159
this.commentStart = commentStart;
164160
this.escape = escape;
165-
this.leadingSpacesIgnored = leadingSpacesIgnored;
166-
this.trailingSpacesIgnored = trailingSpacesIgnored;
161+
this.surroundingSpacesIgnored = surroundingSpacesIgnored;
167162
this.emptyLinesIgnored = emptyLinesIgnored;
168163
this.lineSeparator = lineSeparator;
169164
this.header = header;
@@ -226,7 +221,7 @@ public CSVFormat withDelimiter(char delimiter) {
226221
throw new IllegalArgumentException("The delimiter cannot be a line break");
227222
}
228223

229-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
224+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
230225
}
231226

232227
/**
@@ -250,7 +245,7 @@ public CSVFormat withEncapsulator(char encapsulator) {
250245
throw new IllegalArgumentException("The encapsulator cannot be a line break");
251246
}
252247

253-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
248+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
254249
}
255250

256251
boolean isEncapsulating() {
@@ -278,7 +273,7 @@ public CSVFormat withCommentStart(char commentStart) {
278273
throw new IllegalArgumentException("The comment start character cannot be a line break");
279274
}
280275

281-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
276+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
282277
}
283278

284279
/**
@@ -311,51 +306,20 @@ public CSVFormat withEscape(char escape) {
311306
throw new IllegalArgumentException("The escape character cannot be a line break");
312307
}
313308

314-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
309+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
315310
}
316311

317312
boolean isEscaping() {
318313
return this.escape != DISABLED;
319314
}
320315

321316
/**
322-
* Tells if the spaces characters at the beginning of the values are ignored when parsing a file.
317+
* Specifies whether spaces around values are ignored when parsing input.
323318
*
324-
* @return <tt>true</tt> if leading spaces are removed, <tt>false</tt> if they are preserved.
319+
* @return <tt>true</tt> if spaces around values are ignored, <tt>false</tt> if they are treated as part of the value.
325320
*/
326-
public boolean isLeadingSpacesIgnored() {
327-
return leadingSpacesIgnored;
328-
}
329-
330-
/**
331-
* Returns a copy of this format with the specified left trimming behavior.
332-
*
333-
* @param leadingSpacesIgnored the left trimming behavior, <tt>true</tt> to remove the leading spaces,
334-
* <tt>false</tt> to leave the spaces as is.
335-
* @return A copy of this format with the specified left trimming behavior.
336-
*/
337-
public CSVFormat withLeadingSpacesIgnored(boolean leadingSpacesIgnored) {
338-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
339-
}
340-
341-
/**
342-
* Tells if the spaces characters at the end of the values are ignored when parsing a file.
343-
*
344-
* @return <tt>true</tt> if trailing spaces are removed, <tt>false</tt> if they are preserved.
345-
*/
346-
public boolean isTrailingSpacesIgnored() {
347-
return trailingSpacesIgnored;
348-
}
349-
350-
/**
351-
* Returns a copy of this format with the specified right trimming behavior.
352-
*
353-
* @param trailingSpacesIgnored the right trimming behavior, <tt>true</tt> to remove the trailing spaces,
354-
* <tt>false</tt> to leave the spaces as is.
355-
* @return A copy of this format with the specified right trimming behavior.
356-
*/
357-
public CSVFormat withTrailingSpacesIgnored(boolean trailingSpacesIgnored) {
358-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
321+
public boolean isSurroundingSpacesIgnored() {
322+
return surroundingSpacesIgnored;
359323
}
360324

361325
/**
@@ -366,7 +330,7 @@ public CSVFormat withTrailingSpacesIgnored(boolean trailingSpacesIgnored) {
366330
* @return A copy of this format with the specified trimming behavior.
367331
*/
368332
public CSVFormat withSurroundingSpacesIgnored(boolean surroundingSpacesIgnored) {
369-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
333+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
370334
}
371335

372336
/**
@@ -386,7 +350,7 @@ public boolean isEmptyLinesIgnored() {
386350
* @return A copy of this format with the specified empty line skipping behavior.
387351
*/
388352
public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) {
389-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
353+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
390354
}
391355

392356
/**
@@ -406,7 +370,7 @@ public String getLineSeparator() {
406370
* @return A copy of this format using the specified output line separator
407371
*/
408372
public CSVFormat withLineSeparator(String lineSeparator) {
409-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
373+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
410374
}
411375

412376
String[] getHeader() {
@@ -428,7 +392,7 @@ String[] getHeader() {
428392
* @return A copy of this format using the specified header
429393
*/
430394
public CSVFormat withHeader(String... header) {
431-
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
395+
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
432396
}
433397

434398
/**

src/main/java/org/apache/commons/csv/CSVLexer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ Token nextToken(Token tkn) throws IOException {
8181
// important: make sure a new char gets consumed in each iteration
8282
while (tkn.type == INVALID) {
8383
// ignore whitespaces at beginning of a token
84-
if (leadingSpacesIgnored) {
84+
if (surroundingSpacesIgnored) {
8585
while (isWhitespace(c) && !eol) {
8686
c = in.read();
8787
eol = isEndOfLine(c);
@@ -158,7 +158,7 @@ private Token simpleTokenLexer(Token tkn, int c) throws IOException {
158158
c = in.read();
159159
}
160160

161-
if (trailingSpacesIgnored) {
161+
if (surroundingSpacesIgnored) {
162162
trimTrailingSpaces(tkn.content);
163163
}
164164

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ abstract class Lexer {
3434
private final char encapsulator;
3535
private final char commmentStart;
3636

37-
final boolean leadingSpacesIgnored;
38-
final boolean trailingSpacesIgnored;
37+
final boolean surroundingSpacesIgnored;
3938
final boolean emptyLinesIgnored;
4039

4140
final CSVFormat format;
@@ -53,8 +52,7 @@ abstract class Lexer {
5352
this.escape = format.getEscape();
5453
this.encapsulator = format.getEncapsulator();
5554
this.commmentStart = format.getCommentStart();
56-
this.leadingSpacesIgnored = format.isLeadingSpacesIgnored();
57-
this.trailingSpacesIgnored = format.isTrailingSpacesIgnored();
55+
this.surroundingSpacesIgnored = format.isSurroundingSpacesIgnored();
5856
this.emptyLinesIgnored = format.isEmptyLinesIgnored();
5957
}
6058

src/test/java/org/apache/commons/csv/CSVFormatTest.java

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,14 @@ public class CSVFormatTest {
3030

3131
@Test
3232
public void testImmutalibity() {
33-
CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, "\r\n", null);
33+
CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, "\r\n", null);
3434

3535
format.withDelimiter('?');
3636
format.withEncapsulator('?');
3737
format.withCommentStart('?');
3838
format.withLineSeparator("?");
3939
format.withEscape('?');
40-
format.withLeadingSpacesIgnored(false);
41-
format.withTrailingSpacesIgnored(false);
40+
format.withSurroundingSpacesIgnored(false);
4241
format.withEmptyLinesIgnored(false);
4342

4443
assertEquals('!', format.getDelimiter());
@@ -47,25 +46,21 @@ public void testImmutalibity() {
4746
assertEquals('!', format.getEscape());
4847
assertEquals("\r\n", format.getLineSeparator());
4948

50-
assertTrue(format.isLeadingSpacesIgnored());
51-
assertTrue(format.isTrailingSpacesIgnored());
49+
assertTrue(format.isSurroundingSpacesIgnored());
5250
assertTrue(format.isEmptyLinesIgnored());
5351
}
5452

5553
@Test
5654
public void testMutators() {
57-
CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, "\r\n", null);
55+
CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, "\r\n", null);
5856

5957
assertEquals('?', format.withDelimiter('?').getDelimiter());
6058
assertEquals('?', format.withEncapsulator('?').getEncapsulator());
6159
assertEquals('?', format.withCommentStart('?').getCommentStart());
6260
assertEquals("?", format.withLineSeparator("?").getLineSeparator());
6361
assertEquals('?', format.withEscape('?').getEscape());
6462

65-
assertFalse(format.withLeadingSpacesIgnored(false).isLeadingSpacesIgnored());
66-
assertFalse(format.withTrailingSpacesIgnored(false).isTrailingSpacesIgnored());
67-
assertFalse(format.withSurroundingSpacesIgnored(false).isLeadingSpacesIgnored());
68-
assertFalse(format.withSurroundingSpacesIgnored(false).isTrailingSpacesIgnored());
63+
assertFalse(format.withSurroundingSpacesIgnored(false).isSurroundingSpacesIgnored());
6964
assertFalse(format.withEmptyLinesIgnored(false).isEmptyLinesIgnored());
7065
}
7166

@@ -170,8 +165,7 @@ public void testSerialization() throws Exception {
170165
assertEquals("comment start", CSVFormat.DEFAULT.getCommentStart(), format.getCommentStart());
171166
assertEquals("line separator", CSVFormat.DEFAULT.getLineSeparator(), format.getLineSeparator());
172167
assertEquals("escape", CSVFormat.DEFAULT.getEscape(), format.getEscape());
173-
assertEquals("trim left", CSVFormat.DEFAULT.isLeadingSpacesIgnored(), format.isLeadingSpacesIgnored());
174-
assertEquals("trim right", CSVFormat.DEFAULT.isTrailingSpacesIgnored(), format.isTrailingSpacesIgnored());
168+
assertEquals("trim", CSVFormat.DEFAULT.isSurroundingSpacesIgnored(), format.isSurroundingSpacesIgnored());
175169
assertEquals("empty lines", CSVFormat.DEFAULT.isEmptyLinesIgnored(), format.isEmptyLinesIgnored());
176170
}
177171
}

src/test/java/org/apache/commons/csv/CSVLexer1.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ Token nextToken(Token tkn) throws IOException {
8282
// important: make sure a new char gets consumed in each iteration
8383
while (!tkn.isReady && tkn.type != EOF) {
8484
// ignore whitespaces at beginning of a token
85-
if (format.isLeadingSpacesIgnored()) {
85+
if (format.isSurroundingSpacesIgnored()) {
8686
while (isWhitespace(c) && !eol) {
8787
wsBuf.append((char) c);
8888
c = in.read();
@@ -115,7 +115,7 @@ Token nextToken(Token tkn) throws IOException {
115115
} else {
116116
// next token must be a simple token
117117
// add removed blanks when not ignoring whitespace chars...
118-
if (!format.isLeadingSpacesIgnored()) {
118+
if (!format.isSurroundingSpacesIgnored()) {
119119
tkn.content.append(wsBuf);
120120
}
121121
simpleTokenLexer(tkn, c);
@@ -167,7 +167,7 @@ private Token simpleTokenLexer(Token tkn, int c) throws IOException {
167167
c = in.read();
168168
}
169169

170-
if (format.isTrailingSpacesIgnored()) {
170+
if (format.isSurroundingSpacesIgnored()) {
171171
trimTrailingSpaces(tkn.content);
172172
}
173173

src/test/java/org/apache/commons/csv/CSVLexerTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ private void assertTokenEquals(Token.Type expectedType, String expectedContent,
4040
@Test
4141
public void testNextToken1() throws IOException {
4242
String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
43-
CSVLexer parser = getLexer(code, CSVFormat.DEFAULT);
43+
CSVLexer parser = getLexer(code, CSVFormat.DEFAULT.withSurroundingSpacesIgnored(true));
4444
assertTokenEquals(TOKEN, "abc", parser.nextToken(new Token()));
4545
assertTokenEquals(TOKEN, "def", parser.nextToken(new Token()));
4646
assertTokenEquals(TOKEN, "hijk", parser.nextToken(new Token()));
@@ -115,7 +115,7 @@ public void testNextToken4() throws IOException {
115115
* a, " foo " ,b
116116
*/
117117
String code = "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
118-
CSVLexer parser = getLexer(code, CSVFormat.DEFAULT);
118+
CSVLexer parser = getLexer(code, CSVFormat.DEFAULT.withSurroundingSpacesIgnored(true));
119119
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
120120
assertTokenEquals(TOKEN, "foo", parser.nextToken(new Token()));
121121
assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));

src/test/java/org/apache/commons/csv/CSVParserTest.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,17 @@ public class CSVParserTest {
5656

5757
@Test
5858
public void testGetLine() throws IOException {
59-
CSVParser parser = new CSVParser(new StringReader(code));
59+
CSVParser parser = new CSVParser(new StringReader(code), CSVFormat.DEFAULT.withSurroundingSpacesIgnored(true));
6060
for (String[] re : res) {
61-
assertTrue(Arrays.equals(re, parser.getRecord().values()));
61+
assertTrue("Failed to match: "+Arrays.toString(re), Arrays.equals(re, parser.getRecord().values()));
6262
}
6363

6464
assertNull(parser.getRecord());
6565
}
6666

6767
@Test
6868
public void testGetRecords() throws IOException {
69-
CSVParser parser = new CSVParser(new StringReader(code));
69+
CSVParser parser = new CSVParser(new StringReader(code), CSVFormat.DEFAULT.withSurroundingSpacesIgnored(true));
7070
List<CSVRecord> records = parser.getRecords();
7171
assertEquals(res.length, records.size());
7272
assertTrue(records.size() > 0);
@@ -283,7 +283,7 @@ public void testBackslashEscaping() throws IOException {
283283
};
284284

285285

286-
CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/', false, false, true, "\r\n", null);
286+
CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/', false, true, "\r\n", null);
287287

288288
CSVParser parser = new CSVParser(code, format);
289289
List<CSVRecord> records = parser.getRecords();
@@ -312,7 +312,7 @@ public void testBackslashEscaping2() throws IOException {
312312
};
313313

314314

315-
CSVFormat format = new CSVFormat(',', CSVFormat.DISABLED, CSVFormat.DISABLED, '/', false, false, true, "\r\n", null);
315+
CSVFormat format = new CSVFormat(',', CSVFormat.DISABLED, CSVFormat.DISABLED, '/', false, true, "\r\n", null);
316316

317317
CSVParser parser = new CSVParser(code, format);
318318
List<CSVRecord> records = parser.getRecords();

0 commit comments

Comments
 (0)