Skip to content

Commit 5a0894f

Browse files
committed
[CSV-93] Allow the handling of NULL values. Use a single property 'nullString' for both input and output processing. No substitutions occur if null. For reading, nullString is used to convert field values to null. For writing, nullString is used to output the given string instead of the empty string.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1495911 13f79535-47bb-0310-9956-ffa450edef68
1 parent 530b038 commit 5a0894f

4 files changed

Lines changed: 92 additions & 40 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public static class CSVFormatBuilder {
6363
private boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
6464
private boolean ignoreEmptyLines;
6565
private String recordSeparator; // for outputs
66-
private String nullToString; // for outputs
66+
private String nullString;
6767
private String[] header;
6868

6969
/**
@@ -75,7 +75,7 @@ public static class CSVFormatBuilder {
7575
*/
7676
// package protected to give access without needing a synthetic accessor
7777
CSVFormatBuilder(final char delimiter){
78-
this(delimiter, null, null, null, null, false, false, null, Constants.EMPTY, null);
78+
this(delimiter, null, null, null, null, false, false, null, null, null);
7979
}
8080

8181
/**
@@ -95,19 +95,20 @@ public static class CSVFormatBuilder {
9595
* <tt>true</tt> when whitespaces enclosing values should be ignored
9696
* @param ignoreEmptyLines
9797
* <tt>true</tt> when the parser should skip empty lines
98-
* @param nullToString TODO
99-
* @param header
100-
* the header
10198
* @param recordSeparator
10299
* the record separator to use for output
100+
* @param nullString
101+
* the String to convert to and from {@code null}. No substitution occurs if {@code null}
102+
* @param header
103+
* the header
103104
* @throws IllegalArgumentException if the delimiter is a line break character
104105
*/
105106
// package protected for use by test code
106107
CSVFormatBuilder(final char delimiter, final Character quoteChar,
107108
final Quote quotePolicy, final Character commentStart,
108109
final Character escape, final boolean ignoreSurroundingSpaces,
109110
final boolean ignoreEmptyLines, final String recordSeparator,
110-
final String nullToString, final String[] header) {
111+
String nullString, final String[] header) {
111112
if (isLineBreak(delimiter)) {
112113
throw new IllegalArgumentException("The delimiter cannot be a line break");
113114
}
@@ -119,7 +120,7 @@ public static class CSVFormatBuilder {
119120
this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
120121
this.ignoreEmptyLines = ignoreEmptyLines;
121122
this.recordSeparator = recordSeparator;
122-
this.nullToString = nullToString;
123+
this.nullString = nullString;
123124
this.header = header;
124125
}
125126

@@ -135,7 +136,7 @@ public static class CSVFormatBuilder {
135136
this(format.delimiter, format.quoteChar, format.quotePolicy,
136137
format.commentStart, format.escape,
137138
format.ignoreSurroundingSpaces, format.ignoreEmptyLines,
138-
format.recordSeparator, format.nullToString, format.header);
139+
format.recordSeparator, format.nullString, format.header);
139140
}
140141

141142
/**
@@ -146,7 +147,8 @@ public static class CSVFormatBuilder {
146147
public CSVFormat build() {
147148
validate();
148149
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
149-
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullToString, header);
150+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString,
151+
header);
150152
}
151153

152154
/**
@@ -331,15 +333,22 @@ public CSVFormatBuilder withIgnoreSurroundingSpaces(final boolean ignoreSurround
331333
}
332334

333335
/**
334-
* Sets the String to use for null values for output.
335-
*
336-
* @param nullToString
337-
* the String to use for null values for output.
338-
*
339-
* @return This builder with the the specified output record separator
336+
* Performs conversions to and from null for strings on input and output.
337+
* <ul>
338+
* <li>
339+
* <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
340+
* records.</li>
341+
* <li>
342+
* <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
343+
* </ul>
344+
*
345+
* @param nullString
346+
* the String to convert to and from {@code null}. No substitution occurs if {@code null}
347+
*
348+
* @return This builder with the the specified null conversion string.
340349
*/
341-
public CSVFormatBuilder withNullToString(final String nullToString) {
342-
this.nullToString = nullToString;
350+
public CSVFormatBuilder withNullString(final String nullString) {
351+
this.nullString = nullString;
343352
return this;
344353
}
345354

@@ -439,21 +448,18 @@ static boolean isLineBreak(final Character c) {
439448
* @return a standard comma separated format builder, as for {@link #RFC4180} but allowing empty lines.
440449
*/
441450
public static CSVFormatBuilder newBuilder() {
442-
return new CSVFormatBuilder(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, Constants.EMPTY,
443-
null);
451+
return new CSVFormatBuilder(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null);
444452
}
453+
445454
private final char delimiter;
446455
private final Character quoteChar;
447456
private final Quote quotePolicy;
448457
private final Character commentStart;
449458
private final Character escape;
450459
private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
451460
private final boolean ignoreEmptyLines;
452-
453461
private final String recordSeparator; // for outputs
454-
455-
private final String nullToString; // for outputs
456-
462+
private final String nullString;
457463
private final String[] header;
458464

459465
/**
@@ -588,8 +594,8 @@ public static CSVFormatBuilder newBuilder(final CSVFormat format) {
588594
* <tt>true</tt> when the parser should skip empty lines
589595
* @param recordSeparator
590596
* the line separator to use for output
591-
* @param nullToString
592-
* the String to use to write <code>null</code> values.
597+
* @param nullString
598+
* the line separator to use for output
593599
* @param header
594600
* the header
595601
* @throws IllegalArgumentException if the delimiter is a line break character
@@ -599,7 +605,7 @@ public static CSVFormatBuilder newBuilder(final CSVFormat format) {
599605
final Quote quotePolicy, final Character commentStart,
600606
final Character escape, final boolean ignoreSurroundingSpaces,
601607
final boolean ignoreEmptyLines, final String recordSeparator,
602-
final String nullToString, final String[] header) {
608+
final String nullString, final String[] header) {
603609
if (isLineBreak(delimiter)) {
604610
throw new IllegalArgumentException("The delimiter cannot be a line break");
605611
}
@@ -611,7 +617,7 @@ public static CSVFormatBuilder newBuilder(final CSVFormat format) {
611617
this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
612618
this.ignoreEmptyLines = ignoreEmptyLines;
613619
this.recordSeparator = recordSeparator;
614-
this.nullToString = nullToString;
620+
this.nullString = nullString;
615621
this.header = header == null ? null : header.clone();
616622
}
617623

@@ -744,12 +750,20 @@ public boolean getIgnoreSurroundingSpaces() {
744750
}
745751

746752
/**
747-
* Returns the value to use for writing null values.
748-
*
749-
* @return the value to use for writing null values.
753+
* Gets the String to convert to and from {@code null}.
754+
* <ul>
755+
* <li>
756+
* <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
757+
* records.
758+
* </li>
759+
* <li>
760+
* <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
761+
* </ul>
762+
*
763+
* @return the String to convert to and from {@code null}. No substitution occurs if {@code null}
750764
*/
751-
public String getNullToString() {
752-
return nullToString;
765+
public String getNullString() {
766+
return nullString;
753767
}
754768

755769
/**

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ public class CSVParser implements Iterable<CSVRecord> {
8585
private final Lexer lexer;
8686
private final Map<String, Integer> headerMap;
8787
private long recordNumber;
88+
private final CSVFormat format;
8889

8990
// the following objects are shared to reduce garbage
9091

@@ -120,7 +121,8 @@ public CSVParser(final Reader input) throws IOException {
120121
*/
121122
public CSVParser(final Reader input, final CSVFormat format) throws IOException {
122123
this.lexer = new CSVLexer(format, new ExtendedBufferedReader(input));
123-
this.headerMap = initializeHeader(format);
124+
this.format = format;
125+
this.headerMap = initializeHeader();
124126
}
125127

126128
/**
@@ -189,14 +191,14 @@ CSVRecord nextRecord() throws IOException {
189191
lexer.nextToken(reusableToken);
190192
switch (reusableToken.type) {
191193
case TOKEN:
192-
record.add(reusableToken.content.toString());
194+
this.addRecordValue();
193195
break;
194196
case EORECORD:
195-
record.add(reusableToken.content.toString());
197+
this.addRecordValue();
196198
break;
197199
case EOF:
198200
if (reusableToken.isReady) {
199-
record.add(reusableToken.content.toString());
201+
this.addRecordValue();
200202
}
201203
break;
202204
case INVALID:
@@ -221,6 +223,15 @@ CSVRecord nextRecord() throws IOException {
221223
return result;
222224
}
223225

226+
private void addRecordValue() {
227+
final String input = reusableToken.content.toString();
228+
final String nullString = this.format.getNullString();
229+
if (nullString == null) {
230+
record.add(input);
231+
} else {
232+
record.add(input.equalsIgnoreCase(nullString) ? null : input);
233+
}}
234+
224235
/**
225236
* Parses the CSV input according to the given format and returns the content as an array of {@link CSVRecord}
226237
* entries.
@@ -243,7 +254,7 @@ public List<CSVRecord> getRecords() throws IOException {
243254
/**
244255
* Initializes the name to index mapping if the format defines a header.
245256
*/
246-
private Map<String, Integer> initializeHeader(final CSVFormat format) throws IOException {
257+
private Map<String, Integer> initializeHeader() throws IOException {
247258
Map<String, Integer> hdrMap = null;
248259
if (format.getHeader() != null) {
249260
hdrMap = new LinkedHashMap<String, Integer>();

src/main/java/org/apache/commons/csv/CSVPrinter.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -337,8 +337,14 @@ private void printAndQuote(final Object object, final CharSequence value,
337337
*/
338338
public void print(final Object value) throws IOException {
339339
// null values are considered empty
340-
final String strValue = value == null ? format.getNullToString() : value.toString();
341-
print(value, strValue, 0, strValue.length());
340+
String strValue;
341+
if (value == null) {
342+
final String nullString = format.getNullString();
343+
strValue = nullString == null ? Constants.EMPTY : nullString;
344+
} else {
345+
strValue = value.toString();
346+
}
347+
this.print(value, strValue, 0, strValue.length());
342348
}
343349

344350
/**

src/test/java/org/apache/commons/csv/CSVPrinterTest.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,17 @@
1818
package org.apache.commons.csv;
1919

2020
import static org.junit.Assert.assertEquals;
21+
import static org.junit.Assert.assertFalse;
2122

2223
import java.io.IOException;
24+
import java.io.StringReader;
2325
import java.io.StringWriter;
2426
import java.sql.Connection;
2527
import java.sql.DriverManager;
2628
import java.sql.SQLException;
2729
import java.sql.Statement;
2830
import java.util.Arrays;
31+
import java.util.Iterator;
2932
import java.util.List;
3033
import java.util.Random;
3134

@@ -310,12 +313,30 @@ public void testPrintNullValues() throws IOException {
310313
@Test
311314
public void testPrintCustomNullValues() throws IOException {
312315
final StringWriter sw = new StringWriter();
313-
final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.toBuilder().withNullToString("NULL").build());
316+
final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.toBuilder().withNullString("NULL").build());
314317
printer.printRecord("a", null, "b");
315318
assertEquals("a,NULL,b" + recordSeparator, sw.toString());
316319
printer.close();
317320
}
318321

322+
@Test
323+
public void testParseCustomNullValues() throws IOException {
324+
final StringWriter sw = new StringWriter();
325+
final CSVFormat format = CSVFormat.DEFAULT.toBuilder().withNullString("NULL").build();
326+
final CSVPrinter printer = new CSVPrinter(sw, format);
327+
printer.printRecord("a", null, "b");
328+
printer.close();
329+
String csvString = sw.toString();
330+
assertEquals("a,NULL,b" + recordSeparator, csvString);
331+
final Iterable<CSVRecord> iterable = format.parse(new StringReader(csvString));
332+
final Iterator<CSVRecord> iterator = iterable.iterator();
333+
final CSVRecord record = iterator.next();
334+
assertEquals("a", record.get(0));
335+
assertEquals(null, record.get(1));
336+
assertEquals("b", record.get(2));
337+
assertFalse(iterator.hasNext());
338+
}
339+
319340
@Test
320341
public void testQuoteAll() throws IOException {
321342
final StringWriter sw = new StringWriter();

0 commit comments

Comments
 (0)