Skip to content
This repository was archived by the owner on Jun 3, 2026. It is now read-only.

Commit ac280e7

Browse files
committed
[CSV-93] Allow the handling of NULL values.
- [CSV-253] Handle absent values in input (null). - Cleaned up version of PR 77 from dota17 where: - Don't duplicate two state items from the format. - Use try-with-resources. - Remove useless parens. - Update Javaodc. - Sort members in the new tests. - Use builder.
1 parent 4083b7a commit ac280e7

7 files changed

Lines changed: 243 additions & 6 deletions

File tree

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -446,8 +446,7 @@ private void addRecordValue(final boolean lastRecord) {
446446
if (lastRecord && inputClean.isEmpty() && this.format.getTrailingDelimiter()) {
447447
return;
448448
}
449-
final String nullString = this.format.getNullString();
450-
this.recordList.add(inputClean.equals(nullString) ? null : inputClean);
449+
this.recordList.add(handleNull(inputClean));
451450
}
452451

453452
/**
@@ -636,7 +635,26 @@ public List<CSVRecord> getRecords() throws IOException {
636635
}
637636

638637
/**
639-
* Gets whether this parser is closed.
638+
* Handle whether input is parsed as null
639+
*
640+
* @param input
641+
* the cell data to further processed
642+
* @return null if input is parsed as null, or input itself if input isn't parsed as null
643+
*/
644+
private String handleNull(final String input) {
645+
final boolean isQuoted = this.reusableToken.isQuoted;
646+
final String nullString = format.getNullString();
647+
final boolean strictQuoteMode = isStrictQuoteMode();
648+
if (input.equals(nullString)) {
649+
// nullString = NULL(String), distinguish between "NULL" and NULL in ALL_NON_NULL or NON_NUMERIC quote mode
650+
return strictQuoteMode && isQuoted ? input : null;
651+
}
652+
// don't set nullString, distinguish between "" and ,, (absent values) in All_NON_NULL or NON_NUMERIC quote mode
653+
return strictQuoteMode && nullString == null && input.isEmpty() && !isQuoted ? null : input;
654+
}
655+
656+
/**
657+
* Tests whether this parser is closed.
640658
*
641659
* @return whether this parser is closed.
642660
*/
@@ -645,7 +663,18 @@ public boolean isClosed() {
645663
}
646664

647665
/**
648-
* Returns an iterator on the records.
666+
* Tests whether the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or {@link QuoteMode#NON_NUMERIC}.
667+
*
668+
* @return true if the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or
669+
* {@link QuoteMode#NON_NUMERIC}.
670+
*/
671+
private boolean isStrictQuoteMode() {
672+
return this.format.getQuoteMode() == QuoteMode.ALL_NON_NULL ||
673+
this.format.getQuoteMode() == QuoteMode.NON_NUMERIC;
674+
}
675+
676+
/**
677+
* Returns the record iterator.
649678
*
650679
* <p>
651680
* An {@link IOException} caught during the iteration are re-thrown as an

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,7 @@ Token nextToken(final Token token) throws IOException {
324324
* on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL
325325
*/
326326
private Token parseEncapsulatedToken(final Token token) throws IOException {
327+
token.isQuoted = true;
327328
// save current line number in case needed for IOE
328329
final long startLineNumber = getCurrentLineNumber();
329330
int c;

src/main/java/org/apache/commons/csv/QuoteMode.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
package org.apache.commons.csv;
1818

1919
/**
20-
* Defines quoting behavior when printing.
20+
* Defines quoting behavior.
2121
*/
2222
public enum QuoteMode {
2323

src/main/java/org/apache/commons/csv/Token.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,13 @@ enum Type {
5555
/** Token ready flag: indicates a valid token with content (ready for the parser). */
5656
boolean isReady;
5757

58+
boolean isQuoted;
59+
5860
void reset() {
5961
content.setLength(0);
6062
type = INVALID;
6163
isReady = false;
64+
isQuoted = false;
6265
}
6366

6467
/**

src/test/java/org/apache/commons/csv/CSVPrinterTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -873,7 +873,7 @@ public void testMySqlNullOutput() throws IOException {
873873
String expected = "\"NULL\"\tNULL\n";
874874
assertEquals(expected, writer.toString());
875875
String[] record0 = toFirstRecordValues(expected, format);
876-
assertArrayEquals(new Object[2], record0);
876+
assertArrayEquals(s, record0);
877877

878878
s = new String[] { "\\N", null };
879879
format = CSVFormat.MYSQL.withNullString("\\N");
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.commons.csv.issues;
18+
19+
import static org.junit.jupiter.api.Assertions.assertEquals;
20+
21+
import java.io.IOException;
22+
import java.io.StringReader;
23+
import java.util.Iterator;
24+
25+
import org.apache.commons.csv.CSVFormat;
26+
import org.apache.commons.csv.CSVParser;
27+
import org.apache.commons.csv.CSVRecord;
28+
import org.apache.commons.csv.QuoteMode;
29+
import org.junit.jupiter.api.Test;
30+
31+
/**
32+
* Setting QuoteMode:ALL_NON_NULL or NON_NUMERIC can distinguish between empty string columns and absent value columns.
33+
*/
34+
public class JiraCsv253Test {
35+
36+
private void assertArrayEqual(final String[] expected, final CSVRecord actual) {
37+
for (int i = 0; i < expected.length; i++) {
38+
assertEquals(expected[i], actual.get(i));
39+
}
40+
}
41+
42+
@Test
43+
public void testHandleAbsentValues() throws IOException {
44+
final String source = "\"John\",,\"Doe\"\n" + ",\"AA\",123\n" + "\"John\",90,\n" + "\"\",,90";
45+
final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.NON_NUMERIC).build();
46+
try (final CSVParser parser = csvFormat.parse(new StringReader(source))) {
47+
final Iterator<CSVRecord> csvRecords = parser.iterator();
48+
assertArrayEqual(new String[] {"John", null, "Doe"}, csvRecords.next());
49+
assertArrayEqual(new String[] {null, "AA", "123"}, csvRecords.next());
50+
assertArrayEqual(new String[] {"John", "90", null}, csvRecords.next());
51+
assertArrayEqual(new String[] {"", null, "90"}, csvRecords.next());
52+
}
53+
}
54+
}
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.commons.csv.issues;
18+
19+
import static org.junit.jupiter.api.Assertions.assertEquals;
20+
21+
import java.io.IOException;
22+
import java.io.StringReader;
23+
24+
import org.apache.commons.csv.CSVFormat;
25+
import org.apache.commons.csv.CSVParser;
26+
import org.apache.commons.csv.CSVRecord;
27+
import org.apache.commons.csv.QuoteMode;
28+
import org.junit.jupiter.api.Test;
29+
30+
/**
31+
* Add more tests about null value.
32+
* <p>
33+
* QuoteMode:ALL_NON_NULL (Quotes all non-null fields, null will not be quoted but not null will be quoted). when
34+
* withNullString("NULL"), NULL String value ("NULL") and null value (null) will be formatted as '"NULL",NULL'. So it
35+
* also should be parsed as NULL String value and null value (["NULL", null]), It should be distinguish in parsing. And
36+
* when don't set nullString in CSVFormat, String '"",' should be parsed as "" and null (["", null]) according to null
37+
* will not be quoted but not null will be quoted. QuoteMode:NON_NUMERIC, same as ALL_NON_NULL.
38+
* </p>
39+
* <p>
40+
* This can solve the problem of distinguishing between empty string columns and absent value columns which just like
41+
* Jira CSV-253 to a certain extent.
42+
* </p>
43+
*/
44+
public class JiraCsv93Test {
45+
private static Object[] objects1 = {"abc", "", null, "a,b,c", 123};
46+
47+
private static Object[] objects2 = {"abc", "NULL", null, "a,b,c", 123};
48+
49+
private void every(final CSVFormat csvFormat, final Object[] objects, final String format, final String[] data)
50+
throws IOException {
51+
final String source = csvFormat.format(objects);
52+
assertEquals(format, csvFormat.format(objects));
53+
try (final CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
54+
final CSVRecord csvRecord = csvParser.iterator().next();
55+
for (int i = 0; i < data.length; i++) {
56+
assertEquals(csvRecord.get(i), data[i]);
57+
}
58+
}
59+
}
60+
61+
@Test
62+
public void testWithNotSetNullString() throws IOException {
63+
// @formatter:off
64+
every(CSVFormat.DEFAULT,
65+
objects1,
66+
"abc,,,\"a,b,c\",123",
67+
new String[]{"abc", "", "", "a,b,c", "123"});
68+
every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.ALL).build(),
69+
objects1,
70+
"\"abc\",\"\",,\"a,b,c\",\"123\"",
71+
new String[]{"abc", "", "", "a,b,c", "123"});
72+
every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.ALL_NON_NULL).build(),
73+
objects1,
74+
"\"abc\",\"\",,\"a,b,c\",\"123\"",
75+
new String[]{"abc", "", null, "a,b,c", "123"});
76+
every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.MINIMAL).build(),
77+
objects1,
78+
"abc,,,\"a,b,c\",123",
79+
new String[]{"abc", "", "", "a,b,c", "123"});
80+
every(CSVFormat.DEFAULT.builder().setEscape('?').setQuoteMode(QuoteMode.NONE).build(),
81+
objects1,
82+
"abc,,,a?,b?,c,123",
83+
new String[]{"abc", "", "", "a,b,c", "123"});
84+
every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.NON_NUMERIC).build(),
85+
objects1,
86+
"\"abc\",\"\",,\"a,b,c\",123",
87+
new String[]{"abc", "", null, "a,b,c", "123"});
88+
// @formatter:on
89+
}
90+
91+
@Test
92+
public void testWithSetNullStringEmptyString() throws IOException {
93+
// @formatter:off
94+
every(CSVFormat.DEFAULT.builder().setNullString("").build(),
95+
objects1,
96+
"abc,,,\"a,b,c\",123",
97+
new String[]{"abc", null, null, "a,b,c", "123"});
98+
every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.ALL).build(),
99+
objects1,
100+
"\"abc\",\"\",\"\",\"a,b,c\",\"123\"",
101+
new String[]{"abc", null, null, "a,b,c", "123"});
102+
every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.ALL_NON_NULL).build(),
103+
objects1,
104+
"\"abc\",\"\",,\"a,b,c\",\"123\"",
105+
new String[]{"abc", "", null, "a,b,c", "123"});
106+
every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.MINIMAL).build(),
107+
objects1,
108+
"abc,,,\"a,b,c\",123",
109+
new String[]{"abc", null, null, "a,b,c", "123"});
110+
every(CSVFormat.DEFAULT.builder().setNullString("").setEscape('?').setQuoteMode(QuoteMode.NONE).build(),
111+
objects1,
112+
"abc,,,a?,b?,c,123",
113+
new String[]{"abc", null, null, "a,b,c", "123"});
114+
every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.NON_NUMERIC).build(),
115+
objects1,
116+
"\"abc\",\"\",,\"a,b,c\",123",
117+
new String[]{"abc", "", null, "a,b,c", "123"});
118+
// @formatter:on
119+
}
120+
121+
@Test
122+
public void testWithSetNullStringNULL() throws IOException {
123+
// @formatter:off
124+
every(CSVFormat.DEFAULT.builder().setNullString("NULL").build(),
125+
objects2,
126+
"abc,NULL,NULL,\"a,b,c\",123",
127+
new String[]{"abc", null, null, "a,b,c", "123"});
128+
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.ALL).build(),
129+
objects2,
130+
"\"abc\",\"NULL\",\"NULL\",\"a,b,c\",\"123\"",
131+
new String[]{"abc", null, null, "a,b,c", "123"});
132+
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.ALL_NON_NULL).build(),
133+
objects2,
134+
"\"abc\",\"NULL\",NULL,\"a,b,c\",\"123\"",
135+
new String[]{"abc", "NULL", null, "a,b,c", "123"});
136+
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.MINIMAL).build(),
137+
objects2,
138+
"abc,NULL,NULL,\"a,b,c\",123",
139+
new String[]{"abc", null, null, "a,b,c", "123"});
140+
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setEscape('?').setQuoteMode(QuoteMode.NONE).build(),
141+
objects2,
142+
"abc,NULL,NULL,a?,b?,c,123",
143+
new String[]{"abc", null, null, "a,b,c", "123"});
144+
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.NON_NUMERIC).build(),
145+
objects2,
146+
"\"abc\",\"NULL\",NULL,\"a,b,c\",123",
147+
new String[]{"abc", "NULL", null, "a,b,c", "123"});
148+
// @formatter:on
149+
}
150+
}

0 commit comments

Comments
 (0)