Skip to content

Commit bd48a76

Browse files
committed
CSVFormat: Sanitise empty headers to the empty string ""
Add more tests for duplicate headers including null header names.
1 parent 481d8b1 commit bd48a76

2 files changed

Lines changed: 145 additions & 43 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2304,14 +2304,16 @@ private void validate() throws IllegalArgumentException {
23042304
// Validate headers
23052305
if (headers != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) {
23062306
final Set<String> dupCheckSet = new HashSet<>(headers.length);
2307-
final boolean rejectEmpty = duplicateHeaderMode != DuplicateHeaderMode.ALLOW_EMPTY;
2308-
for (final String header : headers) {
2307+
final boolean emptyDuplicatesAllowed = duplicateHeaderMode == DuplicateHeaderMode.ALLOW_EMPTY;
2308+
for (String header : headers) {
23092309
final boolean blank = isBlank(header);
2310-
if (rejectEmpty && blank) {
2311-
throw new IllegalArgumentException("Header is empty");
2312-
}
2313-
if (!blank && !dupCheckSet.add(header)) {
2314-
throw new IllegalArgumentException(String.format("Header '%s' is a duplicate in %s", header, Arrays.toString(headers)));
2310+
// Sanitise all empty headers to the empty string "" when checking duplicates
2311+
final boolean containsHeader = !dupCheckSet.add(blank ? "" : header);
2312+
if (containsHeader && !(blank && emptyDuplicatesAllowed)) {
2313+
throw new IllegalArgumentException(
2314+
String.format(
2315+
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
2316+
header, Arrays.toString(headers)));
23152317
}
23162318
}
23172319
}

src/test/java/org/apache/commons/csv/CSVDuplicateHeaderTest.java

Lines changed: 136 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import java.io.IOException;
2121
import java.util.Arrays;
22+
import java.util.List;
2223
import java.util.stream.Collectors;
2324
import java.util.stream.Stream;
2425
import org.junit.jupiter.api.Assertions;
@@ -33,22 +34,20 @@
3334
public class CSVDuplicateHeaderTest {
3435

3536
/**
36-
* Return test cases for duplicate header data. Uses the order:
37+
* Return test cases for duplicate header data for use in parsing (CSVParser). Uses the order:
3738
* <pre>
3839
* DuplicateHeaderMode duplicateHeaderMode
3940
* boolean allowMissingColumnNames
4041
* String[] headers
4142
* boolean valid
4243
* </pre>
43-
* <p>
44-
* TODO: Reinstate cases failed by CSVFormat.
45-
* </p>
4644
*
4745
* @return the stream of arguments
4846
*/
4947
static Stream<Arguments> duplicateHeaderData() {
5048
return Stream.of(
51-
// Commented out data here are for cases that are only supported for parsing.
49+
// TODO: Fix CSVParser which does not sanitise 'empty' names or
50+
// equate null names with empty as duplications
5251

5352
// Any combination with a valid header
5453
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "B"}, true),
@@ -58,6 +57,30 @@ static Stream<Arguments> duplicateHeaderData() {
5857
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "B"}, true),
5958
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "B"}, true),
6059

60+
// Any combination with a valid header including empty
61+
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", ""}, false),
62+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", ""}, false),
63+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", ""}, false),
64+
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", ""}, true),
65+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", ""}, true),
66+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", ""}, true),
67+
68+
// Any combination with a valid header including blank (1 space)
69+
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", " "}, false),
70+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", " "}, false),
71+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", " "}, false),
72+
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", " "}, true),
73+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", " "}, true),
74+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", " "}, true),
75+
76+
// Any combination with a valid header including null
77+
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", null}, false),
78+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", null}, false),
79+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", null}, false),
80+
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", null}, true),
81+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", null}, true),
82+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", null}, true),
83+
6184
// Duplicate non-empty names
6285
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "A"}, false),
6386
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "A"}, false),
@@ -68,57 +91,124 @@ static Stream<Arguments> duplicateHeaderData() {
6891

6992
// Duplicate empty names
7093
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"", ""}, false),
94+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"", ""}, false),
95+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"", ""}, false),
7196
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"", ""}, false),
7297
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"", ""}, true),
7398
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"", ""}, true),
7499

75100
// Duplicate blank names (1 space)
76101
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {" ", " "}, false),
102+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {" ", " "}, false),
103+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {" ", " "}, false),
77104
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {" ", " "}, false),
78105
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {" ", " "}, true),
79106
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {" ", " "}, true),
80107

81108
// Duplicate blank names (3 spaces)
82109
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {" ", " "}, false),
110+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {" ", " "}, false),
111+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {" ", " "}, false),
83112
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {" ", " "}, false),
84113
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {" ", " "}, true),
85114
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {" ", " "}, true),
86115

116+
// Duplicate null names
117+
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {null, null}, false),
118+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {null, null}, false),
119+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {null, null}, false),
120+
// Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {null, null}, false),
121+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {null, null}, true),
122+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {null, null}, true),
123+
124+
// Duplicate blank names (1+3 spaces)
125+
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {" ", " "}, false),
126+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {" ", " "}, false),
127+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {" ", " "}, false),
128+
// Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {" ", " "}, false),
129+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {" ", " "}, true),
130+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {" ", " "}, true),
131+
132+
// Duplicate blank names and null names
133+
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {" ", null}, false),
134+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {" ", null}, false),
135+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {" ", null}, false),
136+
// Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {" ", null}, false),
137+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {" ", null}, true),
138+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {" ", null}, true),
139+
87140
// Duplicate non-empty and empty names
88141
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "A", "", ""}, false),
89142
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "A", "", ""}, false),
143+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", "A", "", ""}, false),
90144
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", "A", "", ""}, false),
91145
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "A", "", ""}, false),
92146
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "A", "", ""}, true),
93147

94148
// Duplicate non-empty and blank names
95149
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "A", " ", " "}, false),
96150
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "A", " ", " "}, false),
151+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", "A", " ", " "}, false),
97152
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", "A", " ", " "}, false),
98153
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "A", " ", " "}, false),
99-
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "A", " ", " "}, true)
100-
);
101-
}
154+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "A", " ", " "}, true),
102155

103-
static Stream<Arguments> duplicateHeaderParseOnlyData() {
104-
return Stream.of(
105-
// Duplicate empty names
106-
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] { "", "" }, false),
107-
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] { "", "" }, false),
156+
// Duplicate non-empty and null names
157+
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "A", null, null}, false),
158+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "A", null, null}, false),
159+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", "A", null, null}, false),
160+
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", "A", null, null}, false),
161+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "A", null, null}, false),
162+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "A", null, null}, true),
108163

109-
// Duplicate blank names (1 space)
110-
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] { " ", " " }, false),
111-
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] { " ", " " }, true),
164+
// Duplicate blank names
165+
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "", ""}, false),
166+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "", ""}, false),
167+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", "", ""}, false),
168+
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", "", ""}, false),
169+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "", ""}, true),
170+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "", ""}, true),
112171

113-
// Duplicate blank names (3 spaces)
114-
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] { " ", " " }, false),
115-
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] { " ", " " }, true),
172+
// Duplicate null names
173+
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", null, null}, false),
174+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", null, null}, false),
175+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", null, null}, false),
176+
// Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", null, null}, false),
177+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", null, null}, true),
178+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", null, null}, true),
116179

117-
// Duplicate non-empty and empty names
118-
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] { "A", "A", "", "" }, false),
180+
// Duplicate blank names (1+3 spaces)
181+
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", " ", " "}, false),
182+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", " ", " "}, false),
183+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", " ", " "}, false),
184+
// Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", " ", " "}, false),
185+
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", " ", " "}, true),
186+
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", " ", " "}, true)
187+
);
188+
}
119189

120-
// Duplicate non-empty and blank names
121-
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] { "A", "A", " ", " " }, false));
190+
/**
191+
* Return test cases for duplicate header data for use in CSVFormat.
192+
* <p>
193+
* This filters the parsing test data to all cases where the allow missing column
194+
* names flag is true. The allow missing column names is exclusively for parsing.
195+
* CSVFormat validation applies to both parsing and writing and thus validation
196+
* is less strict and behaves as if the missing column names constraint is absent.
197+
* The filtered data is then returned with the missing column names flag set to both
198+
* true and false for each test case.
199+
* </p>
200+
*
201+
* @return the stream of arguments
202+
*/
203+
static Stream<Arguments> duplicateHeaderAllowsMissingColumnsNamesData() {
204+
return duplicateHeaderData()
205+
.filter(arg -> Boolean.TRUE.equals(arg.get()[1]))
206+
.flatMap(arg -> {
207+
// Return test case with flag as both true and false
208+
final Object[] data = arg.get().clone();
209+
data[1] = Boolean.FALSE;
210+
return Stream.of(arg, Arguments.of(data));
211+
});
122212
}
123213

124214
/**
@@ -130,15 +220,16 @@ static Stream<Arguments> duplicateHeaderParseOnlyData() {
130220
* @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException
131221
*/
132222
@ParameterizedTest
133-
@MethodSource(value = {"duplicateHeaderData"})
223+
@MethodSource(value = {"duplicateHeaderAllowsMissingColumnsNamesData"})
134224
public void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode,
135225
final boolean allowMissingColumnNames,
136226
final String[] headers,
137227
final boolean valid) {
138-
final CSVFormat.Builder builder = CSVFormat.DEFAULT.builder()
139-
.setDuplicateHeaderMode(duplicateHeaderMode)
140-
.setAllowMissingColumnNames(allowMissingColumnNames)
141-
.setHeader(headers);
228+
final CSVFormat.Builder builder =
229+
CSVFormat.DEFAULT.builder()
230+
.setDuplicateHeaderMode(duplicateHeaderMode)
231+
.setAllowMissingColumnNames(allowMissingColumnNames)
232+
.setHeader(headers);
142233
if (valid) {
143234
final CSVFormat format = builder.build();
144235
Assertions.assertEquals(duplicateHeaderMode, format.getDuplicateHeaderMode(), "DuplicateHeaderMode");
@@ -159,20 +250,29 @@ public void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode,
159250
* @throws IOException Signals that an I/O exception has occurred.
160251
*/
161252
@ParameterizedTest
162-
@MethodSource(value = {"duplicateHeaderData", "duplicateHeaderParseOnlyData"})
253+
@MethodSource(value = {"duplicateHeaderData"})
163254
public void testCSVParser(final DuplicateHeaderMode duplicateHeaderMode,
164255
final boolean allowMissingColumnNames,
165256
final String[] headers,
166257
final boolean valid) throws IOException {
167-
final CSVFormat format = CSVFormat.DEFAULT.builder()
168-
.setDuplicateHeaderMode(duplicateHeaderMode)
169-
.setAllowMissingColumnNames(allowMissingColumnNames)
170-
.setHeader()
171-
.build();
172-
final String input = Arrays.stream(headers).collect(Collectors.joining(format.getDelimiterString()));
258+
final CSVFormat format =
259+
CSVFormat.DEFAULT.builder()
260+
.setDuplicateHeaderMode(duplicateHeaderMode)
261+
.setAllowMissingColumnNames(allowMissingColumnNames)
262+
.setNullString("NULL")
263+
.setHeader()
264+
.build();
265+
final String input = Arrays.stream(headers)
266+
.map(s -> s == null ? "NULL" : s)
267+
.collect(Collectors.joining(format.getDelimiterString()));
173268
if (valid) {
174269
try(CSVParser parser = CSVParser.parse(input, format)) {
175-
Assertions.assertEquals(Arrays.asList(headers), parser.getHeaderNames());
270+
// Parser ignores null headers
271+
final List<String> expected =
272+
Arrays.stream(headers)
273+
.filter(s -> s != null)
274+
.collect(Collectors.toList());
275+
Assertions.assertEquals(expected, parser.getHeaderNames(), "HeaderNames");
176276
}
177277
} else {
178278
Assertions.assertThrows(IllegalArgumentException.class, () -> CSVParser.parse(input, format));

0 commit comments

Comments
 (0)