Skip to content

Commit ef1529a

Browse files
committed
csv-264: added duplicateheadermode for flexibility with header strictness
1 parent ca5eb7c commit ef1529a

6 files changed

Lines changed: 222 additions & 37 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 58 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,6 @@
1717

1818
package org.apache.commons.csv;
1919

20-
import static org.apache.commons.csv.Constants.BACKSLASH;
21-
import static org.apache.commons.csv.Constants.COMMA;
22-
import static org.apache.commons.csv.Constants.COMMENT;
23-
import static org.apache.commons.csv.Constants.CR;
24-
import static org.apache.commons.csv.Constants.CRLF;
25-
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
26-
import static org.apache.commons.csv.Constants.EMPTY;
27-
import static org.apache.commons.csv.Constants.LF;
28-
import static org.apache.commons.csv.Constants.PIPE;
29-
import static org.apache.commons.csv.Constants.SP;
30-
import static org.apache.commons.csv.Constants.TAB;
31-
3220
import java.io.File;
3321
import java.io.FileOutputStream;
3422
import java.io.IOException;
@@ -48,6 +36,18 @@
4836
import java.util.Objects;
4937
import java.util.Set;
5038

39+
import static org.apache.commons.csv.Constants.BACKSLASH;
40+
import static org.apache.commons.csv.Constants.COMMA;
41+
import static org.apache.commons.csv.Constants.COMMENT;
42+
import static org.apache.commons.csv.Constants.CR;
43+
import static org.apache.commons.csv.Constants.CRLF;
44+
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
45+
import static org.apache.commons.csv.Constants.EMPTY;
46+
import static org.apache.commons.csv.Constants.LF;
47+
import static org.apache.commons.csv.Constants.PIPE;
48+
import static org.apache.commons.csv.Constants.SP;
49+
import static org.apache.commons.csv.Constants.TAB;
50+
5151
/**
5252
* Specifies the format of a CSV file and parses input.
5353
*
@@ -188,8 +188,6 @@ public static Builder create(final CSVFormat csvFormat) {
188188
return new Builder(csvFormat);
189189
}
190190

191-
private boolean allowDuplicateHeaderNames;
192-
193191
private boolean allowMissingColumnNames;
194192

195193
private boolean autoFlush;
@@ -198,6 +196,8 @@ public static Builder create(final CSVFormat csvFormat) {
198196

199197
private String delimiter;
200198

199+
private DuplicateHeaderMode duplicateHeaderMode;
200+
201201
private Character escapeCharacter;
202202

203203
private String[] headerComments;
@@ -245,7 +245,7 @@ private Builder(final CSVFormat csvFormat) {
245245
this.trim = csvFormat.trim;
246246
this.autoFlush = csvFormat.autoFlush;
247247
this.quotedNullString = csvFormat.quotedNullString;
248-
this.allowDuplicateHeaderNames = csvFormat.allowDuplicateHeaderNames;
248+
this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
249249
}
250250

251251
/**
@@ -262,12 +262,26 @@ public CSVFormat build() {
262262
*
263263
* @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
264264
* @return This instance.
265+
* @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}.
265266
*/
267+
@Deprecated
266268
public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
267-
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
269+
final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
270+
setDuplicateHeaderMode(mode);
268271
return this;
269272
}
270273

274+
/**
275+
* Sets the duplicate header names behavior.
276+
*
277+
* @param duplicateHeaderMode the duplicate header names behavior
278+
* @return This instance.
279+
*/
280+
public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) {
281+
this.duplicateHeaderMode = duplicateHeaderMode;
282+
return this;
283+
}
284+
271285
/**
272286
* Sets the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an
273287
* {@link IllegalArgumentException} to be thrown.
@@ -760,7 +774,8 @@ public CSVFormat getFormat() {
760774
}
761775

762776
/**
763-
* Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines.
777+
* Standard Comma Separated Value format, as for {@link #RFC4180} but allowing
778+
* empty lines.
764779
*
765780
* <p>
766781
* The {@link Builder} settings are:
@@ -770,13 +785,13 @@ public CSVFormat getFormat() {
770785
* <li>{@code setQuote('"')}</li>
771786
* <li>{@code setRecordSeparator("\r\n")}</li>
772787
* <li>{@code setIgnoreEmptyLines(true)}</li>
773-
* <li>{@code setAllowDuplicateHeaderNames(true)}</li>
788+
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
774789
* </ul>
775790
*
776791
* @see Predefined#Default
777792
*/
778793
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false,
779-
false, false, false, true);
794+
false, false, false, DuplicateHeaderMode.ALLOW_ALL);
780795

781796
/**
782797
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
@@ -799,7 +814,7 @@ public CSVFormat getFormat() {
799814
* <li>{@code setRecordSeparator("\r\n")}</li>
800815
* <li>{@code setIgnoreEmptyLines(false)}</li>
801816
* <li>{@code setAllowMissingColumnNames(true)}</li>
802-
* <li>{@code setAllowDuplicateHeaderNames(true)}</li>
817+
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
803818
* </ul>
804819
* <p>
805820
* Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and
@@ -1219,7 +1234,7 @@ private static boolean isLineBreak(final Character c) {
12191234
*/
12201235
public static CSVFormat newFormat(final char delimiter) {
12211236
return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false,
1222-
true);
1237+
DuplicateHeaderMode.ALLOW_ALL);
12231238
}
12241239

12251240
static String[] toStringArray(final Object[] values) {
@@ -1261,7 +1276,7 @@ public static CSVFormat valueOf(final String format) {
12611276
return CSVFormat.Predefined.valueOf(format).getFormat();
12621277
}
12631278

1264-
private final boolean allowDuplicateHeaderNames;
1279+
private final DuplicateHeaderMode duplicateHeaderMode;
12651280

12661281
private final boolean allowMissingColumnNames;
12671282

@@ -1318,7 +1333,7 @@ private CSVFormat(final Builder builder) {
13181333
this.trim = builder.trim;
13191334
this.autoFlush = builder.autoFlush;
13201335
this.quotedNullString = builder.quotedNullString;
1321-
this.allowDuplicateHeaderNames = builder.allowDuplicateHeaderNames;
1336+
this.duplicateHeaderMode = builder.duplicateHeaderMode;
13221337
validate();
13231338
}
13241339

@@ -1342,13 +1357,14 @@ private CSVFormat(final Builder builder) {
13421357
* @param trim TODO Doc me.
13431358
* @param trailingDelimiter TODO Doc me.
13441359
* @param autoFlush TODO Doc me.
1360+
* @param duplicateHeaderMode the behavior when handling duplicate headers
13451361
* @throws IllegalArgumentException if the delimiter is a line break character.
13461362
*/
13471363
private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape,
13481364
final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
13491365
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames,
13501366
final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush,
1351-
final boolean allowDuplicateHeaderNames) {
1367+
final DuplicateHeaderMode duplicateHeaderMode) {
13521368
this.delimiter = delimiter;
13531369
this.quoteCharacter = quoteChar;
13541370
this.quoteMode = quoteMode;
@@ -1367,7 +1383,7 @@ private CSVFormat(final String delimiter, final Character quoteChar, final Quote
13671383
this.trim = trim;
13681384
this.autoFlush = autoFlush;
13691385
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
1370-
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
1386+
this.duplicateHeaderMode = duplicateHeaderMode;
13711387
validate();
13721388
}
13731389

@@ -1405,7 +1421,7 @@ public boolean equals(final Object obj) {
14051421
return false;
14061422
}
14071423
final CSVFormat other = (CSVFormat) obj;
1408-
return allowDuplicateHeaderNames == other.allowDuplicateHeaderNames && allowMissingColumnNames == other.allowMissingColumnNames &&
1424+
return duplicateHeaderMode == other.duplicateHeaderMode && allowMissingColumnNames == other.allowMissingColumnNames &&
14091425
autoFlush == other.autoFlush && Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) &&
14101426
Objects.equals(escapeCharacter, other.escapeCharacter) && Arrays.equals(header, other.header) &&
14111427
Arrays.equals(headerComments, other.headerComments) && ignoreEmptyLines == other.ignoreEmptyLines &&
@@ -1441,7 +1457,17 @@ public String format(final Object... values) {
14411457
* @since 1.7
14421458
*/
14431459
public boolean getAllowDuplicateHeaderNames() {
1444-
return allowDuplicateHeaderNames;
1460+
return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL;
1461+
}
1462+
1463+
/**
1464+
* Returns how duplicate headers are handled.
1465+
*
1466+
* @return if duplicate header values are allowed, allowed conditionally, or disallowed.
1467+
* @since 1.9
1468+
*/
1469+
public DuplicateHeaderMode getDuplicateHeaderMode() {
1470+
return duplicateHeaderMode;
14451471
}
14461472

14471473
/**
@@ -1622,7 +1648,7 @@ public int hashCode() {
16221648
int result = 1;
16231649
result = prime * result + Arrays.hashCode(header);
16241650
result = prime * result + Arrays.hashCode(headerComments);
1625-
return prime * result + Objects.hash(allowDuplicateHeaderNames, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
1651+
return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
16261652
ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
16271653
skipHeaderRecord, trailingDelimiter, trim);
16281654
}
@@ -2222,7 +2248,7 @@ private void validate() throws IllegalArgumentException {
22222248
}
22232249

22242250
// validate header
2225-
if (header != null && !allowDuplicateHeaderNames) {
2251+
if (header != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) {
22262252
final Set<String> dupCheck = new HashSet<>();
22272253
for (final String hdr : header) {
22282254
if (!dupCheck.add(hdr)) {
@@ -2241,7 +2267,7 @@ private void validate() throws IllegalArgumentException {
22412267
*/
22422268
@Deprecated
22432269
public CSVFormat withAllowDuplicateHeaderNames() {
2244-
return builder().setAllowDuplicateHeaderNames(true).build();
2270+
return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build();
22452271
}
22462272

22472273
/**
@@ -2254,7 +2280,8 @@ public CSVFormat withAllowDuplicateHeaderNames() {
22542280
*/
22552281
@Deprecated
22562282
public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
2257-
return builder().setAllowDuplicateHeaderNames(allowDuplicateHeaderNames).build();
2283+
final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
2284+
return builder().setDuplicateHeaderMode(mode).build();
22582285
}
22592286

22602287
/**

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
package org.apache.commons.csv;
1919

20-
import static org.apache.commons.csv.Token.Type.TOKEN;
21-
2220
import java.io.Closeable;
2321
import java.io.File;
2422
import java.io.IOException;
@@ -45,6 +43,8 @@
4543
import java.util.stream.Stream;
4644
import java.util.stream.StreamSupport;
4745

46+
import static org.apache.commons.csv.Token.Type.TOKEN;
47+
4848
/**
4949
* Parses CSV files according to the specified format.
5050
*
@@ -503,12 +503,16 @@ private Headers createHeaders() throws IOException {
503503
throw new IllegalArgumentException(
504504
"A header name is missing in " + Arrays.toString(headerRecord));
505505
}
506-
// Note: This will always allow a duplicate header if the header is empty
506+
507507
final boolean containsHeader = header != null && hdrMap.containsKey(header);
508-
if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
508+
final DuplicateHeaderMode headerMode = this.format.getDuplicateHeaderMode();
509+
final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
510+
final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
511+
512+
if (containsHeader && !duplicatesAllowed && !(emptyHeader && emptyDuplicatesAllowed)) {
509513
throw new IllegalArgumentException(
510514
String.format(
511-
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
515+
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
512516
header, Arrays.toString(headerRecord)));
513517
}
514518
if (header != null) {
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.csv;
19+
20+
/**
21+
* Determines how duplicate header fields should be handled
22+
* if {@link CSVFormat#withHeader(String...)} is not null.
23+
*
24+
* @since 1.9
25+
*/
26+
public enum DuplicateHeaderMode {
27+
28+
/**
29+
* Allows all duplicate headings.
30+
*/
31+
ALLOW_ALL,
32+
33+
/**
34+
* Allows duplicate headings only if they're empty
35+
* strings or null.
36+
*/
37+
ALLOW_EMPTY,
38+
39+
/**
40+
* Disallows duplicate headings entirely.
41+
*/
42+
DISALLOW
43+
}

src/site/resources/checkstyle/checkstyle-suppressions.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,5 @@
1919
"-//Checkstyle//DTD SuppressionFilter Configuration 1.2//EN"
2020
"https://checkstyle.org/dtds/suppressions_1_2.dtd">
2121
<suppressions>
22-
<suppress checks="LineLength" files="[\\/]CSVParser\.java$" lines="511"/>
22+
<suppress checks="LineLength" files="[\\/]CSVParser\.java$" lines="515"/>
2323
</suppressions>

src/test/java/org/apache/commons/csv/CSVFormatTest.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,10 @@ public void testEqualsHash() throws Exception {
260260
final Object a = method.invoke(CSVFormat.DEFAULT, QuoteMode.MINIMAL);
261261
final Object b = method.invoke(CSVFormat.DEFAULT, QuoteMode.ALL);
262262
assertNotEquals(name, type, a, b);
263+
} else if ("org.apache.commons.csv.DuplicateHeaderMode".equals(type)) {
264+
final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.ALLOW_ALL});
265+
final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.DISALLOW});
266+
assertNotEquals(name, type, a, b);
263267
} else if ("java.lang.Object[]".equals(type)){
264268
final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {null, null}});
265269
final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {new Object(), new Object()}});
@@ -1295,6 +1299,15 @@ public void testWithEscape() {
12951299
}
12961300

12971301

1302+
@Test
1303+
public void testWithEmptyDuplicates() {
1304+
final CSVFormat formatWithEmptyDuplicates =
1305+
CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build();
1306+
1307+
assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, formatWithEmptyDuplicates.getDuplicateHeaderMode());
1308+
assertFalse(formatWithEmptyDuplicates.getAllowDuplicateHeaderNames());
1309+
}
1310+
12981311
@Test
12991312
public void testWithEscapeCRThrowsExceptions() {
13001313
assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(CR));

0 commit comments

Comments
 (0)