Skip to content

Commit 4b62b30

Browse files
committed
CSV-264: Added DuplicateHeaderMode for flexibility with header strictness.
1 parent d467e41 commit 4b62b30

6 files changed

Lines changed: 209 additions & 55 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 48 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,6 @@
1717

1818
package org.apache.commons.csv;
1919

20-
import static org.apache.commons.csv.Constants.BACKSLASH;
21-
import static org.apache.commons.csv.Constants.COMMA;
22-
import static org.apache.commons.csv.Constants.COMMENT;
23-
import static org.apache.commons.csv.Constants.CR;
24-
import static org.apache.commons.csv.Constants.CRLF;
25-
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
26-
import static org.apache.commons.csv.Constants.EMPTY;
27-
import static org.apache.commons.csv.Constants.LF;
28-
import static org.apache.commons.csv.Constants.PIPE;
29-
import static org.apache.commons.csv.Constants.SP;
30-
import static org.apache.commons.csv.Constants.TAB;
31-
3220
import java.io.File;
3321
import java.io.FileOutputStream;
3422
import java.io.IOException;
@@ -47,6 +35,18 @@
4735
import java.util.HashSet;
4836
import java.util.Set;
4937

38+
import static org.apache.commons.csv.Constants.BACKSLASH;
39+
import static org.apache.commons.csv.Constants.COMMA;
40+
import static org.apache.commons.csv.Constants.COMMENT;
41+
import static org.apache.commons.csv.Constants.CR;
42+
import static org.apache.commons.csv.Constants.CRLF;
43+
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
44+
import static org.apache.commons.csv.Constants.EMPTY;
45+
import static org.apache.commons.csv.Constants.LF;
46+
import static org.apache.commons.csv.Constants.PIPE;
47+
import static org.apache.commons.csv.Constants.SP;
48+
import static org.apache.commons.csv.Constants.TAB;
49+
5050
/**
5151
* Specifies the format of a CSV file and parses input.
5252
*
@@ -260,13 +260,13 @@ public CSVFormat getFormat() {
260260
* <li>{@code withQuote('"')}</li>
261261
* <li>{@code withRecordSeparator("\r\n")}</li>
262262
* <li>{@code withIgnoreEmptyLines(true)}</li>
263-
* <li>{@code withAllowDuplicateHeaderNames(true)}</li>
263+
* <li>{@code withDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
264264
* </ul>
265265
*
266266
* @see Predefined#Default
267267
*/
268268
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
269-
null, null, null, false, false, false, false, false, false, true);
269+
null, null, null, false, false, false, false, false, false, DuplicateHeaderMode.ALLOW_ALL);
270270

271271
/**
272272
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
@@ -289,7 +289,7 @@ public CSVFormat getFormat() {
289289
* <li>{@code withRecordSeparator("\r\n")}</li>
290290
* <li>{@code withIgnoreEmptyLines(false)}</li>
291291
* <li>{@code withAllowMissingColumnNames(true)}</li>
292-
* <li>{@code withAllowDuplicateHeaderNames(true)}</li>
292+
* <li>{@code withDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
293293
* </ul>
294294
* <p>
295295
* Note: This is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean)
@@ -673,7 +673,7 @@ private static boolean isLineBreak(final Character c) {
673673
*/
674674
public static CSVFormat newFormat(final char delimiter) {
675675
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
676-
false, false, false, false, true);
676+
false, false, false, false, DuplicateHeaderMode.ALLOW_ALL);
677677
}
678678

679679
/**
@@ -688,7 +688,7 @@ public static CSVFormat valueOf(final String format) {
688688
return CSVFormat.Predefined.valueOf(format).getFormat();
689689
}
690690

691-
private final boolean allowDuplicateHeaderNames;
691+
private final DuplicateHeaderMode duplicateHeaderMode;
692692

693693
private final boolean allowMissingColumnNames;
694694

@@ -770,7 +770,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
770770
final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
771771
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
772772
final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
773-
final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames) {
773+
final boolean trailingDelimiter, final boolean autoFlush, final DuplicateHeaderMode duplicateHeaderMode) {
774774
this.delimiter = delimiter;
775775
this.quoteCharacter = quoteChar;
776776
this.quoteMode = quoteMode;
@@ -789,7 +789,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
789789
this.trim = trim;
790790
this.autoFlush = autoFlush;
791791
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
792-
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
792+
this.duplicateHeaderMode = duplicateHeaderMode;
793793
validate();
794794
}
795795

@@ -821,7 +821,7 @@ public boolean equals(final Object obj) {
821821
if (allowMissingColumnNames != other.allowMissingColumnNames) {
822822
return false;
823823
}
824-
if (allowDuplicateHeaderNames != other.allowDuplicateHeaderNames) {
824+
if (duplicateHeaderMode != other.duplicateHeaderMode) {
825825
return false;
826826
}
827827
if (ignoreHeaderCase != other.ignoreHeaderCase) {
@@ -904,13 +904,13 @@ public String format(final Object... values) {
904904
}
905905

906906
/**
907-
* Returns true if and only if duplicate names are allowed in the headers.
907+
* Returns how duplicate headers are handled.
908908
*
909-
* @return whether duplicate header names are allowed
909+
* @return if duplicate header values are allowed, allowed conditionally, or disallowed.
910910
* @since 1.7
911911
*/
912-
public boolean getAllowDuplicateHeaderNames() {
913-
return allowDuplicateHeaderNames;
912+
public DuplicateHeaderMode getDuplicateHeaderMode() {
913+
return duplicateHeaderMode;
914914
}
915915

916916
/**
@@ -1093,7 +1093,7 @@ public int hashCode() {
10931093
result = prime * result + (ignoreHeaderCase ? 1231 : 1237);
10941094
result = prime * result + (ignoreEmptyLines ? 1231 : 1237);
10951095
result = prime * result + (skipHeaderRecord ? 1231 : 1237);
1096-
result = prime * result + (allowDuplicateHeaderNames ? 1231 : 1237);
1096+
result = prime * result + ((duplicateHeaderMode == null) ? 0 : duplicateHeaderMode.hashCode());
10971097
result = prime * result + (trim ? 1231 : 1237);
10981098
result = prime * result + (autoFlush ? 1231 : 1237);
10991099
result = prime * result + (trailingDelimiter ? 1231 : 1237);
@@ -1703,7 +1703,7 @@ private void validate() throws IllegalArgumentException {
17031703
}
17041704

17051705
// validate header
1706-
if (header != null && !allowDuplicateHeaderNames) {
1706+
if (header != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) {
17071707
final Set<String> dupCheck = new HashSet<>();
17081708
for (final String hdr : header) {
17091709
if (!dupCheck.add(hdr)) {
@@ -1721,21 +1721,22 @@ private void validate() throws IllegalArgumentException {
17211721
* @since 1.7
17221722
*/
17231723
public CSVFormat withAllowDuplicateHeaderNames() {
1724-
return withAllowDuplicateHeaderNames(true);
1724+
return withDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL);
17251725
}
17261726

17271727
/**
17281728
* Returns a new {@code CSVFormat} with duplicate header names behavior set to the given value.
17291729
*
1730-
* @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
1730+
* @param duplicateHeaderMode the duplicate header names behavior, to allow, allow conditionally,
1731+
* or disable duplicates.
17311732
* @return a new {@code CSVFormat} with duplicate header names behavior set to the given value.
17321733
* @since 1.7
17331734
*/
1734-
public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
1735+
public CSVFormat withDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) {
17351736
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17361737
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17371738
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1738-
allowDuplicateHeaderNames);
1739+
duplicateHeaderMode);
17391740
}
17401741

17411742
/**
@@ -1761,7 +1762,7 @@ public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNam
17611762
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17621763
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17631764
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1764-
allowDuplicateHeaderNames);
1765+
duplicateHeaderMode);
17651766
}
17661767

17671768
/**
@@ -1777,7 +1778,7 @@ public CSVFormat withAutoFlush(final boolean autoFlush) {
17771778
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17781779
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17791780
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1780-
allowDuplicateHeaderNames);
1781+
duplicateHeaderMode);
17811782
}
17821783

17831784
/**
@@ -1813,7 +1814,7 @@ public CSVFormat withCommentMarker(final Character commentMarker) {
18131814
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
18141815
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
18151816
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1816-
allowDuplicateHeaderNames);
1817+
duplicateHeaderMode);
18171818
}
18181819

18191820
/**
@@ -1832,7 +1833,7 @@ public CSVFormat withDelimiter(final char delimiter) {
18321833
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
18331834
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
18341835
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1835-
allowDuplicateHeaderNames);
1836+
duplicateHeaderMode);
18361837
}
18371838

18381839
/**
@@ -1864,7 +1865,7 @@ public CSVFormat withEscape(final Character escape) {
18641865
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
18651866
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
18661867
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1867-
allowDuplicateHeaderNames);
1868+
duplicateHeaderMode);
18681869
}
18691870

18701871
/**
@@ -2021,7 +2022,7 @@ public CSVFormat withHeader(final String... header) {
20212022
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20222023
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20232024
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2024-
allowDuplicateHeaderNames);
2025+
duplicateHeaderMode);
20252026
}
20262027

20272028
/**
@@ -2043,7 +2044,7 @@ public CSVFormat withHeaderComments(final Object... headerComments) {
20432044
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20442045
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20452046
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2046-
allowDuplicateHeaderNames);
2047+
duplicateHeaderMode);
20472048
}
20482049

20492050
/**
@@ -2069,7 +2070,7 @@ public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
20692070
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20702071
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20712072
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2072-
allowDuplicateHeaderNames);
2073+
duplicateHeaderMode);
20732074
}
20742075

20752076
/**
@@ -2096,7 +2097,7 @@ public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
20962097
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20972098
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20982099
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2099-
allowDuplicateHeaderNames);
2100+
duplicateHeaderMode);
21002101
}
21012102

21022103
/**
@@ -2121,7 +2122,7 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
21212122
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
21222123
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
21232124
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2124-
allowDuplicateHeaderNames);
2125+
duplicateHeaderMode);
21252126
}
21262127

21272128
/**
@@ -2141,7 +2142,7 @@ public CSVFormat withNullString(final String nullString) {
21412142
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
21422143
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
21432144
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2144-
allowDuplicateHeaderNames);
2145+
duplicateHeaderMode);
21452146
}
21462147

21472148
/**
@@ -2173,7 +2174,7 @@ public CSVFormat withQuote(final Character quoteChar) {
21732174
return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
21742175
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
21752176
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2176-
allowDuplicateHeaderNames);
2177+
duplicateHeaderMode);
21772178
}
21782179

21792180
/**
@@ -2188,7 +2189,7 @@ public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
21882189
return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
21892190
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
21902191
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2191-
allowDuplicateHeaderNames);
2192+
duplicateHeaderMode);
21922193
}
21932194

21942195
/**
@@ -2227,7 +2228,7 @@ public CSVFormat withRecordSeparator(final String recordSeparator) {
22272228
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22282229
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
22292230
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2230-
allowDuplicateHeaderNames);
2231+
duplicateHeaderMode);
22312232
}
22322233

22332234
/**
@@ -2255,7 +2256,7 @@ public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
22552256
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22562257
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
22572258
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2258-
allowDuplicateHeaderNames);
2259+
duplicateHeaderMode);
22592260
}
22602261

22612262
/**
@@ -2297,7 +2298,7 @@ public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
22972298
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22982299
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
22992300
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2300-
allowDuplicateHeaderNames);
2301+
duplicateHeaderMode);
23012302
}
23022303

23032304
/**
@@ -2325,6 +2326,6 @@ public CSVFormat withTrim(final boolean trim) {
23252326
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
23262327
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
23272328
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2328-
allowDuplicateHeaderNames);
2329+
duplicateHeaderMode);
23292330
}
23302331
}

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
package org.apache.commons.csv;
1919

20-
import static org.apache.commons.csv.Token.Type.TOKEN;
21-
2220
import java.io.Closeable;
2321
import java.io.File;
2422
import java.io.FileInputStream;
@@ -42,6 +40,8 @@
4240
import java.util.Objects;
4341
import java.util.TreeMap;
4442

43+
import static org.apache.commons.csv.Token.Type.TOKEN;
44+
4545
/**
4646
* Parses CSV files according to the specified format.
4747
*
@@ -503,12 +503,14 @@ private Headers createHeaders() throws IOException {
503503
throw new IllegalArgumentException(
504504
"A header name is missing in " + Arrays.toString(headerRecord));
505505
}
506-
// Note: This will always allow a duplicate header if the header is empty
506+
507507
final boolean containsHeader = header != null && hdrMap.containsKey(header);
508-
if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
508+
final DuplicateHeaderMode headerRule = this.format.getDuplicateHeaderMode();
509+
510+
if (containsHeader && headerRule != DuplicateHeaderMode.ALLOW_ALL && !(emptyHeader && headerRule == DuplicateHeaderMode.ALLOW_EMPTY)) {
509511
throw new IllegalArgumentException(
510512
String.format(
511-
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
513+
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withDuplicateHeaderMode().",
512514
header, Arrays.toString(headerRecord)));
513515
}
514516
if (header != null) {

0 commit comments

Comments
 (0)