Skip to content

Commit 10a519d

Browse files
committed
CSV-264: Added DuplicateHeaderMode for flexibility with header strictness.
1 parent 2ac8398 commit 10a519d

6 files changed

Lines changed: 209 additions & 55 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 48 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,6 @@
1717

1818
package org.apache.commons.csv;
1919

20-
import static org.apache.commons.csv.Constants.BACKSLASH;
21-
import static org.apache.commons.csv.Constants.COMMA;
22-
import static org.apache.commons.csv.Constants.COMMENT;
23-
import static org.apache.commons.csv.Constants.CR;
24-
import static org.apache.commons.csv.Constants.CRLF;
25-
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
26-
import static org.apache.commons.csv.Constants.EMPTY;
27-
import static org.apache.commons.csv.Constants.LF;
28-
import static org.apache.commons.csv.Constants.PIPE;
29-
import static org.apache.commons.csv.Constants.SP;
30-
import static org.apache.commons.csv.Constants.TAB;
31-
3220
import java.io.File;
3321
import java.io.FileOutputStream;
3422
import java.io.IOException;
@@ -48,6 +36,18 @@
4836
import java.util.Objects;
4937
import java.util.Set;
5038

39+
import static org.apache.commons.csv.Constants.BACKSLASH;
40+
import static org.apache.commons.csv.Constants.COMMA;
41+
import static org.apache.commons.csv.Constants.COMMENT;
42+
import static org.apache.commons.csv.Constants.CR;
43+
import static org.apache.commons.csv.Constants.CRLF;
44+
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
45+
import static org.apache.commons.csv.Constants.EMPTY;
46+
import static org.apache.commons.csv.Constants.LF;
47+
import static org.apache.commons.csv.Constants.PIPE;
48+
import static org.apache.commons.csv.Constants.SP;
49+
import static org.apache.commons.csv.Constants.TAB;
50+
5151
/**
5252
* Specifies the format of a CSV file and parses input.
5353
*
@@ -261,13 +261,13 @@ public CSVFormat getFormat() {
261261
* <li>{@code withQuote('"')}</li>
262262
* <li>{@code withRecordSeparator("\r\n")}</li>
263263
* <li>{@code withIgnoreEmptyLines(true)}</li>
264-
* <li>{@code withAllowDuplicateHeaderNames(true)}</li>
264+
* <li>{@code withDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
265265
* </ul>
266266
*
267267
* @see Predefined#Default
268268
*/
269269
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
270-
null, null, null, false, false, false, false, false, false, true);
270+
null, null, null, false, false, false, false, false, false, DuplicateHeaderMode.ALLOW_ALL);
271271

272272
/**
273273
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
@@ -290,7 +290,7 @@ public CSVFormat getFormat() {
290290
* <li>{@code withRecordSeparator("\r\n")}</li>
291291
* <li>{@code withIgnoreEmptyLines(false)}</li>
292292
* <li>{@code withAllowMissingColumnNames(true)}</li>
293-
* <li>{@code withAllowDuplicateHeaderNames(true)}</li>
293+
* <li>{@code withDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
294294
* </ul>
295295
* <p>
296296
* Note: This is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean)
@@ -674,7 +674,7 @@ private static boolean isLineBreak(final Character c) {
674674
*/
675675
public static CSVFormat newFormat(final char delimiter) {
676676
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
677-
false, false, false, false, true);
677+
false, false, false, false, DuplicateHeaderMode.ALLOW_ALL);
678678
}
679679

680680
/**
@@ -689,7 +689,7 @@ public static CSVFormat valueOf(final String format) {
689689
return CSVFormat.Predefined.valueOf(format).getFormat();
690690
}
691691

692-
private final boolean allowDuplicateHeaderNames;
692+
private final DuplicateHeaderMode duplicateHeaderMode;
693693

694694
private final boolean allowMissingColumnNames;
695695

@@ -771,7 +771,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
771771
final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
772772
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
773773
final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
774-
final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames) {
774+
final boolean trailingDelimiter, final boolean autoFlush, final DuplicateHeaderMode duplicateHeaderMode) {
775775
this.delimiter = delimiter;
776776
this.quoteCharacter = quoteChar;
777777
this.quoteMode = quoteMode;
@@ -790,7 +790,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
790790
this.trim = trim;
791791
this.autoFlush = autoFlush;
792792
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
793-
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
793+
this.duplicateHeaderMode = duplicateHeaderMode;
794794
validate();
795795
}
796796

@@ -822,7 +822,7 @@ public boolean equals(final Object obj) {
822822
if (allowMissingColumnNames != other.allowMissingColumnNames) {
823823
return false;
824824
}
825-
if (allowDuplicateHeaderNames != other.allowDuplicateHeaderNames) {
825+
if (duplicateHeaderMode != other.duplicateHeaderMode) {
826826
return false;
827827
}
828828
if (ignoreHeaderCase != other.ignoreHeaderCase) {
@@ -885,13 +885,13 @@ public String format(final Object... values) {
885885
}
886886

887887
/**
888-
* Returns true if and only if duplicate names are allowed in the headers.
888+
* Returns how duplicate headers are handled.
889889
*
890-
* @return whether duplicate header names are allowed
890+
* @return if duplicate header values are allowed, allowed conditionally, or disallowed.
891891
* @since 1.7
892892
*/
893-
public boolean getAllowDuplicateHeaderNames() {
894-
return allowDuplicateHeaderNames;
893+
public DuplicateHeaderMode getDuplicateHeaderMode() {
894+
return duplicateHeaderMode;
895895
}
896896

897897
/**
@@ -1062,7 +1062,7 @@ public boolean getTrim() {
10621062
@Override
10631063
public int hashCode() {
10641064
return Objects.hash(delimiter, quoteMode, quoteCharacter, commentMarker, escapeCharacter, nullString,
1065-
ignoreSurroundingSpaces, ignoreHeaderCase, ignoreEmptyLines, skipHeaderRecord, allowDuplicateHeaderNames,
1065+
ignoreSurroundingSpaces, ignoreHeaderCase, ignoreEmptyLines, skipHeaderRecord, duplicateHeaderMode,
10661066
trim, autoFlush, trailingDelimiter, allowMissingColumnNames, recordSeparator, Arrays.hashCode(header),
10671067
Arrays.hashCode(headerComments));
10681068
}
@@ -1666,7 +1666,7 @@ private void validate() throws IllegalArgumentException {
16661666
}
16671667

16681668
// validate header
1669-
if (header != null && !allowDuplicateHeaderNames) {
1669+
if (header != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) {
16701670
final Set<String> dupCheck = new HashSet<>();
16711671
for (final String hdr : header) {
16721672
if (!dupCheck.add(hdr)) {
@@ -1684,21 +1684,22 @@ private void validate() throws IllegalArgumentException {
16841684
* @since 1.7
16851685
*/
16861686
public CSVFormat withAllowDuplicateHeaderNames() {
1687-
return withAllowDuplicateHeaderNames(true);
1687+
return withDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL);
16881688
}
16891689

16901690
/**
16911691
* Returns a new {@code CSVFormat} with duplicate header names behavior set to the given value.
16921692
*
1693-
* @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
1693+
* @param duplicateHeaderMode the duplicate header names behavior, to allow, allow conditionally,
1694+
* or disable duplicates.
16941695
* @return a new {@code CSVFormat} with duplicate header names behavior set to the given value.
16951696
* @since 1.7
16961697
*/
1697-
public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
1698+
public CSVFormat withDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) {
16981699
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
16991700
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17001701
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1701-
allowDuplicateHeaderNames);
1702+
duplicateHeaderMode);
17021703
}
17031704

17041705
/**
@@ -1724,7 +1725,7 @@ public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNam
17241725
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17251726
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17261727
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1727-
allowDuplicateHeaderNames);
1728+
duplicateHeaderMode);
17281729
}
17291730

17301731
/**
@@ -1740,7 +1741,7 @@ public CSVFormat withAutoFlush(final boolean autoFlush) {
17401741
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17411742
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17421743
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1743-
allowDuplicateHeaderNames);
1744+
duplicateHeaderMode);
17441745
}
17451746

17461747
/**
@@ -1776,7 +1777,7 @@ public CSVFormat withCommentMarker(final Character commentMarker) {
17761777
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17771778
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17781779
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1779-
allowDuplicateHeaderNames);
1780+
duplicateHeaderMode);
17801781
}
17811782

17821783
/**
@@ -1795,7 +1796,7 @@ public CSVFormat withDelimiter(final char delimiter) {
17951796
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17961797
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17971798
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1798-
allowDuplicateHeaderNames);
1799+
duplicateHeaderMode);
17991800
}
18001801

18011802
/**
@@ -1827,7 +1828,7 @@ public CSVFormat withEscape(final Character escape) {
18271828
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
18281829
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
18291830
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1830-
allowDuplicateHeaderNames);
1831+
duplicateHeaderMode);
18311832
}
18321833

18331834
/**
@@ -1984,7 +1985,7 @@ public CSVFormat withHeader(final String... header) {
19841985
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
19851986
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
19861987
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1987-
allowDuplicateHeaderNames);
1988+
duplicateHeaderMode);
19881989
}
19891990

19901991
/**
@@ -2006,7 +2007,7 @@ public CSVFormat withHeaderComments(final Object... headerComments) {
20062007
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20072008
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20082009
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2009-
allowDuplicateHeaderNames);
2010+
duplicateHeaderMode);
20102011
}
20112012

20122013
/**
@@ -2032,7 +2033,7 @@ public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
20322033
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20332034
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20342035
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2035-
allowDuplicateHeaderNames);
2036+
duplicateHeaderMode);
20362037
}
20372038

20382039
/**
@@ -2059,7 +2060,7 @@ public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
20592060
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20602061
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20612062
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2062-
allowDuplicateHeaderNames);
2063+
duplicateHeaderMode);
20632064
}
20642065

20652066
/**
@@ -2084,7 +2085,7 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
20842085
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20852086
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20862087
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2087-
allowDuplicateHeaderNames);
2088+
duplicateHeaderMode);
20882089
}
20892090

20902091
/**
@@ -2104,7 +2105,7 @@ public CSVFormat withNullString(final String nullString) {
21042105
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
21052106
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
21062107
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2107-
allowDuplicateHeaderNames);
2108+
duplicateHeaderMode);
21082109
}
21092110

21102111
/**
@@ -2136,7 +2137,7 @@ public CSVFormat withQuote(final Character quoteChar) {
21362137
return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
21372138
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
21382139
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2139-
allowDuplicateHeaderNames);
2140+
duplicateHeaderMode);
21402141
}
21412142

21422143
/**
@@ -2151,7 +2152,7 @@ public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
21512152
return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
21522153
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
21532154
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2154-
allowDuplicateHeaderNames);
2155+
duplicateHeaderMode);
21552156
}
21562157

21572158
/**
@@ -2190,7 +2191,7 @@ public CSVFormat withRecordSeparator(final String recordSeparator) {
21902191
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
21912192
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
21922193
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2193-
allowDuplicateHeaderNames);
2194+
duplicateHeaderMode);
21942195
}
21952196

21962197
/**
@@ -2218,7 +2219,7 @@ public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
22182219
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22192220
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
22202221
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2221-
allowDuplicateHeaderNames);
2222+
duplicateHeaderMode);
22222223
}
22232224

22242225
/**
@@ -2260,7 +2261,7 @@ public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
22602261
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22612262
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
22622263
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2263-
allowDuplicateHeaderNames);
2264+
duplicateHeaderMode);
22642265
}
22652266

22662267
/**
@@ -2288,6 +2289,6 @@ public CSVFormat withTrim(final boolean trim) {
22882289
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22892290
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
22902291
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2291-
allowDuplicateHeaderNames);
2292+
duplicateHeaderMode);
22922293
}
22932294
}

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
package org.apache.commons.csv;
1919

20-
import static org.apache.commons.csv.Token.Type.TOKEN;
21-
2220
import java.io.Closeable;
2321
import java.io.File;
2422
import java.io.FileInputStream;
@@ -42,6 +40,8 @@
4240
import java.util.Objects;
4341
import java.util.TreeMap;
4442

43+
import static org.apache.commons.csv.Token.Type.TOKEN;
44+
4545
/**
4646
* Parses CSV files according to the specified format.
4747
*
@@ -503,12 +503,14 @@ private Headers createHeaders() throws IOException {
503503
throw new IllegalArgumentException(
504504
"A header name is missing in " + Arrays.toString(headerRecord));
505505
}
506-
// Note: This will always allow a duplicate header if the header is empty
506+
507507
final boolean containsHeader = header != null && hdrMap.containsKey(header);
508-
if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
508+
final DuplicateHeaderMode headerRule = this.format.getDuplicateHeaderMode();
509+
510+
if (containsHeader && headerRule != DuplicateHeaderMode.ALLOW_ALL && !(emptyHeader && headerRule == DuplicateHeaderMode.ALLOW_EMPTY)) {
509511
throw new IllegalArgumentException(
510512
String.format(
511-
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
513+
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withDuplicateHeaderMode().",
512514
header, Arrays.toString(headerRecord)));
513515
}
514516
if (header != null) {

0 commit comments

Comments
 (0)