Skip to content

Commit 93b77d0

Browse files
committed
CSV-264: Added DuplicateHeaderMode for flexibility with header strictness.
1 parent bf2f809 commit 93b77d0

6 files changed

Lines changed: 209 additions & 55 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 48 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,6 @@
1717

1818
package org.apache.commons.csv;
1919

20-
import static org.apache.commons.csv.Constants.BACKSLASH;
21-
import static org.apache.commons.csv.Constants.COMMA;
22-
import static org.apache.commons.csv.Constants.COMMENT;
23-
import static org.apache.commons.csv.Constants.CR;
24-
import static org.apache.commons.csv.Constants.CRLF;
25-
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
26-
import static org.apache.commons.csv.Constants.EMPTY;
27-
import static org.apache.commons.csv.Constants.LF;
28-
import static org.apache.commons.csv.Constants.PIPE;
29-
import static org.apache.commons.csv.Constants.SP;
30-
import static org.apache.commons.csv.Constants.TAB;
31-
3220
import java.io.File;
3321
import java.io.FileOutputStream;
3422
import java.io.IOException;
@@ -48,6 +36,18 @@
4836
import java.util.Objects;
4937
import java.util.Set;
5038

39+
import static org.apache.commons.csv.Constants.BACKSLASH;
40+
import static org.apache.commons.csv.Constants.COMMA;
41+
import static org.apache.commons.csv.Constants.COMMENT;
42+
import static org.apache.commons.csv.Constants.CR;
43+
import static org.apache.commons.csv.Constants.CRLF;
44+
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
45+
import static org.apache.commons.csv.Constants.EMPTY;
46+
import static org.apache.commons.csv.Constants.LF;
47+
import static org.apache.commons.csv.Constants.PIPE;
48+
import static org.apache.commons.csv.Constants.SP;
49+
import static org.apache.commons.csv.Constants.TAB;
50+
5151
/**
5252
* Specifies the format of a CSV file and parses input.
5353
*
@@ -261,13 +261,13 @@ public CSVFormat getFormat() {
261261
* <li>{@code withQuote('"')}</li>
262262
* <li>{@code withRecordSeparator("\r\n")}</li>
263263
* <li>{@code withIgnoreEmptyLines(true)}</li>
264-
* <li>{@code withAllowDuplicateHeaderNames(true)}</li>
264+
* <li>{@code withDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
265265
* </ul>
266266
*
267267
* @see Predefined#Default
268268
*/
269269
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
270-
null, null, null, false, false, false, false, false, false, true);
270+
null, null, null, false, false, false, false, false, false, DuplicateHeaderMode.ALLOW_ALL);
271271

272272
/**
273273
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
@@ -290,7 +290,7 @@ public CSVFormat getFormat() {
290290
* <li>{@code withRecordSeparator("\r\n")}</li>
291291
* <li>{@code withIgnoreEmptyLines(false)}</li>
292292
* <li>{@code withAllowMissingColumnNames(true)}</li>
293-
* <li>{@code withAllowDuplicateHeaderNames(true)}</li>
293+
* <li>{@code withDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
294294
* </ul>
295295
* <p>
296296
* Note: This is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean)
@@ -674,7 +674,7 @@ private static boolean isLineBreak(final Character c) {
674674
*/
675675
public static CSVFormat newFormat(final char delimiter) {
676676
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
677-
false, false, false, false, true);
677+
false, false, false, false, DuplicateHeaderMode.ALLOW_ALL);
678678
}
679679

680680
/**
@@ -689,7 +689,7 @@ public static CSVFormat valueOf(final String format) {
689689
return CSVFormat.Predefined.valueOf(format).getFormat();
690690
}
691691

692-
private final boolean allowDuplicateHeaderNames;
692+
private final DuplicateHeaderMode duplicateHeaderMode;
693693

694694
private final boolean allowMissingColumnNames;
695695

@@ -771,7 +771,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
771771
final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
772772
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
773773
final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
774-
final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames) {
774+
final boolean trailingDelimiter, final boolean autoFlush, final DuplicateHeaderMode duplicateHeaderMode) {
775775
this.delimiter = delimiter;
776776
this.quoteCharacter = quoteChar;
777777
this.quoteMode = quoteMode;
@@ -790,7 +790,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
790790
this.trim = trim;
791791
this.autoFlush = autoFlush;
792792
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
793-
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
793+
this.duplicateHeaderMode = duplicateHeaderMode;
794794
validate();
795795
}
796796

@@ -822,7 +822,7 @@ public boolean equals(final Object obj) {
822822
if (allowMissingColumnNames != other.allowMissingColumnNames) {
823823
return false;
824824
}
825-
if (allowDuplicateHeaderNames != other.allowDuplicateHeaderNames) {
825+
if (duplicateHeaderMode != other.duplicateHeaderMode) {
826826
return false;
827827
}
828828
if (ignoreHeaderCase != other.ignoreHeaderCase) {
@@ -885,13 +885,13 @@ public String format(final Object... values) {
885885
}
886886

887887
/**
888-
* Returns true if and only if duplicate names are allowed in the headers.
888+
* Returns how duplicate headers are handled.
889889
*
890-
* @return whether duplicate header names are allowed
890+
* @return if duplicate header values are allowed, allowed conditionally, or disallowed.
891891
* @since 1.7
892892
*/
893-
public boolean getAllowDuplicateHeaderNames() {
894-
return allowDuplicateHeaderNames;
893+
public DuplicateHeaderMode getDuplicateHeaderMode() {
894+
return duplicateHeaderMode;
895895
}
896896

897897
/**
@@ -1062,7 +1062,7 @@ public boolean getTrim() {
10621062
@Override
10631063
public int hashCode() {
10641064
return Objects.hash(delimiter, quoteMode, quoteCharacter, commentMarker, escapeCharacter, nullString,
1065-
ignoreSurroundingSpaces, ignoreHeaderCase, ignoreEmptyLines, skipHeaderRecord, allowDuplicateHeaderNames,
1065+
ignoreSurroundingSpaces, ignoreHeaderCase, ignoreEmptyLines, skipHeaderRecord, duplicateHeaderMode,
10661066
trim, autoFlush, trailingDelimiter, allowMissingColumnNames, recordSeparator, Arrays.hashCode(header),
10671067
Arrays.hashCode(headerComments));
10681068
}
@@ -1665,7 +1665,7 @@ private void validate() throws IllegalArgumentException {
16651665
}
16661666

16671667
// validate header
1668-
if (header != null && !allowDuplicateHeaderNames) {
1668+
if (header != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) {
16691669
final Set<String> dupCheck = new HashSet<>();
16701670
for (final String hdr : header) {
16711671
if (!dupCheck.add(hdr)) {
@@ -1683,21 +1683,22 @@ private void validate() throws IllegalArgumentException {
16831683
* @since 1.7
16841684
*/
16851685
public CSVFormat withAllowDuplicateHeaderNames() {
1686-
return withAllowDuplicateHeaderNames(true);
1686+
return withDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL);
16871687
}
16881688

16891689
/**
16901690
* Returns a new {@code CSVFormat} with duplicate header names behavior set to the given value.
16911691
*
1692-
* @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
1692+
* @param duplicateHeaderMode the duplicate header names behavior, to allow, allow conditionally,
1693+
* or disable duplicates.
16931694
* @return a new {@code CSVFormat} with duplicate header names behavior set to the given value.
16941695
* @since 1.7
16951696
*/
1696-
public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
1697+
public CSVFormat withDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) {
16971698
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
16981699
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
16991700
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1700-
allowDuplicateHeaderNames);
1701+
duplicateHeaderMode);
17011702
}
17021703

17031704
/**
@@ -1723,7 +1724,7 @@ public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNam
17231724
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17241725
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17251726
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1726-
allowDuplicateHeaderNames);
1727+
duplicateHeaderMode);
17271728
}
17281729

17291730
/**
@@ -1739,7 +1740,7 @@ public CSVFormat withAutoFlush(final boolean autoFlush) {
17391740
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17401741
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17411742
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1742-
allowDuplicateHeaderNames);
1743+
duplicateHeaderMode);
17431744
}
17441745

17451746
/**
@@ -1775,7 +1776,7 @@ public CSVFormat withCommentMarker(final Character commentMarker) {
17751776
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17761777
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17771778
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1778-
allowDuplicateHeaderNames);
1779+
duplicateHeaderMode);
17791780
}
17801781

17811782
/**
@@ -1794,7 +1795,7 @@ public CSVFormat withDelimiter(final char delimiter) {
17941795
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17951796
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17961797
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1797-
allowDuplicateHeaderNames);
1798+
duplicateHeaderMode);
17981799
}
17991800

18001801
/**
@@ -1826,7 +1827,7 @@ public CSVFormat withEscape(final Character escape) {
18261827
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
18271828
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
18281829
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1829-
allowDuplicateHeaderNames);
1830+
duplicateHeaderMode);
18301831
}
18311832

18321833
/**
@@ -1983,7 +1984,7 @@ public CSVFormat withHeader(final String... header) {
19831984
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
19841985
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
19851986
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1986-
allowDuplicateHeaderNames);
1987+
duplicateHeaderMode);
19871988
}
19881989

19891990
/**
@@ -2005,7 +2006,7 @@ public CSVFormat withHeaderComments(final Object... headerComments) {
20052006
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20062007
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20072008
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2008-
allowDuplicateHeaderNames);
2009+
duplicateHeaderMode);
20092010
}
20102011

20112012
/**
@@ -2031,7 +2032,7 @@ public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
20312032
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20322033
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20332034
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2034-
allowDuplicateHeaderNames);
2035+
duplicateHeaderMode);
20352036
}
20362037

20372038
/**
@@ -2058,7 +2059,7 @@ public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
20582059
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20592060
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20602061
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2061-
allowDuplicateHeaderNames);
2062+
duplicateHeaderMode);
20622063
}
20632064

20642065
/**
@@ -2083,7 +2084,7 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
20832084
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20842085
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20852086
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2086-
allowDuplicateHeaderNames);
2087+
duplicateHeaderMode);
20872088
}
20882089

20892090
/**
@@ -2103,7 +2104,7 @@ public CSVFormat withNullString(final String nullString) {
21032104
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
21042105
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
21052106
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2106-
allowDuplicateHeaderNames);
2107+
duplicateHeaderMode);
21072108
}
21082109

21092110
/**
@@ -2135,7 +2136,7 @@ public CSVFormat withQuote(final Character quoteChar) {
21352136
return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
21362137
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
21372138
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2138-
allowDuplicateHeaderNames);
2139+
duplicateHeaderMode);
21392140
}
21402141

21412142
/**
@@ -2150,7 +2151,7 @@ public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
21502151
return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
21512152
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
21522153
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2153-
allowDuplicateHeaderNames);
2154+
duplicateHeaderMode);
21542155
}
21552156

21562157
/**
@@ -2189,7 +2190,7 @@ public CSVFormat withRecordSeparator(final String recordSeparator) {
21892190
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
21902191
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
21912192
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2192-
allowDuplicateHeaderNames);
2193+
duplicateHeaderMode);
21932194
}
21942195

21952196
/**
@@ -2217,7 +2218,7 @@ public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
22172218
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22182219
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
22192220
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2220-
allowDuplicateHeaderNames);
2221+
duplicateHeaderMode);
22212222
}
22222223

22232224
/**
@@ -2259,7 +2260,7 @@ public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
22592260
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22602261
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
22612262
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2262-
allowDuplicateHeaderNames);
2263+
duplicateHeaderMode);
22632264
}
22642265

22652266
/**
@@ -2287,6 +2288,6 @@ public CSVFormat withTrim(final boolean trim) {
22872288
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22882289
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
22892290
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2290-
allowDuplicateHeaderNames);
2291+
duplicateHeaderMode);
22912292
}
22922293
}

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
package org.apache.commons.csv;
1919

20-
import static org.apache.commons.csv.Token.Type.TOKEN;
21-
2220
import java.io.Closeable;
2321
import java.io.File;
2422
import java.io.FileInputStream;
@@ -42,6 +40,8 @@
4240
import java.util.Objects;
4341
import java.util.TreeMap;
4442

43+
import static org.apache.commons.csv.Token.Type.TOKEN;
44+
4545
/**
4646
* Parses CSV files according to the specified format.
4747
*
@@ -503,12 +503,14 @@ private Headers createHeaders() throws IOException {
503503
throw new IllegalArgumentException(
504504
"A header name is missing in " + Arrays.toString(headerRecord));
505505
}
506-
// Note: This will always allow a duplicate header if the header is empty
506+
507507
final boolean containsHeader = header != null && hdrMap.containsKey(header);
508-
if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
508+
final DuplicateHeaderMode headerRule = this.format.getDuplicateHeaderMode();
509+
510+
if (containsHeader && headerRule != DuplicateHeaderMode.ALLOW_ALL && !(emptyHeader && headerRule == DuplicateHeaderMode.ALLOW_EMPTY)) {
509511
throw new IllegalArgumentException(
510512
String.format(
511-
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
513+
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withDuplicateHeaderMode().",
512514
header, Arrays.toString(headerRecord)));
513515
}
514516
if (header != null) {

0 commit comments

Comments
 (0)