Skip to content
This repository was archived by the owner on Jun 3, 2026. It is now read-only.

Commit 030fb8e

Browse files
davidmotengarydgregory
authored andcommitted
[CSV-239] Add CSVRecord.getHeaderNames and allow duplicate headers (apache#41)
* [CSV-239] Cannot get headers in column order from CSVRecord. * getHeaderNames returns all headers in column order including repeats which are allowed as per RFC 4180 * add CSVFormat.withAllowDuplicateHeaderNames() * [CSV-239] Cannot get headers in column order from CSVRecord. * only wrap headerNames with unmodifiableList if non-empty * fix and enhance CSVRecord.toMap javadoc * [CSV-239] Cannot get headers in column order from CSVRecord. * fix exception messages * [CSV-239] Cannot get headers in column order from CSVRecord. * fix whitespace * [CSV-239] Cannot get headers in column order from CSVRecord. * simplify if statement * [CSV-239] Cannot get headers in column order from CSVRecord. * fix indentation * add javadoc to Headers class * rename method to createHeaders * use String.format to build error message * initialize header names List with appropriate size
1 parent 4d2616b commit 030fb8e

4 files changed

Lines changed: 128 additions & 39 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 57 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -260,12 +260,13 @@ public CSVFormat getFormat() {
260260
* <li>{@code withQuote('"')}</li>
261261
* <li>{@code withRecordSeparator("\r\n")}</li>
262262
* <li>{@code withIgnoreEmptyLines(true)}</li>
263+
* <li>{@code withAllowDuplicateHeaderNames(true)}</li>
263264
* </ul>
264265
*
265266
* @see Predefined#Default
266267
*/
267268
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
268-
null, null, null, false, false, false, false, false, false);
269+
null, null, null, false, false, false, false, false, false, true);
269270

270271
/**
271272
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
@@ -288,6 +289,7 @@ public CSVFormat getFormat() {
288289
* <li>{@code {@link #withRecordSeparator(String) withRecordSeparator("\r\n")}}</li>
289290
* <li>{@code {@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}}</li>
290291
* <li>{@code {@link #withAllowMissingColumnNames(boolean) withAllowMissingColumnNames(true)}}</li>
292+
* <li>{@code {@link #withAllowDuplicateHeaderNames(boolean) withAllowDuplicateHeaderNames(true)}}</li>
291293
* </ul>
292294
* <p>
293295
* Note: This is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean)
@@ -671,7 +673,7 @@ private static boolean isLineBreak(final Character c) {
671673
*/
672674
public static CSVFormat newFormat(final char delimiter) {
673675
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
674-
false, false, false, false);
676+
false, false, false, false, true);
675677
}
676678

677679
/**
@@ -721,6 +723,8 @@ public static CSVFormat valueOf(final String format) {
721723
private final boolean trim;
722724

723725
private final boolean autoFlush;
726+
727+
private final boolean allowDuplicateHeaderNames;
724728

725729
/**
726730
* Creates a customized CSV format.
@@ -766,7 +770,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
766770
final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
767771
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
768772
final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
769-
final boolean trailingDelimiter, final boolean autoFlush) {
773+
final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames) {
770774
this.delimiter = delimiter;
771775
this.quoteCharacter = quoteChar;
772776
this.quoteMode = quoteMode;
@@ -785,6 +789,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
785789
this.trim = trim;
786790
this.autoFlush = autoFlush;
787791
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
792+
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
788793
validate();
789794
}
790795

@@ -1686,7 +1691,8 @@ public CSVFormat withAllowMissingColumnNames() {
16861691
public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
16871692
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
16881693
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1689-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1694+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1695+
allowDuplicateHeaderNames);
16901696
}
16911697

16921698
/**
@@ -1701,7 +1707,8 @@ public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNam
17011707
public CSVFormat withAutoFlush(final boolean autoFlush) {
17021708
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17031709
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1704-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1710+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1711+
allowDuplicateHeaderNames);
17051712
}
17061713

17071714
/**
@@ -1736,7 +1743,8 @@ public CSVFormat withCommentMarker(final Character commentMarker) {
17361743
}
17371744
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17381745
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1739-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1746+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1747+
allowDuplicateHeaderNames);
17401748
}
17411749

17421750
/**
@@ -1754,7 +1762,8 @@ public CSVFormat withDelimiter(final char delimiter) {
17541762
}
17551763
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17561764
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1757-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1765+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1766+
allowDuplicateHeaderNames);
17581767
}
17591768

17601769
/**
@@ -1785,7 +1794,8 @@ public CSVFormat withEscape(final Character escape) {
17851794
}
17861795
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
17871796
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
1788-
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1797+
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1798+
allowDuplicateHeaderNames);
17891799
}
17901800

17911801
/**
@@ -1941,7 +1951,8 @@ public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLExceptio
19411951
public CSVFormat withHeader(final String... header) {
19421952
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
19431953
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1944-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1954+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1955+
allowDuplicateHeaderNames);
19451956
}
19461957

19471958
/**
@@ -1962,7 +1973,8 @@ public CSVFormat withHeader(final String... header) {
19621973
public CSVFormat withHeaderComments(final Object... headerComments) {
19631974
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
19641975
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1965-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1976+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1977+
allowDuplicateHeaderNames);
19661978
}
19671979

19681980
/**
@@ -1987,7 +1999,8 @@ public CSVFormat withIgnoreEmptyLines() {
19871999
public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
19882000
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
19892001
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1990-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
2002+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2003+
allowDuplicateHeaderNames);
19912004
}
19922005

19932006
/**
@@ -2013,7 +2026,8 @@ public CSVFormat withIgnoreHeaderCase() {
20132026
public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
20142027
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20152028
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
2016-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
2029+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2030+
allowDuplicateHeaderNames);
20172031
}
20182032

20192033
/**
@@ -2038,7 +2052,8 @@ public CSVFormat withIgnoreSurroundingSpaces() {
20382052
public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
20392053
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20402054
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
2041-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
2055+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2056+
allowDuplicateHeaderNames);
20422057
}
20432058

20442059
/**
@@ -2057,7 +2072,8 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
20572072
public CSVFormat withNullString(final String nullString) {
20582073
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20592074
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
2060-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
2075+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2076+
allowDuplicateHeaderNames);
20612077
}
20622078

20632079
/**
@@ -2088,7 +2104,8 @@ public CSVFormat withQuote(final Character quoteChar) {
20882104
}
20892105
return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
20902106
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
2091-
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
2107+
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2108+
allowDuplicateHeaderNames);
20922109
}
20932110

20942111
/**
@@ -2102,7 +2119,8 @@ public CSVFormat withQuote(final Character quoteChar) {
21022119
public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
21032120
return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
21042121
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
2105-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
2122+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2123+
allowDuplicateHeaderNames);
21062124
}
21072125

21082126
/**
@@ -2140,7 +2158,8 @@ public CSVFormat withRecordSeparator(final char recordSeparator) {
21402158
public CSVFormat withRecordSeparator(final String recordSeparator) {
21412159
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
21422160
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
2143-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
2161+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2162+
allowDuplicateHeaderNames);
21442163
}
21452164

21462165
/**
@@ -2167,7 +2186,8 @@ public CSVFormat withSkipHeaderRecord() {
21672186
public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
21682187
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
21692188
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
2170-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
2189+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2190+
allowDuplicateHeaderNames);
21712191
}
21722192

21732193
/**
@@ -2208,7 +2228,8 @@ public CSVFormat withTrailingDelimiter() {
22082228
public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
22092229
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22102230
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
2211-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
2231+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2232+
allowDuplicateHeaderNames);
22122233
}
22132234

22142235
/**
@@ -2233,6 +2254,22 @@ public CSVFormat withTrim() {
22332254
public CSVFormat withTrim(final boolean trim) {
22342255
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22352256
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
2236-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
2257+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2258+
allowDuplicateHeaderNames);
22372259
}
2260+
2261+
public CSVFormat withAllowDuplicateHeaderNames(boolean allowDuplicateHeaderNames) {
2262+
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
2263+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
2264+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2265+
allowDuplicateHeaderNames);
2266+
}
2267+
2268+
public CSVFormat withAllowDuplicateHeaderNames() {
2269+
return withAllowDuplicateHeaderNames(true);
2270+
}
2271+
2272+
public boolean getAllowDuplicateHeaderNames() {
2273+
return allowDuplicateHeaderNames;
2274+
}
22382275
}

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -410,8 +410,9 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact
410410
this.format = format;
411411
this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
412412
this.csvRecordIterator = new CSVRecordIterator();
413-
this.headerMap = createHeaderMap(); // 1st
414-
this.headerNames = createHeaderNames(this.headerMap); // 2nd
413+
Headers headers = createHeaders();
414+
this.headerMap = headers.headerMap;
415+
this.headerNames = headers.headerNames;
415416
this.characterOffset = characterOffset;
416417
this.recordNumber = recordNumber - 1;
417418
}
@@ -445,14 +446,35 @@ private Map<String, Integer> createEmptyHeaderMap() {
445446
new LinkedHashMap<>();
446447
}
447448

449+
/**
450+
* Header information based on name and position.
451+
*/
452+
private static final class Headers {
453+
/**
454+
* Header column positions (0-based)
455+
*/
456+
final Map<String, Integer> headerMap;
457+
458+
/**
459+
* Header names in column order
460+
*/
461+
final List<String> headerNames;
462+
463+
Headers(Map<String, Integer> headerMap, List<String> headerNames) {
464+
this.headerMap = headerMap;
465+
this.headerNames = headerNames;
466+
}
467+
}
468+
448469
/**
449470
* Creates the name to index mapping if the format defines a header.
450471
*
451472
* @return null if the format has no header.
452473
* @throws IOException if there is a problem reading the header or skipping the first record
453474
*/
454-
private Map<String, Integer> createHeaderMap() throws IOException {
475+
private Headers createHeaders() throws IOException {
455476
Map<String, Integer> hdrMap = null;
477+
List<String> headerNames = null;
456478
final String[] formatHeader = this.format.getHeader();
457479
if (formatHeader != null) {
458480
hdrMap = createEmptyHeaderMap();
@@ -476,27 +498,34 @@ private Map<String, Integer> createHeaderMap() throws IOException {
476498
final String header = headerRecord[i];
477499
final boolean containsHeader = header == null ? false : hdrMap.containsKey(header);
478500
final boolean emptyHeader = header == null || header.trim().isEmpty();
479-
if (containsHeader && (!emptyHeader || !this.format.getAllowMissingColumnNames())) {
480-
throw new IllegalArgumentException("The header contains a duplicate name: \"" + header
481-
+ "\" in " + Arrays.toString(headerRecord));
501+
if (containsHeader) {
502+
if (!emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
503+
throw new IllegalArgumentException(
504+
String.format("The header contains a duplicate name: \"%s\" in %s."
505+
+ " If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
506+
header, Arrays.toString(headerRecord)));
507+
}
508+
if (emptyHeader && !this.format.getAllowMissingColumnNames()) {
509+
throw new IllegalArgumentException(
510+
"A header name is missing in " + Arrays.toString(headerRecord));
511+
}
482512
}
483513
if (header != null) {
484514
hdrMap.put(header, Integer.valueOf(i));
515+
if (headerNames == null) {
516+
headerNames = new ArrayList<>(headerRecord.length);
517+
}
518+
headerNames.add(header);
485519
}
486520
}
487521
}
522+
}
523+
if (headerNames == null) {
524+
headerNames = Collections.emptyList(); //immutable
525+
} else {
526+
headerNames = Collections.unmodifiableList(headerNames);
488527
}
489-
return hdrMap;
490-
}
491-
492-
private List<String> createHeaderNames(final Map<String, Integer> headerMap) {
493-
// @formatter:off
494-
return headerMap == null ? null
495-
: headerMap.entrySet().stream()
496-
.sorted(Map.Entry.comparingByValue())
497-
.map(Map.Entry::getKey)
498-
.collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
499-
// @formatter:on
528+
return new Headers(hdrMap, headerNames);
500529
}
501530

502531
/**

src/main/java/org/apache/commons/csv/CSVRecord.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ private List<String> toList() {
265265
}
266266

267267
/**
268-
* Copies this record into a new Map. The new map is not connect
268+
* Copies this record into a new Map of header name to record value.
269269
*
270270
* @return A new Map. The map is empty if the record has no headers.
271271
*/

0 commit comments

Comments
 (0)