Skip to content

Commit 99c3819

Browse files
committed
[CSV-121] Exception that the header contains duplicate names when the column names are empty. Added the setting ignoreEmptyHeaders, defaults to false to keep the IAE as the default behavior.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1602206 13f79535-47bb-0310-9956-ffa450edef68
1 parent f9871c5 commit 99c3819

4 files changed

Lines changed: 73 additions & 24 deletions

File tree

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
<body>
4141

4242
<release version="1.0" date="TBD" description="First release">
43+
<action issue="CSV-121" type="add" dev="ggregory" due-to="Sebastian Hardt">IllegalArgumentException thrown when the header contains duplicate names when the column names are empty.</action>
4344
<action issue="CSV-120" type="add" dev="ggregory" due-to="Sergei Lebedev">CSVFormat#withHeader doesn't work with CSVPrinter</action>
4445
<action issue="CSV-119" type="add" dev="ggregory" due-to="Sergei Lebedev">CSVFormat is missing a print(...) method</action>
4546
<action issue="CSV-118" type="fix" dev="ggregory" due-to="Enrique Lara">CSVRecord.toMap() throws NPE on formats with no

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ public final class CSVFormat implements Serializable {
152152
private final Character commentStart; // null if commenting is disabled
153153
private final Character escape; // null if escaping is disabled
154154
private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
155+
private boolean ignoreEmptyHeaders;
155156
private final boolean ignoreEmptyLines;
156157
private final String recordSeparator; // for outputs
157158
private final String nullString; // the string to be used for null values
@@ -172,7 +173,7 @@ public final class CSVFormat implements Serializable {
172173
* </ul>
173174
*/
174175
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null,
175-
false, true, CRLF, null, null, false);
176+
false, true, CRLF, null, null, false, false);
176177

177178
/**
178179
* Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
@@ -264,7 +265,7 @@ private static boolean isLineBreak(final Character c) {
264265
* @throws IllegalArgumentException if the delimiter is a line break character
265266
*/
266267
public static CSVFormat newFormat(final char delimiter) {
267-
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, false);
268+
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, false, false);
268269
}
269270

270271
/**
@@ -291,13 +292,15 @@ public static CSVFormat newFormat(final char delimiter) {
291292
* @param header
292293
* the header
293294
* @param skipHeaderRecord TODO
295+
* @param ignoreEmptyHeaders TODO
294296
* @throws IllegalArgumentException if the delimiter is a line break character
295297
*/
296298
private CSVFormat(final char delimiter, final Character quoteChar,
297299
final Quote quotePolicy, final Character commentStart,
298300
final Character escape, final boolean ignoreSurroundingSpaces,
299301
final boolean ignoreEmptyLines, final String recordSeparator,
300-
final String nullString, final String[] header, final boolean skipHeaderRecord) {
302+
final String nullString, final String[] header, final boolean skipHeaderRecord,
303+
final boolean ignoreEmptyHeaders) {
301304
if (isLineBreak(delimiter)) {
302305
throw new IllegalArgumentException("The delimiter cannot be a line break");
303306
}
@@ -307,6 +310,7 @@ private CSVFormat(final char delimiter, final Character quoteChar,
307310
this.commentStart = commentStart;
308311
this.escape = escape;
309312
this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
313+
this.ignoreEmptyHeaders = ignoreEmptyHeaders;
310314
this.ignoreEmptyLines = ignoreEmptyLines;
311315
this.recordSeparator = recordSeparator;
312316
this.nullString = nullString;
@@ -447,6 +451,16 @@ public String[] getHeader() {
447451
return header != null ? header.clone() : null;
448452
}
449453

454+
/**
455+
* Specifies whether empty headers are ignored when parsing the header line.
456+
*
457+
* @return <tt>true</tt> if headers are ignored when parsing the header line, <tt>false</tt> to throw an
458+
* {@link IllegalArgumentException}..
459+
*/
460+
public boolean getIgnoreEmptyHeaders() {
461+
return ignoreEmptyHeaders;
462+
}
463+
450464
/**
451465
* Specifies whether empty lines between records are ignored when parsing input.
452466
*
@@ -718,7 +732,8 @@ public CSVFormat withCommentStart(final Character commentStart) {
718732
throw new IllegalArgumentException("The comment start character cannot be a line break");
719733
}
720734
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
721-
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
735+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
736+
ignoreEmptyHeaders);
722737
}
723738

724739
/**
@@ -735,7 +750,8 @@ public CSVFormat withDelimiter(final char delimiter) {
735750
throw new IllegalArgumentException("The delimiter cannot be a line break");
736751
}
737752
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
738-
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
753+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
754+
ignoreEmptyHeaders);
739755
}
740756

741757
/**
@@ -765,7 +781,8 @@ public CSVFormat withEscape(final Character escape) {
765781
throw new IllegalArgumentException("The escape character cannot be a line break");
766782
}
767783
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
768-
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
784+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
785+
ignoreEmptyHeaders);
769786
}
770787

771788
/**
@@ -787,7 +804,22 @@ public CSVFormat withEscape(final Character escape) {
787804
*/
788805
public CSVFormat withHeader(final String... header) {
789806
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
790-
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
807+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
808+
ignoreEmptyHeaders);
809+
}
810+
811+
/**
812+
* Sets the empty header behavior of the format.
813+
*
814+
* @param ignoreEmptyHeaders
815+
* the empty header behavior, <tt>true</tt> to ignore empty headers in the header line,
816+
* <tt>false</tt> to cause an {@link IllegalArgumentException} to be thrown.
817+
* @return A new CSVFormat that is equal to this but with the specified empty header behavior.
818+
*/
819+
public CSVFormat withIgnoreEmptyHeaders(final boolean ignoreEmptyHeaders) {
820+
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
821+
ignoreSurroundingSpaces, ignoreEmptyHeaders, recordSeparator, nullString, header, skipHeaderRecord,
822+
ignoreEmptyHeaders);
791823
}
792824

793825
/**
@@ -800,7 +832,8 @@ public CSVFormat withHeader(final String... header) {
800832
*/
801833
public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
802834
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
803-
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
835+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
836+
ignoreEmptyHeaders);
804837
}
805838

806839
/**
@@ -813,7 +846,8 @@ public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
813846
*/
814847
public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
815848
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
816-
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
849+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
850+
ignoreEmptyHeaders);
817851
}
818852

819853
/**
@@ -833,7 +867,8 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
833867
*/
834868
public CSVFormat withNullString(final String nullString) {
835869
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
836-
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
870+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
871+
ignoreEmptyHeaders);
837872
}
838873

839874
/**
@@ -863,7 +898,8 @@ public CSVFormat withQuoteChar(final Character quoteChar) {
863898
throw new IllegalArgumentException("The quoteChar cannot be a line break");
864899
}
865900
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
866-
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
901+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
902+
ignoreEmptyHeaders);
867903
}
868904

869905
/**
@@ -876,7 +912,8 @@ public CSVFormat withQuoteChar(final Character quoteChar) {
876912
*/
877913
public CSVFormat withQuotePolicy(final Quote quotePolicy) {
878914
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
879-
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
915+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
916+
ignoreEmptyHeaders);
880917
}
881918

882919
/**
@@ -901,7 +938,8 @@ public CSVFormat withRecordSeparator(final char recordSeparator) {
901938
*/
902939
public CSVFormat withRecordSeparator(final String recordSeparator) {
903940
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
904-
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
941+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
942+
ignoreEmptyHeaders);
905943
}
906944

907945
/**
@@ -915,6 +953,7 @@ public CSVFormat withRecordSeparator(final String recordSeparator) {
915953
*/
916954
public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
917955
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
918-
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
956+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
957+
ignoreEmptyHeaders);
919958
}
920959
}

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -360,28 +360,31 @@ private Map<String, Integer> initializeHeader() throws IOException {
360360
if (formatHeader != null) {
361361
hdrMap = new LinkedHashMap<String, Integer>();
362362

363-
String[] header = null;
363+
String[] headerRecord = null;
364364
if (formatHeader.length == 0) {
365365
// read the header from the first line of the file
366366
final CSVRecord nextRecord = this.nextRecord();
367367
if (nextRecord != null) {
368-
header = nextRecord.values();
368+
headerRecord = nextRecord.values();
369369
}
370370
} else {
371371
if (this.format.getSkipHeaderRecord()) {
372372
this.nextRecord();
373373
}
374-
header = formatHeader;
374+
headerRecord = formatHeader;
375375
}
376376

377377
// build the name to index mappings
378-
if (header != null) {
379-
for (int i = 0; i < header.length; i++) {
380-
if (hdrMap.containsKey(header[i])) {
381-
throw new IllegalArgumentException("The header contains duplicate names: " +
382-
Arrays.toString(header));
378+
if (headerRecord != null) {
379+
for (int i = 0; i < headerRecord.length; i++) {
380+
final String header = headerRecord[i];
381+
final boolean containsHeader = hdrMap.containsKey(header);
382+
final boolean emptyHeader = header.trim().isEmpty();
383+
if (containsHeader && (!emptyHeader || (emptyHeader && !this.format.getIgnoreEmptyHeaders()))) {
384+
throw new IllegalArgumentException("The header contains a duplicate name: \"" + header
385+
+ "\" in " + Arrays.toString(headerRecord));
383386
}
384-
hdrMap.put(header[i], Integer.valueOf(i));
387+
hdrMap.put(header, Integer.valueOf(i));
385388
}
386389
}
387390
}

src/test/java/org/apache/commons/csv/CSVParserTest.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,7 @@ public void testGetHeaderMap() throws Exception {
493493
}
494494

495495
@Test(expected = IllegalArgumentException.class)
496-
public void testDuplicateHeaderEntries() throws Exception {
496+
public void testDuplicateHeaders() throws Exception {
497497
CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader(new String[]{}));
498498
}
499499

@@ -655,6 +655,12 @@ public void testHeadersMissingException() throws Exception {
655655
CSVFormat.DEFAULT.withHeader().parse(in).iterator();
656656
}
657657

658+
@Test
659+
public void testHeadersMissing() throws Exception {
660+
final Reader in = new StringReader("a,,c,,d\n1,2,3,4\nx,y,z,zz");
661+
CSVFormat.DEFAULT.withHeader().withIgnoreEmptyHeaders(true).parse(in).iterator();
662+
}
663+
658664
@Test
659665
public void testHeaderComment() throws Exception {
660666
final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z");

0 commit comments

Comments
 (0)