Skip to content

Commit de47291

Browse files
committed
Test for empty/blank headers the same way for input and output
1 parent b6c63e4 commit de47291

4 files changed

Lines changed: 84 additions & 64 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 57 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
import java.util.Set;
5050

5151
/**
52-
* Specifies the format of a CSV file and parses input.
52+
* Specifies the format of a CSV file for parsing and writing.
5353
*
5454
* <h2>Using predefined formats</h2>
5555
*
@@ -174,6 +174,9 @@
174174
* <p>
175175
* This class is immutable.
176176
* </p>
177+
* <p>
178+
* Not all settings are used for both parsing and writing.
179+
* </p>
177180
*/
178181
public final class CSVFormat implements Serializable {
179182

@@ -1210,6 +1213,10 @@ private static boolean containsLineBreak(final String source) {
12101213
return contains(source, CR) || contains(source, LF);
12111214
}
12121215

1216+
static boolean isBlank(final String value) {
1217+
return value == null || value.trim().isEmpty();
1218+
}
1219+
12131220
/**
12141221
* Returns true if the given character is a line break character.
12151222
*
@@ -1232,10 +1239,12 @@ private static boolean isLineBreak(final Character c) {
12321239
return c != null && isLineBreak(c.charValue());
12331240
}
12341241

1242+
/** Same test as in as {@link String#trim()}. */
12351243
private static boolean isTrimChar(final char ch) {
12361244
return ch <= SP;
12371245
}
12381246

1247+
/** Same test as in as {@link String#trim()}. */
12391248
private static boolean isTrimChar(final CharSequence charSequence, final int pos) {
12401249
return isTrimChar(charSequence.charAt(pos));
12411250
}
@@ -2250,8 +2259,16 @@ public String toString() {
22502259
return sb.toString();
22512260
}
22522261

2262+
String trim(final String value) {
2263+
return getTrim() ? value.trim() : value;
2264+
}
2265+
22532266
/**
2254-
* Verifies the validity and consistency of the attributes, and throws an IllegalArgumentException if necessary.
2267+
* Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary.
2268+
* <p>
2269+
* Because an instance can be used for both writing an parsing, not all conditions can be tested here. For example allowMissingColumnNames is only used for
2270+
* parsing, so it cannot be used here.
2271+
* </p>
22552272
*
22562273
* @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes.
22572274
*/
@@ -2289,19 +2306,19 @@ private void validate() throws IllegalArgumentException {
22892306
final Set<String> dupCheckSet = new HashSet<>(headers.length);
22902307
final boolean rejectEmpty = duplicateHeaderMode != DuplicateHeaderMode.ALLOW_EMPTY;
22912308
for (final String header : headers) {
2292-
final boolean empty = header == null || header.isEmpty();
2293-
if (rejectEmpty && empty) {
2309+
final boolean blank = isBlank(header);
2310+
if (rejectEmpty && blank) {
22942311
throw new IllegalArgumentException("Header is empty");
22952312
}
2296-
if (!empty && !dupCheckSet.add(header)) {
2313+
if (!blank && !dupCheckSet.add(header)) {
22972314
throw new IllegalArgumentException(String.format("Header '%s' is a duplicate in %s", header, Arrays.toString(headers)));
22982315
}
22992316
}
23002317
}
23012318
}
23022319

23032320
/**
2304-
* Returns a new {@code CSVFormat} that allows duplicate header names.
2321+
* Builds a new {@code CSVFormat} that allows duplicate header names.
23052322
*
23062323
* @return a new {@code CSVFormat} that allows duplicate header names
23072324
* @since 1.7
@@ -2313,7 +2330,7 @@ public CSVFormat withAllowDuplicateHeaderNames() {
23132330
}
23142331

23152332
/**
2316-
* Returns a new {@code CSVFormat} with duplicate header names behavior set to the given value.
2333+
* Builds a new {@code CSVFormat} with duplicate header names behavior set to the given value.
23172334
*
23182335
* @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
23192336
* @return a new {@code CSVFormat} with duplicate header names behavior set to the given value.
@@ -2327,7 +2344,7 @@ public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeade
23272344
}
23282345

23292346
/**
2330-
* Returns a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}.
2347+
* Builds a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}.
23312348
*
23322349
* @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
23332350
* @see Builder#setAllowMissingColumnNames(boolean)
@@ -2340,7 +2357,7 @@ public CSVFormat withAllowMissingColumnNames() {
23402357
}
23412358

23422359
/**
2343-
* Returns a new {@code CSVFormat} with the missing column names behavior of the format set to the given value.
2360+
* Builds a new {@code CSVFormat} with the missing column names behavior of the format set to the given value.
23442361
*
23452362
* @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause
23462363
* an {@link IllegalArgumentException} to be thrown.
@@ -2353,7 +2370,7 @@ public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNam
23532370
}
23542371

23552372
/**
2356-
* Returns a new {@code CSVFormat} with whether to flush on close.
2373+
* Builds a new {@code CSVFormat} with whether to flush on close.
23572374
*
23582375
* @param autoFlush whether to flush on close.
23592376
*
@@ -2367,7 +2384,7 @@ public CSVFormat withAutoFlush(final boolean autoFlush) {
23672384
}
23682385

23692386
/**
2370-
* Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character.
2387+
* Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character.
23712388
*
23722389
* Note that the comment start character is only recognized at the start of a line.
23732390
*
@@ -2382,7 +2399,7 @@ public CSVFormat withCommentMarker(final char commentMarker) {
23822399
}
23832400

23842401
/**
2385-
* Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character.
2402+
* Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character.
23862403
*
23872404
* Note that the comment start character is only recognized at the start of a line.
23882405
*
@@ -2397,7 +2414,7 @@ public CSVFormat withCommentMarker(final Character commentMarker) {
23972414
}
23982415

23992416
/**
2400-
* Returns a new {@code CSVFormat} with the delimiter of the format set to the specified character.
2417+
* Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character.
24012418
*
24022419
* @param delimiter the delimiter character
24032420
* @return A new CSVFormat that is equal to this with the specified character as delimiter
@@ -2410,7 +2427,7 @@ public CSVFormat withDelimiter(final char delimiter) {
24102427
}
24112428

24122429
/**
2413-
* Returns a new {@code CSVFormat} with the escape character of the format set to the specified character.
2430+
* Builds a new {@code CSVFormat} with the escape character of the format set to the specified character.
24142431
*
24152432
* @param escape the escape character
24162433
* @return A new CSVFormat that is equal to this but with the specified character as the escape character
@@ -2423,7 +2440,7 @@ public CSVFormat withEscape(final char escape) {
24232440
}
24242441

24252442
/**
2426-
* Returns a new {@code CSVFormat} with the escape character of the format set to the specified character.
2443+
* Builds a new {@code CSVFormat} with the escape character of the format set to the specified character.
24272444
*
24282445
* @param escape the escape character, use {@code null} to disable
24292446
* @return A new CSVFormat that is equal to this but with the specified character as the escape character
@@ -2436,7 +2453,7 @@ public CSVFormat withEscape(final Character escape) {
24362453
}
24372454

24382455
/**
2439-
* Returns a new {@code CSVFormat} using the first record as header.
2456+
* Builds a new {@code CSVFormat} using the first record as header.
24402457
*
24412458
* <p>
24422459
* Calling this method is equivalent to calling:
@@ -2463,7 +2480,7 @@ public CSVFormat withFirstRecordAsHeader() {
24632480
}
24642481

24652482
/**
2466-
* Returns a new {@code CSVFormat} with the header of the format defined by the enum class.
2483+
* Builds a new {@code CSVFormat} with the header of the format defined by the enum class.
24672484
*
24682485
* <p>
24692486
* Example:
@@ -2493,7 +2510,7 @@ public CSVFormat withHeader(final Class<? extends Enum<?>> headerEnum) {
24932510
}
24942511

24952512
/**
2496-
* Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the
2513+
* Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the
24972514
* input file with:
24982515
*
24992516
* <pre>
@@ -2521,7 +2538,7 @@ public CSVFormat withHeader(final ResultSet resultSet) throws SQLException {
25212538
}
25222539

25232540
/**
2524-
* Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the
2541+
* Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the
25252542
* input file with:
25262543
*
25272544
* <pre>
@@ -2549,7 +2566,7 @@ public CSVFormat withHeader(final ResultSetMetaData resultSetMetaData) throws SQ
25492566
}
25502567

25512568
/**
2552-
* Returns a new {@code CSVFormat} with the header of the format set to the given values. The header can either be parsed automatically from the input file
2569+
* Builds a new {@code CSVFormat} with the header of the format set to the given values. The header can either be parsed automatically from the input file
25532570
* with:
25542571
*
25552572
* <pre>
@@ -2576,7 +2593,7 @@ public CSVFormat withHeader(final String... header) {
25762593
}
25772594

25782595
/**
2579-
* Returns a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will be printed first, before the headers.
2596+
* Builds a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will be printed first, before the headers.
25802597
* This setting is ignored by the parser.
25812598
*
25822599
* <pre>
@@ -2595,7 +2612,7 @@ public CSVFormat withHeaderComments(final Object... headerComments) {
25952612
}
25962613

25972614
/**
2598-
* Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}.
2615+
* Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}.
25992616
*
26002617
* @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
26012618
* @see Builder#setIgnoreEmptyLines(boolean)
@@ -2608,7 +2625,7 @@ public CSVFormat withIgnoreEmptyLines() {
26082625
}
26092626

26102627
/**
2611-
* Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value.
2628+
* Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value.
26122629
*
26132630
* @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty
26142631
* lines to empty records.
@@ -2621,7 +2638,7 @@ public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
26212638
}
26222639

26232640
/**
2624-
* Returns a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
2641+
* Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
26252642
*
26262643
* @return A new CSVFormat that will ignore case header name.
26272644
* @see Builder#setIgnoreHeaderCase(boolean)
@@ -2634,7 +2651,7 @@ public CSVFormat withIgnoreHeaderCase() {
26342651
}
26352652

26362653
/**
2637-
* Returns a new {@code CSVFormat} with whether header names should be accessed ignoring case.
2654+
* Builds a new {@code CSVFormat} with whether header names should be accessed ignoring case.
26382655
*
26392656
* @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is.
26402657
* @return A new CSVFormat that will ignore case header name if specified as {@code true}
@@ -2647,7 +2664,7 @@ public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
26472664
}
26482665

26492666
/**
2650-
* Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}.
2667+
* Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}.
26512668
*
26522669
* @return A new CSVFormat that is equal to this but with the specified parser trimming behavior.
26532670
* @see Builder#setIgnoreSurroundingSpaces(boolean)
@@ -2660,7 +2677,7 @@ public CSVFormat withIgnoreSurroundingSpaces() {
26602677
}
26612678

26622679
/**
2663-
* Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value.
2680+
* Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value.
26642681
*
26652682
* @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is.
26662683
* @return A new CSVFormat that is equal to this but with the specified trimming behavior.
@@ -2672,7 +2689,7 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
26722689
}
26732690

26742691
/**
2675-
* Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output.
2692+
* Builds a new {@code CSVFormat} with conversions to and from null for strings on input and output.
26762693
* <ul>
26772694
* <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li>
26782695
* <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
@@ -2688,7 +2705,7 @@ public CSVFormat withNullString(final String nullString) {
26882705
}
26892706

26902707
/**
2691-
* Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character.
2708+
* Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character.
26922709
*
26932710
* @param quoteChar the quote character
26942711
* @return A new CSVFormat that is equal to this but with the specified character as quoteChar
@@ -2701,7 +2718,7 @@ public CSVFormat withQuote(final char quoteChar) {
27012718
}
27022719

27032720
/**
2704-
* Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character.
2721+
* Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character.
27052722
*
27062723
* @param quoteChar the quote character, use {@code null} to disable.
27072724
* @return A new CSVFormat that is equal to this but with the specified character as quoteChar
@@ -2714,7 +2731,7 @@ public CSVFormat withQuote(final Character quoteChar) {
27142731
}
27152732

27162733
/**
2717-
* Returns a new {@code CSVFormat} with the output quote policy of the format set to the specified value.
2734+
* Builds a new {@code CSVFormat} with the output quote policy of the format set to the specified value.
27182735
*
27192736
* @param quoteMode the quote policy to use for output.
27202737
*
@@ -2727,7 +2744,7 @@ public CSVFormat withQuoteMode(final QuoteMode quoteMode) {
27272744
}
27282745

27292746
/**
2730-
* Returns a new {@code CSVFormat} with the record separator of the format set to the specified character.
2747+
* Builds a new {@code CSVFormat} with the record separator of the format set to the specified character.
27312748
*
27322749
* <p>
27332750
* <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and
@@ -2744,7 +2761,7 @@ public CSVFormat withRecordSeparator(final char recordSeparator) {
27442761
}
27452762

27462763
/**
2747-
* Returns a new {@code CSVFormat} with the record separator of the format set to the specified String.
2764+
* Builds a new {@code CSVFormat} with the record separator of the format set to the specified String.
27482765
*
27492766
* <p>
27502767
* <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and
@@ -2762,7 +2779,7 @@ public CSVFormat withRecordSeparator(final String recordSeparator) {
27622779
}
27632780

27642781
/**
2765-
* Returns a new {@code CSVFormat} with skipping the header record set to {@code true}.
2782+
* Builds a new {@code CSVFormat} with skipping the header record set to {@code true}.
27662783
*
27672784
* @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting.
27682785
* @see Builder#setSkipHeaderRecord(boolean)
@@ -2776,7 +2793,7 @@ public CSVFormat withSkipHeaderRecord() {
27762793
}
27772794

27782795
/**
2779-
* Returns a new {@code CSVFormat} with whether to skip the header record.
2796+
* Builds a new {@code CSVFormat} with whether to skip the header record.
27802797
*
27812798
* @param skipHeaderRecord whether to skip the header record.
27822799
* @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting.
@@ -2789,7 +2806,7 @@ public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
27892806
}
27902807

27912808
/**
2792-
* Returns a new {@code CSVFormat} with the record separator of the format set to the operating system's line separator string, typically CR+LF on Windows
2809+
* Builds a new {@code CSVFormat} with the record separator of the format set to the operating system's line separator string, typically CR+LF on Windows
27932810
* and LF on Linux.
27942811
*
27952812
* <p>
@@ -2807,7 +2824,7 @@ public CSVFormat withSystemRecordSeparator() {
28072824
}
28082825

28092826
/**
2810-
* Returns a new {@code CSVFormat} to add a trailing delimiter.
2827+
* Builds a new {@code CSVFormat} to add a trailing delimiter.
28112828
*
28122829
* @return A new CSVFormat that is equal to this but with the trailing delimiter setting.
28132830
* @since 1.3
@@ -2819,7 +2836,7 @@ public CSVFormat withTrailingDelimiter() {
28192836
}
28202837

28212838
/**
2822-
* Returns a new {@code CSVFormat} with whether to add a trailing delimiter.
2839+
* Builds a new {@code CSVFormat} with whether to add a trailing delimiter.
28232840
*
28242841
* @param trailingDelimiter whether to add a trailing delimiter.
28252842
* @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting.
@@ -2832,7 +2849,7 @@ public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
28322849
}
28332850

28342851
/**
2835-
* Returns a new {@code CSVFormat} to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used.
2852+
* Builds a new {@code CSVFormat} to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used.
28362853
*
28372854
* @return A new CSVFormat that is equal to this but with the trim setting on.
28382855
* @since 1.3
@@ -2844,7 +2861,7 @@ public CSVFormat withTrim() {
28442861
}
28452862

28462863
/**
2847-
* Returns a new {@code CSVFormat} with whether to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used.
2864+
* Builds a new {@code CSVFormat} with whether to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used.
28482865
*
28492866
* @param trim whether to trim leading and trailing blanks.
28502867
* @return A new CSVFormat that is equal to this but with the specified trim setting.

0 commit comments

Comments
 (0)