Skip to content

Commit d6ff323

Browse files
committed
add recordSeparatorForInput
add recordSeparatorForInput
1 parent a775784 commit d6ff323

2 files changed

Lines changed: 68 additions & 24 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 56 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ public CSVFormat getFormat() {
232232
* @see Predefined#Default
233233
*/
234234
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
235-
null, null, null, false, false, false, false, false);
235+
null, null, null, false, false, false, false, false,null);
236236

237237
/**
238238
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
@@ -427,7 +427,7 @@ private static boolean isLineBreak(final Character c) {
427427
*/
428428
public static CSVFormat newFormat(final char delimiter) {
429429
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
430-
false, false, false);
430+
false, false, false,null);
431431
}
432432

433433
/**
@@ -468,6 +468,8 @@ public static CSVFormat valueOf(final String format) {
468468

469469
private final String recordSeparator; // for outputs
470470

471+
private final Character recordSeparatorForInput; // for inputs
472+
471473
private final boolean skipHeaderRecord;
472474

473475
private final boolean trailingDelimiter;
@@ -509,6 +511,8 @@ public static CSVFormat valueOf(final String format) {
509511
* TODO
510512
* @param trailingDelimiter
511513
* TODO
514+
* @param recordSeparatorForInput
515+
* the line separator to use for input
512516
* @throws IllegalArgumentException
513517
* if the delimiter is a line break character
514518
*/
@@ -517,7 +521,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
517521
final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
518522
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
519523
final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
520-
final boolean trailingDelimiter) {
524+
final boolean trailingDelimiter,Character recordSeparatorForInput) {
521525
this.delimiter = delimiter;
522526
this.quoteCharacter = quoteChar;
523527
this.quoteMode = quoteMode;
@@ -534,6 +538,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
534538
this.ignoreHeaderCase = ignoreHeaderCase;
535539
this.trailingDelimiter = trailingDelimiter;
536540
this.trim = trim;
541+
this.recordSeparatorForInput=recordSeparatorForInput;
537542
validate();
538543
}
539544

@@ -748,6 +753,14 @@ public QuoteMode getQuoteMode() {
748753
public String getRecordSeparator() {
749754
return recordSeparator;
750755
}
756+
/**
757+
* Returns the record separator delimiting input records.
758+
*
759+
* @return the record separator
760+
*/
761+
public Character getRecordSeparatorForInput() {
762+
return recordSeparatorForInput;
763+
}
751764

752765
/**
753766
* Returns whether to skip the header record.
@@ -793,6 +806,7 @@ public int hashCode() {
793806
result = prime * result + (ignoreEmptyLines ? 1231 : 1237);
794807
result = prime * result + (skipHeaderRecord ? 1231 : 1237);
795808
result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode());
809+
result = prime * result + ((recordSeparatorForInput == null) ? 0 : recordSeparatorForInput.hashCode());
796810
result = prime * result + Arrays.hashCode(header);
797811
return result;
798812
}
@@ -1188,6 +1202,10 @@ public String toString() {
11881202
sb.append(' ');
11891203
sb.append("RecordSeparator=<").append(recordSeparator).append('>');
11901204
}
1205+
if (recordSeparatorForInput != null) {
1206+
sb.append(' ');
1207+
sb.append("recordSeparatorForInput=<").append(recordSeparatorForInput).append('>');
1208+
}
11911209
if (getIgnoreEmptyLines()) {
11921210
sb.append(" EmptyLines:ignored");
11931211
}
@@ -1311,7 +1329,7 @@ public CSVFormat withAllowMissingColumnNames() {
13111329
public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
13121330
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
13131331
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1314-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1332+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
13151333
}
13161334

13171335
/**
@@ -1346,7 +1364,7 @@ public CSVFormat withCommentMarker(final Character commentMarker) {
13461364
}
13471365
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
13481366
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1349-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1367+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
13501368
}
13511369

13521370
/**
@@ -1364,7 +1382,7 @@ public CSVFormat withDelimiter(final char delimiter) {
13641382
}
13651383
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
13661384
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1367-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1385+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
13681386
}
13691387

13701388
/**
@@ -1395,7 +1413,7 @@ public CSVFormat withEscape(final Character escape) {
13951413
}
13961414
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
13971415
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
1398-
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1416+
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
13991417
}
14001418

14011419
/**
@@ -1550,7 +1568,7 @@ public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLExceptio
15501568
public CSVFormat withHeader(final String... header) {
15511569
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
15521570
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1553-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1571+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
15541572
}
15551573

15561574
/**
@@ -1571,7 +1589,7 @@ public CSVFormat withHeader(final String... header) {
15711589
public CSVFormat withHeaderComments(final Object... headerComments) {
15721590
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
15731591
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1574-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1592+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
15751593
}
15761594

15771595
/**
@@ -1596,7 +1614,7 @@ public CSVFormat withIgnoreEmptyLines() {
15961614
public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
15971615
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
15981616
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1599-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1617+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
16001618
}
16011619

16021620
/**
@@ -1622,7 +1640,7 @@ public CSVFormat withIgnoreHeaderCase() {
16221640
public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
16231641
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
16241642
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1625-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1643+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
16261644
}
16271645

16281646
/**
@@ -1647,7 +1665,7 @@ public CSVFormat withIgnoreSurroundingSpaces() {
16471665
public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
16481666
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
16491667
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1650-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1668+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
16511669
}
16521670

16531671
/**
@@ -1666,7 +1684,7 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
16661684
public CSVFormat withNullString(final String nullString) {
16671685
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
16681686
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1669-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1687+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
16701688
}
16711689

16721690
/**
@@ -1697,7 +1715,7 @@ public CSVFormat withQuote(final Character quoteChar) {
16971715
}
16981716
return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
16991717
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
1700-
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1718+
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
17011719
}
17021720

17031721
/**
@@ -1711,7 +1729,7 @@ public CSVFormat withQuote(final Character quoteChar) {
17111729
public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
17121730
return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
17131731
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1714-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1732+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
17151733
}
17161734

17171735
/**
@@ -1731,6 +1749,25 @@ public CSVFormat withRecordSeparator(final char recordSeparator) {
17311749
return withRecordSeparator(String.valueOf(recordSeparator));
17321750
}
17331751

1752+
/**
1753+
* Returns a new {@code CSVFormat} with the record separator of the format set to the specified character.
1754+
*
1755+
* <p>
1756+
* <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
1757+
* only works for inputs with '\n', '\r' and "\r\n"
1758+
* </p>
1759+
*
1760+
* @param recordSeparatorForInput
1761+
* the record separator to use for input.
1762+
*
1763+
* @return A new CSVFormat that is equal to this but with the the specified input record separator
1764+
*/
1765+
public CSVFormat withRecordSeparatorForInput(final char recordSeparatorForInput) {
1766+
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1767+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1768+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
1769+
}
1770+
17341771
/**
17351772
* Returns a new {@code CSVFormat} with the record separator of the format set to the specified String.
17361773
*
@@ -1749,7 +1786,7 @@ public CSVFormat withRecordSeparator(final char recordSeparator) {
17491786
public CSVFormat withRecordSeparator(final String recordSeparator) {
17501787
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17511788
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1752-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1789+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
17531790
}
17541791

17551792
/**
@@ -1776,7 +1813,7 @@ public CSVFormat withSkipHeaderRecord() {
17761813
public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
17771814
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17781815
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1779-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1816+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
17801817
}
17811818

17821819
/**
@@ -1801,7 +1838,7 @@ public CSVFormat withTrailingDelimiter() {
18011838
public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
18021839
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
18031840
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1804-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1841+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
18051842
}
18061843

18071844
/**
@@ -1826,6 +1863,6 @@ public CSVFormat withTrim() {
18261863
public CSVFormat withTrim(final boolean trim) {
18271864
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
18281865
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1829-
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1866+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter,recordSeparatorForInput);
18301867
}
18311868
}

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ final class Lexer implements Closeable {
5555
private final boolean ignoreSurroundingSpaces;
5656
private final boolean ignoreEmptyLines;
5757

58+
private final Character recordSeparatorForInput;
59+
5860
/** The input stream */
5961
private final ExtendedBufferedReader reader;
6062

@@ -66,6 +68,7 @@ final class Lexer implements Closeable {
6668
this.commentStart = mapNullToDisabled(format.getCommentMarker());
6769
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
6870
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
71+
this.recordSeparatorForInput=format.getRecordSeparatorForInput();
6972
}
7073

7174
/**
@@ -370,12 +373,16 @@ void trimTrailingSpaces(final StringBuilder buffer) {
370373
* @return true if the given or next character is a line-terminator
371374
*/
372375
boolean readEndOfLine(int ch) throws IOException {
373-
// check if we have \r\n...
374-
if (ch == CR && reader.lookAhead() == LF) {
375-
// note: does not change ch outside of this method!
376-
ch = reader.read();
376+
if (recordSeparatorForInput == null){
377+
// check if we have \r\n...
378+
if (ch == CR && reader.lookAhead() == LF) {
379+
// note: does not change ch outside of this method!
380+
ch = reader.read();
381+
}
382+
return ch == LF || ch == CR;
383+
}else{
384+
return ch == recordSeparatorForInput.charValue();
377385
}
378-
return ch == LF || ch == CR;
379386
}
380387

381388
boolean isClosed() {

0 commit comments

Comments
 (0)