Skip to content

Commit d0ea9e3

Browse files
author
Damjan Jovanovic
committed
Add a setting that controls whether the last field on the last line,
if quoted, has to have a closing quote before the file ends.
1 parent ed0ca22 commit d0ea9e3

3 files changed

Lines changed: 64 additions & 11 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ public static Builder create(final CSVFormat csvFormat) {
206206
return new Builder(csvFormat);
207207
}
208208

209+
private boolean allowEofWithoutClosingQuote;
210+
209211
private boolean allowMissingColumnNames;
210212

211213
private boolean allowTrailingText;
@@ -267,6 +269,7 @@ private Builder(final CSVFormat csvFormat) {
267269
this.quotedNullString = csvFormat.quotedNullString;
268270
this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
269271
this.allowTrailingText = csvFormat.allowTrailingText;
272+
this.allowEofWithoutClosingQuote = csvFormat.allowEofWithoutClosingQuote;
270273
}
271274

272275
/**
@@ -291,6 +294,19 @@ public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNa
291294
return this;
292295
}
293296

297+
/**
298+
* Sets whether the last field on the last line, if quoted, can have no closing quote when the file ends, {@code true} if this is ok,
299+
* {@code false} if {@link IOException} should be thrown.
300+
*
301+
* @param allowEofWithoutClosingQuote whether to allow the last field on the last line to have a missing closing quote when the file ends,
302+
* {@code true} if so, or {@code false} to cause an {@link IOException} to be thrown.
303+
* @since 1.10.0
304+
*/
305+
public Builder setAllowEofWithoutClosingQuote(final boolean allowEofWithoutClosingQuote) {
306+
this.allowEofWithoutClosingQuote = allowEofWithoutClosingQuote;
307+
return this;
308+
}
309+
294310
/**
295311
* Sets the parser missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an
296312
* {@link IllegalArgumentException} to be thrown.
@@ -827,7 +843,7 @@ public CSVFormat getFormat() {
827843
* @see Predefined#Default
828844
*/
829845
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false,
830-
false, false, false, DuplicateHeaderMode.ALLOW_ALL, false);
846+
false, false, false, DuplicateHeaderMode.ALLOW_ALL, false, false);
831847

832848
/**
833849
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
@@ -852,6 +868,7 @@ public CSVFormat getFormat() {
852868
* <li>{@code setAllowMissingColumnNames(true)}</li>
853869
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
854870
* <li>{@code setAllowTrailingText(true)}</li>
871+
* <li>{@code setAllowEofWithoutClosingQuote(true)}</li>
855872
* </ul>
856873
* <p>
857874
* Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and
@@ -865,6 +882,7 @@ public CSVFormat getFormat() {
865882
.setIgnoreEmptyLines(false)
866883
.setAllowMissingColumnNames(true)
867884
.setAllowTrailingText(true)
885+
.setAllowEofWithoutClosingQuote(true)
868886
.build();
869887
// @formatter:on
870888

@@ -1287,7 +1305,7 @@ private static boolean isTrimChar(final CharSequence charSequence, final int pos
12871305
*/
12881306
public static CSVFormat newFormat(final char delimiter) {
12891307
return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false,
1290-
DuplicateHeaderMode.ALLOW_ALL, false);
1308+
DuplicateHeaderMode.ALLOW_ALL, false, false);
12911309
}
12921310

12931311
static String[] toStringArray(final Object[] values) {
@@ -1329,6 +1347,8 @@ public static CSVFormat valueOf(final String format) {
13291347

13301348
private final DuplicateHeaderMode duplicateHeaderMode;
13311349

1350+
private final boolean allowEofWithoutClosingQuote;
1351+
13321352
private final boolean allowMissingColumnNames;
13331353

13341354
private final boolean allowTrailingText;
@@ -1388,6 +1408,7 @@ private CSVFormat(final Builder builder) {
13881408
this.quotedNullString = builder.quotedNullString;
13891409
this.duplicateHeaderMode = builder.duplicateHeaderMode;
13901410
this.allowTrailingText = builder.allowTrailingText;
1411+
this.allowEofWithoutClosingQuote = builder.allowEofWithoutClosingQuote;
13911412
validate();
13921413
}
13931414

@@ -1418,7 +1439,7 @@ private CSVFormat(final String delimiter, final Character quoteChar, final Quote
14181439
final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
14191440
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames,
14201441
final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush,
1421-
final DuplicateHeaderMode duplicateHeaderMode, final boolean allowTrailingText) {
1442+
final DuplicateHeaderMode duplicateHeaderMode, final boolean allowTrailingText, final boolean allowEofWithoutClosingQuote) {
14221443
this.delimiter = delimiter;
14231444
this.quoteCharacter = quoteChar;
14241445
this.quoteMode = quoteMode;
@@ -1439,6 +1460,7 @@ private CSVFormat(final String delimiter, final Character quoteChar, final Quote
14391460
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
14401461
this.duplicateHeaderMode = duplicateHeaderMode;
14411462
this.allowTrailingText = allowTrailingText;
1463+
this.allowEofWithoutClosingQuote = allowEofWithoutClosingQuote;
14421464
validate();
14431465
}
14441466

@@ -1493,7 +1515,7 @@ public boolean equals(final Object obj) {
14931515
Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == other.quoteMode &&
14941516
Objects.equals(quotedNullString, other.quotedNullString) && Objects.equals(recordSeparator, other.recordSeparator) &&
14951517
skipHeaderRecord == other.skipHeaderRecord && trailingDelimiter == other.trailingDelimiter && trim == other.trim &&
1496-
allowTrailingText == other.allowTrailingText;
1518+
allowTrailingText == other.allowTrailingText && allowEofWithoutClosingQuote == other.allowEofWithoutClosingQuote;
14971519
}
14981520

14991521
/**
@@ -1527,6 +1549,16 @@ public boolean getAllowDuplicateHeaderNames() {
15271549
return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL;
15281550
}
15291551

1552+
/**
1553+
* Gets whether the file can end before the last field on the last line, if quoted, has a closing quote.
1554+
*
1555+
* @return {@code true} if so, {@code false} to throw an {@link IOException}.
1556+
* @since 1.10.0
1557+
*/
1558+
public boolean getAllowEofWithoutClosingQuote() {
1559+
return allowEofWithoutClosingQuote;
1560+
}
1561+
15301562
/**
15311563
* Gets whether missing column names are allowed when parsing the header line.
15321564
*
@@ -1726,9 +1758,9 @@ public int hashCode() {
17261758
int result = 1;
17271759
result = prime * result + Arrays.hashCode(headers);
17281760
result = prime * result + Arrays.hashCode(headerComments);
1729-
return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, allowTrailingText, autoFlush, commentMarker, delimiter,
1730-
escapeCharacter, ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString,
1731-
recordSeparator, skipHeaderRecord, trailingDelimiter, trim);
1761+
return prime * result + Objects.hash(duplicateHeaderMode, allowEofWithoutClosingQuote, allowMissingColumnNames, allowTrailingText,
1762+
autoFlush, commentMarker, delimiter, escapeCharacter, ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces,
1763+
nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator, skipHeaderRecord, trailingDelimiter, trim);
17321764
}
17331765

17341766
/**

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ final class Lexer implements Closeable {
5858
private final boolean ignoreSurroundingSpaces;
5959
private final boolean ignoreEmptyLines;
6060
private final boolean allowTrailingText;
61+
private final boolean allowEofWithoutClosingQuote;
6162

6263
/** The input stream */
6364
private final ExtendedBufferedReader reader;
@@ -74,6 +75,7 @@ final class Lexer implements Closeable {
7475
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
7576
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
7677
this.allowTrailingText = format.getAllowTrailingText();
78+
this.allowEofWithoutClosingQuote = format.getAllowEofWithoutClosingQuote();
7779
this.delimiterBuf = new char[delimiter.length - 1];
7880
this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
7981
}
@@ -378,9 +380,15 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
378380
}
379381
}
380382
} else if (isEndOfFile(c)) {
381-
// error condition (end of file before end of token)
382-
throw new IOException("(startline " + startLineNumber +
383-
") EOF reached before encapsulated token finished");
383+
if (allowEofWithoutClosingQuote) {
384+
token.type = EOF;
385+
token.isReady = true; // There is data at EOF
386+
return token;
387+
} else {
388+
// error condition (end of file before end of token)
389+
throw new IOException("(startline " + startLineNumber +
390+
") EOF reached before encapsulated token finished");
391+
}
384392
} else {
385393
// consume character
386394
token.content.append((char) c);

src/test/java/org/apache/commons/csv/LexerTest.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,20 @@ public void testTrailingTextAfterQuote() throws Exception {
441441
assertThat(parser.nextToken(new Token()), matches(EOF, "a b \"\""));
442442
}
443443
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowTrailingText(false).build())) {
444-
assertThrows(IOException.class, () -> lexer.nextToken(new Token()));
444+
assertThrows(IOException.class, () -> parser.nextToken(new Token()));
445+
}
446+
}
447+
448+
@Test
449+
public void testEOFWithoutClosingQuote() throws Exception {
450+
final String code = "a,\"b";
451+
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowEofWithoutClosingQuote(true).build())) {
452+
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
453+
assertThat(parser.nextToken(new Token()), matches(EOF, "b"));
454+
}
455+
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowEofWithoutClosingQuote(false).build())) {
456+
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
457+
assertThrows(IOException.class, () -> parser.nextToken(new Token()));
445458
}
446459
}
447460
}

0 commit comments

Comments
 (0)