Skip to content

Commit ad01ee1

Browse files
author
Ranjith Rp
committed
added ignoreQuoteInToken support to ignore quotes in strings even when there are few encapsulatedTokens with comma within
1 parent 7754cd4 commit ad01ee1

2 files changed

Lines changed: 74 additions & 21 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 70 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ public CSVFormat getFormat() {
266266
* @see Predefined#Default
267267
*/
268268
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
269-
null, null, null, false, false, false, false, false, false, true);
269+
null, null, null, false, false, false, false, false, false, true, false);
270270

271271
/**
272272
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
@@ -673,7 +673,7 @@ private static boolean isLineBreak(final Character c) {
673673
*/
674674
public static CSVFormat newFormat(final char delimiter) {
675675
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
676-
false, false, false, false, true);
676+
false, false, false, false, true, false);
677677
}
678678

679679
/**
@@ -709,6 +709,8 @@ public static CSVFormat valueOf(final String format) {
709709
private final boolean ignoreHeaderCase; // should ignore header names case
710710

711711
private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
712+
713+
private final boolean ignoreQuotesInToken; //should ignore quotes in the token
712714

713715
private final String nullString; // the string to be used for null values
714716

@@ -762,6 +764,8 @@ public static CSVFormat valueOf(final String format) {
762764
* @param trailingDelimiter
763765
* TODO
764766
* @param autoFlush
767+
* @param ignoreQuotesInToken
768+
* the quotes within a string token will be ignored
765769
* @throws IllegalArgumentException
766770
* if the delimiter is a line break character
767771
*/
@@ -770,7 +774,8 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
770774
final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
771775
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
772776
final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
773-
final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames) {
777+
final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames,
778+
final boolean ignoreQuotesInToken) {
774779
this.delimiter = delimiter;
775780
this.quoteCharacter = quoteChar;
776781
this.quoteMode = quoteMode;
@@ -790,6 +795,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
790795
this.autoFlush = autoFlush;
791796
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
792797
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
798+
this.ignoreQuotesInToken = ignoreQuotesInToken;
793799
validate();
794800
}
795801

@@ -864,6 +870,9 @@ public boolean equals(final Object obj) {
864870
if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) {
865871
return false;
866872
}
873+
if (ignoreQuotesInToken != other.ignoreQuotesInToken) {
874+
return false;
875+
}
867876
if (ignoreEmptyLines != other.ignoreEmptyLines) {
868877
return false;
869878
}
@@ -1004,6 +1013,16 @@ public boolean getIgnoreHeaderCase() {
10041013
public boolean getIgnoreSurroundingSpaces() {
10051014
return ignoreSurroundingSpaces;
10061015
}
1016+
1017+
/**
1018+
* Specifies whether quotes in token are ignored when parsing input.
1019+
*
1020+
* @return {@code true} to allow quotes anywhwere in the string,
1021+
* {@code false} to ensure quotes come in the beginning and end of string only.
1022+
*/
1023+
public boolean getIgnoreQuotesInToken() {
1024+
return ignoreQuotesInToken;
1025+
}
10071026

10081027
/**
10091028
* Gets the String to convert to and from {@code null}.
@@ -1088,6 +1107,7 @@ public int hashCode() {
10881107
result = prime * result + ((escapeCharacter == null) ? 0 : escapeCharacter.hashCode());
10891108
result = prime * result + ((nullString == null) ? 0 : nullString.hashCode());
10901109
result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237);
1110+
result = prime * result + (ignoreQuotesInToken ? 1231 : 1237);
10911111
result = prime * result + (ignoreHeaderCase ? 1231 : 1237);
10921112
result = prime * result + (ignoreEmptyLines ? 1231 : 1237);
10931113
result = prime * result + (skipHeaderRecord ? 1231 : 1237);
@@ -1618,6 +1638,9 @@ public String toString() {
16181638
if (getIgnoreSurroundingSpaces()) {
16191639
sb.append(" SurroundingSpaces:ignored");
16201640
}
1641+
if (getIgnoreQuotesInToken()) {
1642+
sb.append(" QuotesInToken:ignored");
1643+
}
16211644
if (getIgnoreHeaderCase()) {
16221645
sb.append(" IgnoreHeaderCase:ignored");
16231646
}
@@ -1734,7 +1757,7 @@ public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeade
17341757
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17351758
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17361759
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1737-
allowDuplicateHeaderNames);
1760+
allowDuplicateHeaderNames, ignoreQuotesInToken);
17381761
}
17391762

17401763
/**
@@ -1760,7 +1783,7 @@ public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNam
17601783
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17611784
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17621785
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1763-
allowDuplicateHeaderNames);
1786+
allowDuplicateHeaderNames, ignoreQuotesInToken);
17641787
}
17651788

17661789
/**
@@ -1776,7 +1799,7 @@ public CSVFormat withAutoFlush(final boolean autoFlush) {
17761799
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
17771800
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
17781801
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1779-
allowDuplicateHeaderNames);
1802+
allowDuplicateHeaderNames, ignoreQuotesInToken);
17801803
}
17811804

17821805
/**
@@ -1812,7 +1835,7 @@ public CSVFormat withCommentMarker(final Character commentMarker) {
18121835
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
18131836
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
18141837
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1815-
allowDuplicateHeaderNames);
1838+
allowDuplicateHeaderNames, ignoreQuotesInToken);
18161839
}
18171840

18181841
/**
@@ -1831,7 +1854,7 @@ public CSVFormat withDelimiter(final char delimiter) {
18311854
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
18321855
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
18331856
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1834-
allowDuplicateHeaderNames);
1857+
allowDuplicateHeaderNames, ignoreQuotesInToken);
18351858
}
18361859

18371860
/**
@@ -1863,7 +1886,7 @@ public CSVFormat withEscape(final Character escape) {
18631886
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
18641887
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
18651888
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
1866-
allowDuplicateHeaderNames);
1889+
allowDuplicateHeaderNames, ignoreQuotesInToken);
18671890
}
18681891

18691892
/**
@@ -2020,7 +2043,7 @@ public CSVFormat withHeader(final String... header) {
20202043
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20212044
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20222045
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2023-
allowDuplicateHeaderNames);
2046+
allowDuplicateHeaderNames, ignoreQuotesInToken);
20242047
}
20252048

20262049
/**
@@ -2042,7 +2065,7 @@ public CSVFormat withHeaderComments(final Object... headerComments) {
20422065
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20432066
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20442067
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2045-
allowDuplicateHeaderNames);
2068+
allowDuplicateHeaderNames, ignoreQuotesInToken);
20462069
}
20472070

20482071
/**
@@ -2068,7 +2091,7 @@ public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
20682091
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20692092
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20702093
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2071-
allowDuplicateHeaderNames);
2094+
allowDuplicateHeaderNames, ignoreQuotesInToken);
20722095
}
20732096

20742097
/**
@@ -2095,7 +2118,7 @@ public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
20952118
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
20962119
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
20972120
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2098-
allowDuplicateHeaderNames);
2121+
allowDuplicateHeaderNames, ignoreQuotesInToken);
20992122
}
21002123

21012124
/**
@@ -2121,9 +2144,35 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
21212144
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
21222145
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
21232146
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2124-
allowDuplicateHeaderNames);
2147+
allowDuplicateHeaderNames, ignoreQuotesInToken);
2148+
}
2149+
2150+
/**
2151+
* Returns a new {@code CSVFormat} with the parser allowing quotes anywhere in the string {@code true}.
2152+
*
2153+
* @return A new CSVFormat that is equal to this but with quotes allowed anywhere in the string.
2154+
* @see #withIgnoreQuotesInToken(boolean)
2155+
* @since 1.9
2156+
*/
2157+
public CSVFormat withIgnoreQuotesInToken() {
2158+
return this.withIgnoreQuotesInToken(true);
21252159
}
21262160

2161+
/**
2162+
* Returns a new {@code CSVFormat} with the parser with quotes anywhere in the string set to the given value.
2163+
*
2164+
* @param ignoreQuotesInToken
2165+
* parser with quotes anywhere in the string, {@code true} to allow quotes anywhwere in the string,
2166+
* {@code false} to ensure quotes come in the beginning and end of string only
2167+
* @return A new CSVFormat that is equal to this but with quotes allowed anywhere in the string.
2168+
*/
2169+
public CSVFormat withIgnoreQuotesInToken(final boolean ignoreQuotesInToken) {
2170+
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
2171+
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
2172+
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2173+
allowDuplicateHeaderNames, ignoreQuotesInToken);
2174+
}
2175+
21272176
/**
21282177
* Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output.
21292178
* <ul>
@@ -2141,7 +2190,7 @@ public CSVFormat withNullString(final String nullString) {
21412190
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
21422191
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
21432192
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2144-
allowDuplicateHeaderNames);
2193+
allowDuplicateHeaderNames, ignoreQuotesInToken);
21452194
}
21462195

21472196
/**
@@ -2173,7 +2222,7 @@ public CSVFormat withQuote(final Character quoteChar) {
21732222
return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
21742223
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
21752224
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2176-
allowDuplicateHeaderNames);
2225+
allowDuplicateHeaderNames, ignoreQuotesInToken);
21772226
}
21782227

21792228
/**
@@ -2188,7 +2237,7 @@ public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
21882237
return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
21892238
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
21902239
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2191-
allowDuplicateHeaderNames);
2240+
allowDuplicateHeaderNames, ignoreQuotesInToken);
21922241
}
21932242

21942243
/**
@@ -2227,7 +2276,7 @@ public CSVFormat withRecordSeparator(final String recordSeparator) {
22272276
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22282277
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
22292278
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2230-
allowDuplicateHeaderNames);
2279+
allowDuplicateHeaderNames, ignoreQuotesInToken);
22312280
}
22322281

22332282
/**
@@ -2255,7 +2304,7 @@ public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
22552304
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22562305
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
22572306
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2258-
allowDuplicateHeaderNames);
2307+
allowDuplicateHeaderNames, ignoreQuotesInToken);
22592308
}
22602309

22612310
/**
@@ -2297,7 +2346,7 @@ public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
22972346
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
22982347
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
22992348
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2300-
allowDuplicateHeaderNames);
2349+
allowDuplicateHeaderNames, ignoreQuotesInToken);
23012350
}
23022351

23032352
/**
@@ -2325,6 +2374,6 @@ public CSVFormat withTrim(final boolean trim) {
23252374
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
23262375
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
23272376
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
2328-
allowDuplicateHeaderNames);
2377+
allowDuplicateHeaderNames,ignoreQuotesInToken);
23292378
}
23302379
}

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ final class Lexer implements Closeable {
5555

5656
private final boolean ignoreSurroundingSpaces;
5757
private final boolean ignoreEmptyLines;
58+
private final boolean ignoreQuotesInToken;
5859

5960
/** The input stream */
6061
private final ExtendedBufferedReader reader;
@@ -72,6 +73,7 @@ String getFirstEol(){
7273
this.commentStart = mapNullToDisabled(format.getCommentMarker());
7374
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
7475
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
76+
this.ignoreQuotesInToken = format.getIgnoreQuotesInToken();
7577
}
7678

7779
/**
@@ -276,6 +278,8 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
276278
} else if (readEndOfLine(c)) {
277279
token.type = EORECORD;
278280
return token;
281+
} else if(ignoreQuotesInToken) {
282+
token.content.append((char)c);
279283
} else if (!isWhitespace(c)) {
280284
// error invalid char between token and next delimiter
281285
throw new IOException("(line " + getCurrentLineNumber() +

0 commit comments

Comments
 (0)