Skip to content

Commit c15a06e

Browse files
authored
CSV-288 Fix for multi-char delimiter not working as expected (apache#218)
When checking if previous token is delimiter, isDelimiter(lastChar) unintentionally advance the buffer pointer. Also isDelimiter(lastChar) cannot handle multi-char delimiter. To fix this, create a new indicator isLastTokenDelimiter instead of using isDelimiter(lastChar), the indicator is set/reset in isDelimiter()
1 parent 94711eb commit c15a06e

2 files changed

Lines changed: 238 additions & 3 deletions

File tree

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ final class Lexer implements Closeable {
6262
private final ExtendedBufferedReader reader;
6363
private String firstEol;
6464

65+
private boolean isLastTokenDelimiter;
66+
6567
Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
6668
this.reader = reader;
6769
this.delimiter = format.getDelimiterString().toCharArray();
@@ -124,11 +126,13 @@ boolean isCommentStart(final int ch) {
124126
* @throws IOException If an I/O error occurs.
125127
*/
126128
boolean isDelimiter(final int ch) throws IOException {
129+
isLastTokenDelimiter = false;
127130
if (ch != delimiter[0]) {
128131
return false;
129132
}
130133
if (delimiter.length == 1) {
131-
return true;
134+
isLastTokenDelimiter = true;
135+
return true;
132136
}
133137
reader.lookAhead(delimiterBuf);
134138
for (int i = 0; i < delimiterBuf.length; i++) {
@@ -137,7 +141,8 @@ boolean isDelimiter(final int ch) throws IOException {
137141
}
138142
}
139143
final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
140-
return count != END_OF_STREAM;
144+
isLastTokenDelimiter = count != END_OF_STREAM;
145+
return isLastTokenDelimiter;
141146
}
142147

143148
/**
@@ -243,7 +248,7 @@ Token nextToken(final Token token) throws IOException {
243248
}
244249

245250
// did we reach eof during the last iteration already ? EOF
246-
if (isEndOfFile(lastChar) || !isDelimiter(lastChar) && isEndOfFile(c)) {
251+
if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) {
247252
token.type = EOF;
248253
// don't set token.isReady here because no content
249254
return token;
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.csv.issues;
19+
20+
import static org.junit.jupiter.api.Assertions.assertEquals;
21+
22+
import java.io.Reader;
23+
import java.io.StringReader;
24+
25+
import org.apache.commons.csv.CSVFormat;
26+
import org.apache.commons.csv.CSVParser;
27+
import org.apache.commons.csv.CSVPrinter;
28+
import org.apache.commons.csv.CSVRecord;
29+
import org.junit.jupiter.api.Test;
30+
31+
public class JiraCsv288Test {
32+
@Test
33+
// Before fix:
34+
// expected: <a,b,c,d,,f> but was: <a,b|c,d,|f>
35+
public void testParseWithDoublePipeDelimiter() throws Exception {
36+
final Reader in = new StringReader("a||b||c||d||||f");
37+
StringBuilder stringBuilder = new StringBuilder();
38+
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
39+
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
40+
for (CSVRecord csvRecord : csvParser) {
41+
for (int i = 0; i < csvRecord.size(); i++) {
42+
csvPrinter.print(csvRecord.get(i));
43+
}
44+
assertEquals("a,b,c,d,,f", stringBuilder.toString());
45+
}
46+
}
47+
}
48+
49+
@Test
50+
// Before fix:
51+
// expected: <a,b,c,d,,f> but was: <a,b|c,d,|f>
52+
public void testParseWithTriplePipeDelimiter() throws Exception {
53+
final Reader in = new StringReader("a|||b|||c|||d||||||f");
54+
StringBuilder stringBuilder = new StringBuilder();
55+
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
56+
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|||").build())) {
57+
for (CSVRecord csvRecord : csvParser) {
58+
for (int i = 0; i < csvRecord.size(); i++) {
59+
csvPrinter.print(csvRecord.get(i));
60+
}
61+
assertEquals("a,b,c,d,,f", stringBuilder.toString());
62+
}
63+
}
64+
}
65+
66+
@Test
67+
// Before fix:
68+
// expected: <a,b,c,d,,f> but was: <a,b,c,d,|f>
69+
public void testParseWithABADelimiter() throws Exception {
70+
final Reader in = new StringReader("a|~|b|~|c|~|d|~||~|f");
71+
StringBuilder stringBuilder = new StringBuilder();
72+
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
73+
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|~|").build())) {
74+
for (CSVRecord csvRecord : csvParser) {
75+
for (int i = 0; i < csvRecord.size(); i++) {
76+
csvPrinter.print(csvRecord.get(i));
77+
}
78+
assertEquals("a,b,c,d,,f", stringBuilder.toString());
79+
}
80+
}
81+
}
82+
83+
@Test
84+
// Before fix:
85+
// expected: <a,b||c,d,,f> but was: <a,b||c,d,|f>
86+
public void testParseWithDoublePipeDelimiterQuoted() throws Exception {
87+
final Reader in = new StringReader("a||\"b||c\"||d||||f");
88+
StringBuilder stringBuilder = new StringBuilder();
89+
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
90+
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
91+
for (CSVRecord csvRecord : csvParser) {
92+
for (int i = 0; i < csvRecord.size(); i++) {
93+
csvPrinter.print(csvRecord.get(i));
94+
}
95+
assertEquals("a,b||c,d,,f", stringBuilder.toString());
96+
}
97+
}
98+
}
99+
100+
@Test
101+
// Before fix:
102+
// expected: <a,b,c,d,,f,> but was: <a,b|c,d,|f>
103+
public void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws Exception {
104+
final Reader in = new StringReader("a||b||c||d||||f||");
105+
StringBuilder stringBuilder = new StringBuilder();
106+
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
107+
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
108+
for (CSVRecord csvRecord : csvParser) {
109+
for (int i = 0; i < csvRecord.size(); i++) {
110+
csvPrinter.print(csvRecord.get(i));
111+
}
112+
assertEquals("a,b,c,d,,f,", stringBuilder.toString());
113+
}
114+
}
115+
}
116+
117+
@Test
118+
// Before fix:
119+
// expected: <a,b,c,d,,f,> but was: <a,b,c,d,,f>
120+
public void testParseWithTwoCharDelimiterEndsWithDelimiter() throws Exception {
121+
final Reader in = new StringReader("a~|b~|c~|d~|~|f~|");
122+
StringBuilder stringBuilder = new StringBuilder();
123+
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
124+
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
125+
for (CSVRecord csvRecord : csvParser) {
126+
for (int i = 0; i < csvRecord.size(); i++) {
127+
csvPrinter.print(csvRecord.get(i));
128+
}
129+
assertEquals("a,b,c,d,,f,", stringBuilder.toString());
130+
}
131+
}
132+
}
133+
134+
@Test
135+
// Regression, already passed before fix
136+
137+
public void testParseWithDoublePipeDelimiterDoubleCharValue() throws Exception {
138+
final Reader in = new StringReader("a||bb||cc||dd||f");
139+
StringBuilder stringBuilder = new StringBuilder();
140+
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
141+
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
142+
for (CSVRecord csvRecord : csvParser) {
143+
for (int i = 0; i < csvRecord.size(); i++) {
144+
csvPrinter.print(csvRecord.get(i));
145+
}
146+
assertEquals("a,bb,cc,dd,f", stringBuilder.toString());
147+
}
148+
}
149+
}
150+
151+
@Test
152+
// Regression, already passed before fix
153+
public void testParseWithTwoCharDelimiter1() throws Exception {
154+
final Reader in = new StringReader("a~|b~|c~|d~|~|f");
155+
StringBuilder stringBuilder = new StringBuilder();
156+
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
157+
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
158+
for (CSVRecord csvRecord : csvParser) {
159+
for (int i = 0; i < csvRecord.size(); i++) {
160+
csvPrinter.print(csvRecord.get(i));
161+
}
162+
assertEquals("a,b,c,d,,f", stringBuilder.toString());
163+
}
164+
}
165+
}
166+
167+
@Test
168+
// Regression, already passed before fix
169+
public void testParseWithTwoCharDelimiter2() throws Exception {
170+
final Reader in = new StringReader("a~|b~|c~|d~|~|f~");
171+
StringBuilder stringBuilder = new StringBuilder();
172+
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
173+
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
174+
for (CSVRecord csvRecord : csvParser) {
175+
for (int i = 0; i < csvRecord.size(); i++) {
176+
csvPrinter.print(csvRecord.get(i));
177+
}
178+
assertEquals("a,b,c,d,,f~", stringBuilder.toString());
179+
}
180+
}
181+
}
182+
183+
@Test
184+
// Regression, already passed before fix
185+
public void testParseWithTwoCharDelimiter3() throws Exception {
186+
final Reader in = new StringReader("a~|b~|c~|d~|~|f|");
187+
StringBuilder stringBuilder = new StringBuilder();
188+
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
189+
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
190+
for (CSVRecord csvRecord : csvParser) {
191+
for (int i = 0; i < csvRecord.size(); i++) {
192+
csvPrinter.print(csvRecord.get(i));
193+
}
194+
assertEquals("a,b,c,d,,f|", stringBuilder.toString());
195+
}
196+
}
197+
}
198+
199+
@Test
200+
// Regression, already passed before fix
201+
public void testParseWithTwoCharDelimiter4() throws Exception {
202+
final Reader in = new StringReader("a~|b~|c~|d~|~|f~~||g");
203+
StringBuilder stringBuilder = new StringBuilder();
204+
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
205+
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
206+
for (CSVRecord csvRecord : csvParser) {
207+
for (int i = 0; i < csvRecord.size(); i++) {
208+
csvPrinter.print(csvRecord.get(i));
209+
}
210+
assertEquals("a,b,c,d,,f~,|g", stringBuilder.toString());
211+
}
212+
}
213+
}
214+
215+
@Test
216+
// Regression, already passed before fix
217+
public void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws Exception {
218+
final Reader in = new StringReader("a|b|c|d||f|");
219+
StringBuilder stringBuilder = new StringBuilder();
220+
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
221+
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|").build())) {
222+
for (CSVRecord csvRecord : csvParser) {
223+
for (int i = 0; i < csvRecord.size(); i++) {
224+
csvPrinter.print(csvRecord.get(i));
225+
}
226+
assertEquals("a,b,c,d,,f,", stringBuilder.toString());
227+
}
228+
}
229+
}
230+
}

0 commit comments

Comments
 (0)