Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
CSV-288 Fix for multi-char delimiter not working as expected
When checking if previous token is delimiter, isDelimiter(lastChar) unintentionally advance the buffer pointer. Also isDelimiter(lastChar) cannot handle multi-char delimiter. To fix this, create a new indicator isLastTokenDelimiter instead of using isDelimiter(lastChar), the indicator is set/reset in isDelimiter()
  • Loading branch information
angusdev committed Feb 19, 2022
commit 3a99e2eb3dca28175885b9fb1e4ca6dfefdb4337
11 changes: 8 additions & 3 deletions src/main/java/org/apache/commons/csv/Lexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ final class Lexer implements Closeable {
private final ExtendedBufferedReader reader;
private String firstEol;

private boolean isLastTokenDelimiter;

Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
this.reader = reader;
this.delimiter = format.getDelimiterString().toCharArray();
Expand Down Expand Up @@ -124,11 +126,13 @@ boolean isCommentStart(final int ch) {
* @throws IOException If an I/O error occurs.
*/
boolean isDelimiter(final int ch) throws IOException {
isLastTokenDelimiter = false;
if (ch != delimiter[0]) {
return false;
}
if (delimiter.length == 1) {
return true;
isLastTokenDelimiter = true;
return true;
}
reader.lookAhead(delimiterBuf);
for (int i = 0; i < delimiterBuf.length; i++) {
Expand All @@ -137,7 +141,8 @@ boolean isDelimiter(final int ch) throws IOException {
}
}
final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
return count != END_OF_STREAM;
isLastTokenDelimiter = count != END_OF_STREAM;
return isLastTokenDelimiter;
}

/**
Expand Down Expand Up @@ -243,7 +248,7 @@ Token nextToken(final Token token) throws IOException {
}

// did we reach eof during the last iteration already ? EOF
if (isEndOfFile(lastChar) || !isDelimiter(lastChar) && isEndOfFile(c)) {
if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) {
token.type = EOF;
// don't set token.isReady here because no content
return token;
Expand Down
230 changes: 230 additions & 0 deletions src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.commons.csv.issues;

import static org.junit.jupiter.api.Assertions.assertEquals;

import java.io.Reader;
import java.io.StringReader;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.junit.jupiter.api.Test;

public class JiraCsv288Test {
@Test
// Before fix:
// expected: <a,b,c,d,,f> but was: <a,b|c,d,|f>
public void testParseWithDoublePipeDelimiter() throws Exception {
final Reader in = new StringReader("a||b||c||d||||f");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f", stringBuilder.toString());
}
}
}

@Test
// Before fix:
// expected: <a,b,c,d,,f> but was: <a,b|c,d,|f>
public void testParseWithTriplePipeDelimiter() throws Exception {
final Reader in = new StringReader("a|||b|||c|||d||||||f");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|||").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f", stringBuilder.toString());
}
}
}

@Test
// Before fix:
// expected: <a,b,c,d,,f> but was: <a,b,c,d,|f>
public void testParseWithABADelimiter() throws Exception {
final Reader in = new StringReader("a|~|b|~|c|~|d|~||~|f");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|~|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f", stringBuilder.toString());
}
}
}

@Test
// Before fix:
// expected: <a,b||c,d,,f> but was: <a,b||c,d,|f>
public void testParseWithDoublePipeDelimiterQuoted() throws Exception {
final Reader in = new StringReader("a||\"b||c\"||d||||f");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b||c,d,,f", stringBuilder.toString());
}
}
}

@Test
// Before fix:
// expected: <a,b,c,d,,f,> but was: <a,b|c,d,|f>
public void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws Exception {
final Reader in = new StringReader("a||b||c||d||||f||");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f,", stringBuilder.toString());
}
}
}

@Test
// Before fix:
// expected: <a,b,c,d,,f,> but was: <a,b,c,d,,f>
public void testParseWithTwoCharDelimiterEndsWithDelimiter() throws Exception {
final Reader in = new StringReader("a~|b~|c~|d~|~|f~|");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f,", stringBuilder.toString());
}
}
}

@Test
// Regression, already passed before fix

public void testParseWithDoublePipeDelimiterDoubleCharValue() throws Exception {
final Reader in = new StringReader("a||bb||cc||dd||f");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,bb,cc,dd,f", stringBuilder.toString());
}
}
}

@Test
// Regression, already passed before fix
public void testParseWithTwoCharDelimiter1() throws Exception {
final Reader in = new StringReader("a~|b~|c~|d~|~|f");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f", stringBuilder.toString());
}
}
}

@Test
// Regression, already passed before fix
public void testParseWithTwoCharDelimiter2() throws Exception {
final Reader in = new StringReader("a~|b~|c~|d~|~|f~");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f~", stringBuilder.toString());
}
}
}

@Test
// Regression, already passed before fix
public void testParseWithTwoCharDelimiter3() throws Exception {
final Reader in = new StringReader("a~|b~|c~|d~|~|f|");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f|", stringBuilder.toString());
}
}
}

@Test
// Regression, already passed before fix
public void testParseWithTwoCharDelimiter4() throws Exception {
final Reader in = new StringReader("a~|b~|c~|d~|~|f~~||g");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f~,|g", stringBuilder.toString());
}
}
}

@Test
// Regression, already passed before fix
public void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws Exception {
final Reader in = new StringReader("a|b|c|d||f|");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f,", stringBuilder.toString());
}
}
}
}