Skip to content

Commit f685de6

Browse files
authored
Merge pull request apache#611 from rootvector2/delimiter-buffer-clear
Clear delimiter buffer before each peek in isDelimiter
2 parents e21d66e + 61f5213 commit f685de6

3 files changed

Lines changed: 29 additions & 1 deletion

File tree

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ boolean isDelimiter(final int ch) throws IOException {
153153
isLastTokenDelimiter = true;
154154
return true;
155155
}
156+
Arrays.fill(delimiterBuf, '\0');
156157
reader.peek(delimiterBuf);
157158
for (int i = 0; i < delimiterBuf.length; i++) {
158159
if (delimiterBuf[i] != delimiter[i + 1]) {
@@ -274,7 +275,6 @@ Token nextToken(final Token token) throws IOException {
274275
token.type = Token.Type.COMMENT;
275276
return token;
276277
}
277-
Arrays.fill(delimiterBuf, '\0');
278278
// Important: make sure a new char gets consumed in each iteration
279279
while (token.type == Token.Type.INVALID) {
280280
// ignore whitespaces at beginning of a token

src/test/java/org/apache/commons/csv/CSVParserTest.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1696,6 +1696,21 @@ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
16961696
}
16971697
}
16981698

1699+
/**
1700+
* A truncated multi-character delimiter at EOF must not be completed from the look-ahead buffer left dirty by an
1701+
* earlier non-matching peek in the same token.
1702+
*/
1703+
@Test
1704+
void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
1705+
final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
1706+
// The "[a]" peek leaves ']' in the look-ahead buffer; the trailing "[|" must not match "[|]".
1707+
try (CSVParser parser = format.parse(new StringReader("x[a][|"))) {
1708+
final CSVRecord record = parser.nextRecord();
1709+
assertEquals("x[a][|", record.get(0));
1710+
assertEquals(1, record.size());
1711+
}
1712+
}
1713+
16991714
@Test
17001715
void testProvidedHeader() throws Exception {
17011716
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");

src/test/java/org/apache/commons/csv/LexerTest.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,19 @@ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
433433
}
434434
}
435435

436+
/**
437+
* A truncated multi-character delimiter at EOF must not be accepted by reusing the look-ahead buffer left dirty by an
438+
* earlier non-matching peek in the same token (CSV-324 only cleared the buffer once per token).
439+
*/
440+
@Test
441+
void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
442+
final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
443+
// The "[a]" peek leaves ']' in the look-ahead buffer; the trailing "[|" must not match "[|]".
444+
try (Lexer lexer = createLexer("x[a][|", format)) {
445+
assertNextToken(EOF, "x[a][|", lexer);
446+
}
447+
}
448+
436449
@Test
437450
void testReadEscapeBackspace() throws IOException {
438451
try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {

0 commit comments

Comments
 (0)