Skip to content

Commit afbf34a

Browse files
committed
evaluate isDelimiter once in nextToken whitespace skip
1 parent e729d17 commit afbf34a

2 files changed

Lines changed: 28 additions & 2 deletions

File tree

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,15 +277,22 @@ Token nextToken(final Token token) throws IOException {
277277
}
278278
// Important: make sure a new char gets consumed in each iteration
279279
while (token.type == Token.Type.INVALID) {
280+
// isDelimiter consumes the trailing characters of a multi-character delimiter as a side effect, so it must
281+
// only be evaluated once per character. Remember a match found while skipping whitespace below.
282+
boolean delimiter = false;
280283
// ignore whitespaces at beginning of a token
281284
if (ignoreSurroundingSpaces) {
282-
while (Character.isWhitespace((char) c) && !isDelimiter(c) && !eol) {
285+
while (Character.isWhitespace((char) c) && !eol) {
286+
if (isDelimiter(c)) {
287+
delimiter = true;
288+
break;
289+
}
283290
c = reader.read();
284291
eol = readEndOfLine(c);
285292
}
286293
}
287294
// ok, start of token reached: encapsulated, or token
288-
if (isDelimiter(c)) {
295+
if (delimiter || isDelimiter(c)) {
289296
// empty token return TOKEN("")
290297
token.type = Token.Type.TOKEN;
291298
} else if (eol) {

src/test/java/org/apache/commons/csv/LexerTest.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,25 @@ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
447447
}
448448
}
449449

450+
/**
451+
* With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace,
452+
* the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the
453+
* delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped.
454+
*/
455+
@Test
456+
void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException {
457+
final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get();
458+
try (Lexer lexer = createLexer(" |a", format)) {
459+
assertNextToken(TOKEN, "", lexer);
460+
assertNextToken(EOF, "a", lexer);
461+
}
462+
try (Lexer lexer = createLexer("a | |b", format)) {
463+
assertNextToken(TOKEN, "a", lexer);
464+
assertNextToken(TOKEN, "", lexer);
465+
assertNextToken(EOF, "b", lexer);
466+
}
467+
}
468+
450469
@Test
451470
void testReadEscapeBackspace() throws IOException {
452471
try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {

0 commit comments

Comments
 (0)