From afbf34ad92cbf741b9dfad84699e890d8695a6f9 Mon Sep 17 00:00:00 2001 From: Naveed Khan Date: Thu, 25 Jun 2026 23:12:11 +0530 Subject: [PATCH 01/10] evaluate isDelimiter once in nextToken whitespace skip --- .../java/org/apache/commons/csv/Lexer.java | 11 +++++++++-- .../org/apache/commons/csv/LexerTest.java | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java index 93a584663..fe964480a 100644 --- a/src/main/java/org/apache/commons/csv/Lexer.java +++ b/src/main/java/org/apache/commons/csv/Lexer.java @@ -277,15 +277,22 @@ Token nextToken(final Token token) throws IOException { } // Important: make sure a new char gets consumed in each iteration while (token.type == Token.Type.INVALID) { + // isDelimiter consumes the trailing characters of a multi-character delimiter as a side effect, so it must + // only be evaluated once per character. Remember a match found while skipping whitespace below. + boolean delimiter = false; // ignore whitespaces at beginning of a token if (ignoreSurroundingSpaces) { - while (Character.isWhitespace((char) c) && !isDelimiter(c) && !eol) { + while (Character.isWhitespace((char) c) && !eol) { + if (isDelimiter(c)) { + delimiter = true; + break; + } c = reader.read(); eol = readEndOfLine(c); } } // ok, start of token reached: encapsulated, or token - if (isDelimiter(c)) { + if (delimiter || isDelimiter(c)) { // empty token return TOKEN("") token.type = Token.Type.TOKEN; } else if (eol) { diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java index db1ab3a6d..445f710a1 100644 --- a/src/test/java/org/apache/commons/csv/LexerTest.java +++ b/src/test/java/org/apache/commons/csv/LexerTest.java @@ -447,6 +447,25 @@ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException { } } + /** + * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, + * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the + * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped. + */ + @Test + void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); + try (Lexer lexer = createLexer(" |a", format)) { + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "a", lexer); + } + try (Lexer lexer = createLexer("a | |b", format)) { + assertNextToken(TOKEN, "a", lexer); + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "b", lexer); + } + } + @Test void testReadEscapeBackspace() throws IOException { try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) { From c9362f76baa32534e82334a27220b698db49788c Mon Sep 17 00:00:00 2001 From: Naveed Khan Date: Fri, 26 Jun 2026 02:45:35 +0530 Subject: [PATCH 02/10] add public-api test for whitespace-prefixed multi-char delimiter exercises the empty-field-dropped regression through CSVParser, not just the lexer. --- .../org/apache/commons/csv/CSVParserTest.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 051548757..565e132eb 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -1758,6 +1758,26 @@ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException { } } + /** + * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, + * the empty field at the delimiter boundary must survive. The delimiter look-ahead is consumed while skipping + * leading whitespace, so re-evaluating it would drop the empty field and merge the following field's value. + */ + @Test + void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); + try (CSVParser parser = CSVParser.parse(" |a", format)) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "", "a" }, records.get(0)); + } + try (CSVParser parser = CSVParser.parse("a | |b", format)) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "a", "", "b" }, records.get(0)); + } + } + @Test void testProvidedHeader() throws Exception { final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); From c4113ceb3acfba0634133e47d35c40d141e18525 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Thu, 25 Jun 2026 22:11:14 +0000 Subject: [PATCH 03/10] Evaluate isDelimiter once in nextToken whitespace skip (#618). --- src/changes/changes.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 0d0175ccc..93952e9f1 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -62,6 +62,7 @@ Escape leading comment marker in printWithEscapes (#614). Skip byte counting at EOF in ExtendedBufferedReader.read (#615). Keep quoted empty trailing field with trailingDelimiter (#616). + Evaluate isDelimiter once in nextToken whitespace skip (#618).. Add an "Android Compatibility" section to the web site. Add CSVParser.Builder.setByteOffset(long) (#604). From e36e7f3a1d0fbe29f2ff602f041d3a3d4195b84a Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Thu, 25 Jun 2026 22:11:53 +0000 Subject: [PATCH 04/10] Sort members --- .../org/apache/commons/csv/CSVParserTest.java | 40 +++++++++---------- .../org/apache/commons/csv/LexerTest.java | 38 +++++++++--------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 565e132eb..3bea08fac 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -465,6 +465,26 @@ void testDuplicateHeadersNotAllowed() { () -> CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader().withAllowDuplicateHeaderNames(false))); } + /** + * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, + * the empty field at the delimiter boundary must survive. The delimiter look-ahead is consumed while skipping + * leading whitespace, so re-evaluating it would drop the empty field and merge the following field's value. + */ + @Test + void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); + try (CSVParser parser = CSVParser.parse(" |a", format)) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "", "a" }, records.get(0)); + } + try (CSVParser parser = CSVParser.parse("a | |b", format)) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "a", "", "b" }, records.get(0)); + } + } + @Test void testEmptyFile() throws Exception { try (CSVParser parser = CSVParser.parse(Paths.get("src/test/resources/org/apache/commons/csv/empty.txt"), StandardCharsets.UTF_8, @@ -1758,26 +1778,6 @@ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException { } } - /** - * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, - * the empty field at the delimiter boundary must survive. The delimiter look-ahead is consumed while skipping - * leading whitespace, so re-evaluating it would drop the empty field and merge the following field's value. - */ - @Test - void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); - try (CSVParser parser = CSVParser.parse(" |a", format)) { - final List records = parser.getRecords(); - assertEquals(1, records.size()); - assertValuesEquals(new String[] { "", "a" }, records.get(0)); - } - try (CSVParser parser = CSVParser.parse("a | |b", format)) { - final List records = parser.getRecords(); - assertEquals(1, records.size()); - assertValuesEquals(new String[] { "a", "", "b" }, records.get(0)); - } - } - @Test void testProvidedHeader() throws Exception { final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java index 445f710a1..a76f6e513 100644 --- a/src/test/java/org/apache/commons/csv/LexerTest.java +++ b/src/test/java/org/apache/commons/csv/LexerTest.java @@ -216,6 +216,25 @@ void testDelimiterIsWhitespace() throws IOException { } } + /** + * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, + * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the + * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped. + */ + @Test + void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); + try (Lexer lexer = createLexer(" |a", format)) { + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "a", lexer); + } + try (Lexer lexer = createLexer("a | |b", format)) { + assertNextToken(TOKEN, "a", lexer); + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "b", lexer); + } + } + @Test void testEOFWithoutClosingQuote() throws Exception { final String code = "a,\"b"; @@ -447,25 +466,6 @@ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException { } } - /** - * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, - * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the - * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped. - */ - @Test - void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); - try (Lexer lexer = createLexer(" |a", format)) { - assertNextToken(TOKEN, "", lexer); - assertNextToken(EOF, "a", lexer); - } - try (Lexer lexer = createLexer("a | |b", format)) { - assertNextToken(TOKEN, "a", lexer); - assertNextToken(TOKEN, "", lexer); - assertNextToken(EOF, "b", lexer); - } - } - @Test void testReadEscapeBackspace() throws IOException { try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) { From 26a53751934e52f84663e5e90956db99f0eefc49 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Thu, 25 Jun 2026 22:13:08 +0000 Subject: [PATCH 05/10] Add test more assertions to CSVParserTest.testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() --- src/test/java/org/apache/commons/csv/CSVParserTest.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 3bea08fac..6d9bdd9e8 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -483,6 +483,11 @@ void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOEx assertEquals(1, records.size()); assertValuesEquals(new String[] { "a", "", "b" }, records.get(0)); } + try (CSVParser parser = CSVParser.parse("a | |b |", format)) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "a", "", "b", "" }, records.get(0)); + } } @Test From b346046e10a5833e5f6143fd0162155bb51ccc87 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Thu, 25 Jun 2026 18:23:59 -0400 Subject: [PATCH 06/10] Bump actions/cache from 5.0.5 to 6.0.0. --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/maven.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 08c673ee0..20f1ee2cb 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -49,7 +49,7 @@ jobs: uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: persist-credentials: false - - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5 + - uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index a6154ddb1..139df406d 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -46,7 +46,7 @@ jobs: - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: persist-credentials: false - - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5 + - uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} From 930bc7919db7ebb81dc566e9e278ce2bed3acf42 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Mon, 29 Jun 2026 07:29:52 -0400 Subject: [PATCH 07/10] Bump actions/cache from 6.0.0 to 6.1.0 --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/maven.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 20f1ee2cb..cca38e512 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -49,7 +49,7 @@ jobs: uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: persist-credentials: false - - uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0 + - uses: actions/cache@55cc8345863c7cc4c66a329aec7e433d2d1c52a9 #v6.1.0 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 139df406d..2637840b1 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -46,7 +46,7 @@ jobs: - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: persist-credentials: false - - uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0 + - uses: actions/cache@55cc8345863c7cc4c66a329aec7e433d2d1c52a9 #v6.1.0 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} From 6471196a31d9ea92942a634732e5350ef5253fcf Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Tue, 30 Jun 2026 13:58:31 +0000 Subject: [PATCH 08/10] Javadoc --- src/main/java/org/apache/commons/csv/CSVRecord.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java index 502bf318a..8dab14d90 100644 --- a/src/main/java/org/apache/commons/csv/CSVRecord.java +++ b/src/main/java/org/apache/commons/csv/CSVRecord.java @@ -281,7 +281,7 @@ public Iterator iterator() { /** * Puts all values of this record into the given Map. * - * @param the map type. + * @param The map type. * @param map The Map to populate. * @return the given map. * @since 1.9.0 From f7efa29cd9c4d9d1c1cafdae4469cf254bf22f42 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Tue, 30 Jun 2026 20:50:12 -0400 Subject: [PATCH 09/10] Bump actions/setup-java from 5.3.0 to 5.4.0 --- .github/workflows/maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 2637840b1..17ba7dd38 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -53,7 +53,7 @@ jobs: restore-keys: | ${{ runner.os }}-maven- - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@ad2b38190b15e4d6bdf0c97fb4fca8412226d287 # v5.3.0 + uses: actions/setup-java@1bcf9fb12cf4aa7d266a90ae39939e61372fe520 # v5.4.0 with: distribution: ${{ runner.os == 'macOS' && matrix.java == '8' && 'zulu' || 'temurin' }} java-version: ${{ matrix.java }} From 4434d93b92ff3f7b0754e65eba53721dd95c59f1 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Wed, 1 Jul 2026 07:31:54 -0400 Subject: [PATCH 10/10] Bump actions/checkout from 6.0.2 to 7.0.0 --- .github/workflows/scorecards-analysis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml index bf246c140..e1868cb46 100644 --- a/.github/workflows/scorecards-analysis.yml +++ b/.github/workflows/scorecards-analysis.yml @@ -40,7 +40,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # 7.0.0 with: persist-credentials: false