From e1ffa42ebef4907d02929b210852f466aa16d204 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Tue, 5 May 2026 11:23:19 -0400
Subject: [PATCH 01/69] Bump github/codeql-action from 4.35.2 to 4.35.3
---
.github/workflows/codeql-analysis.yml | 6 +++---
.github/workflows/scorecards-analysis.yml | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index b7b7fdf05..3571ff3f4 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -58,7 +58,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
- uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
+ uses: github/codeql-action/init@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -69,7 +69,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
- uses: github/codeql-action/autobuild@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
+ uses: github/codeql-action/autobuild@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@@ -83,4 +83,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
+ uses: github/codeql-action/analyze@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml
index 16e37f605..95dbcd027 100644
--- a/.github/workflows/scorecards-analysis.yml
+++ b/.github/workflows/scorecards-analysis.yml
@@ -64,6 +64,6 @@ jobs:
retention-days: 5
- name: "Upload to code-scanning"
- uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
+ uses: github/codeql-action/upload-sarif@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
with:
sarif_file: results.sarif
From 6f93c7edfa0f758f757227b1d30588411fdbf669 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Tue, 12 May 2026 18:49:17 +0000
Subject: [PATCH 02/69] Bump github/codeql-action from 4.35.3 to 4.35.4
---
.github/workflows/codeql-analysis.yml | 6 +++---
.github/workflows/scorecards-analysis.yml | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 3571ff3f4..e597a6dbb 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -58,7 +58,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
- uses: github/codeql-action/init@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
+ uses: github/codeql-action/init@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4.35.4
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -69,7 +69,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
- uses: github/codeql-action/autobuild@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
+ uses: github/codeql-action/autobuild@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4.35.4
# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@@ -83,4 +83,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
+ uses: github/codeql-action/analyze@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4.35.4
diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml
index 95dbcd027..2eed26f73 100644
--- a/.github/workflows/scorecards-analysis.yml
+++ b/.github/workflows/scorecards-analysis.yml
@@ -64,6 +64,6 @@ jobs:
retention-days: 5
- name: "Upload to code-scanning"
- uses: github/codeql-action/upload-sarif@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
+ uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4.35.4
with:
sarif_file: results.sarif
From 8b041dba39eba4975ddd884ea813b6334d39c8de Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 20 May 2026 06:43:45 -0400
Subject: [PATCH 03/69] Bump github/codeql-action from 4.35.4 to 4.35.5
---
.github/workflows/codeql-analysis.yml | 6 +++---
.github/workflows/scorecards-analysis.yml | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index e597a6dbb..f95d030d8 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -58,7 +58,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
- uses: github/codeql-action/init@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4.35.4
+ uses: github/codeql-action/init@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -69,7 +69,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
- uses: github/codeql-action/autobuild@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4.35.4
+ uses: github/codeql-action/autobuild@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5
# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@@ -83,4 +83,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4.35.4
+ uses: github/codeql-action/analyze@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5
diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml
index 2eed26f73..79c088cf3 100644
--- a/.github/workflows/scorecards-analysis.yml
+++ b/.github/workflows/scorecards-analysis.yml
@@ -64,6 +64,6 @@ jobs:
retention-days: 5
- name: "Upload to code-scanning"
- uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4.35.4
+ uses: github/codeql-action/upload-sarif@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5
with:
sarif_file: results.sarif
From 5e2d0590ca74424d5a51defef489e374abfb921a Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 20 May 2026 07:53:21 -0400
Subject: [PATCH 04/69] [CSV-321] CSVFormat.equals()/hashCode() ignores maxRows
---
.../java/org/apache/commons/csv/CSVFormat.java | 15 ++++++++-------
.../org/apache/commons/csv/CSVFormatTest.java | 8 ++++++++
2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 09e76e3a6..123fa2635 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -1690,11 +1690,11 @@ public boolean equals(final Object obj) {
duplicateHeaderMode == other.duplicateHeaderMode && Objects.equals(escapeCharacter, other.escapeCharacter) &&
Arrays.equals(headerComments, other.headerComments) && Arrays.equals(headers, other.headers) &&
ignoreEmptyLines == other.ignoreEmptyLines && ignoreHeaderCase == other.ignoreHeaderCase &&
- ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && lenientEof == other.lenientEof &&
- Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) &&
- quoteMode == other.quoteMode && Objects.equals(quotedNullString, other.quotedNullString) &&
- Objects.equals(recordSeparator, other.recordSeparator) && skipHeaderRecord == other.skipHeaderRecord &&
- trailingData == other.trailingData && trailingDelimiter == other.trailingDelimiter && trim == other.trim;
+ ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && lenientEof == other.lenientEof && maxRows == other.maxRows &&
+ Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == other.quoteMode &&
+ Objects.equals(quotedNullString, other.quotedNullString) && Objects.equals(recordSeparator, other.recordSeparator) &&
+ skipHeaderRecord == other.skipHeaderRecord && trailingData == other.trailingData && trailingDelimiter == other.trailingDelimiter &&
+ trim == other.trim;
}
private void escape(final char c, final Appendable appendable) throws IOException {
@@ -2029,9 +2029,10 @@ public int hashCode() {
int result = 1;
result = prime * result + Arrays.hashCode(headerComments);
result = prime * result + Arrays.hashCode(headers);
- return prime * result + Objects.hash(allowMissingColumnNames, autoFlush, commentMarker, delimiter, duplicateHeaderMode, escapeCharacter,
- ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, lenientEof, nullString, quoteCharacter, quoteMode, quotedNullString,
+ result = prime * result + Objects.hash(allowMissingColumnNames, autoFlush, commentMarker, delimiter, duplicateHeaderMode, escapeCharacter,
+ ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, lenientEof, maxRows, nullString, quoteCharacter, quoteMode, quotedNullString,
recordSeparator, skipHeaderRecord, trailingData, trailingDelimiter, trim);
+ return result;
}
/**
diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
index 4d428b465..23bc9c8dc 100644
--- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
@@ -395,6 +395,14 @@ void testEqualsLeftNoQuoteRightQuote_Deprecated() {
assertNotEqualsFlip(left, right);
}
+ @Test
+ void testEqualsMaxRows() {
+ final CSVFormat right = CSVFormat.DEFAULT.builder().setMaxRows(10).get();
+ final CSVFormat left = CSVFormat.DEFAULT.builder().setMaxRows(1000).get();
+ assertNotEqualsFlip(right, left);
+ assertNotEquals(right.hashCode(), left.hashCode());
+ }
+
@Test
void testEqualsNoQuotes() {
final CSVFormat left = CSVFormat.newFormat(',').builder().setQuote(null).get();
From 69248ba0bed03c71454b71ec0c79805f48f5c447 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 20 May 2026 07:58:48 -0400
Subject: [PATCH 05/69] Bump org.apache.commons:commons-parent from 99 to 100
---
pom.xml | 2 +-
src/changes/changes.xml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/pom.xml b/pom.xml
index 7e796568e..f0578c820 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
org.apache.commons
commons-parent
- 99
+ 100
commons-csv
1.14.2-SNAPSHOT
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index aacd40586..34e8f8aa2 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -49,7 +49,7 @@
Add an "Android Compatibility" section to the web site.
- Bump org.apache.commons:commons-parent from 85 to 99 #573, #595.
+ Bump org.apache.commons:commons-parent from 85 to 100 #573, #595.
[test] Bump com.opencsv:opencsv from 5.11.2 to 5.12.0 #558.
Bump org.apache.commons:commons-lang3 from 3.18.0 to 3.20.0.
Bump commons-codec:commons-codec from 1.19.0 to 1.22.0.
From add38c3e06d1b90ebcb87ad4390a000b09850df1 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 20 May 2026 08:05:51 -0400
Subject: [PATCH 06/69] [CSV-321] CSVFormat.equals()/hashCode() ignores maxRows
---
src/changes/changes.xml | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 34e8f8aa2..2bbb0946d 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -46,6 +46,7 @@
Remove broken website link #577.
Fix Apache RAT plugin console warnings.
[Javadoc] Clarify behavior of deprecated CSVFormat#withFirstRecordAsHeader() #2413.
+ CSVFormat.equals()/hashCode() ignores maxRows (#600).
Add an "Android Compatibility" section to the web site.
From 23eb602d25a9cf0c989085e9a618dd2b3bff5b45 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 20 May 2026 10:42:03 -0400
Subject: [PATCH 07/69] [CSV-323] ExtendedBufferedReader byte tracking leads to
an incorrect CSVRecord.getBytePosition()
---
.../commons/csv/ExtendedBufferedReader.java | 76 ++++++++++---------
.../org/apache/commons/csv/CSVParserTest.java | 18 +++++
2 files changed, 60 insertions(+), 34 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 8dcda6517..889b58edc 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -37,26 +37,30 @@
/**
* A special buffered reader which supports sophisticated read access.
*
- * In particular the reader supports a look-ahead option, which allows you to see the next char returned by
- * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
+ * In particular the reader supports a look-ahead option, which allows you to see the next char returned by {@link #read()}. This reader also tracks how many
+ * characters have been read with {@link #getPosition()}.
*
*/
final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
/** The last char returned */
private int lastChar = UNDEFINED;
+
private int lastCharMark = UNDEFINED;
/** The count of EOLs (CR/LF/CRLF) seen so far */
private long lineNumber;
+
private long lineNumberMark;
/** The position, which is the number of characters read so far */
private long position;
+
private long positionMark;
/** The number of bytes read so far. */
private long bytesRead;
+
private long bytesReadMark;
/** Encoder for calculating the number of bytes for each character read. */
@@ -70,12 +74,11 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
}
/**
- * Constructs a new instance with the specified reader, character set,
- * and byte tracking option. Initializes an encoder if byte tracking is enabled
- * and a character set is provided.
+ * Constructs a new instance with the specified reader, character set, and byte tracking option. Initializes an encoder if byte tracking is enabled and a
+ * character set is provided.
*
- * @param reader the reader supports a look-ahead option.
- * @param charset the character set for encoding, or {@code null} if not applicable.
+ * @param reader the reader supports a look-ahead option.
+ * @param charset the character set for encoding, or {@code null} if not applicable.
* @param trackBytes {@code true} to enable byte tracking; {@code false} to disable it.
*/
ExtendedBufferedReader(final Reader reader, final Charset charset, final boolean trackBytes) {
@@ -86,8 +89,7 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
/**
* Closes the stream.
*
- * @throws IOException
- * If an I/O error occurs
+ * @throws IOException If an I/O error occurs
*/
@Override
public void close() throws IOException {
@@ -105,26 +107,33 @@ long getBytesRead() {
return this.bytesRead;
}
+ private long getEncodedCharLength(final char[] buf, final int offset, final int length) throws CharacterCodingException {
+ int len = 0;
+ for (int i = offset; i < length; i++) {
+ len += getEncodedCharLength(buf[i]);
+ }
+ return len;
+ }
+
/**
- * Gets the byte length of the given character based on the original Unicode
- * specification, which defined characters as fixed-width 16-bit entities.
+ * Gets the byte length of the given character based on the original Unicode specification, which defined characters as fixed-width 16-bit entities.
*
* The Unicode characters are divided into two main ranges:
*
- * - U+0000 to U+FFFF (Basic Multilingual Plane, BMP):
- *
- * - Represented using a single 16-bit {@code char}.
- * - Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte characters.
- *
- *
- * - U+10000 to U+10FFFF (Supplementary Characters):
- *
- * - Represented as a pair of {@code char}s:
- * - The first {@code char} is from the high-surrogates range (\uD800-\uDBFF).
- * - The second {@code char} is from the low-surrogates range (\uDC00-\uDFFF).
- * - Includes UTF-8 encodings of some 3-byte characters and all 4-byte characters.
- *
- *
+ * - U+0000 to U+FFFF (Basic Multilingual Plane, BMP):
+ *
+ * - Represented using a single 16-bit {@code char}.
+ * - Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte characters.
+ *
+ *
+ * - U+10000 to U+10FFFF (Supplementary Characters):
+ *
+ * - Represented as a pair of {@code char}s:
+ * - The first {@code char} is from the high-surrogates range (\uD800-\uDBFF).
+ * - The second {@code char} is from the low-surrogates range (\uDC00-\uDFFF).
+ * - Includes UTF-8 encodings of some 3-byte characters and all 4-byte characters.
+ *
+ *
*
*
* @param current the current character to process.
@@ -148,10 +157,9 @@ private int getEncodedCharLength(final int current) throws CharacterCodingExcept
}
/**
- * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by
- * any of the read methods. This will not include a character read using the {@link #peek()} method. If no
- * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached
- * on the last read then this will return {@link IOUtils#EOF}.
+ * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by any of the read methods. This will not
+ * include a character read using the {@link #peek()} method. If no character has been read then this will return {@link Constants#UNDEFINED}. If the end of
+ * the stream was reached on the last read then this will return {@link IOUtils#EOF}.
*
* @return the last character that was read
*/
@@ -193,8 +201,7 @@ public void mark(final int readAheadLimit) throws IOException {
@Override
public int read() throws IOException {
final int current = super.read();
- if (current == CR || current == LF && lastChar != CR ||
- current == EOF && lastChar != CR && lastChar != LF && lastChar != EOF) {
+ if (current == CR || current == LF && lastChar != CR || current == EOF && lastChar != CR && lastChar != LF && lastChar != EOF) {
lineNumber++;
}
if (encoder != null) {
@@ -226,13 +233,15 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
} else if (len == EOF) {
lastChar = EOF;
}
+ if (encoder != null) {
+ this.bytesRead += getEncodedCharLength(buf, offset, len);
+ }
position += len;
return len;
}
/**
- * Gets the next line, dropping the line terminator(s). This method should only be called when processing a
- * comment, otherwise, information can be lost.
+ * Gets the next line, dropping the line terminator(s). This method should only be called when processing a comment, otherwise, information can be lost.
*
* Increments {@link #lineNumber} and updates {@link #position}.
*
@@ -272,5 +281,4 @@ public void reset() throws IOException {
bytesRead = bytesReadMark;
super.reset();
}
-
}
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index d9dd4e545..e18eee026 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -648,6 +648,24 @@ void testForEach() throws Exception {
}
}
+ @Test
+ void testGetBytePositionMultiCharacterDelimiter() throws IOException {
+ final String code = "aa[|]bb\ncc[|]dd\n";
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
+ try (CSVParser parser = CSVParser.builder()
+ .setReader(new StringReader(code))
+ .setFormat(format)
+ .setCharset(StandardCharsets.UTF_8)
+ .setTrackBytes(true)
+ .get()) {
+ final Iterator it = parser.iterator();
+ final CSVRecord first = it.next();
+ final CSVRecord second = it.next();
+ assertEquals(0, first.getBytePosition());
+ assertEquals(8, second.getBytePosition());
+ }
+ }
+
@Test
void testGetHeaderComment_HeaderComment1() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) {
From 7cf896b0f3da94d77ba7c4d9e76309ef0ef1273b Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 20 May 2026 11:49:48 -0400
Subject: [PATCH 08/69] [CSV-323] ExtendedBufferedReader byte tracking leads to
an incorrect CSVRecord.getBytePosition()
---
src/changes/changes.xml | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 2bbb0946d..be2d55598 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -47,6 +47,7 @@
Fix Apache RAT plugin console warnings.
[Javadoc] Clarify behavior of deprecated CSVFormat#withFirstRecordAsHeader() #2413.
CSVFormat.equals()/hashCode() ignores maxRows (#600).
+ ExtendedBufferedReader byte tracking leads to an incorrect CSVRecord.getBytePosition() (#601).
Add an "Android Compatibility" section to the web site.
From 4a28ba8254ce1af6985e3dcd95c753ef1c7165ea Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 20 May 2026 12:05:44 -0400
Subject: [PATCH 09/69] [CSV-322] CSVFormat.Builder.setQuote() does not refresh
quotedNullString
---
.../org/apache/commons/csv/CSVFormat.java | 2 ++
.../org/apache/commons/csv/CSVFormatTest.java | 24 +++++++++++++++++++
2 files changed, 26 insertions(+)
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 123fa2635..46d9d0931 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -806,6 +806,8 @@ public Builder setQuote(final Character quoteCharacter) {
throw new IllegalArgumentException("The quoteCharacter cannot be a line break");
}
this.quoteCharacter = quoteCharacter;
+ final Character quote = quoteCharacter != null ? quoteCharacter : Constants.DOUBLE_QUOTE_CHAR;
+ this.quotedNullString = quote + nullString + quote;
return this;
}
diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
index 23bc9c8dc..ca18754f7 100644
--- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
@@ -1001,6 +1001,30 @@ void testQuoteCharSameAsDelimiterThrowsException_Deprecated() {
assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote('!').withDelimiter('!'));
}
+ @Test
+ void testQuotedNullStringTracksQuoteCharacter() throws IOException {
+ final StringBuilder out = new StringBuilder();
+ // @formatter:off
+ final Builder builder = CSVFormat.DEFAULT.builder();
+ final CSVFormat format = builder
+ .setQuoteMode(QuoteMode.ALL)
+ .setNullString("NULL")
+ .get();
+ // @formatter:on
+ format.print(null, out, true);
+ assertEquals("\"NULL\"", out.toString());
+ // set
+ out.setLength(0);
+ builder.setQuote('\'');
+ builder.get().print(null, out, true);
+ assertEquals("'NULL'", out.toString());
+ // reset
+ out.setLength(0);
+ builder.setQuote((Character) null);
+ builder.get().print(null, out, true);
+ assertEquals("\"NULL\"", out.toString());
+ }
+
@Test
void testQuoteModeNoneShouldReturnMeaningfulExceptionMessage() {
final Exception exception = assertThrows(IllegalArgumentException.class, () ->
From 4c67ed89df41d4a1eb2795c1cdd4fda9ed6edb9a Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 20 May 2026 14:34:32 -0400
Subject: [PATCH 10/69] [CSV-322] CSVFormat.Builder.setQuote() does not refresh
quotedNullString #2447
---
src/changes/changes.xml | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index be2d55598..a86ffcf88 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -48,6 +48,7 @@
[Javadoc] Clarify behavior of deprecated CSVFormat#withFirstRecordAsHeader() #2413.
CSVFormat.equals()/hashCode() ignores maxRows (#600).
ExtendedBufferedReader byte tracking leads to an incorrect CSVRecord.getBytePosition() (#601).
+ [CSV-322] CSVFormat.Builder.setQuote() does not refresh quotedNullString (#2447).
Add an "Android Compatibility" section to the web site.
From f25612fff9fc55e40d2e9a1c1ab37ee69dd0588e Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 20 May 2026 16:23:26 -0400
Subject: [PATCH 11/69] Internal parser ctor refactoring .
---
.../org/apache/commons/csv/CSVParser.java | 53 +++++++------------
1 file changed, 18 insertions(+), 35 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index bce62ea54..5351f8726 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -165,10 +165,9 @@ protected Builder() {
// empty
}
- @SuppressWarnings("resource")
@Override
public CSVParser get() throws IOException {
- return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber, getCharset(), trackBytes);
+ return new CSVParser(this);
}
/**
@@ -524,46 +523,30 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException
*/
@Deprecated
public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) throws IOException {
- this(reader, format, characterOffset, recordNumber, null, false);
+ // @formatter:off
+ this(builder()
+ .setReader(reader)
+ .setFormat(Objects.requireNonNull(format, "format")) // requireNonNull for full compatibility
+ .setCharacterOffset(characterOffset)
+ .setRecordNumber(recordNumber)
+ .setCharset((Charset) null).setTrackBytes(false));
+ // @formatter:off
}
/**
- * Constructs a new instance using the given {@link CSVFormat}.
- *
- *
- * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
- * unless you close the {@code reader}.
- *
+ * Constructs a new instance from a builder.
*
- * @param reader
- * a Reader containing CSV-formatted input. Must not be null.
- * @param format
- * the CSVFormat used for CSV parsing. Must not be null.
- * @param characterOffset
- * Lexer offset when the parser does not start parsing at the beginning of the source.
- * @param recordNumber
- * The next record number to assign.
- * @param charset
- * The character encoding to be used for the reader when enableByteTracking is true.
- * @param trackBytes
- * {@code true} to enable byte tracking for the parser; {@code false} to disable it.
- * @throws IllegalArgumentException
- * If the parameters of the format are inconsistent or if either the reader or format is null.
- * @throws IOException
- * If there is a problem reading the header or skipping the first record.
- * @throws CSVException Thrown on invalid CSV input data.
+ * @param builder The source builder.
+ * @throws IOException if an I/O error occurs.
*/
- @SuppressWarnings("resource") // reader is managed by lexer.
- private CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber, final Charset charset,
- final boolean trackBytes) throws IOException {
- Objects.requireNonNull(reader, "reader");
- Objects.requireNonNull(format, "format");
- this.format = format.copy();
- this.lexer = new Lexer(format, new ExtendedBufferedReader(reader, charset, trackBytes));
+ @SuppressWarnings("resource") // Lexer manages ExtendedBufferedReader.
+ private CSVParser(final Builder builder) throws IOException {
+ this.format = (builder.format != null ? builder.format : CSVFormat.DEFAULT).copy();
+ this.lexer = new Lexer(format, new ExtendedBufferedReader(builder.getReader(), builder.getCharset(), builder.trackBytes));
this.csvRecordIterator = new CSVRecordIterator();
this.headers = createHeaders();
- this.characterOffset = characterOffset;
- this.recordNumber = recordNumber - 1;
+ this.characterOffset = builder.characterOffset;
+ this.recordNumber = builder.recordNumber - 1;
}
private void addRecordValue(final boolean lastRecord) {
From c7c54e12ef5b0aa5a3bfef85498081eff9e899a5 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 21 May 2026 17:11:43 -0400
Subject: [PATCH 12/69] Javadoc
---
.../java/org/apache/commons/csv/Lexer.java | 32 +++++++++----------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index 3d00fe0bf..e83f5465f 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -68,8 +68,8 @@ final class Lexer implements Closeable {
/**
* Appends the next escaped character to the token's content.
*
- * @param token the current token
- * @throws IOException on stream access error
+ * @param token the current token.
+ * @throws IOException on stream access error.
* @throws CSVException Thrown on invalid input.
*/
private void appendNextEscapedCharacterToToken(final Token token) throws IOException {
@@ -89,7 +89,7 @@ private void appendNextEscapedCharacterToToken(final Token token) throws IOExcep
* Closes resources.
*
* @throws IOException
- * If an I/O error occurs
+ * If an I/O error occurs.
*/
@Override
public void close() throws IOException {
@@ -97,27 +97,27 @@ public void close() throws IOException {
}
/**
- * Gets the number of bytes read
+ * Gets the number of bytes read.
*
- * @return the number of bytes read
+ * @return the number of bytes read.
*/
long getBytesRead() {
return reader.getBytesRead();
}
/**
- * Returns the current character position
+ * Gets the current character position.
*
- * @return the current character position
+ * @return the current character position.
*/
long getCharacterPosition() {
return reader.getPosition();
}
/**
- * Returns the current line number
+ * Gets the current line number.
*
- * @return the current line number
+ * @return the current line number.
*/
long getCurrentLineNumber() {
return reader.getLineNumber();
@@ -136,7 +136,7 @@ boolean isCommentStart(final int ch) {
}
/**
- * Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#peek(char[])}.
+ * Tests whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#peek(char[])}.
*
* @param ch
* the current character.
@@ -214,7 +214,7 @@ boolean isQuoteChar(final int ch) {
/**
* Tests if the current character represents the start of a line: a CR, LF, or is at the start of the file.
*
- * @param ch the character to check
+ * @param ch the character to check.
* @return true if the character is at the start of a line.
*/
boolean isStartOfLine(final int ch) {
@@ -400,10 +400,10 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
* An unescaped delimiter has been reached (TOKEN)
*
*
- * @param token the current token
- * @param ch the current character
- * @return the filled token
- * @throws IOException on stream access error
+ * @param token the current token.
+ * @param ch the current character.
+ * @return the filled token.
+ * @throws IOException on stream access error.
* @throws CSVException Thrown on invalid input.
*/
private Token parseSimpleToken(final Token token, final int ch) throws IOException {
@@ -442,7 +442,7 @@ private Token parseSimpleToken(final Token token, final int ch) throws IOExcepti
/**
* Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
*
- * @return true if the given or next character is a line-terminator
+ * @return true if the given or next character is a line-terminator.
*/
boolean readEndOfLine(final int ch) throws IOException {
// check if we have \r\n...
From b29fb40acd3ecc09bb4f43d20f88ad5ee9fa8324 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 21 May 2026 17:35:06 -0400
Subject: [PATCH 13/69] Lexer.isDelimiter() accepts a partial multi-character
delimiter at EOF.
---
src/main/java/org/apache/commons/csv/Lexer.java | 2 ++
.../java/org/apache/commons/csv/CSVParserTest.java | 14 ++++++++++++++
.../java/org/apache/commons/csv/LexerTest.java | 12 ++++++++++++
3 files changed, 28 insertions(+)
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index 3d00fe0bf..e0b96b079 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -23,6 +23,7 @@
import java.io.Closeable;
import java.io.IOException;
+import java.util.Arrays;
import org.apache.commons.io.IOUtils;
@@ -272,6 +273,7 @@ Token nextToken(final Token token) throws IOException {
token.type = Token.Type.COMMENT;
return token;
}
+ Arrays.fill(delimiterBuf, '\0');
// Important: make sure a new char gets consumed in each iteration
while (token.type == Token.Type.INVALID) {
// ignore whitespaces at beginning of a token
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index e18eee026..d7bc07e99 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -1621,6 +1621,20 @@ void testParsingPrintedEmptyFirstColumn(final CSVFormat.Predefined format) throw
}
}
+ /**
+ * Tests CSV-324.
+ */
+ @Test
+ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
+ try (CSVParser parser = format.parse(new StringReader("a[|]b[|"))) {
+ final CSVRecord record = parser.nextRecord();
+ assertEquals("a", record.get(0));
+ assertEquals("b[|", record.get(1));
+ assertEquals(2, record.size());
+ }
+ }
+
@Test
void testProvidedHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java
index e54e93365..511876a28 100644
--- a/src/test/java/org/apache/commons/csv/LexerTest.java
+++ b/src/test/java/org/apache/commons/csv/LexerTest.java
@@ -409,6 +409,18 @@ void testNextToken6() throws IOException {
}
}
+ /**
+ * Tests CSV-324.
+ */
+ @Test
+ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
+ try (Lexer lexer = createLexer("a[|]b[|", format)) {
+ assertNextToken(TOKEN, "a", lexer);
+ assertNextToken(EOF, "b[|", lexer);
+ }
+ }
+
@Test
void testReadEscapeBackspace() throws IOException {
try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {
From f11278e603d7641fe46c81b5dc88bd797b725114 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 21 May 2026 17:56:23 -0400
Subject: [PATCH 14/69] [CSV-324] Lexer.isDelimiter() accepts a partial
multi-character delimiter at EOF. #603
---
src/changes/changes.xml | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index a86ffcf88..f72472eeb 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -48,7 +48,8 @@
[Javadoc] Clarify behavior of deprecated CSVFormat#withFirstRecordAsHeader() #2413.
CSVFormat.equals()/hashCode() ignores maxRows (#600).
ExtendedBufferedReader byte tracking leads to an incorrect CSVRecord.getBytePosition() (#601).
- [CSV-322] CSVFormat.Builder.setQuote() does not refresh quotedNullString (#2447).
+ CSVFormat.Builder.setQuote() does not refresh quotedNullString (#2447).
+ Lexer.isDelimiter() accepts a partial multi-character delimiter at EOF (#603).
Add an "Android Compatibility" section to the web site.
From 64ea660f8131d3257c2c312c8396fc5b9044a628 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 21 May 2026 20:45:12 -0400
Subject: [PATCH 15/69] Sort members
---
.../org/apache/commons/csv/CSVParser.java | 32 +++++++++----------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 5351f8726..97c65a9b3 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -473,6 +473,22 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor
private final Token reusableToken = new Token();
+ /**
+ * Constructs a new instance from a builder.
+ *
+ * @param builder The source builder.
+ * @throws IOException if an I/O error occurs.
+ */
+ @SuppressWarnings("resource") // Lexer manages ExtendedBufferedReader.
+ private CSVParser(final Builder builder) throws IOException {
+ this.format = (builder.format != null ? builder.format : CSVFormat.DEFAULT).copy();
+ this.lexer = new Lexer(format, new ExtendedBufferedReader(builder.getReader(), builder.getCharset(), builder.trackBytes));
+ this.csvRecordIterator = new CSVRecordIterator();
+ this.headers = createHeaders();
+ this.characterOffset = builder.characterOffset;
+ this.recordNumber = builder.recordNumber - 1;
+ }
+
/**
* Constructs a new instance using the given {@link CSVFormat}.
*
@@ -533,22 +549,6 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact
// @formatter:off
}
- /**
- * Constructs a new instance from a builder.
- *
- * @param builder The source builder.
- * @throws IOException if an I/O error occurs.
- */
- @SuppressWarnings("resource") // Lexer manages ExtendedBufferedReader.
- private CSVParser(final Builder builder) throws IOException {
- this.format = (builder.format != null ? builder.format : CSVFormat.DEFAULT).copy();
- this.lexer = new Lexer(format, new ExtendedBufferedReader(builder.getReader(), builder.getCharset(), builder.trackBytes));
- this.csvRecordIterator = new CSVRecordIterator();
- this.headers = createHeaders();
- this.characterOffset = builder.characterOffset;
- this.recordNumber = builder.recordNumber - 1;
- }
-
private void addRecordValue(final boolean lastRecord) {
final String input = format.trim(reusableToken.content.toString());
if (lastRecord && input.isEmpty() && format.getTrailingDelimiter()) {
From 513aac23983337ddd2dddb266e43a5eff145957e Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Fri, 22 May 2026 07:24:53 -0400
Subject: [PATCH 16/69] [CSV-325] CSVParser applies characterOffset to
bytePosition, which breaks getBytePosition() for multi-byte prefixes
Add CSVParser.Builder.setByteOffset(long)
---
.../org/apache/commons/csv/CSVParser.java | 31 +++++++++++++++++--
.../org/apache/commons/csv/CSVParserTest.java | 30 ++++++++++++++++++
2 files changed, 58 insertions(+), 3 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 97c65a9b3..208f5a0da 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -154,6 +154,7 @@ public final class CSVParser implements Iterable, Closeable {
public static class Builder extends AbstractStreamBuilder {
private CSVFormat format;
+ private long byteOffset = -1;
private long characterOffset;
private long recordNumber = 1;
private boolean trackBytes;
@@ -171,10 +172,27 @@ public CSVParser get() throws IOException {
}
/**
- * Sets the lexer offset when the parser does not start parsing at the beginning of the source.
+ * Sets the lexer byte offset when the parser does not start parsing at the beginning of the source.
+ *
+ * By default, the value is {@code -1}, which reuses the character offset for the byte offset.
+ *
*
- * @param characterOffset the lexer offset.
+ * @param byteOffset the lexer byte offset.
* @return {@code this} instance.
+ * @see #setCharacterOffset(long)
+ * @since 1.15.0
+ */
+ public Builder setByteOffset(final long byteOffset) {
+ this.byteOffset = byteOffset;
+ return asThis();
+ }
+
+ /**
+ * Sets the lexer character offset when the parser does not start parsing at the beginning of the source.
+ *
+ * @param characterOffset the lexer character offset.
+ * @return {@code this} instance.
+ * @see #setByteOffset(long)
*/
public Builder setCharacterOffset(final long characterOffset) {
this.characterOffset = characterOffset;
@@ -465,6 +483,12 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor
*/
private long recordNumber;
+ /**
+ * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
+ * with {@link #recordNumber}.
+ */
+ private final long byteOffset;
+
/**
* Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
* with {@link #recordNumber}.
@@ -485,6 +509,7 @@ private CSVParser(final Builder builder) throws IOException {
this.lexer = new Lexer(format, new ExtendedBufferedReader(builder.getReader(), builder.getCharset(), builder.trackBytes));
this.csvRecordIterator = new CSVRecordIterator();
this.headers = createHeaders();
+ this.byteOffset = builder.byteOffset != -1 ? builder.byteOffset : builder.characterOffset;
this.characterOffset = builder.characterOffset;
this.recordNumber = builder.recordNumber - 1;
}
@@ -870,7 +895,7 @@ CSVRecord nextRecord() throws IOException {
recordList.clear();
StringBuilder sb = null;
final long startCharPosition = lexer.getCharacterPosition() + characterOffset;
- final long startBytePosition = lexer.getBytesRead() + characterOffset;
+ final long startBytePosition = lexer.getBytesRead() + byteOffset;
do {
reusableToken.reset();
lexer.nextToken(reusableToken);
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index d7bc07e99..8b1527c42 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -666,6 +666,36 @@ void testGetBytePositionMultiCharacterDelimiter() throws IOException {
}
}
+ @Test
+ void testGetBytePositionWithCharacterOffsetAndMultiBytePrefix() throws Exception {
+ final String row0 = "é,x\n";
+ final Charset charset = UTF_8;
+ // row0 char count is 4
+ assertEquals(4, row0.length());
+ // row0 byte count is 5
+ final int record1ByteOffset = row0.getBytes(charset).length;
+ assertEquals(5, record1ByteOffset);
+ final String row1 = "b,c\n";
+ final String rows = row0 + row1;
+ final long record1CharOffset = row0.length();
+ final long expectedByteOffset = row0.getBytes(charset).length;
+ try (CSVParser parser = CSVParser.builder()
+ .setReader(new StringReader(row1))
+ .setFormat(CSVFormat.DEFAULT)
+ .setCharset(charset)
+ .setTrackBytes(true)
+ .setByteOffset(record1ByteOffset)
+ .setCharacterOffset(record1CharOffset)
+ .setRecordNumber(2) // not relevant but a better use case example.
+ .get()) {
+ final CSVRecord record = parser.nextRecord();
+ assertNotNull(record);
+ assertEquals(4, record.getCharacterPosition());
+ assertEquals(record1CharOffset, record.getCharacterPosition());
+ assertEquals(expectedByteOffset, record.getBytePosition());
+ }
+ }
+
@Test
void testGetHeaderComment_HeaderComment1() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) {
From 971b629a7ad78fa053b23a45127d44dc7701e73b Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Fri, 22 May 2026 07:58:11 -0400
Subject: [PATCH 17/69] [CSV-325] CSVParser applies characterOffset to
bytePosition, breaking getBytePosition() for multi-byte prefixes.
Add CSVParser.Builder.setByteOffset(long) (#604).
---
src/changes/changes.xml | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index f72472eeb..e32ba1776 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -50,8 +50,10 @@
ExtendedBufferedReader byte tracking leads to an incorrect CSVRecord.getBytePosition() (#601).
CSVFormat.Builder.setQuote() does not refresh quotedNullString (#2447).
Lexer.isDelimiter() accepts a partial multi-character delimiter at EOF (#603).
+ CSVParser applies characterOffset to bytePosition (#604).
Add an "Android Compatibility" section to the web site.
+ Add CSVParser.Builder.setByteOffset(long) (#604).
Bump org.apache.commons:commons-parent from 85 to 100 #573, #595.
[test] Bump com.opencsv:opencsv from 5.11.2 to 5.12.0 #558.
From df91b6edfcf1d30e4601bb6f6ec128533ec2dbff Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Fri, 22 May 2026 07:59:58 -0400
Subject: [PATCH 18/69] The next version will be 1.15.0
---
pom.xml | 6 +++---
src/changes/changes.xml | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/pom.xml b/pom.xml
index f0578c820..f362f8130 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,7 +23,7 @@
100
commons-csv
- 1.14.2-SNAPSHOT
+ 1.15.0-SNAPSHOT
Apache Commons CSV
https://commons.apache.org/proper/commons-csv/
2005
@@ -89,12 +89,12 @@
- 1.14.2
+ 1.15.0
(Java 8 or above)
RC1
1.14.1
- 1.14.3
+ 1.15.1
csv
org.apache.commons.csv
CSV
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index e32ba1776..664c922ed 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -40,7 +40,7 @@
Apache Commons CSV Release Notes
-
+
Remove Spotbugs dependency and use exclude-filter instead #564.
Remove broken website link #577.
From 019456e09af622978bb5349168c5512dc4daac70 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Fri, 22 May 2026 08:39:20 -0400
Subject: [PATCH 19/69] Add comment
---
pom.xml | 1 +
1 file changed, 1 insertion(+)
diff --git a/pom.xml b/pom.xml
index f362f8130..6d814251b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -59,6 +59,7 @@
com.h2database
h2
+
2.2.224
test
From ae6949807e894b62c57547236b8e805646a2a6b9 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 27 May 2026 16:51:47 -0400
Subject: [PATCH 20/69] Bump github/codeql-action from 4.35.5 to 4.36.0
---
.github/workflows/codeql-analysis.yml | 6 +++---
.github/workflows/scorecards-analysis.yml | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index f95d030d8..a8afcbfb7 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -58,7 +58,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
- uses: github/codeql-action/init@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5
+ uses: github/codeql-action/init@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -69,7 +69,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
- uses: github/codeql-action/autobuild@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5
+ uses: github/codeql-action/autobuild@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@@ -83,4 +83,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5
+ uses: github/codeql-action/analyze@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml
index 79c088cf3..fd682a9a1 100644
--- a/.github/workflows/scorecards-analysis.yml
+++ b/.github/workflows/scorecards-analysis.yml
@@ -64,6 +64,6 @@ jobs:
retention-days: 5
- name: "Upload to code-scanning"
- uses: github/codeql-action/upload-sarif@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5
+ uses: github/codeql-action/upload-sarif@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
with:
sarif_file: results.sarif
From bbf07cbfb4c6f723105f8e82c4c805e15f96e605 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 4 Jun 2026 10:20:17 -0400
Subject: [PATCH 21/69] Bump org.apache.commons:commons-parent from 100 to 101
---
pom.xml | 2 +-
src/changes/changes.xml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/pom.xml b/pom.xml
index 6d814251b..57bca27b2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
org.apache.commons
commons-parent
- 100
+ 101
commons-csv
1.15.0-SNAPSHOT
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 664c922ed..633de96bd 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -55,7 +55,7 @@
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
- Bump org.apache.commons:commons-parent from 85 to 100 #573, #595.
+ Bump org.apache.commons:commons-parent from 85 to 101 #573, #595.
[test] Bump com.opencsv:opencsv from 5.11.2 to 5.12.0 #558.
Bump org.apache.commons:commons-lang3 from 3.18.0 to 3.20.0.
Bump commons-codec:commons-codec from 1.19.0 to 1.22.0.
From 9f8c596a0cf3cd864960421f3222e73d586cadb9 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 4 Jun 2026 10:21:56 -0400
Subject: [PATCH 22/69] Add GH CI Java 26
Bump GH CI from Java 26-ea to 27-ea
---
.github/workflows/maven.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index 10c18b607..a5ff694fc 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -34,12 +34,12 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
- java: [ 8, 11, 17, 21, 25 ]
+ java: [ 8, 11, 17, 21, 25, 36 ]
experimental: [false]
# Keep the same parameter order as the matrix above
include:
- os: ubuntu-latest
- java: 26-ea
+ java: 27-ea
experimental: true
steps:
From 6183c6ec5f78cd726b72c24c06e1bc0afe84df45 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 4 Jun 2026 10:22:08 -0400
Subject: [PATCH 23/69] Add GH CI Java 26
Bump GH CI from Java 26-ea to 27-ea
---
.github/workflows/maven.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index a5ff694fc..f1bfba770 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -34,7 +34,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
- java: [ 8, 11, 17, 21, 25, 36 ]
+ java: [ 8, 11, 17, 21, 25, 26 ]
experimental: [false]
# Keep the same parameter order as the matrix above
include:
From ca64eb8439b26404e2c5baa383d42ffa3436733d Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Fri, 5 Jun 2026 07:27:06 -0400
Subject: [PATCH 24/69] Better inline comments
---
.../org/apache/commons/csv/CSVFormat.java | 20 +++++++++----------
.../org/apache/commons/csv/CSVParser.java | 2 +-
.../org/apache/commons/csv/CSVPrinter.java | 4 ++--
.../org/apache/commons/csv/CSVRecord.java | 10 ++++------
.../org/apache/commons/csv/Constants.java | 2 +-
5 files changed, 18 insertions(+), 20 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 46d9d0931..f6b2c5ae0 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -1479,7 +1479,7 @@ private static boolean isLineBreak(final char c) {
* @return true if {@code c} is a line break character (and not null).
*/
private static boolean isLineBreak(final Character c) {
- return c != null && isLineBreak(c.charValue()); // Explicit (un)boxing is intentional
+ return c != null && isLineBreak(c.charValue()); // Explicit unboxing is intentional
}
/** Same test as in as {@link String#trim()}. */
@@ -1700,7 +1700,7 @@ public boolean equals(final Object obj) {
}
private void escape(final char c, final Appendable appendable) throws IOException {
- append(escapeCharacter.charValue(), appendable); // Explicit (un)boxing is intentional
+ append(escapeCharacter.charValue(), appendable); // Explicit unboxing is intentional
append(c, appendable);
}
@@ -1838,7 +1838,7 @@ public DuplicateHeaderMode getDuplicateHeaderMode() {
* @return the escape character, may be {@code 0}
*/
char getEscapeChar() {
- return escapeCharacter != null ? escapeCharacter.charValue() : 0; // Explicit (un)boxing is intentional
+ return escapeCharacter != null ? escapeCharacter.charValue() : 0; // Explicit unboxing is intentional
}
/**
@@ -2161,7 +2161,7 @@ private void print(final InputStream inputStream, final Appendable out, final bo
}
final boolean quoteCharacterSet = isQuoteCharacterSet();
if (quoteCharacterSet) {
- append(getQuoteCharacter().charValue(), out); // Explicit (un)boxing is intentional
+ append(getQuoteCharacter().charValue(), out); // Explicit unboxing is intentional
}
// Stream the input to the output without reading or holding the whole value in memory.
// AppendableOutputStream cannot "close" an Appendable.
@@ -2169,7 +2169,7 @@ private void print(final InputStream inputStream, final Appendable out, final bo
IOUtils.copy(inputStream, outputStream);
}
if (quoteCharacterSet) {
- append(getQuoteCharacter().charValue(), out); // Explicit (un)boxing is intentional
+ append(getQuoteCharacter().charValue(), out); // Explicit unboxing is intentional
}
}
@@ -2418,7 +2418,7 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi
final int len = charSeq.length();
final char[] delim = getDelimiterCharArray();
final int delimLength = delim.length;
- final char quoteChar = getQuoteCharacter().charValue(); // Explicit (un)boxing is intentional
+ final char quoteChar = getQuoteCharacter().charValue(); // Explicit unboxing is intentional
// If escape char not specified, default to the quote char
// This avoids having to keep checking whether there is an escape character
// at the cost of checking against quote twice
@@ -2521,7 +2521,7 @@ private void printWithQuotes(final Reader reader, final Appendable appendable) t
printWithEscapes(reader, appendable);
return;
}
- final char quote = getQuoteCharacter().charValue(); // Explicit (un)boxing is intentional
+ final char quote = getQuoteCharacter().charValue(); // Explicit unboxing is intentional
// (1) Append opening quote
append(quote, appendable);
// (2) Append Reader contents, doubling quotes
@@ -2607,13 +2607,13 @@ boolean useRow(final long rowNum) {
* @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes.
*/
private void validate() throws IllegalArgumentException {
- if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // Explicit (un)boxing is intentional
+ if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // Explicit unboxing is intentional
throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')");
}
- if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // Explicit (un)boxing is intentional
+ if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // Explicit unboxing is intentional
throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')");
}
- if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { // Explicit (un)boxing is intentional
+ if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { // Explicit unboxing is intentional
throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" + commentMarker + "')");
}
if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) {
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 208f5a0da..c9b2dc44f 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -650,7 +650,7 @@ private Headers createHeaders() throws IOException {
}
observedMissing |= blankHeader;
if (header != null) {
- headerMap.put(header, Integer.valueOf(i)); // Explicit (un)boxing is intentional
+ headerMap.put(header, Integer.valueOf(i)); // Explicit boxing is intentional
if (headerNames == null) {
headerNames = new ArrayList<>(headerRecord.length);
}
diff --git a/src/main/java/org/apache/commons/csv/CSVPrinter.java b/src/main/java/org/apache/commons/csv/CSVPrinter.java
index 087129ec5..a7048fd62 100644
--- a/src/main/java/org/apache/commons/csv/CSVPrinter.java
+++ b/src/main/java/org/apache/commons/csv/CSVPrinter.java
@@ -235,7 +235,7 @@ public void printComment(final String comment) throws IOException {
if (!newRecord) {
println();
}
- appendable.append(format.getCommentMarker().charValue()); // Explicit (un)boxing is intentional
+ appendable.append(format.getCommentMarker().charValue()); // Explicit unboxing is intentional
appendable.append(SP);
for (int i = 0; i < comment.length(); i++) {
final char c = comment.charAt(i);
@@ -247,7 +247,7 @@ public void printComment(final String comment) throws IOException {
// falls-through: break intentionally excluded.
case LF:
println();
- appendable.append(format.getCommentMarker().charValue()); // Explicit (un)boxing is intentional
+ appendable.append(format.getCommentMarker().charValue()); // Explicit unboxing is intentional
appendable.append(SP);
break;
default:
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java
index f619717d0..502bf318a 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -132,13 +132,11 @@ public String get(final String name) {
throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, headerMap.keySet()));
}
try {
- return values[index.intValue()]; // Explicit (un)boxing is intentional
+ return values[index.intValue()]; // Explicit unboxing is intentional
} catch (final ArrayIndexOutOfBoundsException e) {
+ // Explicit boxing is intentional
throw new IllegalArgumentException(
- String.format("Index for header '%s' is %d but CSVRecord only has %d values!", name, index, Integer.valueOf(values.length))); // Explicit
- // (un)boxing
- // is
- // intentional
+ String.format("Index for header '%s' is %d but CSVRecord only has %d values!", name, index, Integer.valueOf(values.length)));
}
}
@@ -267,7 +265,7 @@ public boolean isSet(final int index) {
* @return whether a given column is mapped and has a value.
*/
public boolean isSet(final String name) {
- return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; // Explicit (un)boxing is intentional
+ return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; // Explicit unboxing is intentional
}
/**
diff --git a/src/main/java/org/apache/commons/csv/Constants.java b/src/main/java/org/apache/commons/csv/Constants.java
index 0b9476e1c..9dd276ecc 100644
--- a/src/main/java/org/apache/commons/csv/Constants.java
+++ b/src/main/java/org/apache/commons/csv/Constants.java
@@ -40,7 +40,7 @@ final class Constants {
/** RFC 4180 defines line breaks as CRLF. */
static final String CRLF = "\r\n";
- static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"'); // Explicit (un)boxing is intentional.
+ static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"'); // Explicit boxing is intentional.
static final String EMPTY = "";
From 8192d9d196a554d67d7d65b0a131001b9d1eb412 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Fri, 5 Jun 2026 07:28:04 -0400
Subject: [PATCH 25/69] Bump github/codeql-action from 4.36.0 to 4.36.2
---
.github/workflows/codeql-analysis.yml | 6 +++---
.github/workflows/scorecards-analysis.yml | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index a8afcbfb7..4e69d1942 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -58,7 +58,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
- uses: github/codeql-action/init@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
+ uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -69,7 +69,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
- uses: github/codeql-action/autobuild@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
+ uses: github/codeql-action/autobuild@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@@ -83,4 +83,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
+ uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml
index fd682a9a1..bf246c140 100644
--- a/.github/workflows/scorecards-analysis.yml
+++ b/.github/workflows/scorecards-analysis.yml
@@ -64,6 +64,6 @@ jobs:
retention-days: 5
- name: "Upload to code-scanning"
- uses: github/codeql-action/upload-sarif@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
+ uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
with:
sarif_file: results.sarif
From 297ae10f6a2d753862485b7c77fab746a1ae885d Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Sun, 7 Jun 2026 10:09:01 -0400
Subject: [PATCH 26/69] Bump GH CI actions/checkout from 6.0.2 to 6.0.3
---
.github/workflows/codeql-analysis.yml | 2 +-
.github/workflows/dependency-review.yml | 2 +-
.github/workflows/maven.yml | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 4e69d1942..57f55cae0 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -46,7 +46,7 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml
index f0d8ca94e..7bf60ad9e 100644
--- a/.github/workflows/dependency-review.yml
+++ b/.github/workflows/dependency-review.yml
@@ -26,6 +26,6 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: 'Checkout Repository'
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
- name: 'Dependency Review PR'
uses: actions/dependency-review-action@2031cfc080254a8a887f58cffee85186f0e49e48 # v4.9.0
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index f1bfba770..3ee3dec2b 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -43,7 +43,7 @@ jobs:
experimental: true
steps:
- - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
From 361056d668fb2c682fb26e848c58fc3f666a607d Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Sun, 7 Jun 2026 10:12:54 -0400
Subject: [PATCH 27/69] Bump GH CI actions/dependency-review-action from 4.9.0
to 5.0.0
---
.github/workflows/dependency-review.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml
index 7bf60ad9e..114f3d8a2 100644
--- a/.github/workflows/dependency-review.yml
+++ b/.github/workflows/dependency-review.yml
@@ -28,4 +28,4 @@ jobs:
- name: 'Checkout Repository'
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
- name: 'Dependency Review PR'
- uses: actions/dependency-review-action@2031cfc080254a8a887f58cffee85186f0e49e48 # v4.9.0
+ uses: actions/dependency-review-action@a1d282b36b6f3519aa1f3fc636f609c47dddb294 # v5.0.0
From 68b4b0910be1b9c0beb52a39b906851f0c41c75c Mon Sep 17 00:00:00 2001
From: OldTruckDriver
Date: Tue, 9 Jun 2026 21:15:13 +1000
Subject: [PATCH 28/69] [CSV-326] Escape Reader values with quote and escape
---
src/changes/changes.xml | 1 +
.../java/org/apache/commons/csv/CSVFormat.java | 7 ++++---
.../org/apache/commons/csv/CSVFormatTest.java | 17 +++++++++++++++++
3 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 633de96bd..a3e03e372 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -51,6 +51,7 @@
CSVFormat.Builder.setQuote() does not refresh quotedNullString (#2447).
Lexer.isDelimiter() accepts a partial multi-character delimiter at EOF (#603).
CSVParser applies characterOffset to bytePosition (#604).
+ CSVPrinter Reader printing with quote and escape can emit CSV that its parser cannot read back.
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index f6b2c5ae0..852a3956c 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -2522,14 +2522,15 @@ private void printWithQuotes(final Reader reader, final Appendable appendable) t
return;
}
final char quote = getQuoteCharacter().charValue(); // Explicit unboxing is intentional
+ final char escape = isEscapeCharacterSet() ? getEscapeChar() : quote;
// (1) Append opening quote
append(quote, appendable);
- // (2) Append Reader contents, doubling quotes
+ // (2) Append Reader contents, doubling quotes and escape characters
int c;
while (EOF != (c = reader.read())) {
append((char) c, appendable);
- if (c == quote) {
- append(quote, appendable);
+ if (c == quote || c == escape) {
+ append((char) c, appendable);
}
}
// (3) Append closing quote
diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
index ca18754f7..c3fdeeb77 100644
--- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
@@ -966,6 +966,23 @@ void testPrintWithQuotes() throws IOException {
assertEquals("\"\"\"a,b,c\r\nx,y,z\"", out.toString());
}
+ /**
+ * Tests CSV-326.
+ */
+ @Test
+ void testPrintWithQuotesEscapeBeforeQuote() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder()
+ .setEscape('\\')
+ .setQuote('"')
+ .get();
+ final String value = "\\\"";
+ final Appendable out = new StringBuilder();
+ format.print(new StringReader(value), out, true);
+ try (CSVParser parser = CSVParser.parse(out.toString(), format)) {
+ assertEquals(value, parser.getRecords().get(0).get(0));
+ }
+ }
+
@Test
void testQuoteCharSameAsCommentStartThrowsException() {
assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote('!').setCommentMarker('!').get());
From 966b38519e45c1fd85c76fbbdc47cb5bb1905238 Mon Sep 17 00:00:00 2001
From: OldTruckDriver
Date: Tue, 9 Jun 2026 21:25:28 +1000
Subject: [PATCH 29/69] [CSV-327] Limit parser maxRows by produced records
---
src/changes/changes.xml | 1 +
.../java/org/apache/commons/csv/CSVParser.java | 6 +++++-
.../org/apache/commons/csv/CSVParserTest.java | 17 +++++++++++++++++
3 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 633de96bd..44f1a6be0 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -51,6 +51,7 @@
CSVFormat.Builder.setQuote() does not refresh quotedNullString (#2447).
Lexer.isDelimiter() accepts a partial multi-character delimiter at EOF (#603).
CSVParser applies characterOffset to bytePosition (#604).
+ CSVParser applies maxRows to record numbers instead of rows produced when setRecordNumber(...) is used.
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index c9b2dc44f..83b60170e 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -237,6 +237,7 @@ public Builder setTrackBytes(final boolean trackBytes) {
final class CSVRecordIterator implements Iterator {
private CSVRecord current;
+ private long recordCount;
/**
* Gets the next record or null at the end of stream or max rows read.
@@ -247,8 +248,11 @@ final class CSVRecordIterator implements Iterator {
*/
private CSVRecord getNextRecord() {
CSVRecord record = null;
- if (format.useRow(recordNumber + 1)) {
+ if (format.useRow(recordCount + 1)) {
record = Uncheck.get(CSVParser.this::nextRecord);
+ if (record != null) {
+ recordCount++;
+ }
}
return record;
}
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 8b1527c42..816c1c853 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -965,6 +965,23 @@ void testGetRecordsMaxRows(final long maxRows) throws IOException {
}
}
+ /**
+ * Tests CSV-327.
+ */
+ @Test
+ void testGetRecordsMaxRowsWithRecordNumberOffset() throws IOException {
+ try (CSVParser parser = CSVParser.builder()
+ .setReader(new StringReader("a,b\nc,d\n"))
+ .setFormat(CSVFormat.DEFAULT.builder().setMaxRows(1).get())
+ .setRecordNumber(2)
+ .get()) {
+ final List records = parser.getRecords();
+ assertEquals(1, records.size());
+ assertEquals(2, records.get(0).getRecordNumber());
+ assertValuesEquals(new String[] { "a", "b" }, records.get(0));
+ }
+ }
+
@Test
void testGetRecordThreeBytesRead() throws Exception {
final String code = "id,date,val5,val4\n" +
From 1e3de1274636959d3cf70acbba14ae10128369a5 Mon Sep 17 00:00:00 2001
From: rootvector2
Date: Tue, 9 Jun 2026 17:37:44 +0530
Subject: [PATCH 30/69] clear escape delimiter buffer before peek in
isEscapeDelimiter
---
src/main/java/org/apache/commons/csv/Lexer.java | 1 +
.../java/org/apache/commons/csv/CSVParserTest.java | 14 ++++++++++++++
.../java/org/apache/commons/csv/LexerTest.java | 12 ++++++++++++
3 files changed, 27 insertions(+)
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index de97868e4..238e64cee 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -191,6 +191,7 @@ boolean isEscape(final int ch) {
* @throws IOException If an I/O error occurs.
*/
boolean isEscapeDelimiter() throws IOException {
+ Arrays.fill(escapeDelimiterBuf, '\0');
reader.peek(escapeDelimiterBuf);
if (escapeDelimiterBuf[0] != delimiter[0]) {
return false;
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 8b1527c42..5443c5e84 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -1665,6 +1665,20 @@ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
}
}
+ /**
+ * A truncated escaped multi-character delimiter at EOF must stay literal data and not be completed from a stale
+ * escape delimiter look-ahead.
+ */
+ @Test
+ void testPartialEscapedMultiCharacterDelimiterAtEOF() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').get();
+ try (CSVParser parser = format.parse(new StringReader("x![!|!]y![!|"))) {
+ final CSVRecord record = parser.nextRecord();
+ assertEquals("x[|]y![!|", record.get(0));
+ assertEquals(1, record.size());
+ }
+ }
+
@Test
void testProvidedHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java
index 511876a28..da60df07e 100644
--- a/src/test/java/org/apache/commons/csv/LexerTest.java
+++ b/src/test/java/org/apache/commons/csv/LexerTest.java
@@ -421,6 +421,18 @@ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
}
}
+ /**
+ * A truncated escaped multi-character delimiter at EOF must not be accepted by reusing the previous escape delimiter
+ * look-ahead in {@link Lexer#isEscapeDelimiter()}.
+ */
+ @Test
+ void testPartialEscapedMultiCharacterDelimiterAtEOF() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').get();
+ try (Lexer lexer = createLexer("x![!|!]y![!|", format)) {
+ assertNextToken(EOF, "x[|]y![!|", lexer);
+ }
+ }
+
@Test
void testReadEscapeBackspace() throws IOException {
try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {
From 4f9a4037a2c1890154e1f077d66306ec54afbf60 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 11 Jun 2026 09:40:09 -0400
Subject: [PATCH 31/69] Bump org.apache.commons:commons-parent from 101 to 102.
---
pom.xml | 2 +-
src/changes/changes.xml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/pom.xml b/pom.xml
index 57bca27b2..8cb13ed7c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
org.apache.commons
commons-parent
- 101
+ 102
commons-csv
1.15.0-SNAPSHOT
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 633de96bd..87d68ea2a 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -55,7 +55,7 @@
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
- Bump org.apache.commons:commons-parent from 85 to 101 #573, #595.
+ Bump org.apache.commons:commons-parent from 85 to 102 #573, #595.
[test] Bump com.opencsv:opencsv from 5.11.2 to 5.12.0 #558.
Bump org.apache.commons:commons-lang3 from 3.18.0 to 3.20.0.
Bump commons-codec:commons-codec from 1.19.0 to 1.22.0.
From 27126657d2117afd40e8972b8a34659abc753a65 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 11 Jun 2026 14:23:37 -0400
Subject: [PATCH 32/69] Update legacy GitHub links in CONTRIBUTING.md
---
CONTRIBUTING.md | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index eb15f2518..3423e18ad 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -48,13 +48,13 @@ Getting Started
---------------
+ Make sure you have a [JIRA account](https://issues.apache.org/jira/).
-+ Make sure you have a [GitHub account](https://github.com/signup/free). This is not essential, but makes providing patches much easier.
++ Make sure you have a [GitHub account](https://github.com/signup). This is not essential, but makes providing patches much easier.
+ If you're planning to implement a new feature it makes sense to discuss your changes on the [dev list](https://commons.apache.org/mail-lists.html) first. This way you can make sure you're not wasting your time on something that isn't considered to be in Apache Commons CSV's scope.
+ Submit a [Jira Ticket][jira] for your issue, assuming one does not already exist.
+ Clearly describe the issue including steps to reproduce when it is a bug.
+ Make sure you fill in the earliest version that you know has the issue.
+ Find the corresponding [repository on GitHub](https://github.com/apache/?query=commons-),
-[fork](https://help.github.com/articles/fork-a-repo/) and check out your forked repository. If you don't have a GitHub account, you can still clone the Commons repository.
+[fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) and check out your forked repository. If you don't have a GitHub account, you can still clone the Commons repository.
Making Changes
--------------
@@ -108,8 +108,8 @@ Additional Resources
+ [Contributing patches](https://commons.apache.org/patches.html)
+ [Apache Commons CSV JIRA project page][jira]
+ [Contributor License Agreement][cla]
-+ [General GitHub documentation](https://help.github.com/)
-+ [GitHub pull request documentation](https://help.github.com/articles/creating-a-pull-request/)
++ [General GitHub documentation](https://docs.github.com/)
++ [GitHub pull request documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request)
+ [Apache Commons Twitter Account](https://twitter.com/ApacheCommons)
[cla]:https://www.apache.org/licenses/#clas
From 6887303cbca84216a3324e103504d9dd91660ea8 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 11 Jun 2026 15:33:31 -0400
Subject: [PATCH 33/69] [CSV-326] Escape Reader values with quote and escape
(#606).
---
src/changes/changes.xml | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 8ea12d983..2475f2b9b 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -53,6 +53,8 @@
CSVParser applies characterOffset to bytePosition (#604).
CSVPrinter Reader printing with quote and escape can emit CSV that its parser cannot read back.
CSVParser applies maxRows to record numbers instead of rows produced when setRecordNumber(...) is used.
+ Escape Reader values with quote and escape (#606).
+.
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
From 037e2e0cd161c9d4f485aae8e49879d6cf2048ab Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 11 Jun 2026 15:59:34 -0400
Subject: [PATCH 34/69] Fix typo.
---
src/changes/changes.xml | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 2475f2b9b..cb4a17848 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -53,8 +53,7 @@
CSVParser applies characterOffset to bytePosition (#604).
CSVPrinter Reader printing with quote and escape can emit CSV that its parser cannot read back.
CSVParser applies maxRows to record numbers instead of rows produced when setRecordNumber(...) is used.
- Escape Reader values with quote and escape (#606).
-.
+ Escape Reader values with quote and escape (#606).
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
From 19b29139dfdb58426bf1330567e6d6c750abe81c Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 11 Jun 2026 16:00:26 -0400
Subject: [PATCH 35/69] Clear escape delimiter buffer before peek in
isEscapeDelimiter (#608).
---
src/changes/changes.xml | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index cb4a17848..0786cc365 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -54,6 +54,7 @@
CSVPrinter Reader printing with quote and escape can emit CSV that its parser cannot read back.
CSVParser applies maxRows to record numbers instead of rows produced when setRecordNumber(...) is used.
Escape Reader values with quote and escape (#606).
+ Clear escape delimiter buffer before peek in isEscapeDelimiter (#608).
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
From b5940a642eef0de550733887fc5b78451d4a8eed Mon Sep 17 00:00:00 2001
From: rootvector2
Date: Sat, 13 Jun 2026 12:30:53 +0530
Subject: [PATCH 36/69] escape quote char in printWithEscapes when QuoteMode is
NONE
---
.../java/org/apache/commons/csv/CSVFormat.java | 8 ++++++--
.../org/apache/commons/csv/CSVPrinterTest.java | 17 +++++++++++++++++
2 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 852a3956c..03211e689 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -2324,12 +2324,14 @@ private void printWithEscapes(final CharSequence charSeq, final Appendable appen
final char[] delimArray = getDelimiterCharArray();
final int delimLength = delimArray.length;
final char escape = getEscapeChar();
+ final boolean quoteSet = isQuoteCharacterSet();
+ final char quote = quoteSet ? getQuoteCharacter().charValue() : 0;
while (pos < end) {
char c = charSeq.charAt(pos);
final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delimArray, delimLength);
final boolean isCr = c == Constants.CR;
final boolean isLf = c == Constants.LF;
- if (isCr || isLf || c == escape || isDelimiterStart) {
+ if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart) {
// write out segment up until this char
if (pos > start) {
appendable.append(charSeq, start, pos);
@@ -2368,6 +2370,8 @@ private void printWithEscapes(final Reader reader, final Appendable appendable)
final char[] delimArray = getDelimiterCharArray();
final int delimLength = delimArray.length;
final char escape = getEscapeChar();
+ final boolean quoteSet = isQuoteCharacterSet();
+ final char quote = quoteSet ? getQuoteCharacter().charValue() : 0;
final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE);
int c;
final char[] lookAheadBuffer = new char[delimLength - 1];
@@ -2379,7 +2383,7 @@ private void printWithEscapes(final Reader reader, final Appendable appendable)
final boolean isDelimiterStart = isDelimiter((char) c, test, pos, delimArray, delimLength);
final boolean isCr = c == Constants.CR;
final boolean isLf = c == Constants.LF;
- if (isCr || isLf || c == escape || isDelimiterStart) {
+ if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart) {
// write out segment up until this char
if (pos > start) {
append(builder.substring(start, pos), appendable);
diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
index 1ff791010..7d1993e01 100644
--- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
@@ -423,6 +423,23 @@ void testDelimeterStringQuoteNone() throws IOException {
}
}
+ @Test
+ void testQuoteCharEscapedWithQuoteModeNone() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setQuote('"').setEscape('?').setQuoteMode(QuoteMode.NONE).get();
+ final StringWriter sw = new StringWriter();
+ try (CSVPrinter printer = new CSVPrinter(sw, format)) {
+ printer.printRecord("\"abc", "x\"y");
+ }
+ assertEquals("?\"abc,x?\"y\r\n", sw.toString());
+ // The emitted record must read back as the original values.
+ try (CSVParser parser = CSVParser.parse(sw.toString(), format)) {
+ final List records = parser.getRecords();
+ assertEquals(1, records.size());
+ assertEquals("\"abc", records.get(0).get(0));
+ assertEquals("x\"y", records.get(0).get(1));
+ }
+ }
+
@Test
void testDelimiterEscaped() throws IOException {
final StringWriter sw = new StringWriter();
From 27a439ae0aba41221d296bca7bb5e00379bc25a8 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Sat, 13 Jun 2026 08:33:49 -0400
Subject: [PATCH 37/69] Potential fix for pull request finding
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
.../java/org/apache/commons/csv/CSVPrinterTest.java | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
index 7d1993e01..79ce987bd 100644
--- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
@@ -429,14 +429,17 @@ void testQuoteCharEscapedWithQuoteModeNone() throws IOException {
final StringWriter sw = new StringWriter();
try (CSVPrinter printer = new CSVPrinter(sw, format)) {
printer.printRecord("\"abc", "x\"y");
+ printer.printRecord(new StringReader("\"abc"), new StringReader("x\"y"));
}
- assertEquals("?\"abc,x?\"y\r\n", sw.toString());
- // The emitted record must read back as the original values.
+ assertEquals("?\"abc,x?\"y" + RECORD_SEPARATOR + "?\"abc,x?\"y" + RECORD_SEPARATOR, sw.toString());
+ // The emitted records must read back as the original values.
try (CSVParser parser = CSVParser.parse(sw.toString(), format)) {
final List records = parser.getRecords();
- assertEquals(1, records.size());
- assertEquals("\"abc", records.get(0).get(0));
- assertEquals("x\"y", records.get(0).get(1));
+ assertEquals(2, records.size());
+ for (final CSVRecord record : records) {
+ assertEquals("\"abc", record.get(0));
+ assertEquals("x\"y", record.get(1));
+ }
}
}
From d729b442e4bbdf6c603e3e64955d27352744cc29 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Sat, 13 Jun 2026 12:37:26 +0000
Subject: [PATCH 38/69] Sort members
---
.../org/apache/commons/csv/CSVParserTest.java | 28 ++++++-------
.../apache/commons/csv/CSVPrinterTest.java | 40 +++++++++----------
.../org/apache/commons/csv/LexerTest.java | 24 +++++------
3 files changed, 46 insertions(+), 46 deletions(-)
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index cccbef4e6..dca37fc5a 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -1668,20 +1668,6 @@ void testParsingPrintedEmptyFirstColumn(final CSVFormat.Predefined format) throw
}
}
- /**
- * Tests CSV-324.
- */
- @Test
- void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
- final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
- try (CSVParser parser = format.parse(new StringReader("a[|]b[|"))) {
- final CSVRecord record = parser.nextRecord();
- assertEquals("a", record.get(0));
- assertEquals("b[|", record.get(1));
- assertEquals(2, record.size());
- }
- }
-
/**
* A truncated escaped multi-character delimiter at EOF must stay literal data and not be completed from a stale
* escape delimiter look-ahead.
@@ -1696,6 +1682,20 @@ void testPartialEscapedMultiCharacterDelimiterAtEOF() throws IOException {
}
}
+ /**
+ * Tests CSV-324.
+ */
+ @Test
+ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
+ try (CSVParser parser = format.parse(new StringReader("a[|]b[|"))) {
+ final CSVRecord record = parser.nextRecord();
+ assertEquals("a", record.get(0));
+ assertEquals("b[|", record.get(1));
+ assertEquals(2, record.size());
+ }
+ }
+
@Test
void testProvidedHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
index 79ce987bd..b58782210 100644
--- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
@@ -423,26 +423,6 @@ void testDelimeterStringQuoteNone() throws IOException {
}
}
- @Test
- void testQuoteCharEscapedWithQuoteModeNone() throws IOException {
- final CSVFormat format = CSVFormat.DEFAULT.builder().setQuote('"').setEscape('?').setQuoteMode(QuoteMode.NONE).get();
- final StringWriter sw = new StringWriter();
- try (CSVPrinter printer = new CSVPrinter(sw, format)) {
- printer.printRecord("\"abc", "x\"y");
- printer.printRecord(new StringReader("\"abc"), new StringReader("x\"y"));
- }
- assertEquals("?\"abc,x?\"y" + RECORD_SEPARATOR + "?\"abc,x?\"y" + RECORD_SEPARATOR, sw.toString());
- // The emitted records must read back as the original values.
- try (CSVParser parser = CSVParser.parse(sw.toString(), format)) {
- final List records = parser.getRecords();
- assertEquals(2, records.size());
- for (final CSVRecord record : records) {
- assertEquals("\"abc", record.get(0));
- assertEquals("x\"y", record.get(1));
- }
- }
- }
-
@Test
void testDelimiterEscaped() throws IOException {
final StringWriter sw = new StringWriter();
@@ -1818,6 +1798,26 @@ void testQuoteAll() throws IOException {
}
}
+ @Test
+ void testQuoteCharEscapedWithQuoteModeNone() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setQuote('"').setEscape('?').setQuoteMode(QuoteMode.NONE).get();
+ final StringWriter sw = new StringWriter();
+ try (CSVPrinter printer = new CSVPrinter(sw, format)) {
+ printer.printRecord("\"abc", "x\"y");
+ printer.printRecord(new StringReader("\"abc"), new StringReader("x\"y"));
+ }
+ assertEquals("?\"abc,x?\"y" + RECORD_SEPARATOR + "?\"abc,x?\"y" + RECORD_SEPARATOR, sw.toString());
+ // The emitted records must read back as the original values.
+ try (CSVParser parser = CSVParser.parse(sw.toString(), format)) {
+ final List records = parser.getRecords();
+ assertEquals(2, records.size());
+ for (final CSVRecord record : records) {
+ assertEquals("\"abc", record.get(0));
+ assertEquals("x\"y", record.get(1));
+ }
+ }
+ }
+
@Test
void testQuoteCommaFirstChar() throws IOException {
final StringWriter sw = new StringWriter();
diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java
index da60df07e..244079df6 100644
--- a/src/test/java/org/apache/commons/csv/LexerTest.java
+++ b/src/test/java/org/apache/commons/csv/LexerTest.java
@@ -409,18 +409,6 @@ void testNextToken6() throws IOException {
}
}
- /**
- * Tests CSV-324.
- */
- @Test
- void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
- final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
- try (Lexer lexer = createLexer("a[|]b[|", format)) {
- assertNextToken(TOKEN, "a", lexer);
- assertNextToken(EOF, "b[|", lexer);
- }
- }
-
/**
* A truncated escaped multi-character delimiter at EOF must not be accepted by reusing the previous escape delimiter
* look-ahead in {@link Lexer#isEscapeDelimiter()}.
@@ -433,6 +421,18 @@ void testPartialEscapedMultiCharacterDelimiterAtEOF() throws IOException {
}
}
+ /**
+ * Tests CSV-324.
+ */
+ @Test
+ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
+ try (Lexer lexer = createLexer("a[|]b[|", format)) {
+ assertNextToken(TOKEN, "a", lexer);
+ assertNextToken(EOF, "b[|", lexer);
+ }
+ }
+
@Test
void testReadEscapeBackspace() throws IOException {
try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {
From d4d4154454b43c46aff65ed75c721605eafb142b Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Sat, 13 Jun 2026 12:43:32 +0000
Subject: [PATCH 39/69] Refactor some magic strings in
CSVPrinterTest.testQuoteCharEscapedWithQuoteModeNone()
---
.../java/org/apache/commons/csv/CSVPrinterTest.java | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
index b58782210..16901c4e2 100644
--- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
@@ -1802,9 +1802,11 @@ void testQuoteAll() throws IOException {
void testQuoteCharEscapedWithQuoteModeNone() throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.builder().setQuote('"').setEscape('?').setQuoteMode(QuoteMode.NONE).get();
final StringWriter sw = new StringWriter();
+ final String col1 = "\"abc";
+ final String col2 = "x\"y";
try (CSVPrinter printer = new CSVPrinter(sw, format)) {
- printer.printRecord("\"abc", "x\"y");
- printer.printRecord(new StringReader("\"abc"), new StringReader("x\"y"));
+ printer.printRecord(col1, col2);
+ printer.printRecord(new StringReader(col1), new StringReader(col2));
}
assertEquals("?\"abc,x?\"y" + RECORD_SEPARATOR + "?\"abc,x?\"y" + RECORD_SEPARATOR, sw.toString());
// The emitted records must read back as the original values.
@@ -1812,8 +1814,8 @@ void testQuoteCharEscapedWithQuoteModeNone() throws IOException {
final List records = parser.getRecords();
assertEquals(2, records.size());
for (final CSVRecord record : records) {
- assertEquals("\"abc", record.get(0));
- assertEquals("x\"y", record.get(1));
+ assertEquals(col1, record.get(0));
+ assertEquals(col2, record.get(1));
}
}
}
From 411d3a37c923361f2ccca1c26862e6e4d7fd4742 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Sat, 13 Jun 2026 12:45:52 +0000
Subject: [PATCH 40/69] Escape quote char in printWithEscapes when QuoteMode is
NONE (#609).
---
src/changes/changes.xml | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 0786cc365..41b1a038c 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -55,6 +55,7 @@
CSVParser applies maxRows to record numbers instead of rows produced when setRecordNumber(...) is used.
Escape Reader values with quote and escape (#606).
Clear escape delimiter buffer before peek in isEscapeDelimiter (#608).
+ Escape quote char in printWithEscapes when QuoteMode is NONE (#609).
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
From a99f2609299a72b08a3e43c2968822555528da4e Mon Sep 17 00:00:00 2001
From: rootvector2
Date: Mon, 15 Jun 2026 21:31:34 +0530
Subject: [PATCH 41/69] quote value starting with comment marker in minimal
quote mode
---
.../java/org/apache/commons/csv/CSVFormat.java | 5 +++--
.../org/apache/commons/csv/CSVPrinterTest.java | 18 ++++++++++++++++++
2 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 03211e689..4f60eff93 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -2454,10 +2454,11 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi
}
} else {
char c = charSeq.charAt(pos);
- if (c <= Constants.COMMENT) {
+ if (c <= Constants.COMMENT || isCommentMarkerSet() && c == commentMarker.charValue()) {
// Some other chars at the start of a value caused the parser to fail, so for now
// encapsulate if we start in anything less than '#'. We are being conservative
- // by including the default comment char too.
+ // by including the default comment char and any configured comment marker too,
+ // which the parser would otherwise read back as a comment line.
quote = true;
} else {
while (pos < len) {
diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
index 16901c4e2..e00accfb0 100644
--- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
@@ -1829,6 +1829,24 @@ void testQuoteCommaFirstChar() throws IOException {
}
}
+ @Test
+ void testQuoteCommentMarkerFirstChar() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setCommentMarker(';').get();
+ final StringWriter sw = new StringWriter();
+ final String col1 = ";comment-like";
+ try (CSVPrinter printer = new CSVPrinter(sw, format)) {
+ printer.printRecord(col1, "b");
+ }
+ assertEquals("\";comment-like\",b" + RECORD_SEPARATOR, sw.toString());
+ // A value starting with the comment marker must read back as data, not a dropped comment line.
+ try (CSVParser parser = CSVParser.parse(sw.toString(), format)) {
+ final List records = parser.getRecords();
+ assertEquals(1, records.size());
+ assertEquals(col1, records.get(0).get(0));
+ assertEquals("b", records.get(0).get(1));
+ }
+ }
+
@Test
void testQuoteNonNumeric() throws IOException {
final StringWriter sw = new StringWriter();
From 3c2291cf5e40c5b9a19f3a6b3165fb27c8de5321 Mon Sep 17 00:00:00 2001
From: rootvector2
Date: Tue, 16 Jun 2026 14:35:39 +0530
Subject: [PATCH 42/69] expand comment marker test to contrast printed comment
with quoted value
---
.../org/apache/commons/csv/CSVPrinterTest.java | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
index e00accfb0..f4f3c85b1 100644
--- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
@@ -1835,15 +1835,24 @@ void testQuoteCommentMarkerFirstChar() throws IOException {
final StringWriter sw = new StringWriter();
final String col1 = ";comment-like";
try (CSVPrinter printer = new CSVPrinter(sw, format)) {
+ // A real comment is written with the marker, unquoted.
+ printer.printComment("a real comment");
+ // A value starting with the marker is quoted, so it does not read back as a comment.
printer.printRecord(col1, "b");
+ // The marker past the first character does not start a comment, so only the leading-marker value is quoted.
+ printer.printRecord("a;b", ";c");
}
- assertEquals("\";comment-like\",b" + RECORD_SEPARATOR, sw.toString());
- // A value starting with the comment marker must read back as data, not a dropped comment line.
+ assertEquals("; a real comment" + RECORD_SEPARATOR +
+ "\";comment-like\",b" + RECORD_SEPARATOR +
+ "a;b,\";c\"" + RECORD_SEPARATOR, sw.toString());
+ // The comment is dropped on read; both data records survive intact.
try (CSVParser parser = CSVParser.parse(sw.toString(), format)) {
final List records = parser.getRecords();
- assertEquals(1, records.size());
+ assertEquals(2, records.size());
assertEquals(col1, records.get(0).get(0));
assertEquals("b", records.get(0).get(1));
+ assertEquals("a;b", records.get(1).get(0));
+ assertEquals(";c", records.get(1).get(1));
}
}
From e21d66e410cdafca2e822361de5eb6b2596291f2 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Tue, 16 Jun 2026 21:20:03 +0000
Subject: [PATCH 43/69] Quote value starting with comment marker in minimal
quote mode (#610).
Extract to local variable.
---
src/changes/changes.xml | 1 +
src/test/java/org/apache/commons/csv/CSVPrinterTest.java | 5 +++--
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 41b1a038c..431da6b5f 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -56,6 +56,7 @@
Escape Reader values with quote and escape (#606).
Clear escape delimiter buffer before peek in isEscapeDelimiter (#608).
Escape quote char in printWithEscapes when QuoteMode is NONE (#609).
+ Quote value starting with comment marker in minimal quote mode (#610)..
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
index f4f3c85b1..e68d4c243 100644
--- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
@@ -1842,11 +1842,12 @@ void testQuoteCommentMarkerFirstChar() throws IOException {
// The marker past the first character does not start a comment, so only the leading-marker value is quoted.
printer.printRecord("a;b", ";c");
}
+ final String string = sw.toString();
assertEquals("; a real comment" + RECORD_SEPARATOR +
"\";comment-like\",b" + RECORD_SEPARATOR +
- "a;b,\";c\"" + RECORD_SEPARATOR, sw.toString());
+ "a;b,\";c\"" + RECORD_SEPARATOR, string);
// The comment is dropped on read; both data records survive intact.
- try (CSVParser parser = CSVParser.parse(sw.toString(), format)) {
+ try (CSVParser parser = CSVParser.parse(string, format)) {
final List records = parser.getRecords();
assertEquals(2, records.size());
assertEquals(col1, records.get(0).get(0));
From 110e830616e44844a0a57256401093a151ae0e66 Mon Sep 17 00:00:00 2001
From: rootvector2
Date: Wed, 17 Jun 2026 16:38:10 +0530
Subject: [PATCH 44/69] clear delimiter buffer before each peek in isDelimiter
---
src/main/java/org/apache/commons/csv/Lexer.java | 2 +-
src/test/java/org/apache/commons/csv/LexerTest.java | 13 +++++++++++++
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index 238e64cee..93a584663 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -153,6 +153,7 @@ boolean isDelimiter(final int ch) throws IOException {
isLastTokenDelimiter = true;
return true;
}
+ Arrays.fill(delimiterBuf, '\0');
reader.peek(delimiterBuf);
for (int i = 0; i < delimiterBuf.length; i++) {
if (delimiterBuf[i] != delimiter[i + 1]) {
@@ -274,7 +275,6 @@ Token nextToken(final Token token) throws IOException {
token.type = Token.Type.COMMENT;
return token;
}
- Arrays.fill(delimiterBuf, '\0');
// Important: make sure a new char gets consumed in each iteration
while (token.type == Token.Type.INVALID) {
// ignore whitespaces at beginning of a token
diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java
index 244079df6..e5f831fb2 100644
--- a/src/test/java/org/apache/commons/csv/LexerTest.java
+++ b/src/test/java/org/apache/commons/csv/LexerTest.java
@@ -433,6 +433,19 @@ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
}
}
+ /**
+ * A truncated multi-character delimiter at EOF must not be accepted by reusing the look-ahead buffer left dirty by an
+ * earlier non-matching peek in the same token (CSV-324 only cleared the buffer once per token).
+ */
+ @Test
+ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
+ // The "[a]" peek leaves ']' in the look-ahead buffer; the trailing "[|" must not match "[|]".
+ try (Lexer lexer = createLexer("x[a][|", format)) {
+ assertNextToken(EOF, "x[a][|", lexer);
+ }
+ }
+
@Test
void testReadEscapeBackspace() throws IOException {
try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {
From 61f521350b34c5605bfd68760cdce120a5da4ed7 Mon Sep 17 00:00:00 2001
From: rootvector2
Date: Wed, 17 Jun 2026 17:07:51 +0530
Subject: [PATCH 45/69] add public-api parser test for partial delimiter
false-match at eof
---
.../org/apache/commons/csv/CSVParserTest.java | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index dca37fc5a..c1ca3d7a4 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -1696,6 +1696,21 @@ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
}
}
+ /**
+ * A truncated multi-character delimiter at EOF must not be completed from the look-ahead buffer left dirty by an
+ * earlier non-matching peek in the same token.
+ */
+ @Test
+ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
+ // The "[a]" peek leaves ']' in the look-ahead buffer; the trailing "[|" must not match "[|]".
+ try (CSVParser parser = format.parse(new StringReader("x[a][|"))) {
+ final CSVRecord record = parser.nextRecord();
+ assertEquals("x[a][|", record.get(0));
+ assertEquals(1, record.size());
+ }
+ }
+
@Test
void testProvidedHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
From ed8dbf25ad73856cfa10cba4f5e9855fdcae0d88 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 17 Jun 2026 12:08:58 +0000
Subject: [PATCH 46/69] Clear escape delimiter buffer before peek in
Lexer.isEscapeDelimiter() (#608, #611).
Refactor magic strings in tests
---
src/changes/changes.xml | 2 +-
src/test/java/org/apache/commons/csv/CSVParserTest.java | 5 +++--
src/test/java/org/apache/commons/csv/LexerTest.java | 5 +++--
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 431da6b5f..66073c9dd 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -54,7 +54,7 @@
CSVPrinter Reader printing with quote and escape can emit CSV that its parser cannot read back.
CSVParser applies maxRows to record numbers instead of rows produced when setRecordNumber(...) is used.
Escape Reader values with quote and escape (#606).
- Clear escape delimiter buffer before peek in isEscapeDelimiter (#608).
+ Clear escape delimiter buffer before peek in Lexer.isEscapeDelimiter() (#608, #611).
Escape quote char in printWithEscapes when QuoteMode is NONE (#609).
Quote value starting with comment marker in minimal quote mode (#610)..
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index c1ca3d7a4..5bece571f 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -1704,9 +1704,10 @@ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
// The "[a]" peek leaves ']' in the look-ahead buffer; the trailing "[|" must not match "[|]".
- try (CSVParser parser = format.parse(new StringReader("x[a][|"))) {
+ final String recordString = "x[a][|";
+ try (CSVParser parser = format.parse(new StringReader(recordString))) {
final CSVRecord record = parser.nextRecord();
- assertEquals("x[a][|", record.get(0));
+ assertEquals(recordString, record.get(0));
assertEquals(1, record.size());
}
}
diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java
index e5f831fb2..db1ab3a6d 100644
--- a/src/test/java/org/apache/commons/csv/LexerTest.java
+++ b/src/test/java/org/apache/commons/csv/LexerTest.java
@@ -441,8 +441,9 @@ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
// The "[a]" peek leaves ']' in the look-ahead buffer; the trailing "[|" must not match "[|]".
- try (Lexer lexer = createLexer("x[a][|", format)) {
- assertNextToken(EOF, "x[a][|", lexer);
+ final String recordString = "x[a][|";
+ try (Lexer lexer = createLexer(recordString, format)) {
+ assertNextToken(EOF, recordString, lexer);
}
}
From a6ee67ecf0d4b9208ffc640f433d8b40c258e1f3 Mon Sep 17 00:00:00 2001
From: OldTruckDriver
Date: Fri, 19 Jun 2026 01:32:16 +1000
Subject: [PATCH 47/69] [CSV-328] Fix quoted null string after disabling quote
setNullString(String) rebuilt quotedNullString by concatenating the nullable
quoteCharacter field directly, so calling setQuote(null) before setNullString(...)
produced a literal "nullNULLnull". Extract a shared setQuotedNullString() helper
that applies the default-quote fallback, so both builder orders produce the same state.
Reviewed-by: OpenAI Codex
Reviewed-by: Anthropic Claude Code
---
src/changes/changes.xml | 1 +
src/main/java/org/apache/commons/csv/CSVFormat.java | 7 +++++--
src/test/java/org/apache/commons/csv/CSVFormatTest.java | 5 +++++
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 66073c9dd..64f936554 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -53,6 +53,7 @@
CSVParser applies characterOffset to bytePosition (#604).
CSVPrinter Reader printing with quote and escape can emit CSV that its parser cannot read back.
CSVParser applies maxRows to record numbers instead of rows produced when setRecordNumber(...) is used.
+ CSVFormat.Builder.setNullString(String) can build an invalid quoted null string after setQuote(null).
Escape Reader values with quote and escape (#606).
Clear escape delimiter buffer before peek in Lexer.isEscapeDelimiter() (#608, #611).
Escape quote char in printWithEscapes when QuoteMode is NONE (#609).
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 4f60eff93..9c403d9e1 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -780,8 +780,7 @@ public Builder setMaxRows(final long maxRows) {
*/
public Builder setNullString(final String nullString) {
this.nullString = nullString;
- this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
- return this;
+ return setQuotedNullString();
}
/**
@@ -806,6 +805,10 @@ public Builder setQuote(final Character quoteCharacter) {
throw new IllegalArgumentException("The quoteCharacter cannot be a line break");
}
this.quoteCharacter = quoteCharacter;
+ return setQuotedNullString();
+ }
+
+ private Builder setQuotedNullString() {
final Character quote = quoteCharacter != null ? quoteCharacter : Constants.DOUBLE_QUOTE_CHAR;
this.quotedNullString = quote + nullString + quote;
return this;
diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
index c3fdeeb77..ed20898de 100644
--- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
@@ -1040,6 +1040,11 @@ void testQuotedNullStringTracksQuoteCharacter() throws IOException {
builder.setQuote((Character) null);
builder.get().print(null, out, true);
assertEquals("\"NULL\"", out.toString());
+ // reset, reverse setter order
+ out.setLength(0);
+ builder.setNullString(null).setQuote((Character) null).setNullString("NULL");
+ builder.get().print(null, out, true);
+ assertEquals("\"NULL\"", out.toString());
}
@Test
From 1d89cd5f0aa454ef3853dfc7528242399ef26b74 Mon Sep 17 00:00:00 2001
From: OldTruckDriver
Date: Fri, 19 Jun 2026 02:05:55 +1000
Subject: [PATCH 48/69] [CSV-329] Fix byte tracking for supplementary
delimiters
ExtendedBufferedReader.read(char[], int, int) updated lastChar before computing the encoded byte length, so a surrogate pair in the delimiter lookahead buffer was paired against the post-update lastChar and threw CharacterCodingException.
Count bytes before updating lastChar, and pair each char against the preceding char in the buffer seeded from lastChar so pairs split across reads still count. Add parser and ExtendedBufferedReader regression tests.
Reviewed-by: OpenAI Codex
Reviewed-by: Anthropic Claude Code
---
src/changes/changes.xml | 1 +
.../commons/csv/ExtendedBufferedReader.java | 20 +++++++++------
.../org/apache/commons/csv/CSVParserTest.java | 25 +++++++++++++++++++
.../csv/ExtendedBufferedReaderTest.java | 14 +++++++++++
4 files changed, 53 insertions(+), 7 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 66073c9dd..f6a474dbf 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -53,6 +53,7 @@
CSVParser applies characterOffset to bytePosition (#604).
CSVPrinter Reader printing with quote and escape can emit CSV that its parser cannot read back.
CSVParser applies maxRows to record numbers instead of rows produced when setRecordNumber(...) is used.
+ CSVParser with trackBytes enabled throws on multi-character delimiters containing supplementary Unicode characters.
Escape Reader values with quote and escape (#606).
Clear escape delimiter buffer before peek in Lexer.isEscapeDelimiter() (#608, #611).
Escape quote char in printWithEscapes when QuoteMode is NONE (#609).
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 889b58edc..5b519a08c 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -108,9 +108,11 @@ long getBytesRead() {
}
private long getEncodedCharLength(final char[] buf, final int offset, final int length) throws CharacterCodingException {
- int len = 0;
- for (int i = offset; i < length; i++) {
- len += getEncodedCharLength(buf[i]);
+ long len = 0;
+ int previous = lastChar;
+ for (int i = offset; i < offset + length; i++) {
+ len += getEncodedCharLength(previous, buf[i]);
+ previous = buf[i];
}
return len;
}
@@ -141,8 +143,12 @@ private long getEncodedCharLength(final char[] buf, final int offset, final int
* @throws CharacterCodingException if the character cannot be encoded.
*/
private int getEncodedCharLength(final int current) throws CharacterCodingException {
+ return getEncodedCharLength(lastChar, current);
+ }
+
+ private int getEncodedCharLength(final int previous, final int current) throws CharacterCodingException {
final char cChar = (char) current;
- final char lChar = (char) lastChar;
+ final char lChar = (char) previous;
if (!Character.isSurrogate(cChar)) {
return encoder.encode(CharBuffer.wrap(new char[] { cChar })).limit();
}
@@ -218,6 +224,9 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
return 0;
}
final int len = super.read(buf, offset, length);
+ if (encoder != null && len > 0) {
+ this.bytesRead += getEncodedCharLength(buf, offset, len);
+ }
if (len > 0) {
for (int i = offset; i < offset + len; i++) {
final char ch = buf[i];
@@ -233,9 +242,6 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
} else if (len == EOF) {
lastChar = EOF;
}
- if (encoder != null) {
- this.bytesRead += getEncodedCharLength(buf, offset, len);
- }
position += len;
return len;
}
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 5bece571f..29ca0cf1f 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -666,6 +666,31 @@ void testGetBytePositionMultiCharacterDelimiter() throws IOException {
}
}
+ /**
+ * Tests CSV-329.
+ */
+ @Test
+ void testGetBytePositionMultiCharacterDelimiterWithSupplementaryCharacter() throws IOException {
+ final String delimiter = "x😀";
+ final String code = "ax😀b\ncx😀d\n";
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(delimiter).get();
+ try (CSVParser parser = CSVParser.builder()
+ .setReader(new StringReader(code))
+ .setFormat(format)
+ .setCharset(UTF_8)
+ .setTrackBytes(true)
+ .get()) {
+ final CSVRecord first = parser.nextRecord();
+ final CSVRecord second = parser.nextRecord();
+ assertNotNull(first);
+ assertNotNull(second);
+ assertValuesEquals(new String[] { "a", "b" }, first);
+ assertValuesEquals(new String[] { "c", "d" }, second);
+ assertEquals(0, first.getBytePosition());
+ assertEquals("ax😀b\n".getBytes(UTF_8).length, second.getBytePosition());
+ }
+ }
+
@Test
void testGetBytePositionWithCharacterOffsetAndMultiBytePrefix() throws Exception {
final String row0 = "é,x\n";
diff --git a/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java b/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java
index 056b8a9c9..b8d9b9f19 100644
--- a/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java
+++ b/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java
@@ -26,6 +26,7 @@
import static org.junit.jupiter.api.Assertions.assertNull;
import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
import org.junit.jupiter.api.Test;
@@ -104,6 +105,19 @@ void testReadingInDifferentBuffer() throws Exception {
}
}
+ @Test
+ void testReadingSupplementaryCharacterTracksBytes() throws Exception {
+ final String input = "😀";
+ final char[] buffer = new char[input.length()];
+ try (ExtendedBufferedReader reader = new ExtendedBufferedReader(new StringReader(input), StandardCharsets.UTF_8, true)) {
+ assertEquals(input.length(), reader.read(buffer, 0, buffer.length));
+ assertArrayEquals(input.toCharArray(), buffer);
+ assertEquals(input.getBytes(StandardCharsets.UTF_8).length, reader.getBytesRead());
+ assertEquals(input.length(), reader.getPosition());
+ assertEquals(input.charAt(input.length() - 1), reader.getLastChar());
+ }
+ }
+
@Test
void testReadLine() throws Exception {
try (ExtendedBufferedReader br = createBufferedReader("")) {
From d8e12423b47109c76196bbae454726a114cf7c07 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Fri, 19 Jun 2026 07:28:18 -0400
Subject: [PATCH 49/69] Bump actions/checkout from 6.0.3 to 7.0.0.
---
.github/workflows/codeql-analysis.yml | 2 +-
.github/workflows/dependency-review.yml | 2 +-
.github/workflows/maven.yml | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 57f55cae0..08c673ee0 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -46,7 +46,7 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
persist-credentials: false
- uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml
index 114f3d8a2..7bc02bdd2 100644
--- a/.github/workflows/dependency-review.yml
+++ b/.github/workflows/dependency-review.yml
@@ -26,6 +26,6 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: 'Checkout Repository'
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- name: 'Dependency Review PR'
uses: actions/dependency-review-action@a1d282b36b6f3519aa1f3fc636f609c47dddb294 # v5.0.0
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index 3ee3dec2b..3cb743cbf 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -43,7 +43,7 @@ jobs:
experimental: true
steps:
- - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+ - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
persist-credentials: false
- uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
From a1cf4f2a73065281ca7c22841c2f3ec0c00098c1 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Fri, 19 Jun 2026 11:44:21 +0000
Subject: [PATCH 50/69] Refactor delimiter in test
Rename local variable
---
src/test/java/org/apache/commons/csv/CSVParserTest.java | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 29ca0cf1f..aa4a639dd 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -672,10 +672,10 @@ void testGetBytePositionMultiCharacterDelimiter() throws IOException {
@Test
void testGetBytePositionMultiCharacterDelimiterWithSupplementaryCharacter() throws IOException {
final String delimiter = "x😀";
- final String code = "ax😀b\ncx😀d\n";
+ final String data = "a" + delimiter + "b\nc" + delimiter + "d\n";
final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(delimiter).get();
try (CSVParser parser = CSVParser.builder()
- .setReader(new StringReader(code))
+ .setReader(new StringReader(data))
.setFormat(format)
.setCharset(UTF_8)
.setTrackBytes(true)
@@ -687,7 +687,7 @@ void testGetBytePositionMultiCharacterDelimiterWithSupplementaryCharacter() thro
assertValuesEquals(new String[] { "a", "b" }, first);
assertValuesEquals(new String[] { "c", "d" }, second);
assertEquals(0, first.getBytePosition());
- assertEquals("ax😀b\n".getBytes(UTF_8).length, second.getBytePosition());
+ assertEquals("a" + delimiter + "b\n".getBytes(UTF_8).length, second.getBytePosition());
}
}
From caa1c8d0ed1a05f4a75f9a4fc6e5e2dc6fa5bf51 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Fri, 19 Jun 2026 18:51:44 +0000
Subject: [PATCH 51/69] Revert "Refactor delimiter in test"
This reverts commit a1cf4f2a73065281ca7c22841c2f3ec0c00098c1.
---
src/test/java/org/apache/commons/csv/CSVParserTest.java | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index aa4a639dd..29ca0cf1f 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -672,10 +672,10 @@ void testGetBytePositionMultiCharacterDelimiter() throws IOException {
@Test
void testGetBytePositionMultiCharacterDelimiterWithSupplementaryCharacter() throws IOException {
final String delimiter = "x😀";
- final String data = "a" + delimiter + "b\nc" + delimiter + "d\n";
+ final String code = "ax😀b\ncx😀d\n";
final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(delimiter).get();
try (CSVParser parser = CSVParser.builder()
- .setReader(new StringReader(data))
+ .setReader(new StringReader(code))
.setFormat(format)
.setCharset(UTF_8)
.setTrackBytes(true)
@@ -687,7 +687,7 @@ void testGetBytePositionMultiCharacterDelimiterWithSupplementaryCharacter() thro
assertValuesEquals(new String[] { "a", "b" }, first);
assertValuesEquals(new String[] { "c", "d" }, second);
assertEquals(0, first.getBytePosition());
- assertEquals("a" + delimiter + "b\n".getBytes(UTF_8).length, second.getBytePosition());
+ assertEquals("ax😀b\n".getBytes(UTF_8).length, second.getBytePosition());
}
}
From 61aa0555d60c68120914b4952232f8c6bfc72ed3 Mon Sep 17 00:00:00 2001
From: rootvector2
Date: Fri, 19 Jun 2026 23:39:51 +0530
Subject: [PATCH 52/69] escape leading comment marker in printWithEscapes
---
.../org/apache/commons/csv/CSVFormat.java | 14 ++++-
.../apache/commons/csv/CSVPrinterTest.java | 51 +++++++++++++++++++
2 files changed, 63 insertions(+), 2 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 9c403d9e1..eaa8c8ffe 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -2329,12 +2329,16 @@ private void printWithEscapes(final CharSequence charSeq, final Appendable appen
final char escape = getEscapeChar();
final boolean quoteSet = isQuoteCharacterSet();
final char quote = quoteSet ? getQuoteCharacter().charValue() : 0;
+ final boolean commentMarkerSet = isCommentMarkerSet();
+ final char commentChar = commentMarkerSet ? commentMarker.charValue() : 0; // Explicit unboxing is intentional
while (pos < end) {
char c = charSeq.charAt(pos);
final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delimArray, delimLength);
final boolean isCr = c == Constants.CR;
final boolean isLf = c == Constants.LF;
- if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart) {
+ // A leading comment marker would be read back as a comment, so escape it.
+ final boolean isComment = commentMarkerSet && pos == 0 && c == commentChar;
+ if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart || isComment) {
// write out segment up until this char
if (pos > start) {
appendable.append(charSeq, start, pos);
@@ -2375,8 +2379,11 @@ private void printWithEscapes(final Reader reader, final Appendable appendable)
final char escape = getEscapeChar();
final boolean quoteSet = isQuoteCharacterSet();
final char quote = quoteSet ? getQuoteCharacter().charValue() : 0;
+ final boolean commentMarkerSet = isCommentMarkerSet();
+ final char commentChar = commentMarkerSet ? commentMarker.charValue() : 0; // Explicit unboxing is intentional
final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE);
int c;
+ boolean firstChar = true;
final char[] lookAheadBuffer = new char[delimLength - 1];
while (EOF != (c = bufferedReader.read())) {
builder.append((char) c);
@@ -2386,7 +2393,10 @@ private void printWithEscapes(final Reader reader, final Appendable appendable)
final boolean isDelimiterStart = isDelimiter((char) c, test, pos, delimArray, delimLength);
final boolean isCr = c == Constants.CR;
final boolean isLf = c == Constants.LF;
- if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart) {
+ // A leading comment marker would be read back as a comment, so escape it.
+ final boolean isComment = commentMarkerSet && firstChar && c == commentChar;
+ firstChar = false;
+ if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart || isComment) {
// write out segment up until this char
if (pos > start) {
append(builder.substring(start, pos), appendable);
diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
index e68d4c243..9ae80c1e5 100644
--- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
@@ -569,6 +569,57 @@ void testEscapeBackslash5() throws IOException {
assertEquals("\\\\", sw.toString());
}
+ @Test
+ void testEscapeCommentMarkerFirstChar() throws IOException {
+ // No quoting available in escape mode, so a leading comment marker must be escaped or the
+ // record reads back as a comment and is dropped. Mirrors the quoting fix for QuoteMode.MINIMAL.
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setQuote(null).setEscape('\\').setCommentMarker(';').get();
+ final StringWriter sw = new StringWriter();
+ final String col1 = ";comment-like";
+ try (CSVPrinter printer = new CSVPrinter(sw, format)) {
+ printer.printRecord(col1, "b");
+ printer.printRecord(new StringReader(col1), new StringReader("b"));
+ // The marker past the first character does not start a comment and is left alone.
+ printer.printRecord("a;b", ";c");
+ }
+ final String string = sw.toString();
+ assertEquals("\\;comment-like,b" + RECORD_SEPARATOR +
+ "\\;comment-like,b" + RECORD_SEPARATOR +
+ "a;b,\\;c" + RECORD_SEPARATOR, string);
+ // The emitted records must read back as the original values, none parsed as a comment.
+ try (CSVParser parser = CSVParser.parse(string, format)) {
+ final List records = parser.getRecords();
+ assertEquals(3, records.size());
+ assertEquals(col1, records.get(0).get(0));
+ assertEquals("b", records.get(0).get(1));
+ assertEquals(col1, records.get(1).get(0));
+ assertEquals("b", records.get(1).get(1));
+ assertEquals("a;b", records.get(2).get(0));
+ assertEquals(";c", records.get(2).get(1));
+ }
+ }
+
+ @Test
+ void testEscapeCommentMarkerFirstCharWithQuoteModeNone() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setEscape('\\').setQuoteMode(QuoteMode.NONE).setCommentMarker(';').get();
+ final StringWriter sw = new StringWriter();
+ final String col1 = ";bar";
+ try (CSVPrinter printer = new CSVPrinter(sw, format)) {
+ printer.printRecord(col1, "b");
+ printer.printRecord(new StringReader(col1), new StringReader("b"));
+ }
+ final String string = sw.toString();
+ assertEquals("\\;bar,b" + RECORD_SEPARATOR + "\\;bar,b" + RECORD_SEPARATOR, string);
+ try (CSVParser parser = CSVParser.parse(string, format)) {
+ final List records = parser.getRecords();
+ assertEquals(2, records.size());
+ for (final CSVRecord record : records) {
+ assertEquals(col1, record.get(0));
+ assertEquals("b", record.get(1));
+ }
+ }
+ }
+
@Test
void testEscapeNull1() throws IOException {
final StringWriter sw = new StringWriter();
From b112daacc74664c925b28d413b660dc47faddcef Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Fri, 19 Jun 2026 21:14:05 +0000
Subject: [PATCH 53/69] Escape leading comment marker in printWithEscapes
(#614).
---
src/changes/changes.xml | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 006de7711..f172a96fe 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -58,7 +58,8 @@
Escape Reader values with quote and escape (#606).
Clear escape delimiter buffer before peek in Lexer.isEscapeDelimiter() (#608, #611).
Escape quote char in printWithEscapes when QuoteMode is NONE (#609).
- Quote value starting with comment marker in minimal quote mode (#610)..
+ Quote value starting with comment marker in minimal quote mode (#610).
+ Escape leading comment marker in printWithEscapes (#614).
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
From 274b4ceba4e418726a5c9e7043bf9d460b0429c5 Mon Sep 17 00:00:00 2001
From: rootvector2
Date: Sat, 20 Jun 2026 20:33:44 +0530
Subject: [PATCH 54/69] skip byte counting at EOF in
ExtendedBufferedReader.read
---
.../commons/csv/ExtendedBufferedReader.java | 2 +-
.../org/apache/commons/csv/CSVParserTest.java | 21 +++++++++++++++++++
2 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 5b519a08c..20c1ef544 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -210,7 +210,7 @@ public int read() throws IOException {
if (current == CR || current == LF && lastChar != CR || current == EOF && lastChar != CR && lastChar != LF && lastChar != EOF) {
lineNumber++;
}
- if (encoder != null) {
+ if (encoder != null && current != EOF) {
this.bytesRead += getEncodedCharLength(current);
}
lastChar = current;
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 29ca0cf1f..1332fa582 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -691,6 +691,27 @@ void testGetBytePositionMultiCharacterDelimiterWithSupplementaryCharacter() thro
}
}
+ @Test
+ void testGetBytePositionWithSingleByteCharset() throws IOException {
+ // A single-byte charset cannot encode U+FFFF, the char value of the EOF sentinel.
+ // Byte counting must skip the EOF read so a valid file parses without throwing.
+ final String code = "a,b\nc,d\n";
+ try (CSVParser parser = CSVParser.builder()
+ .setReader(new StringReader(code))
+ .setFormat(CSVFormat.DEFAULT)
+ .setCharset(StandardCharsets.ISO_8859_1)
+ .setTrackBytes(true)
+ .get()) {
+ final CSVRecord first = parser.nextRecord();
+ final CSVRecord second = parser.nextRecord();
+ assertNotNull(first);
+ assertNotNull(second);
+ assertNull(parser.nextRecord());
+ assertEquals(0, first.getBytePosition());
+ assertEquals(4, second.getBytePosition());
+ }
+ }
+
@Test
void testGetBytePositionWithCharacterOffsetAndMultiBytePrefix() throws Exception {
final String row0 = "é,x\n";
From 0633c989c9ff892d99bacd3289a3ff8d4cb0fbd6 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Sat, 20 Jun 2026 15:11:11 +0000
Subject: [PATCH 55/69] Skip byte counting at EOF in
ExtendedBufferedReader.read (#615).
Sort members.
---
src/changes/changes.xml | 1 +
.../org/apache/commons/csv/CSVParserTest.java | 42 +++++++++----------
2 files changed, 22 insertions(+), 21 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index f172a96fe..867a20507 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -60,6 +60,7 @@
Escape quote char in printWithEscapes when QuoteMode is NONE (#609).
Quote value starting with comment marker in minimal quote mode (#610).
Escape leading comment marker in printWithEscapes (#614).
+ Skip byte counting at EOF in ExtendedBufferedReader.read (#615).
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 1332fa582..309a073cf 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -691,27 +691,6 @@ void testGetBytePositionMultiCharacterDelimiterWithSupplementaryCharacter() thro
}
}
- @Test
- void testGetBytePositionWithSingleByteCharset() throws IOException {
- // A single-byte charset cannot encode U+FFFF, the char value of the EOF sentinel.
- // Byte counting must skip the EOF read so a valid file parses without throwing.
- final String code = "a,b\nc,d\n";
- try (CSVParser parser = CSVParser.builder()
- .setReader(new StringReader(code))
- .setFormat(CSVFormat.DEFAULT)
- .setCharset(StandardCharsets.ISO_8859_1)
- .setTrackBytes(true)
- .get()) {
- final CSVRecord first = parser.nextRecord();
- final CSVRecord second = parser.nextRecord();
- assertNotNull(first);
- assertNotNull(second);
- assertNull(parser.nextRecord());
- assertEquals(0, first.getBytePosition());
- assertEquals(4, second.getBytePosition());
- }
- }
-
@Test
void testGetBytePositionWithCharacterOffsetAndMultiBytePrefix() throws Exception {
final String row0 = "é,x\n";
@@ -742,6 +721,27 @@ void testGetBytePositionWithCharacterOffsetAndMultiBytePrefix() throws Exception
}
}
+ @Test
+ void testGetBytePositionWithSingleByteCharset() throws IOException {
+ // A single-byte charset cannot encode U+FFFF, the char value of the EOF sentinel.
+ // Byte counting must skip the EOF read so a valid file parses without throwing.
+ final String code = "a,b\nc,d\n";
+ try (CSVParser parser = CSVParser.builder()
+ .setReader(new StringReader(code))
+ .setFormat(CSVFormat.DEFAULT)
+ .setCharset(StandardCharsets.ISO_8859_1)
+ .setTrackBytes(true)
+ .get()) {
+ final CSVRecord first = parser.nextRecord();
+ final CSVRecord second = parser.nextRecord();
+ assertNotNull(first);
+ assertNotNull(second);
+ assertNull(parser.nextRecord());
+ assertEquals(0, first.getBytePosition());
+ assertEquals(4, second.getBytePosition());
+ }
+ }
+
@Test
void testGetHeaderComment_HeaderComment1() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) {
From 609a228e35adf9d73275a6b88e065de091c94be6 Mon Sep 17 00:00:00 2001
From: rootvector2
Date: Mon, 22 Jun 2026 02:01:19 +0530
Subject: [PATCH 56/69] keep quoted empty trailing field with trailingDelimiter
---
.../java/org/apache/commons/csv/CSVParser.java | 4 +++-
.../org/apache/commons/csv/CSVParserTest.java | 17 +++++++++++++++++
2 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 83b60170e..141eba732 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -580,7 +580,9 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact
private void addRecordValue(final boolean lastRecord) {
final String input = format.trim(reusableToken.content.toString());
- if (lastRecord && input.isEmpty() && format.getTrailingDelimiter()) {
+ // Only drop the empty field produced by an actual trailing delimiter. A quoted empty
+ // field ("") is a real value, not a trailing delimiter, so it must be kept.
+ if (lastRecord && input.isEmpty() && format.getTrailingDelimiter() && !reusableToken.isQuoted) {
return;
}
recordList.add(handleNull(input));
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 309a073cf..051548757 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -1949,6 +1949,23 @@ void testTrailingDelimiter() throws Exception {
}
}
+ @Test
+ void testTrailingDelimiterKeepsQuotedEmptyLastField() throws Exception {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setTrailingDelimiter(true).get();
+ try (CSVParser parser = CSVParser.parse("a,b,\"\"", format)) {
+ final CSVRecord record = parser.iterator().next();
+ assertEquals(3, record.size());
+ assertEquals("a", record.get(0));
+ assertEquals("b", record.get(1));
+ assertEquals("", record.get(2));
+ }
+ // An unquoted trailing delimiter still drops the empty field.
+ try (CSVParser parser = CSVParser.parse("a,b,", format)) {
+ final CSVRecord record = parser.iterator().next();
+ assertEquals(2, record.size());
+ }
+ }
+
@Test
void testTrim() throws Exception {
final Reader in = new StringReader("a,a,a\n\" 1 \",\" 2 \",\" 3 \"\nx,y,z");
From 53360c47dbdd5d6ba4fe5e8008f8bb3510200b33 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Mon, 22 Jun 2026 07:32:06 -0400
Subject: [PATCH 57/69] Bump actions/setup-java from 5.2.0 to 5.3.0
---
.github/workflows/maven.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index 3cb743cbf..a6154ddb1 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -53,7 +53,7 @@ jobs:
restore-keys: |
${{ runner.os }}-maven-
- name: Set up JDK ${{ matrix.java }}
- uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0
+ uses: actions/setup-java@ad2b38190b15e4d6bdf0c97fb4fca8412226d287 # v5.3.0
with:
distribution: ${{ runner.os == 'macOS' && matrix.java == '8' && 'zulu' || 'temurin' }}
java-version: ${{ matrix.java }}
From fc4c0e3b43c3cba69f023d336629d2696e250294 Mon Sep 17 00:00:00 2001
From: rootvector2
Date: Mon, 22 Jun 2026 18:06:33 +0530
Subject: [PATCH 58/69] document trailing delimiter parse behavior and contrast
with trailing data
---
.../org/apache/commons/csv/CSVFormat.java | 20 +++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index eaa8c8ffe..7145d23d3 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -883,6 +883,16 @@ public Builder setTrailingData(final boolean trailingData) {
/**
* Sets whether to add a trailing delimiter.
*
+ *
+ * When writing, a delimiter is appended after the last value of each record. When reading, the empty field
+ * that such a trailing delimiter produces is dropped so the output round-trips back to the original record;
+ * a quoted empty trailing field ({@code ""}) is a real value rather than a trailing delimiter and is kept.
+ *
+ *
+ * This is unrelated to {@link #setTrailingData(boolean) trailing data}, which controls whether characters
+ * after the closing quote of an encapsulated value are tolerated when reading.
+ *
+ *
* @param trailingDelimiter whether to add a trailing delimiter.
* @return This instance.
*/
@@ -2012,6 +2022,16 @@ public boolean getTrailingData() {
/**
* Gets whether to add a trailing delimiter.
*
+ *
+ * When writing, a delimiter is appended after the last value of each record. When reading, the empty field
+ * that such a trailing delimiter produces is dropped so the output round-trips back to the original record;
+ * a quoted empty trailing field ({@code ""}) is a real value rather than a trailing delimiter and is kept.
+ *
+ *
+ * This is unrelated to {@link #getTrailingData() trailing data}, which controls whether characters after the
+ * closing quote of an encapsulated value are tolerated when reading.
+ *
+ *
* @return whether to add a trailing delimiter.
* @since 1.3
*/
From e729d17d5089d794c07e41dd6c9d374f9ea09e85 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Mon, 22 Jun 2026 13:49:46 +0000
Subject: [PATCH 59/69] Keep quoted empty trailing field with trailingDelimiter
(#616).
---
src/changes/changes.xml | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 867a20507..0d0175ccc 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -61,6 +61,7 @@
Quote value starting with comment marker in minimal quote mode (#610).
Escape leading comment marker in printWithEscapes (#614).
Skip byte counting at EOF in ExtendedBufferedReader.read (#615).
+ Keep quoted empty trailing field with trailingDelimiter (#616).
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
From afbf34ad92cbf741b9dfad84699e890d8695a6f9 Mon Sep 17 00:00:00 2001
From: Naveed Khan
Date: Thu, 25 Jun 2026 23:12:11 +0530
Subject: [PATCH 60/69] evaluate isDelimiter once in nextToken whitespace skip
---
.../java/org/apache/commons/csv/Lexer.java | 11 +++++++++--
.../org/apache/commons/csv/LexerTest.java | 19 +++++++++++++++++++
2 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index 93a584663..fe964480a 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -277,15 +277,22 @@ Token nextToken(final Token token) throws IOException {
}
// Important: make sure a new char gets consumed in each iteration
while (token.type == Token.Type.INVALID) {
+ // isDelimiter consumes the trailing characters of a multi-character delimiter as a side effect, so it must
+ // only be evaluated once per character. Remember a match found while skipping whitespace below.
+ boolean delimiter = false;
// ignore whitespaces at beginning of a token
if (ignoreSurroundingSpaces) {
- while (Character.isWhitespace((char) c) && !isDelimiter(c) && !eol) {
+ while (Character.isWhitespace((char) c) && !eol) {
+ if (isDelimiter(c)) {
+ delimiter = true;
+ break;
+ }
c = reader.read();
eol = readEndOfLine(c);
}
}
// ok, start of token reached: encapsulated, or token
- if (isDelimiter(c)) {
+ if (delimiter || isDelimiter(c)) {
// empty token return TOKEN("")
token.type = Token.Type.TOKEN;
} else if (eol) {
diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java
index db1ab3a6d..445f710a1 100644
--- a/src/test/java/org/apache/commons/csv/LexerTest.java
+++ b/src/test/java/org/apache/commons/csv/LexerTest.java
@@ -447,6 +447,25 @@ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
}
}
+ /**
+ * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace,
+ * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the
+ * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped.
+ */
+ @Test
+ void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get();
+ try (Lexer lexer = createLexer(" |a", format)) {
+ assertNextToken(TOKEN, "", lexer);
+ assertNextToken(EOF, "a", lexer);
+ }
+ try (Lexer lexer = createLexer("a | |b", format)) {
+ assertNextToken(TOKEN, "a", lexer);
+ assertNextToken(TOKEN, "", lexer);
+ assertNextToken(EOF, "b", lexer);
+ }
+ }
+
@Test
void testReadEscapeBackspace() throws IOException {
try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {
From c9362f76baa32534e82334a27220b698db49788c Mon Sep 17 00:00:00 2001
From: Naveed Khan
Date: Fri, 26 Jun 2026 02:45:35 +0530
Subject: [PATCH 61/69] add public-api test for whitespace-prefixed multi-char
delimiter
exercises the empty-field-dropped regression through CSVParser, not just the lexer.
---
.../org/apache/commons/csv/CSVParserTest.java | 20 +++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 051548757..565e132eb 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -1758,6 +1758,26 @@ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
}
}
+ /**
+ * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace,
+ * the empty field at the delimiter boundary must survive. The delimiter look-ahead is consumed while skipping
+ * leading whitespace, so re-evaluating it would drop the empty field and merge the following field's value.
+ */
+ @Test
+ void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get();
+ try (CSVParser parser = CSVParser.parse(" |a", format)) {
+ final List records = parser.getRecords();
+ assertEquals(1, records.size());
+ assertValuesEquals(new String[] { "", "a" }, records.get(0));
+ }
+ try (CSVParser parser = CSVParser.parse("a | |b", format)) {
+ final List records = parser.getRecords();
+ assertEquals(1, records.size());
+ assertValuesEquals(new String[] { "a", "", "b" }, records.get(0));
+ }
+ }
+
@Test
void testProvidedHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
From c4113ceb3acfba0634133e47d35c40d141e18525 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 25 Jun 2026 22:11:14 +0000
Subject: [PATCH 62/69] Evaluate isDelimiter once in nextToken whitespace skip
(#618).
---
src/changes/changes.xml | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 0d0175ccc..93952e9f1 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -62,6 +62,7 @@
Escape leading comment marker in printWithEscapes (#614).
Skip byte counting at EOF in ExtendedBufferedReader.read (#615).
Keep quoted empty trailing field with trailingDelimiter (#616).
+ Evaluate isDelimiter once in nextToken whitespace skip (#618)..
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
From e36e7f3a1d0fbe29f2ff602f041d3a3d4195b84a Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 25 Jun 2026 22:11:53 +0000
Subject: [PATCH 63/69] Sort members
---
.../org/apache/commons/csv/CSVParserTest.java | 40 +++++++++----------
.../org/apache/commons/csv/LexerTest.java | 38 +++++++++---------
2 files changed, 39 insertions(+), 39 deletions(-)
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 565e132eb..3bea08fac 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -465,6 +465,26 @@ void testDuplicateHeadersNotAllowed() {
() -> CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader().withAllowDuplicateHeaderNames(false)));
}
+ /**
+ * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace,
+ * the empty field at the delimiter boundary must survive. The delimiter look-ahead is consumed while skipping
+ * leading whitespace, so re-evaluating it would drop the empty field and merge the following field's value.
+ */
+ @Test
+ void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get();
+ try (CSVParser parser = CSVParser.parse(" |a", format)) {
+ final List records = parser.getRecords();
+ assertEquals(1, records.size());
+ assertValuesEquals(new String[] { "", "a" }, records.get(0));
+ }
+ try (CSVParser parser = CSVParser.parse("a | |b", format)) {
+ final List records = parser.getRecords();
+ assertEquals(1, records.size());
+ assertValuesEquals(new String[] { "a", "", "b" }, records.get(0));
+ }
+ }
+
@Test
void testEmptyFile() throws Exception {
try (CSVParser parser = CSVParser.parse(Paths.get("src/test/resources/org/apache/commons/csv/empty.txt"), StandardCharsets.UTF_8,
@@ -1758,26 +1778,6 @@ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
}
}
- /**
- * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace,
- * the empty field at the delimiter boundary must survive. The delimiter look-ahead is consumed while skipping
- * leading whitespace, so re-evaluating it would drop the empty field and merge the following field's value.
- */
- @Test
- void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException {
- final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get();
- try (CSVParser parser = CSVParser.parse(" |a", format)) {
- final List records = parser.getRecords();
- assertEquals(1, records.size());
- assertValuesEquals(new String[] { "", "a" }, records.get(0));
- }
- try (CSVParser parser = CSVParser.parse("a | |b", format)) {
- final List records = parser.getRecords();
- assertEquals(1, records.size());
- assertValuesEquals(new String[] { "a", "", "b" }, records.get(0));
- }
- }
-
@Test
void testProvidedHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java
index 445f710a1..a76f6e513 100644
--- a/src/test/java/org/apache/commons/csv/LexerTest.java
+++ b/src/test/java/org/apache/commons/csv/LexerTest.java
@@ -216,6 +216,25 @@ void testDelimiterIsWhitespace() throws IOException {
}
}
+ /**
+ * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace,
+ * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the
+ * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped.
+ */
+ @Test
+ void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get();
+ try (Lexer lexer = createLexer(" |a", format)) {
+ assertNextToken(TOKEN, "", lexer);
+ assertNextToken(EOF, "a", lexer);
+ }
+ try (Lexer lexer = createLexer("a | |b", format)) {
+ assertNextToken(TOKEN, "a", lexer);
+ assertNextToken(TOKEN, "", lexer);
+ assertNextToken(EOF, "b", lexer);
+ }
+ }
+
@Test
void testEOFWithoutClosingQuote() throws Exception {
final String code = "a,\"b";
@@ -447,25 +466,6 @@ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
}
}
- /**
- * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace,
- * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the
- * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped.
- */
- @Test
- void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException {
- final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get();
- try (Lexer lexer = createLexer(" |a", format)) {
- assertNextToken(TOKEN, "", lexer);
- assertNextToken(EOF, "a", lexer);
- }
- try (Lexer lexer = createLexer("a | |b", format)) {
- assertNextToken(TOKEN, "a", lexer);
- assertNextToken(TOKEN, "", lexer);
- assertNextToken(EOF, "b", lexer);
- }
- }
-
@Test
void testReadEscapeBackspace() throws IOException {
try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {
From 26a53751934e52f84663e5e90956db99f0eefc49 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 25 Jun 2026 22:13:08 +0000
Subject: [PATCH 64/69] Add test more assertions to
CSVParserTest.testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter()
---
src/test/java/org/apache/commons/csv/CSVParserTest.java | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 3bea08fac..6d9bdd9e8 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -483,6 +483,11 @@ void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOEx
assertEquals(1, records.size());
assertValuesEquals(new String[] { "a", "", "b" }, records.get(0));
}
+ try (CSVParser parser = CSVParser.parse("a | |b |", format)) {
+ final List records = parser.getRecords();
+ assertEquals(1, records.size());
+ assertValuesEquals(new String[] { "a", "", "b", "" }, records.get(0));
+ }
}
@Test
From b346046e10a5833e5f6143fd0162155bb51ccc87 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 25 Jun 2026 18:23:59 -0400
Subject: [PATCH 65/69] Bump actions/cache from 5.0.5 to 6.0.0.
---
.github/workflows/codeql-analysis.yml | 2 +-
.github/workflows/maven.yml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 08c673ee0..20f1ee2cb 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -49,7 +49,7 @@ jobs:
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
persist-credentials: false
- - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
+ - uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index a6154ddb1..139df406d 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -46,7 +46,7 @@ jobs:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
persist-credentials: false
- - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
+ - uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
From 930bc7919db7ebb81dc566e9e278ce2bed3acf42 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Mon, 29 Jun 2026 07:29:52 -0400
Subject: [PATCH 66/69] Bump actions/cache from 6.0.0 to 6.1.0
---
.github/workflows/codeql-analysis.yml | 2 +-
.github/workflows/maven.yml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 20f1ee2cb..cca38e512 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -49,7 +49,7 @@ jobs:
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
persist-credentials: false
- - uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0
+ - uses: actions/cache@55cc8345863c7cc4c66a329aec7e433d2d1c52a9 #v6.1.0
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index 139df406d..2637840b1 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -46,7 +46,7 @@ jobs:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
persist-credentials: false
- - uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0
+ - uses: actions/cache@55cc8345863c7cc4c66a329aec7e433d2d1c52a9 #v6.1.0
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
From 6471196a31d9ea92942a634732e5350ef5253fcf Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Tue, 30 Jun 2026 13:58:31 +0000
Subject: [PATCH 67/69] Javadoc
---
src/main/java/org/apache/commons/csv/CSVRecord.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java
index 502bf318a..8dab14d90 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -281,7 +281,7 @@ public Iterator iterator() {
/**
* Puts all values of this record into the given Map.
*
- * @param the map type.
+ * @param The map type.
* @param map The Map to populate.
* @return the given map.
* @since 1.9.0
From f7efa29cd9c4d9d1c1cafdae4469cf254bf22f42 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Tue, 30 Jun 2026 20:50:12 -0400
Subject: [PATCH 68/69] Bump actions/setup-java from 5.3.0 to 5.4.0
---
.github/workflows/maven.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index 2637840b1..17ba7dd38 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -53,7 +53,7 @@ jobs:
restore-keys: |
${{ runner.os }}-maven-
- name: Set up JDK ${{ matrix.java }}
- uses: actions/setup-java@ad2b38190b15e4d6bdf0c97fb4fca8412226d287 # v5.3.0
+ uses: actions/setup-java@1bcf9fb12cf4aa7d266a90ae39939e61372fe520 # v5.4.0
with:
distribution: ${{ runner.os == 'macOS' && matrix.java == '8' && 'zulu' || 'temurin' }}
java-version: ${{ matrix.java }}
From 4434d93b92ff3f7b0754e65eba53721dd95c59f1 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 1 Jul 2026 07:31:54 -0400
Subject: [PATCH 69/69] Bump actions/checkout from 6.0.2 to 7.0.0
---
.github/workflows/scorecards-analysis.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml
index bf246c140..e1868cb46 100644
--- a/.github/workflows/scorecards-analysis.yml
+++ b/.github/workflows/scorecards-analysis.yml
@@ -40,7 +40,7 @@ jobs:
steps:
- name: "Checkout code"
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # 7.0.0
with:
persist-credentials: false