diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 08c673ee0..cca38e512 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -49,7 +49,7 @@ jobs:
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
persist-credentials: false
- - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
+ - uses: actions/cache@55cc8345863c7cc4c66a329aec7e433d2d1c52a9 #v6.1.0
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index a6154ddb1..17ba7dd38 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -46,14 +46,14 @@ jobs:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
persist-credentials: false
- - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
+ - uses: actions/cache@55cc8345863c7cc4c66a329aec7e433d2d1c52a9 #v6.1.0
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up JDK ${{ matrix.java }}
- uses: actions/setup-java@ad2b38190b15e4d6bdf0c97fb4fca8412226d287 # v5.3.0
+ uses: actions/setup-java@1bcf9fb12cf4aa7d266a90ae39939e61372fe520 # v5.4.0
with:
distribution: ${{ runner.os == 'macOS' && matrix.java == '8' && 'zulu' || 'temurin' }}
java-version: ${{ matrix.java }}
diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml
index bf246c140..e1868cb46 100644
--- a/.github/workflows/scorecards-analysis.yml
+++ b/.github/workflows/scorecards-analysis.yml
@@ -40,7 +40,7 @@ jobs:
steps:
- name: "Checkout code"
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # 7.0.0
with:
persist-credentials: false
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 0d0175ccc..93952e9f1 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -62,6 +62,7 @@
Escape leading comment marker in printWithEscapes (#614).
Skip byte counting at EOF in ExtendedBufferedReader.read (#615).
Keep quoted empty trailing field with trailingDelimiter (#616).
+ Evaluate isDelimiter once in nextToken whitespace skip (#618)..
Add an "Android Compatibility" section to the web site.
Add CSVParser.Builder.setByteOffset(long) (#604).
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java
index 502bf318a..8dab14d90 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -281,7 +281,7 @@ public Iterator iterator() {
/**
* Puts all values of this record into the given Map.
*
- * @param the map type.
+ * @param The map type.
* @param map The Map to populate.
* @return the given map.
* @since 1.9.0
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index 93a584663..fe964480a 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -277,15 +277,22 @@ Token nextToken(final Token token) throws IOException {
}
// Important: make sure a new char gets consumed in each iteration
while (token.type == Token.Type.INVALID) {
+ // isDelimiter consumes the trailing characters of a multi-character delimiter as a side effect, so it must
+ // only be evaluated once per character. Remember a match found while skipping whitespace below.
+ boolean delimiter = false;
// ignore whitespaces at beginning of a token
if (ignoreSurroundingSpaces) {
- while (Character.isWhitespace((char) c) && !isDelimiter(c) && !eol) {
+ while (Character.isWhitespace((char) c) && !eol) {
+ if (isDelimiter(c)) {
+ delimiter = true;
+ break;
+ }
c = reader.read();
eol = readEndOfLine(c);
}
}
// ok, start of token reached: encapsulated, or token
- if (isDelimiter(c)) {
+ if (delimiter || isDelimiter(c)) {
// empty token return TOKEN("")
token.type = Token.Type.TOKEN;
} else if (eol) {
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 051548757..6d9bdd9e8 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -465,6 +465,31 @@ void testDuplicateHeadersNotAllowed() {
() -> CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader().withAllowDuplicateHeaderNames(false)));
}
+ /**
+ * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace,
+ * the empty field at the delimiter boundary must survive. The delimiter look-ahead is consumed while skipping
+ * leading whitespace, so re-evaluating it would drop the empty field and merge the following field's value.
+ */
+ @Test
+ void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get();
+ try (CSVParser parser = CSVParser.parse(" |a", format)) {
+ final List records = parser.getRecords();
+ assertEquals(1, records.size());
+ assertValuesEquals(new String[] { "", "a" }, records.get(0));
+ }
+ try (CSVParser parser = CSVParser.parse("a | |b", format)) {
+ final List records = parser.getRecords();
+ assertEquals(1, records.size());
+ assertValuesEquals(new String[] { "a", "", "b" }, records.get(0));
+ }
+ try (CSVParser parser = CSVParser.parse("a | |b |", format)) {
+ final List records = parser.getRecords();
+ assertEquals(1, records.size());
+ assertValuesEquals(new String[] { "a", "", "b", "" }, records.get(0));
+ }
+ }
+
@Test
void testEmptyFile() throws Exception {
try (CSVParser parser = CSVParser.parse(Paths.get("src/test/resources/org/apache/commons/csv/empty.txt"), StandardCharsets.UTF_8,
diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java
index db1ab3a6d..a76f6e513 100644
--- a/src/test/java/org/apache/commons/csv/LexerTest.java
+++ b/src/test/java/org/apache/commons/csv/LexerTest.java
@@ -216,6 +216,25 @@ void testDelimiterIsWhitespace() throws IOException {
}
}
+ /**
+ * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace,
+ * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the
+ * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped.
+ */
+ @Test
+ void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get();
+ try (Lexer lexer = createLexer(" |a", format)) {
+ assertNextToken(TOKEN, "", lexer);
+ assertNextToken(EOF, "a", lexer);
+ }
+ try (Lexer lexer = createLexer("a | |b", format)) {
+ assertNextToken(TOKEN, "a", lexer);
+ assertNextToken(TOKEN, "", lexer);
+ assertNextToken(EOF, "b", lexer);
+ }
+ }
+
@Test
void testEOFWithoutClosingQuote() throws Exception {
final String code = "a,\"b";