From bbdfad773199ec4edeb64b70b85e256ea7687852 Mon Sep 17 00:00:00 2001 From: Naveed Khan Date: Wed, 1 Jul 2026 20:00:09 +0530 Subject: [PATCH] quote value ending with multi-character delimiter prefix --- .../org/apache/commons/csv/CSVFormat.java | 30 +++++++++++++++++++ .../apache/commons/csv/CSVPrinterTest.java | 22 ++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index 7145d23d3..4909de1f0 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -2097,6 +2097,33 @@ private boolean isDelimiter(final char ch0, final CharSequence charSeq, final in return true; } + /** + * Tests whether appending the delimiter after {@code charSeq} would let the parser match the delimiter starting inside the value. This happens with a + * multi-character delimiter when the value ends with a straddling prefix of it (for delimiter {@code ||}, a value ending in {@code |} followed by the + * delimiter yields {@code |||}, which the greedy lexer splits one character early). Such a value must be encapsulated so the field boundary is unambiguous. + */ + private boolean endsWithDelimiterPrefix(final CharSequence charSeq, final char[] delimiter, final int delimiterLength) { + if (delimiterLength < 2) { + return false; + } + final int len = charSeq.length(); + for (int start = Math.max(0, len - delimiterLength + 1); start < len; start++) { + boolean match = true; + for (int j = 0; j < delimiterLength; j++) { + final int idx = start + j; + final char c = idx < len ? charSeq.charAt(idx) : delimiter[idx - len]; + if (c != delimiter[j]) { + match = false; + break; + } + } + if (match) { + return true; + } + } + return false; + } + /** * Tests whether escapes are being processed. * @@ -2510,6 +2537,9 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi // encapsulate if we end in anything less than ' ' if (isTrimChar(c)) { quote = true; + } else if (endsWithDelimiterPrefix(charSeq, delim, delimLength)) { + // A trailing partial multi-character delimiter would merge with the following delimiter on read. + quote = true; } } } diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java index 9ae80c1e5..e3f26d2b5 100644 --- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java +++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java @@ -1908,6 +1908,28 @@ void testQuoteCommentMarkerFirstChar() throws IOException { } } + @Test + void testQuoteValueEndingWithMultiCharacterDelimiterPrefix() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("||").get(); + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + // "a|" ends with the delimiter's first char; unquoted output "a|||b" would split one char early on read. + printer.printRecord("a|", "b"); + // "a|b" does not end with a delimiter prefix, so it stays unquoted. + printer.printRecord("a|b", "c"); + } + final String string = sw.toString(); + assertEquals("\"a|\"||b" + RECORD_SEPARATOR + "a|b||c" + RECORD_SEPARATOR, string); + try (CSVParser parser = CSVParser.parse(string, format)) { + final List records = parser.getRecords(); + assertEquals(2, records.size()); + assertEquals("a|", records.get(0).get(0)); + assertEquals("b", records.get(0).get(1)); + assertEquals("a|b", records.get(1).get(0)); + assertEquals("c", records.get(1).get(1)); + } + } + @Test void testQuoteNonNumeric() throws IOException { final StringWriter sw = new StringWriter();