Skip to content

Commit f0a2acd

Browse files
authored
Merge pull request #614 from rootvector2/escape-comment-marker-first-char
Escape leading comment marker in printWithEscapes
2 parents caa1c8d + 61aa055 commit f0a2acd

2 files changed

Lines changed: 63 additions & 2 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2329,12 +2329,16 @@ private void printWithEscapes(final CharSequence charSeq, final Appendable appen
23292329
final char escape = getEscapeChar();
23302330
final boolean quoteSet = isQuoteCharacterSet();
23312331
final char quote = quoteSet ? getQuoteCharacter().charValue() : 0;
2332+
final boolean commentMarkerSet = isCommentMarkerSet();
2333+
final char commentChar = commentMarkerSet ? commentMarker.charValue() : 0; // Explicit unboxing is intentional
23322334
while (pos < end) {
23332335
char c = charSeq.charAt(pos);
23342336
final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delimArray, delimLength);
23352337
final boolean isCr = c == Constants.CR;
23362338
final boolean isLf = c == Constants.LF;
2337-
if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart) {
2339+
// A leading comment marker would be read back as a comment, so escape it.
2340+
final boolean isComment = commentMarkerSet && pos == 0 && c == commentChar;
2341+
if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart || isComment) {
23382342
// write out segment up until this char
23392343
if (pos > start) {
23402344
appendable.append(charSeq, start, pos);
@@ -2375,8 +2379,11 @@ private void printWithEscapes(final Reader reader, final Appendable appendable)
23752379
final char escape = getEscapeChar();
23762380
final boolean quoteSet = isQuoteCharacterSet();
23772381
final char quote = quoteSet ? getQuoteCharacter().charValue() : 0;
2382+
final boolean commentMarkerSet = isCommentMarkerSet();
2383+
final char commentChar = commentMarkerSet ? commentMarker.charValue() : 0; // Explicit unboxing is intentional
23782384
final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE);
23792385
int c;
2386+
boolean firstChar = true;
23802387
final char[] lookAheadBuffer = new char[delimLength - 1];
23812388
while (EOF != (c = bufferedReader.read())) {
23822389
builder.append((char) c);
@@ -2386,7 +2393,10 @@ private void printWithEscapes(final Reader reader, final Appendable appendable)
23862393
final boolean isDelimiterStart = isDelimiter((char) c, test, pos, delimArray, delimLength);
23872394
final boolean isCr = c == Constants.CR;
23882395
final boolean isLf = c == Constants.LF;
2389-
if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart) {
2396+
// A leading comment marker would be read back as a comment, so escape it.
2397+
final boolean isComment = commentMarkerSet && firstChar && c == commentChar;
2398+
firstChar = false;
2399+
if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart || isComment) {
23902400
// write out segment up until this char
23912401
if (pos > start) {
23922402
append(builder.substring(start, pos), appendable);

src/test/java/org/apache/commons/csv/CSVPrinterTest.java

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,57 @@ void testEscapeBackslash5() throws IOException {
569569
assertEquals("\\\\", sw.toString());
570570
}
571571

572+
@Test
573+
void testEscapeCommentMarkerFirstChar() throws IOException {
574+
// No quoting available in escape mode, so a leading comment marker must be escaped or the
575+
// record reads back as a comment and is dropped. Mirrors the quoting fix for QuoteMode.MINIMAL.
576+
final CSVFormat format = CSVFormat.DEFAULT.builder().setQuote(null).setEscape('\\').setCommentMarker(';').get();
577+
final StringWriter sw = new StringWriter();
578+
final String col1 = ";comment-like";
579+
try (CSVPrinter printer = new CSVPrinter(sw, format)) {
580+
printer.printRecord(col1, "b");
581+
printer.printRecord(new StringReader(col1), new StringReader("b"));
582+
// The marker past the first character does not start a comment and is left alone.
583+
printer.printRecord("a;b", ";c");
584+
}
585+
final String string = sw.toString();
586+
assertEquals("\\;comment-like,b" + RECORD_SEPARATOR +
587+
"\\;comment-like,b" + RECORD_SEPARATOR +
588+
"a;b,\\;c" + RECORD_SEPARATOR, string);
589+
// The emitted records must read back as the original values, none parsed as a comment.
590+
try (CSVParser parser = CSVParser.parse(string, format)) {
591+
final List<CSVRecord> records = parser.getRecords();
592+
assertEquals(3, records.size());
593+
assertEquals(col1, records.get(0).get(0));
594+
assertEquals("b", records.get(0).get(1));
595+
assertEquals(col1, records.get(1).get(0));
596+
assertEquals("b", records.get(1).get(1));
597+
assertEquals("a;b", records.get(2).get(0));
598+
assertEquals(";c", records.get(2).get(1));
599+
}
600+
}
601+
602+
@Test
603+
void testEscapeCommentMarkerFirstCharWithQuoteModeNone() throws IOException {
604+
final CSVFormat format = CSVFormat.DEFAULT.builder().setEscape('\\').setQuoteMode(QuoteMode.NONE).setCommentMarker(';').get();
605+
final StringWriter sw = new StringWriter();
606+
final String col1 = ";bar";
607+
try (CSVPrinter printer = new CSVPrinter(sw, format)) {
608+
printer.printRecord(col1, "b");
609+
printer.printRecord(new StringReader(col1), new StringReader("b"));
610+
}
611+
final String string = sw.toString();
612+
assertEquals("\\;bar,b" + RECORD_SEPARATOR + "\\;bar,b" + RECORD_SEPARATOR, string);
613+
try (CSVParser parser = CSVParser.parse(string, format)) {
614+
final List<CSVRecord> records = parser.getRecords();
615+
assertEquals(2, records.size());
616+
for (final CSVRecord record : records) {
617+
assertEquals(col1, record.get(0));
618+
assertEquals("b", record.get(1));
619+
}
620+
}
621+
}
622+
572623
@Test
573624
void testEscapeNull1() throws IOException {
574625
final StringWriter sw = new StringWriter();

0 commit comments

Comments
 (0)