Skip to content

Commit b7a9d40

Browse files
committed
CSV-290 - Fix the wrong assumptions in PostgreSQL formats
CSVFormat.POSTGRESQL_CSV - special characters are not escaped. CSVFormat.POSTGRESQL_TEXT - values are not quoted.
1 parent 048d507 commit b7a9d40

5 files changed

Lines changed: 136 additions & 11 deletions

File tree

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,15 +1047,15 @@ public CSVFormat getFormat() {
10471047
*
10481048
* <p>
10491049
* This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special
1050-
* characters are escaped with {@code '"'}. The default NULL string is {@code ""}.
1050+
* characters are not escaped. The default NULL string is {@code ""}.
10511051
* </p>
10521052
*
10531053
* <p>
10541054
* The {@link Builder} settings are:
10551055
* </p>
10561056
* <ul>
10571057
* <li>{@code setDelimiter(',')}</li>
1058-
* <li>{@code setEscape('"')}</li>
1058+
* <li>{@code setEscape(null)}</li>
10591059
* <li>{@code setIgnoreEmptyLines(false)}</li>
10601060
* <li>{@code setQuote('"')}</li>
10611061
* <li>{@code setRecordSeparator('\n')}</li>
@@ -1071,7 +1071,7 @@ public CSVFormat getFormat() {
10711071
// @formatter:off
10721072
public static final CSVFormat POSTGRESQL_CSV = DEFAULT.builder()
10731073
.setDelimiter(COMMA)
1074-
.setEscape(DOUBLE_QUOTE_CHAR)
1074+
.setEscape(null)
10751075
.setIgnoreEmptyLines(false)
10761076
.setQuote(DOUBLE_QUOTE_CHAR)
10771077
.setRecordSeparator(LF)
@@ -1084,8 +1084,8 @@ public CSVFormat getFormat() {
10841084
* Default PostgreSQL text format used by the {@code COPY} operation.
10851085
*
10861086
* <p>
1087-
* This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special
1088-
* characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}.
1087+
* This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
1088+
* characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}.
10891089
* </p>
10901090
*
10911091
* <p>
@@ -1095,7 +1095,7 @@ public CSVFormat getFormat() {
10951095
* <li>{@code setDelimiter('\t')}</li>
10961096
* <li>{@code setEscape('\\')}</li>
10971097
* <li>{@code setIgnoreEmptyLines(false)}</li>
1098-
* <li>{@code setQuote('"')}</li>
1098+
* <li>{@code setQuote(null)}</li>
10991099
* <li>{@code setRecordSeparator('\n')}</li>
11001100
* <li>{@code setNullString("\\N")}</li>
11011101
* <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li>
@@ -1111,7 +1111,7 @@ public CSVFormat getFormat() {
11111111
.setDelimiter(TAB)
11121112
.setEscape(BACKSLASH)
11131113
.setIgnoreEmptyLines(false)
1114-
.setQuote(DOUBLE_QUOTE_CHAR)
1114+
.setQuote(null)
11151115
.setRecordSeparator(LF)
11161116
.setNullString("\\N")
11171117
.setQuoteMode(QuoteMode.ALL_NON_NULL)

src/test/java/org/apache/commons/csv/CSVFormatTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,7 @@ public void testEqualsWithNull() {
706706
assertFalse(csvFormat.getTrailingDelimiter());
707707
assertFalse(csvFormat.getTrim());
708708

709-
assertTrue(csvFormat.isQuoteCharacterSet());
709+
assertFalse(csvFormat.isQuoteCharacterSet());
710710
assertEquals("\\N", csvFormat.getNullString());
711711

712712
assertFalse(csvFormat.getIgnoreHeaderCase());
@@ -724,7 +724,7 @@ public void testEqualsWithNull() {
724724
assertEquals("\n", csvFormat.getRecordSeparator());
725725
assertFalse(csvFormat.getIgnoreEmptyLines());
726726

727-
assertEquals('\"', (char)csvFormat.getQuoteCharacter());
727+
assertNull(csvFormat.getQuoteCharacter());
728728
assertTrue(csvFormat.isNullStringSet());
729729

730730
assertEquals('\\', (char)csvFormat.getEscapeCharacter());
@@ -733,7 +733,7 @@ public void testEqualsWithNull() {
733733
assertFalse(csvFormat.getTrailingDelimiter());
734734
assertFalse(csvFormat.getTrim());
735735

736-
assertTrue(csvFormat.isQuoteCharacterSet());
736+
assertFalse(csvFormat.isQuoteCharacterSet());
737737
assertEquals("\\N", csvFormat.getNullString());
738738

739739
assertFalse(csvFormat.getIgnoreHeaderCase());
@@ -751,7 +751,7 @@ public void testEqualsWithNull() {
751751
assertEquals("\n", csvFormat.getRecordSeparator());
752752
assertFalse(csvFormat.getIgnoreEmptyLines());
753753

754-
assertEquals('\"', (char)csvFormat.getQuoteCharacter());
754+
assertNull(csvFormat.getQuoteCharacter());
755755
assertTrue(csvFormat.isNullStringSet());
756756

757757
Assertions.assertNotEquals(null, csvFormat);
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.csv.issues;
19+
20+
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
21+
import static org.junit.jupiter.api.Assertions.assertEquals;
22+
23+
import java.io.InputStreamReader;
24+
import java.io.StringReader;
25+
import java.io.StringWriter;
26+
import java.util.ArrayList;
27+
import java.util.Iterator;
28+
import java.util.List;
29+
30+
import org.apache.commons.csv.CSVFormat;
31+
import org.apache.commons.csv.CSVParser;
32+
import org.apache.commons.csv.CSVPrinter;
33+
import org.apache.commons.csv.CSVRecord;
34+
import org.junit.jupiter.api.Test;
35+
36+
// psql (14.5 (Homebrew))
37+
//
38+
// create table COMMONS_CSV_PSQL_TEST (ID INTEGER, COL1 VARCHAR, COL2 VARCHAR, COL3 VARCHAR, COL4 VARCHAR);
39+
// insert into COMMONS_CSV_PSQL_TEST select 1, 'abc', 'test line 1' || chr(10) || 'test line 2', null, '';
40+
// insert into COMMONS_CSV_PSQL_TEST select 2, 'xyz', '\b:' || chr(8) || ' \n:' || chr(10) || ' \r:' || chr(13), 'a', 'b';
41+
// insert into COMMONS_CSV_PSQL_TEST values (3, 'a', 'b,c,d', '"quoted"', 'e');
42+
// copy COMMONS_CSV_PSQL_TEST TO '/tmp/psql.csv' WITH (FORMAT CSV);
43+
// copy COMMONS_CSV_PSQL_TEST TO '/tmp/psql.tsv';
44+
//
45+
// cat /tmp/psql.csv
46+
// 1,abc,"test line 1
47+
// test line 2",,""
48+
// 2,xyz,"\b:^H \n:
49+
// \r:^M",a,b
50+
// 3,a,"b,c,d","""quoted""",e
51+
//
52+
// cat /tmp/psql.tsv
53+
// 1 abc test line 1\ntest line 2 \N
54+
// 2 xyz \\b:\b \\n:\n \\r:\r a b
55+
// 3 a b,c,d "quoted" e
56+
//
57+
public class JiraCsv290Test {
58+
private void testHelper(String filename, CSVFormat format) throws Exception {
59+
List<List<String>> content = new ArrayList<>();
60+
try (CSVParser csvParser = CSVParser.parse(new InputStreamReader(
61+
this.getClass().getResourceAsStream("/org/apache/commons/csv/CSV-290/" + filename)), format)) {
62+
for (CSVRecord csvRecord : csvParser) {
63+
List<String> row = new ArrayList<>();
64+
content.add(row);
65+
for (int i = 0; i < csvRecord.size(); i++) {
66+
row.add(csvRecord.get(i));
67+
}
68+
}
69+
}
70+
assertEquals(3, content.size());
71+
72+
assertEquals("1", content.get(0).get(0));
73+
assertEquals("abc", content.get(0).get(1));
74+
assertEquals("test line 1\ntest line 2", content.get(0).get(2)); // new line
75+
assertEquals(null, content.get(0).get(3)); // null
76+
assertEquals("", content.get(0).get(4));
77+
78+
assertEquals("2", content.get(1).get(0));
79+
assertEquals("\\b:\b \\n:\n \\r:\r", content.get(1).get(2)); // \b, \n, \r
80+
81+
assertEquals("3", content.get(2).get(0));
82+
assertEquals("b,c,d", content.get(2).get(2)); // value has comma
83+
assertEquals("\"quoted\"", content.get(2).get(3)); // quoted
84+
}
85+
86+
@Test
87+
public void testPostgresqlCsv() throws Exception {
88+
testHelper("psql.csv", CSVFormat.POSTGRESQL_CSV);
89+
}
90+
91+
@Test
92+
public void testPostgresqlText() throws Exception {
93+
testHelper("psql.tsv", CSVFormat.POSTGRESQL_TEXT);
94+
}
95+
96+
@Test
97+
public void testWriteThenRead() throws Exception {
98+
StringWriter sw = new StringWriter();
99+
100+
CSVPrinter printer = new CSVPrinter(sw,
101+
CSVFormat.POSTGRESQL_CSV.builder().setHeader().setSkipHeaderRecord(true).build());
102+
103+
printer.printRecord("column1", "column2");
104+
printer.printRecord("v11", "v12");
105+
printer.printRecord("v21", "v22");
106+
printer.close();
107+
108+
CSVParser parser = new CSVParser(new StringReader(sw.toString()),
109+
CSVFormat.POSTGRESQL_CSV.builder().setHeader().setSkipHeaderRecord(true).build());
110+
111+
assertArrayEquals(new Object[] { "column1", "column2" }, parser.getHeaderNames().toArray());
112+
113+
Iterator<CSVRecord> i = parser.iterator();
114+
assertArrayEquals(new String[] { "v11", "v12" }, i.next().toList().toArray());
115+
assertArrayEquals(new String[] { "v21", "v22" }, i.next().toList().toArray());
116+
}
117+
}

src/test/resources/org/apache/commons/csv/CSV-290/psql.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
1,abc,"test line 1
2+
test line 2",,""
3+
2,xyz,"\b: \n:
4+
\r:",a,b
5+
3,a,"b,c,d","""quoted""",e
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
1 abc test line 1\ntest line 2 \N
2+
2 xyz \\b:\b \\n:\n \\r:\r a b
3+
3 a b,c,d "quoted" e

0 commit comments

Comments
 (0)