Skip to content

Commit 41a063d

Browse files
authored
Merge pull request apache#265 from angusdev/CSV-290
CSV-290 - Fix the wrong assumptions in PostgreSQL formats
2 parents 827a1fc + 1ad309f commit 41a063d

6 files changed

Lines changed: 139 additions & 11 deletions

File tree

pom.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,8 @@
260260
<exclude>src/test/resources/org/apache/commons/csv/CSV-259/sample.txt</exclude>
261261
<exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv</exclude>
262262
<exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt</exclude>
263+
<exclude>src/test/resources/org/apache/commons/csv/CSV-290/psql.csv</exclude>
264+
<exclude>src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv</exclude>
263265
</excludes>
264266
</configuration>
265267
</plugin>

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,15 +1045,15 @@ public CSVFormat getFormat() {
10451045
*
10461046
* <p>
10471047
* This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special
1048-
* characters are escaped with {@code '"'}. The default NULL string is {@code ""}.
1048+
* characters are not escaped. The default NULL string is {@code ""}.
10491049
* </p>
10501050
*
10511051
* <p>
10521052
* The {@link Builder} settings are:
10531053
* </p>
10541054
* <ul>
10551055
* <li>{@code setDelimiter(',')}</li>
1056-
* <li>{@code setEscape('"')}</li>
1056+
* <li>{@code setEscape(null)}</li>
10571057
* <li>{@code setIgnoreEmptyLines(false)}</li>
10581058
* <li>{@code setQuote('"')}</li>
10591059
* <li>{@code setRecordSeparator('\n')}</li>
@@ -1069,7 +1069,7 @@ public CSVFormat getFormat() {
10691069
// @formatter:off
10701070
public static final CSVFormat POSTGRESQL_CSV = DEFAULT.builder()
10711071
.setDelimiter(COMMA)
1072-
.setEscape(DOUBLE_QUOTE_CHAR)
1072+
.setEscape(null)
10731073
.setIgnoreEmptyLines(false)
10741074
.setQuote(DOUBLE_QUOTE_CHAR)
10751075
.setRecordSeparator(LF)
@@ -1082,8 +1082,8 @@ public CSVFormat getFormat() {
10821082
* Default PostgreSQL text format used by the {@code COPY} operation.
10831083
*
10841084
* <p>
1085-
* This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special
1086-
* characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}.
1085+
* This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
1086+
* characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}.
10871087
* </p>
10881088
*
10891089
* <p>
@@ -1093,7 +1093,7 @@ public CSVFormat getFormat() {
10931093
* <li>{@code setDelimiter('\t')}</li>
10941094
* <li>{@code setEscape('\\')}</li>
10951095
* <li>{@code setIgnoreEmptyLines(false)}</li>
1096-
* <li>{@code setQuote('"')}</li>
1096+
* <li>{@code setQuote(null)}</li>
10971097
* <li>{@code setRecordSeparator('\n')}</li>
10981098
* <li>{@code setNullString("\\N")}</li>
10991099
* <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li>
@@ -1109,7 +1109,7 @@ public CSVFormat getFormat() {
11091109
.setDelimiter(TAB)
11101110
.setEscape(BACKSLASH)
11111111
.setIgnoreEmptyLines(false)
1112-
.setQuote(DOUBLE_QUOTE_CHAR)
1112+
.setQuote(null)
11131113
.setRecordSeparator(LF)
11141114
.setNullString("\\N")
11151115
.setQuoteMode(QuoteMode.ALL_NON_NULL)

src/test/java/org/apache/commons/csv/CSVFormatTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,7 @@ public void testEqualsWithNull() {
706706
assertFalse(csvFormat.getTrailingDelimiter());
707707
assertFalse(csvFormat.getTrim());
708708

709-
assertTrue(csvFormat.isQuoteCharacterSet());
709+
assertFalse(csvFormat.isQuoteCharacterSet());
710710
assertEquals("\\N", csvFormat.getNullString());
711711

712712
assertFalse(csvFormat.getIgnoreHeaderCase());
@@ -724,7 +724,7 @@ public void testEqualsWithNull() {
724724
assertEquals("\n", csvFormat.getRecordSeparator());
725725
assertFalse(csvFormat.getIgnoreEmptyLines());
726726

727-
assertEquals('\"', (char)csvFormat.getQuoteCharacter());
727+
assertNull(csvFormat.getQuoteCharacter());
728728
assertTrue(csvFormat.isNullStringSet());
729729

730730
assertEquals('\\', (char)csvFormat.getEscapeCharacter());
@@ -733,7 +733,7 @@ public void testEqualsWithNull() {
733733
assertFalse(csvFormat.getTrailingDelimiter());
734734
assertFalse(csvFormat.getTrim());
735735

736-
assertTrue(csvFormat.isQuoteCharacterSet());
736+
assertFalse(csvFormat.isQuoteCharacterSet());
737737
assertEquals("\\N", csvFormat.getNullString());
738738

739739
assertFalse(csvFormat.getIgnoreHeaderCase());
@@ -751,7 +751,7 @@ public void testEqualsWithNull() {
751751
assertEquals("\n", csvFormat.getRecordSeparator());
752752
assertFalse(csvFormat.getIgnoreEmptyLines());
753753

754-
assertEquals('\"', (char)csvFormat.getQuoteCharacter());
754+
assertNull(csvFormat.getQuoteCharacter());
755755
assertTrue(csvFormat.isNullStringSet());
756756

757757
Assertions.assertNotEquals(null, csvFormat);
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.csv.issues;
19+
20+
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
21+
import static org.junit.jupiter.api.Assertions.assertEquals;
22+
23+
import java.io.InputStreamReader;
24+
import java.io.StringReader;
25+
import java.io.StringWriter;
26+
import java.util.ArrayList;
27+
import java.util.Iterator;
28+
import java.util.List;
29+
30+
import org.apache.commons.csv.CSVFormat;
31+
import org.apache.commons.csv.CSVParser;
32+
import org.apache.commons.csv.CSVPrinter;
33+
import org.apache.commons.csv.CSVRecord;
34+
import org.junit.jupiter.api.Test;
35+
36+
// psql (14.5 (Homebrew))
37+
//
38+
// create table COMMONS_CSV_PSQL_TEST (ID INTEGER, COL1 VARCHAR, COL2 VARCHAR, COL3 VARCHAR, COL4 VARCHAR);
39+
// insert into COMMONS_CSV_PSQL_TEST select 1, 'abc', 'test line 1' || chr(10) || 'test line 2', null, '';
40+
// insert into COMMONS_CSV_PSQL_TEST select 2, 'xyz', '\b:' || chr(8) || ' \t:' || chr(9) || ' \n:' || chr(10) || ' \r:' || chr(13), 'a', 'b';
41+
// insert into COMMONS_CSV_PSQL_TEST values (3, 'a', 'b,c,d', '"quoted"', 'e');
42+
// copy COMMONS_CSV_PSQL_TEST TO '/tmp/psql.csv' WITH (FORMAT CSV);
43+
// copy COMMONS_CSV_PSQL_TEST TO '/tmp/psql.tsv';
44+
//
45+
// cat /tmp/psql.csv
46+
// 1,abc,"test line 1
47+
// test line 2",,""
48+
// 2,xyz,"\b:^H \t: \n:
49+
// \r:^M",a,b
50+
// 3,a,"b,c,d","""quoted""",e
51+
//
52+
// cat /tmp/psql.tsv
53+
// 1 abc test line 1\ntest line 2 \N
54+
// 2 xyz \\b:\b \\t:\t \\n:\n \\r:\r a b
55+
// 3 a b,c,d "quoted" e
56+
//
57+
public class JiraCsv290Test {
58+
private void testHelper(String filename, CSVFormat format) throws Exception {
59+
List<List<String>> content = new ArrayList<>();
60+
try (CSVParser csvParser = CSVParser.parse(new InputStreamReader(
61+
this.getClass().getResourceAsStream("/org/apache/commons/csv/CSV-290/" + filename)), format)) {
62+
for (CSVRecord csvRecord : csvParser) {
63+
List<String> row = new ArrayList<>();
64+
content.add(row);
65+
for (int i = 0; i < csvRecord.size(); i++) {
66+
row.add(csvRecord.get(i));
67+
}
68+
}
69+
}
70+
71+
assertEquals(3, content.size());
72+
73+
assertEquals("1", content.get(0).get(0));
74+
assertEquals("abc", content.get(0).get(1));
75+
assertEquals("test line 1\ntest line 2", content.get(0).get(2)); // new line
76+
assertEquals(null, content.get(0).get(3)); // null
77+
assertEquals("", content.get(0).get(4));
78+
79+
assertEquals("2", content.get(1).get(0));
80+
assertEquals("\\b:\b \\t:\t \\n:\n \\r:\r", content.get(1).get(2)); // \b, \t, \n, \r
81+
82+
assertEquals("3", content.get(2).get(0));
83+
assertEquals("b,c,d", content.get(2).get(2)); // value has comma
84+
assertEquals("\"quoted\"", content.get(2).get(3)); // quoted
85+
}
86+
87+
@Test
88+
public void testPostgresqlCsv() throws Exception {
89+
testHelper("psql.csv", CSVFormat.POSTGRESQL_CSV);
90+
}
91+
92+
@Test
93+
public void testPostgresqlText() throws Exception {
94+
testHelper("psql.tsv", CSVFormat.POSTGRESQL_TEXT);
95+
}
96+
97+
@Test
98+
public void testWriteThenRead() throws Exception {
99+
StringWriter sw = new StringWriter();
100+
101+
CSVPrinter printer = new CSVPrinter(sw,
102+
CSVFormat.POSTGRESQL_CSV.builder().setHeader().setSkipHeaderRecord(true).build());
103+
104+
printer.printRecord("column1", "column2");
105+
printer.printRecord("v11", "v12");
106+
printer.printRecord("v21", "v22");
107+
printer.close();
108+
109+
CSVParser parser = new CSVParser(new StringReader(sw.toString()),
110+
CSVFormat.POSTGRESQL_CSV.builder().setHeader().setSkipHeaderRecord(true).build());
111+
112+
assertArrayEquals(new Object[] { "column1", "column2" }, parser.getHeaderNames().toArray());
113+
114+
Iterator<CSVRecord> i = parser.iterator();
115+
assertArrayEquals(new String[] { "v11", "v12" }, i.next().toList().toArray());
116+
assertArrayEquals(new String[] { "v21", "v22" }, i.next().toList().toArray());
117+
}
118+
}

src/test/resources/org/apache/commons/csv/CSV-290/psql.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
1,abc,"test line 1
2+
test line 2",,""
3+
2,xyz,"\b: \t: \n:
4+
\r:",a,b
5+
3,a,"b,c,d","""quoted""",e
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
1 abc test line 1\ntest line 2 \N
2+
2 xyz \\b:\b \\t:\t \\n:\n \\r:\r a b
3+
3 a b,c,d "quoted" e

0 commit comments

Comments
 (0)