Skip to content

Commit 316a51f

Browse files
committed
- Add some real world CSV files.
- Add a record API to get column values using an Enum. - Throw a better exception when a resource is not found in a class loader for the parser. - Replace some tabs with spaces. git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1509068 13f79535-47bb-0310-9956-ffa450edef68
1 parent 816c652 commit 316a51f

6 files changed

Lines changed: 207 additions & 29 deletions

File tree

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 59 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ public class CSVParser implements Iterable<CSVRecord>, Closeable {
106106
public static CSVParser parseFile(File file, final CSVFormat format) throws IOException {
107107
return new CSVParser(new FileReader(file), format);
108108
}
109-
109+
110110
/**
111111
* Creates a parser for the given resource.
112112
*
@@ -128,9 +128,38 @@ public static CSVParser parseFile(File file, final CSVFormat format) throws IOEx
128128
*/
129129
public static CSVParser parseResource(String resource, Charset charset, ClassLoader classLoader,
130130
final CSVFormat format) throws IOException {
131-
return parseURL(classLoader.getResource(resource), charset, format);
131+
URL url = classLoader.getResource(resource);
132+
if (url == null) {
133+
throw new IllegalArgumentException("Resource cannot be found: " + resource);
134+
}
135+
return parseURL(url, charset, format);
132136
}
133-
137+
138+
/**
139+
* Creates a parser for the given resource.
140+
*
141+
* <p>
142+
* If you do not read all records from the given source, you should call {@link #close()} on the parser.
143+
* </p>
144+
*
145+
* @param resource
146+
* a resource path
147+
* @param charset
148+
* the charset for the resource
149+
* @param format
150+
* the CSVFormat used for CSV parsing
151+
* @return a new parser
152+
* @throws IOException
153+
* If an I/O error occurs
154+
*/
155+
public static CSVParser parseResource(String resource, Charset charset, final CSVFormat format) throws IOException {
156+
URL url = ClassLoader.getSystemResource(resource);
157+
if (url == null) {
158+
throw new IllegalArgumentException("System resource cannot be found: " + resource);
159+
}
160+
return parseURL(url, charset, format);
161+
}
162+
134163
/**
135164
* Creates a parser for the given {@link String} using the default format {@link CSVFormat#DEFAULT}.
136165
*
@@ -201,7 +230,7 @@ public static CSVParser parseURL(URL url, Charset charset, final CSVFormat forma
201230

202231
/**
203232
* CSV parser using the default format {@link CSVFormat#DEFAULT}.
204-
*
233+
*
205234
* <p>
206235
* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
207236
* unless you close the {@code reader}.
@@ -249,25 +278,26 @@ private void addRecordValue() {
249278
this.record.add(input);
250279
} else {
251280
this.record.add(input.equalsIgnoreCase(nullString) ? null : input);
252-
}}
281+
}
282+
}
253283

254284
/**
255285
* Closes resources.
256286
*
257-
* @throws IOException
258-
* If an I/O error occurs
287+
* @throws IOException
288+
* If an I/O error occurs
259289
*/
260-
public void close() throws IOException {
261-
if (this.lexer != null) {
262-
this.lexer.close();
263-
}
264-
}
290+
public void close() throws IOException {
291+
if (this.lexer != null) {
292+
this.lexer.close();
293+
}
294+
}
265295

266296
/**
267297
* Returns the current line number in the input stream.
268298
* <p/>
269299
* ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to the record number.
270-
*
300+
*
271301
* @return current line number
272302
*/
273303
public long getCurrentLineNumber() {
@@ -277,9 +307,8 @@ public long getCurrentLineNumber() {
277307
/**
278308
* Returns a copy of the header map that iterates in column order.
279309
* <p>
280-
* The map keys are column names.
281-
* The map values are 0-based indices.
282-
*
310+
* The map keys are column names. The map values are 0-based indices.
311+
*
283312
* @return a copy of the header map that iterates in column order.
284313
*/
285314
public Map<String, Integer> getHeaderMap() {
@@ -290,7 +319,7 @@ public Map<String, Integer> getHeaderMap() {
290319
* Returns the current record number in the input stream.
291320
* <p/>
292321
* ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to the line number.
293-
*
322+
*
294323
* @return current line number
295324
*/
296325
public long getRecordNumber() {
@@ -302,7 +331,7 @@ public long getRecordNumber() {
302331
* entries.
303332
* <p/>
304333
* The returned content starts at the current parse-position in the stream.
305-
*
334+
*
306335
* @return list of {@link CSVRecord} entries, may be empty
307336
* @throws IOException
308337
* on parse error or input read-failure
@@ -350,10 +379,10 @@ private Map<String, Integer> initializeHeader() throws IOException {
350379
}
351380

352381
public boolean isClosed() {
353-
return this.lexer.isClosed();
354-
}
382+
return this.lexer.isClosed();
383+
}
355384

356-
/**
385+
/**
357386
* Returns an iterator on the records. IOExceptions occurring during the iteration are wrapped in a
358387
* RuntimeException.
359388
*/
@@ -371,9 +400,9 @@ private CSVRecord getNextRecord() {
371400
}
372401

373402
public boolean hasNext() {
374-
if (CSVParser.this.isClosed()) {
375-
return false;
376-
}
403+
if (CSVParser.this.isClosed()) {
404+
return false;
405+
}
377406
if (this.current == null) {
378407
this.current = this.getNextRecord();
379408
}
@@ -382,9 +411,9 @@ public boolean hasNext() {
382411
}
383412

384413
public CSVRecord next() {
385-
if (CSVParser.this.isClosed()) {
386-
return null;
387-
}
414+
if (CSVParser.this.isClosed()) {
415+
return null;
416+
}
388417
CSVRecord next = this.current;
389418
this.current = null;
390419

@@ -407,7 +436,7 @@ public void remove() {
407436

408437
/**
409438
* Parses the next record from the current point in the stream.
410-
*
439+
*
411440
* @return the record as an array of values, or <tt>null</tt> if the end of the stream has been reached
412441
* @throws IOException
413442
* on parse error or input read-failure
@@ -448,7 +477,8 @@ CSVRecord nextRecord() throws IOException {
448477
if (!this.record.isEmpty()) {
449478
this.recordNumber++;
450479
final String comment = sb == null ? null : sb.toString();
451-
result = new CSVRecord(this.record.toArray(new String[this.record.size()]), this.headerMap, comment, this.recordNumber);
480+
result = new CSVRecord(this.record.toArray(new String[this.record.size()]), this.headerMap, comment,
481+
this.recordNumber);
452482
}
453483
return result;
454484
}

src/main/java/org/apache/commons/csv/CSVRecord.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,17 @@ public class CSVRecord implements Serializable, Iterable<String> {
5353
this.comment = comment;
5454
}
5555

56+
/**
57+
* Returns a value by {@link Enum}.
58+
*
59+
* @param e
60+
* an enum
61+
* @return the String at the given enum String
62+
*/
63+
public String get(Enum<?> e) {
64+
return get(e.toString());
65+
}
66+
5667
/**
5768
* Returns a value by index.
5869
*
@@ -171,4 +182,5 @@ public String toString() {
171182
return Arrays.toString(values);
172183
}
173184

185+
174186
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.commons.csv;
18+
19+
import java.io.IOException;
20+
import java.nio.charset.Charset;
21+
import java.util.List;
22+
23+
import org.junit.Assert;
24+
import org.junit.Test;
25+
26+
/**
27+
* Real world examples from http://www.ferc.gov/docs-filing/eqr/soft-tools/sample-csv.asp
28+
*/
29+
public class FercGovTest {
30+
31+
private enum ContractColumnNames {
32+
contract_id, seller_company_name, customer_company_name, customer_duns_number, contract_affiliate,
33+
FERC_tariff_reference, contract_service_agreement_id, contract_execution_date, contract_commencement_date,
34+
contract_termination_date, actual_termination_date, extension_provision_description, class_name, term_name,
35+
increment_name, increment_peaking_name, product_type_name, product_name, quantity, units_for_contract, rate,
36+
rate_minimum, rate_maximum, rate_description, units_for_rate, point_of_receipt_control_area,
37+
point_of_receipt_specific_location, point_of_delivery_control_area, point_of_delivery_specific_location,
38+
begin_date, end_date, time_zone;
39+
}
40+
41+
private static final Charset US_ASCII = Charset.forName("US-ASCII");
42+
43+
@Test
44+
public void testContractFile() throws IOException {
45+
final CSVParser parser = CSVParser.parseResource("ferc.gov/contract.txt", US_ASCII,
46+
CSVFormat.DEFAULT.withHeader());
47+
try {
48+
final List<CSVRecord> records = parser.getRecords();
49+
CSVRecord record = records.get(0);
50+
Assert.assertEquals(22, records.size());
51+
// first record
52+
Assert.assertEquals("C71", record.get(ContractColumnNames.contract_id));
53+
Assert.assertEquals("The Electric Company", record.get(ContractColumnNames.seller_company_name));
54+
Assert.assertEquals("ES", record.get(ContractColumnNames.time_zone));
55+
// last record
56+
record = records.get(records.size() - 1);
57+
// first record
58+
Assert.assertEquals("C78", record.get(ContractColumnNames.contract_id));
59+
Assert.assertEquals("The Electric Company", record.get(ContractColumnNames.seller_company_name));
60+
Assert.assertEquals("EP", record.get(ContractColumnNames.time_zone));
61+
} finally {
62+
parser.close();
63+
}
64+
}
65+
66+
@Test
67+
public void testTransactionFile() throws IOException {
68+
final CSVParser parser = CSVParser.parseResource("ferc.gov/transaction.txt", US_ASCII,
69+
CSVFormat.DEFAULT.withHeader());
70+
try {
71+
final List<CSVRecord> records = parser.getRecords();
72+
Assert.assertEquals(24, records.size());
73+
CSVRecord record = records.get(0);
74+
// first record
75+
Assert.assertEquals("T1", record.get("transaction_unique_identifier"));
76+
Assert.assertEquals("The Electric Company", record.get("seller_company_name"));
77+
Assert.assertEquals("880386", record.get("transaction_charge"));
78+
// last record
79+
record = records.get(records.size() - 1);
80+
Assert.assertEquals("T15", record.get("transaction_unique_identifier"));
81+
Assert.assertEquals("The Electric Company", record.get("seller_company_name"));
82+
Assert.assertEquals("1800", record.get("transaction_charge"));
83+
} finally {
84+
parser.close();
85+
}
86+
}
87+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
contract_id,seller_company_name,customer_company_name,customer_duns_number,contract_affiliate,FERC_tariff_reference,contract_service_agreement_id,contract_execution_date,contract_commencement_date,contract_termination_date,actual_termination_date,extension_provision_description,class_name,term_name,increment_name,increment_peaking_name,product_type_name,product_name,quantity,units_for_contract,rate,rate_minimum,rate_maximum,rate_description,units_for_rate,point_of_receipt_control_area,point_of_receipt_specific_location,point_of_delivery_control_area,point_of_delivery_specific_location,begin_date,end_date,time_zone
2+
C71,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Original Volume No. 10,2,2/15/2001,2/15/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,,,,,,ES
3+
C72,The Electric Company,Utility A,38495837,n,FERC Electric Tariff Original Volume No. 10,15,7/25/2001,8/1/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,,,,,,ES
4+
C73,The Electric Company,Utility B,493758794,N,FERC Electric Tariff Original Volume No. 10,7,6/8/2001,7/6/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,, , ,,,ep
5+
C74,The Electric Company,Utility C,594739573,n,FERC Electric Tariff Original Volume No. 10,25,6/8/2001,7/6/2001,,,Evergreen,N/A,N/A,N/A,N/A,MB,ENERGY,0,, , , ,Market Based,,,, , ,,,ep
6+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,ENERGY,2000,KWh,.1475, , ,Max amount of capacity and energy to be transmitted. Bill based on monthly max delivery to City.,$/KWh,PJM,Point A,PJM,Point B,,,ep
7+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,point-to-point agreement,2000,KW,0.01, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
8+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,network,2000,KW,0.2, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
9+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,BLACK START SERVICE,2000,KW,0.22, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
10+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,CAPACITY,2000,KW,0.04, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
11+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,regulation & frequency response,2000,KW,0.1, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
12+
C75,The Electric Company,The Power Company,456543333,N,FERC Electric Tariff Third Revised Volume No. 7,94,2/13/2001,7/1/2001,12/31/2006,,None,F,LT,M,P,T,real power transmission loss,2000,KW,7, , ,,$/kw-mo,PJM,Point A,PJM,Point B,,,ep
13+
C76,The Electric Company,The Power Company,456534333,N,FERC Electric Tariff Original Volume No. 10,132,12/15/2001,1/1/2002,12/31/2004,12/31/2004,None,F,LT,M,FP,MB,CAPACITY,70,MW,3750, , ,70MW for each and every hour over the term of the agreement (7x24 schedule).,$/MW,,,,,,,ep
14+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,35, , ,,$/MWH,,,PJM,Bus 4321,20020101,20030101,EP
15+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,37, , ,,$/MWH,,,PJM,Bus 4321,20030101,20040101,EP
16+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,39, , ,,$/MWH,,,PJM,Bus 4321,20040101,20050101,EP
17+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,41, , ,,$/MWH,,,PJM,Bus 4321,20050101,20060101,EP
18+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,43, , ,,$/MWH,,,PJM,Bus 4321,20060101,20070101,EP
19+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,45, , ,,$/MWH,,,PJM,Bus 4321,20070101,20080101,EP
20+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,47, , ,,$/MWH,,,PJM,Bus 4321,20080101,20090101,EP
21+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,49, , ,,$/MWH,,,PJM,Bus 4321,20090101,20100101,EP
22+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,51, , ,,$/MWH,,,PJM,Bus 4321,20100101,20110101,EP
23+
C78,The Electric Company,"The Electric Marketing Co., LLC",23456789,Y,FERC Electric Tariff Original Volume No. 2,Service Agreement 1,1/2/1992,1/2/1992,1/1/2012,,Renewable annually by mutual agreement after termination date.,UP,LT,Y,FP,CB,ENERGY,0,MWH,53, , ,,$/MWH,,,PJM,Bus 4321,20110101,20120101,EP
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Real world examples from http://www.ferc.gov/docs-filing/eqr/soft-tools/sample-csv.asp

0 commit comments

Comments
 (0)