Skip to content
This repository was archived by the owner on Sep 14, 2025. It is now read-only.

Commit 788f2aa

Browse files
author
Gary Gregory
committed
[CSV-239] Cannot get headers in column order from CSVRecord.
1 parent d8d5de6 commit 788f2aa

4 files changed

Lines changed: 196 additions & 144 deletions

File tree

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
<action issue="CSV-234" type="add" dev="ggregory" due-to="Roberto Benedetti, Gary Gregory">Add support for java.sql.Clob.</action>
4646
<action issue="CSV-237" type="update" dev="ggregory" due-to="Gary Gregory">Update to Java 8.</action>
4747
<action issue="CSV-238" type="fix" dev="ggregory" due-to="Stephen Olander-Waters">Escape quotes in CLOBs #39.</action>
48+
<action issue="CSV-239" type="add" dev="ggregory" due-to="Gary Gregory, Dave Moten">Cannot get headers in column order from CSVRecord.</action>
4849
<action type="update" dev="ggregory" due-to="Gary Gregory">Update tests from H2 1.4.198 to 1.4.199.</action>
4950
</release>
5051
<release version="1.6" date="2018-09-22" description="Feature and bug fix release (Java 7)">

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 115 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import java.util.Map;
4040
import java.util.NoSuchElementException;
4141
import java.util.TreeMap;
42+
import java.util.stream.Collectors;
4243

4344
/**
4445
* Parses CSV files according to the specified format.
@@ -133,6 +134,61 @@
133134
*/
134135
public final class CSVParser implements Iterable<CSVRecord>, Closeable {
135136

137+
class CSVRecordIterator implements Iterator<CSVRecord> {
138+
private CSVRecord current;
139+
140+
private CSVRecord getNextRecord() {
141+
try {
142+
return CSVParser.this.nextRecord();
143+
} catch (final IOException e) {
144+
throw new IllegalStateException(
145+
e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
146+
}
147+
}
148+
149+
@Override
150+
public boolean hasNext() {
151+
if (CSVParser.this.isClosed()) {
152+
return false;
153+
}
154+
if (this.current == null) {
155+
this.current = this.getNextRecord();
156+
}
157+
158+
return this.current != null;
159+
}
160+
161+
@Override
162+
public CSVRecord next() {
163+
if (CSVParser.this.isClosed()) {
164+
throw new NoSuchElementException("CSVParser has been closed");
165+
}
166+
CSVRecord next = this.current;
167+
this.current = null;
168+
169+
if (next == null) {
170+
// hasNext() wasn't called before
171+
next = this.getNextRecord();
172+
if (next == null) {
173+
throw new NoSuchElementException("No more CSV records available");
174+
}
175+
}
176+
177+
return next;
178+
}
179+
180+
@Override
181+
public void remove() {
182+
throw new UnsupportedOperationException();
183+
}
184+
}
185+
186+
static List<String> createHeaderNames(final Map<String, Integer> headerMap) {
187+
return headerMap == null ? null
188+
: headerMap.entrySet().stream().sorted(Map.Entry.comparingByValue()).map(Map.Entry::getKey)
189+
.collect(Collectors.toList());
190+
}
191+
136192
/**
137193
* Creates a parser for the given {@link File}.
138194
*
@@ -229,6 +285,8 @@ public static CSVParser parse(final Reader reader, final CSVFormat format) throw
229285
return new CSVParser(reader, format);
230286
}
231287

288+
// the following objects are shared to reduce garbage
289+
232290
/**
233291
* Creates a parser for the given {@link String}.
234292
*
@@ -277,13 +335,14 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor
277335
return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
278336
}
279337

280-
// the following objects are shared to reduce garbage
281-
282338
private final CSVFormat format;
283339

284340
/** A mapping of column names to column indices */
285341
private final Map<String, Integer> headerMap;
286342

343+
/** Preserve the column order to avoid re-computing it. */
344+
private final List<String> headerNames;
345+
287346
private final Lexer lexer;
288347

289348
private final CSVRecordIterator csvRecordIterator;
@@ -349,14 +408,15 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException
349408
*/
350409
@SuppressWarnings("resource")
351410
public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
352-
throws IOException {
411+
throws IOException {
353412
Assertions.notNull(reader, "reader");
354413
Assertions.notNull(format, "format");
355414

356415
this.format = format;
357416
this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
358417
this.csvRecordIterator = new CSVRecordIterator();
359-
this.headerMap = this.createHeaderMap();
418+
this.headerMap = createHeaderMap(); // 1st
419+
this.headerNames = createHeaderNames(this.headerMap); // 2nd
360420
this.characterOffset = characterOffset;
361421
this.recordNumber = recordNumber - 1;
362422
}
@@ -384,6 +444,53 @@ public void close() throws IOException {
384444
}
385445
}
386446

447+
/**
448+
* Creates the name to index mapping if the format defines a header.
449+
*
450+
* @return null if the format has no header.
451+
* @throws IOException if there is a problem reading the header or skipping the first record
452+
*/
453+
private Map<String, Integer> createHeaderMap() throws IOException {
454+
Map<String, Integer> hdrMap = null;
455+
final String[] formatHeader = this.format.getHeader();
456+
if (formatHeader != null) {
457+
hdrMap = this.format.getIgnoreHeaderCase() ?
458+
new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
459+
new TreeMap<>();
460+
461+
String[] headerRecord = null;
462+
if (formatHeader.length == 0) {
463+
// read the header from the first line of the file
464+
final CSVRecord nextRecord = this.nextRecord();
465+
if (nextRecord != null) {
466+
headerRecord = nextRecord.values();
467+
}
468+
} else {
469+
if (this.format.getSkipHeaderRecord()) {
470+
this.nextRecord();
471+
}
472+
headerRecord = formatHeader;
473+
}
474+
475+
// build the name to index mappings
476+
if (headerRecord != null) {
477+
for (int i = 0; i < headerRecord.length; i++) {
478+
final String header = headerRecord[i];
479+
final boolean containsHeader = header == null ? false : hdrMap.containsKey(header);
480+
final boolean emptyHeader = header == null || header.trim().isEmpty();
481+
if (containsHeader && (!emptyHeader || !this.format.getAllowMissingColumnNames())) {
482+
throw new IllegalArgumentException("The header contains a duplicate name: \"" + header
483+
+ "\" in " + Arrays.toString(headerRecord));
484+
}
485+
if (header != null) {
486+
hdrMap.put(header, Integer.valueOf(i));
487+
}
488+
}
489+
}
490+
}
491+
return hdrMap;
492+
}
493+
387494
/**
388495
* Returns the current line number in the input stream.
389496
*
@@ -409,11 +516,11 @@ public String getFirstEndOfLine() {
409516
}
410517

411518
/**
412-
* Returns a copy of the header map that iterates in column order.
519+
* Returns a copy of the header map.
413520
* <p>
414521
* The map keys are column names. The map values are 0-based indices.
415522
* </p>
416-
* @return a copy of the header map that iterates in column order.
523+
* @return a copy of the header map.
417524
*/
418525
public Map<String, Integer> getHeaderMap() {
419526
return this.headerMap == null ? null : new LinkedHashMap<>(this.headerMap);
@@ -454,53 +561,6 @@ public List<CSVRecord> getRecords() throws IOException {
454561
return records;
455562
}
456563

457-
/**
458-
* Creates the name to index mapping if the format defines a header.
459-
*
460-
* @return null if the format has no header.
461-
* @throws IOException if there is a problem reading the header or skipping the first record
462-
*/
463-
private Map<String, Integer> createHeaderMap() throws IOException {
464-
Map<String, Integer> hdrMap = null;
465-
final String[] formatHeader = this.format.getHeader();
466-
if (formatHeader != null) {
467-
hdrMap = this.format.getIgnoreHeaderCase() ?
468-
new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
469-
new TreeMap<>();
470-
471-
String[] headerRecord = null;
472-
if (formatHeader.length == 0) {
473-
// read the header from the first line of the file
474-
final CSVRecord nextRecord = this.nextRecord();
475-
if (nextRecord != null) {
476-
headerRecord = nextRecord.values();
477-
}
478-
} else {
479-
if (this.format.getSkipHeaderRecord()) {
480-
this.nextRecord();
481-
}
482-
headerRecord = formatHeader;
483-
}
484-
485-
// build the name to index mappings
486-
if (headerRecord != null) {
487-
for (int i = 0; i < headerRecord.length; i++) {
488-
final String header = headerRecord[i];
489-
final boolean containsHeader = header == null ? false : hdrMap.containsKey(header);
490-
final boolean emptyHeader = header == null || header.trim().isEmpty();
491-
if (containsHeader && (!emptyHeader || !this.format.getAllowMissingColumnNames())) {
492-
throw new IllegalArgumentException("The header contains a duplicate name: \"" + header
493-
+ "\" in " + Arrays.toString(headerRecord));
494-
}
495-
if (header != null) {
496-
hdrMap.put(header, Integer.valueOf(i));
497-
}
498-
}
499-
}
500-
}
501-
return hdrMap;
502-
}
503-
504564
/**
505565
* Gets whether this parser is closed.
506566
*
@@ -527,55 +587,6 @@ public Iterator<CSVRecord> iterator() {
527587
return csvRecordIterator;
528588
}
529589

530-
class CSVRecordIterator implements Iterator<CSVRecord> {
531-
private CSVRecord current;
532-
533-
private CSVRecord getNextRecord() {
534-
try {
535-
return CSVParser.this.nextRecord();
536-
} catch (final IOException e) {
537-
throw new IllegalStateException(
538-
e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
539-
}
540-
}
541-
542-
@Override
543-
public boolean hasNext() {
544-
if (CSVParser.this.isClosed()) {
545-
return false;
546-
}
547-
if (this.current == null) {
548-
this.current = this.getNextRecord();
549-
}
550-
551-
return this.current != null;
552-
}
553-
554-
@Override
555-
public CSVRecord next() {
556-
if (CSVParser.this.isClosed()) {
557-
throw new NoSuchElementException("CSVParser has been closed");
558-
}
559-
CSVRecord next = this.current;
560-
this.current = null;
561-
562-
if (next == null) {
563-
// hasNext() wasn't called before
564-
next = this.getNextRecord();
565-
if (next == null) {
566-
throw new NoSuchElementException("No more CSV records available");
567-
}
568-
}
569-
570-
return next;
571-
}
572-
573-
@Override
574-
public void remove() {
575-
throw new UnsupportedOperationException();
576-
}
577-
}
578-
579590
/**
580591
* Parses the next record from the current point in the stream.
581592
*
@@ -622,8 +633,8 @@ CSVRecord nextRecord() throws IOException {
622633
if (!this.recordList.isEmpty()) {
623634
this.recordNumber++;
624635
final String comment = sb == null ? null : sb.toString();
625-
result = new CSVRecord(this.recordList.toArray(new String[this.recordList.size()]), this.headerMap, comment,
626-
this.recordNumber, startCharPosition);
636+
result = new CSVRecord(this.recordList.toArray(new String[this.recordList.size()]), this.headerMap,
637+
this.headerNames, comment, this.recordNumber, startCharPosition);
627638
}
628639
return result;
629640
}

0 commit comments

Comments
 (0)