3939import java .util .Map ;
4040import java .util .NoSuchElementException ;
4141import java .util .TreeMap ;
42+ import java .util .stream .Collectors ;
4243
4344/**
4445 * Parses CSV files according to the specified format.
133134 */
134135public final class CSVParser implements Iterable <CSVRecord >, Closeable {
135136
137+ class CSVRecordIterator implements Iterator <CSVRecord > {
138+ private CSVRecord current ;
139+
140+ private CSVRecord getNextRecord () {
141+ try {
142+ return CSVParser .this .nextRecord ();
143+ } catch (final IOException e ) {
144+ throw new IllegalStateException (
145+ e .getClass ().getSimpleName () + " reading next record: " + e .toString (), e );
146+ }
147+ }
148+
149+ @ Override
150+ public boolean hasNext () {
151+ if (CSVParser .this .isClosed ()) {
152+ return false ;
153+ }
154+ if (this .current == null ) {
155+ this .current = this .getNextRecord ();
156+ }
157+
158+ return this .current != null ;
159+ }
160+
161+ @ Override
162+ public CSVRecord next () {
163+ if (CSVParser .this .isClosed ()) {
164+ throw new NoSuchElementException ("CSVParser has been closed" );
165+ }
166+ CSVRecord next = this .current ;
167+ this .current = null ;
168+
169+ if (next == null ) {
170+ // hasNext() wasn't called before
171+ next = this .getNextRecord ();
172+ if (next == null ) {
173+ throw new NoSuchElementException ("No more CSV records available" );
174+ }
175+ }
176+
177+ return next ;
178+ }
179+
180+ @ Override
181+ public void remove () {
182+ throw new UnsupportedOperationException ();
183+ }
184+ }
185+
186+ static List <String > createHeaderNames (final Map <String , Integer > headerMap ) {
187+ return headerMap == null ? null
188+ : headerMap .entrySet ().stream ().sorted (Map .Entry .comparingByValue ()).map (Map .Entry ::getKey )
189+ .collect (Collectors .toList ());
190+ }
191+
136192 /**
137193 * Creates a parser for the given {@link File}.
138194 *
@@ -229,6 +285,8 @@ public static CSVParser parse(final Reader reader, final CSVFormat format) throw
229285 return new CSVParser (reader , format );
230286 }
231287
288+ // the following objects are shared to reduce garbage
289+
232290 /**
233291 * Creates a parser for the given {@link String}.
234292 *
@@ -277,13 +335,14 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor
277335 return new CSVParser (new InputStreamReader (url .openStream (), charset ), format );
278336 }
279337
280- // the following objects are shared to reduce garbage
281-
282338 private final CSVFormat format ;
283339
284340 /** A mapping of column names to column indices */
285341 private final Map <String , Integer > headerMap ;
286342
343+ /** Preserve the column order to avoid re-computing it. */
344+ private final List <String > headerNames ;
345+
287346 private final Lexer lexer ;
288347
289348 private final CSVRecordIterator csvRecordIterator ;
@@ -349,14 +408,15 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException
349408 */
350409 @ SuppressWarnings ("resource" )
351410 public CSVParser (final Reader reader , final CSVFormat format , final long characterOffset , final long recordNumber )
352- throws IOException {
411+ throws IOException {
353412 Assertions .notNull (reader , "reader" );
354413 Assertions .notNull (format , "format" );
355414
356415 this .format = format ;
357416 this .lexer = new Lexer (format , new ExtendedBufferedReader (reader ));
358417 this .csvRecordIterator = new CSVRecordIterator ();
359- this .headerMap = this .createHeaderMap ();
418+ this .headerMap = createHeaderMap (); // 1st
419+ this .headerNames = createHeaderNames (this .headerMap ); // 2nd
360420 this .characterOffset = characterOffset ;
361421 this .recordNumber = recordNumber - 1 ;
362422 }
@@ -384,6 +444,53 @@ public void close() throws IOException {
384444 }
385445 }
386446
447+ /**
448+ * Creates the name to index mapping if the format defines a header.
449+ *
450+ * @return null if the format has no header.
451+ * @throws IOException if there is a problem reading the header or skipping the first record
452+ */
453+ private Map <String , Integer > createHeaderMap () throws IOException {
454+ Map <String , Integer > hdrMap = null ;
455+ final String [] formatHeader = this .format .getHeader ();
456+ if (formatHeader != null ) {
457+ hdrMap = this .format .getIgnoreHeaderCase () ?
458+ new TreeMap <>(String .CASE_INSENSITIVE_ORDER ) :
459+ new TreeMap <>();
460+
461+ String [] headerRecord = null ;
462+ if (formatHeader .length == 0 ) {
463+ // read the header from the first line of the file
464+ final CSVRecord nextRecord = this .nextRecord ();
465+ if (nextRecord != null ) {
466+ headerRecord = nextRecord .values ();
467+ }
468+ } else {
469+ if (this .format .getSkipHeaderRecord ()) {
470+ this .nextRecord ();
471+ }
472+ headerRecord = formatHeader ;
473+ }
474+
475+ // build the name to index mappings
476+ if (headerRecord != null ) {
477+ for (int i = 0 ; i < headerRecord .length ; i ++) {
478+ final String header = headerRecord [i ];
479+ final boolean containsHeader = header == null ? false : hdrMap .containsKey (header );
480+ final boolean emptyHeader = header == null || header .trim ().isEmpty ();
481+ if (containsHeader && (!emptyHeader || !this .format .getAllowMissingColumnNames ())) {
482+ throw new IllegalArgumentException ("The header contains a duplicate name: \" " + header
483+ + "\" in " + Arrays .toString (headerRecord ));
484+ }
485+ if (header != null ) {
486+ hdrMap .put (header , Integer .valueOf (i ));
487+ }
488+ }
489+ }
490+ }
491+ return hdrMap ;
492+ }
493+
387494 /**
388495 * Returns the current line number in the input stream.
389496 *
@@ -409,11 +516,11 @@ public String getFirstEndOfLine() {
409516 }
410517
411518 /**
412- * Returns a copy of the header map that iterates in column order .
519+ * Returns a copy of the header map.
413520 * <p>
414521 * The map keys are column names. The map values are 0-based indices.
415522 * </p>
416- * @return a copy of the header map that iterates in column order .
523+ * @return a copy of the header map.
417524 */
418525 public Map <String , Integer > getHeaderMap () {
419526 return this .headerMap == null ? null : new LinkedHashMap <>(this .headerMap );
@@ -454,53 +561,6 @@ public List<CSVRecord> getRecords() throws IOException {
454561 return records ;
455562 }
456563
457- /**
458- * Creates the name to index mapping if the format defines a header.
459- *
460- * @return null if the format has no header.
461- * @throws IOException if there is a problem reading the header or skipping the first record
462- */
463- private Map <String , Integer > createHeaderMap () throws IOException {
464- Map <String , Integer > hdrMap = null ;
465- final String [] formatHeader = this .format .getHeader ();
466- if (formatHeader != null ) {
467- hdrMap = this .format .getIgnoreHeaderCase () ?
468- new TreeMap <>(String .CASE_INSENSITIVE_ORDER ) :
469- new TreeMap <>();
470-
471- String [] headerRecord = null ;
472- if (formatHeader .length == 0 ) {
473- // read the header from the first line of the file
474- final CSVRecord nextRecord = this .nextRecord ();
475- if (nextRecord != null ) {
476- headerRecord = nextRecord .values ();
477- }
478- } else {
479- if (this .format .getSkipHeaderRecord ()) {
480- this .nextRecord ();
481- }
482- headerRecord = formatHeader ;
483- }
484-
485- // build the name to index mappings
486- if (headerRecord != null ) {
487- for (int i = 0 ; i < headerRecord .length ; i ++) {
488- final String header = headerRecord [i ];
489- final boolean containsHeader = header == null ? false : hdrMap .containsKey (header );
490- final boolean emptyHeader = header == null || header .trim ().isEmpty ();
491- if (containsHeader && (!emptyHeader || !this .format .getAllowMissingColumnNames ())) {
492- throw new IllegalArgumentException ("The header contains a duplicate name: \" " + header
493- + "\" in " + Arrays .toString (headerRecord ));
494- }
495- if (header != null ) {
496- hdrMap .put (header , Integer .valueOf (i ));
497- }
498- }
499- }
500- }
501- return hdrMap ;
502- }
503-
504564 /**
505565 * Gets whether this parser is closed.
506566 *
@@ -527,55 +587,6 @@ public Iterator<CSVRecord> iterator() {
527587 return csvRecordIterator ;
528588 }
529589
530- class CSVRecordIterator implements Iterator <CSVRecord > {
531- private CSVRecord current ;
532-
533- private CSVRecord getNextRecord () {
534- try {
535- return CSVParser .this .nextRecord ();
536- } catch (final IOException e ) {
537- throw new IllegalStateException (
538- e .getClass ().getSimpleName () + " reading next record: " + e .toString (), e );
539- }
540- }
541-
542- @ Override
543- public boolean hasNext () {
544- if (CSVParser .this .isClosed ()) {
545- return false ;
546- }
547- if (this .current == null ) {
548- this .current = this .getNextRecord ();
549- }
550-
551- return this .current != null ;
552- }
553-
554- @ Override
555- public CSVRecord next () {
556- if (CSVParser .this .isClosed ()) {
557- throw new NoSuchElementException ("CSVParser has been closed" );
558- }
559- CSVRecord next = this .current ;
560- this .current = null ;
561-
562- if (next == null ) {
563- // hasNext() wasn't called before
564- next = this .getNextRecord ();
565- if (next == null ) {
566- throw new NoSuchElementException ("No more CSV records available" );
567- }
568- }
569-
570- return next ;
571- }
572-
573- @ Override
574- public void remove () {
575- throw new UnsupportedOperationException ();
576- }
577- }
578-
579590 /**
580591 * Parses the next record from the current point in the stream.
581592 *
@@ -622,8 +633,8 @@ CSVRecord nextRecord() throws IOException {
622633 if (!this .recordList .isEmpty ()) {
623634 this .recordNumber ++;
624635 final String comment = sb == null ? null : sb .toString ();
625- result = new CSVRecord (this .recordList .toArray (new String [this .recordList .size ()]), this .headerMap , comment ,
626- this .recordNumber , startCharPosition );
636+ result = new CSVRecord (this .recordList .toArray (new String [this .recordList .size ()]), this .headerMap ,
637+ this . headerNames , comment , this .recordNumber , startCharPosition );
627638 }
628639 return result ;
629640 }
0 commit comments