2222import java .util .ArrayList ;
2323import java .util .List ;
2424
25+ import static org .apache .commons .csv .CSVParser .Token .Type .*;
2526
2627/**
2728 * Parses CSV files according to the specified configuration.
@@ -54,19 +55,6 @@ public class CSVParser {
5455 /** length of the initial token (content-)buffer */
5556 private static final int INITIAL_TOKEN_LENGTH = 50 ;
5657
57- // the token types
58- /** Token has no valid content, i.e. is in its initialized state. */
59- static final int TT_INVALID = -1 ;
60-
61- /** Token with content, at beginning or in the middle of a line. */
62- static final int TT_TOKEN = 0 ;
63-
64- /** Token (which can have content) when end of file is reached. */
65- static final int TT_EOF = 1 ;
66-
67- /** Token with content when end of a line is reached. */
68- static final int TT_EORECORD = 2 ;
69-
7058 /** Immutable empty String array. */
7159 private static final String [] EMPTY_STRING_ARRAY = new String [0 ];
7260
@@ -91,22 +79,33 @@ public class CSVParser {
9179 * It is used as contract between the lexer and the parser.
9280 */
9381 static class Token {
94- /**
95- * Token type, see TT_xxx constants.
96- */
97- int type = TT_INVALID ;
98- /**
99- * The content buffer.
100- */
82+
83+ enum Type {
84+ /** Token has no valid content, i.e. is in its initialized state. */
85+ INVALID ,
86+
87+ /** Token with content, at beginning or in the middle of a line. */
88+ TOKEN ,
89+
90+ /** Token (which can have content) when end of file is reached. */
91+ EOF ,
92+
93+ /** Token with content when end of a line is reached. */
94+ EORECORD
95+ }
96+
97+ /** Token type */
98+ Type type = INVALID ;
99+
100+ /** The content buffer. */
101101 CharBuffer content = new CharBuffer (INITIAL_TOKEN_LENGTH );
102- /**
103- * Token ready flag: indicates a valid token with content (ready for the parser).
104- */
102+
103+ /** Token ready flag: indicates a valid token with content (ready for the parser). */
105104 boolean isReady ;
106105
107106 Token reset () {
108107 content .clear ();
109- type = TT_INVALID ;
108+ type = INVALID ;
110109 isReady = false ;
111110 return this ;
112111 }
@@ -180,26 +179,26 @@ public String[] getLine() throws IOException {
180179 reusableToken .reset ();
181180 nextToken (reusableToken );
182181 switch (reusableToken .type ) {
183- case TT_TOKEN :
182+ case TOKEN :
184183 record .add (reusableToken .content .toString ());
185184 break ;
186- case TT_EORECORD :
185+ case EORECORD :
187186 record .add (reusableToken .content .toString ());
188187 break ;
189- case TT_EOF :
188+ case EOF :
190189 if (reusableToken .isReady ) {
191190 record .add (reusableToken .content .toString ());
192191 } else {
193192 ret = null ;
194193 }
195194 break ;
196- case TT_INVALID :
195+ case INVALID :
197196 default :
198197 // error: throw IOException
199198 throw new IOException ("(line " + getLineNumber () + ") invalid parse sequence" );
200199 // unreachable: break;
201200 }
202- if (reusableToken .type != TT_TOKEN ) {
201+ if (reusableToken .type != TOKEN ) {
203202 break ;
204203 }
205204 }
@@ -272,19 +271,19 @@ Token nextToken(Token tkn) throws IOException {
272271 c = in .readAgain ();
273272 // reached end of file without any content (empty line at the end)
274273 if (isEndOfFile (c )) {
275- tkn .type = TT_EOF ;
274+ tkn .type = EOF ;
276275 return tkn ;
277276 }
278277 }
279278
280- // did we reach eof during the last iteration already ? TT_EOF
279+ // did we reach eof during the last iteration already ? EOF
281280 if (isEndOfFile (lastChar ) || (lastChar != format .getDelimiter () && isEndOfFile (c ))) {
282- tkn .type = TT_EOF ;
281+ tkn .type = EOF ;
283282 return tkn ;
284283 }
285284
286285 // important: make sure a new char gets consumed in each iteration
287- while (!tkn .isReady && tkn .type != TT_EOF ) {
286+ while (!tkn .isReady && tkn .type != EOF ) {
288287 // ignore whitespaces at beginning of a token
289288 while (format .isLeadingSpacesIgnored () && isWhitespace (c ) && !eol ) {
290289 wsBuf .append ((char ) c );
@@ -297,21 +296,21 @@ Token nextToken(Token tkn) throws IOException {
297296 in .readLine ();
298297 tkn = nextToken (tkn .reset ());
299298 } else if (c == format .getDelimiter ()) {
300- // empty token return TT_TOKEN ("")
301- tkn .type = TT_TOKEN ;
299+ // empty token return TOKEN ("")
300+ tkn .type = TOKEN ;
302301 tkn .isReady = true ;
303302 } else if (eol ) {
304- // empty token return TT_EORECORD ("")
303+ // empty token return EORECORD ("")
305304 //noop: tkn.content.append("");
306- tkn .type = TT_EORECORD ;
305+ tkn .type = EORECORD ;
307306 tkn .isReady = true ;
308307 } else if (c == format .getEncapsulator ()) {
309308 // consume encapsulated token
310309 encapsulatedTokenLexer (tkn , c );
311310 } else if (isEndOfFile (c )) {
312- // end of file return TT_EOF ()
311+ // end of file return EOF ()
313312 //noop: tkn.content.append("");
314- tkn .type = TT_EOF ;
313+ tkn .type = EOF ;
315314 tkn .isReady = true ;
316315 } else {
317316 // next token must be a simple token
@@ -332,9 +331,9 @@ Token nextToken(Token tkn) throws IOException {
332331 * A simple token might contain escaped delimiters (as \, or \;). The
333332 * token is finished when one of the following conditions become true:
334333 * <ul>
335- * <li>end of line has been reached (TT_EORECORD )</li>
336- * <li>end of stream has been reached (TT_EOF )</li>
337- * <li>an unescaped delimiter has been reached (TT_TOKEN )</li>
334+ * <li>end of line has been reached (EORECORD )</li>
335+ * <li>end of stream has been reached (EOF )</li>
336+ * <li>an unescaped delimiter has been reached (TOKEN )</li>
338337 * </ul>
339338 *
340339 * @param tkn the current token
@@ -346,17 +345,17 @@ private Token simpleTokenLexer(Token tkn, int c) throws IOException {
346345 for (; ;) {
347346 if (isEndOfLine (c )) {
348347 // end of record
349- tkn .type = TT_EORECORD ;
348+ tkn .type = EORECORD ;
350349 tkn .isReady = true ;
351350 break ;
352351 } else if (isEndOfFile (c )) {
353352 // end of file
354- tkn .type = TT_EOF ;
353+ tkn .type = EOF ;
355354 tkn .isReady = true ;
356355 break ;
357356 } else if (c == format .getDelimiter ()) {
358357 // end of token
359- tkn .type = TT_TOKEN ;
358+ tkn .type = TOKEN ;
360359 tkn .isReady = true ;
361360 break ;
362361 } else if (c == '\\' && format .isUnicodeEscapesInterpreted () && in .lookAhead () == 'u' ) {
@@ -414,16 +413,16 @@ private Token encapsulatedTokenLexer(Token tkn, int c) throws IOException {
414413 for (; ;) {
415414 c = in .read ();
416415 if (c == format .getDelimiter ()) {
417- tkn .type = TT_TOKEN ;
416+ tkn .type = TOKEN ;
418417 tkn .isReady = true ;
419418 return tkn ;
420419 } else if (isEndOfFile (c )) {
421- tkn .type = TT_EOF ;
420+ tkn .type = EOF ;
422421 tkn .isReady = true ;
423422 return tkn ;
424423 } else if (isEndOfLine (c )) {
425424 // ok eo token reached
426- tkn .type = TT_EORECORD ;
425+ tkn .type = EORECORD ;
427426 tkn .isReady = true ;
428427 return tkn ;
429428 } else if (!isWhitespace (c )) {
0 commit comments