Skip to content

Commit 16bfec0

Browse files
committed
Turned the token types into an Enum
git-svn-id: https://svn.apache.org/repos/asf/commons/sandbox/csv/trunk@1199872 13f79535-47bb-0310-9956-ffa450edef68
1 parent cbcfb72 commit 16bfec0

2 files changed

Lines changed: 113 additions & 114 deletions

File tree

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 47 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.ArrayList;
2323
import java.util.List;
2424

25+
import static org.apache.commons.csv.CSVParser.Token.Type.*;
2526

2627
/**
2728
* Parses CSV files according to the specified configuration.
@@ -54,19 +55,6 @@ public class CSVParser {
5455
/** length of the initial token (content-)buffer */
5556
private static final int INITIAL_TOKEN_LENGTH = 50;
5657

57-
// the token types
58-
/** Token has no valid content, i.e. is in its initialized state. */
59-
static final int TT_INVALID = -1;
60-
61-
/** Token with content, at beginning or in the middle of a line. */
62-
static final int TT_TOKEN = 0;
63-
64-
/** Token (which can have content) when end of file is reached. */
65-
static final int TT_EOF = 1;
66-
67-
/** Token with content when end of a line is reached. */
68-
static final int TT_EORECORD = 2;
69-
7058
/** Immutable empty String array. */
7159
private static final String[] EMPTY_STRING_ARRAY = new String[0];
7260

@@ -91,22 +79,33 @@ public class CSVParser {
9179
* It is used as contract between the lexer and the parser.
9280
*/
9381
static class Token {
94-
/**
95-
* Token type, see TT_xxx constants.
96-
*/
97-
int type = TT_INVALID;
98-
/**
99-
* The content buffer.
100-
*/
82+
83+
enum Type {
84+
/** Token has no valid content, i.e. is in its initialized state. */
85+
INVALID,
86+
87+
/** Token with content, at beginning or in the middle of a line. */
88+
TOKEN,
89+
90+
/** Token (which can have content) when end of file is reached. */
91+
EOF,
92+
93+
/** Token with content when end of a line is reached. */
94+
EORECORD
95+
}
96+
97+
/** Token type */
98+
Type type = INVALID;
99+
100+
/** The content buffer. */
101101
CharBuffer content = new CharBuffer(INITIAL_TOKEN_LENGTH);
102-
/**
103-
* Token ready flag: indicates a valid token with content (ready for the parser).
104-
*/
102+
103+
/** Token ready flag: indicates a valid token with content (ready for the parser). */
105104
boolean isReady;
106105

107106
Token reset() {
108107
content.clear();
109-
type = TT_INVALID;
108+
type = INVALID;
110109
isReady = false;
111110
return this;
112111
}
@@ -180,26 +179,26 @@ public String[] getLine() throws IOException {
180179
reusableToken.reset();
181180
nextToken(reusableToken);
182181
switch (reusableToken.type) {
183-
case TT_TOKEN:
182+
case TOKEN:
184183
record.add(reusableToken.content.toString());
185184
break;
186-
case TT_EORECORD:
185+
case EORECORD:
187186
record.add(reusableToken.content.toString());
188187
break;
189-
case TT_EOF:
188+
case EOF:
190189
if (reusableToken.isReady) {
191190
record.add(reusableToken.content.toString());
192191
} else {
193192
ret = null;
194193
}
195194
break;
196-
case TT_INVALID:
195+
case INVALID:
197196
default:
198197
// error: throw IOException
199198
throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
200199
// unreachable: break;
201200
}
202-
if (reusableToken.type != TT_TOKEN) {
201+
if (reusableToken.type != TOKEN) {
203202
break;
204203
}
205204
}
@@ -272,19 +271,19 @@ Token nextToken(Token tkn) throws IOException {
272271
c = in.readAgain();
273272
// reached end of file without any content (empty line at the end)
274273
if (isEndOfFile(c)) {
275-
tkn.type = TT_EOF;
274+
tkn.type = EOF;
276275
return tkn;
277276
}
278277
}
279278

280-
// did we reach eof during the last iteration already ? TT_EOF
279+
// did we reach eof during the last iteration already ? EOF
281280
if (isEndOfFile(lastChar) || (lastChar != format.getDelimiter() && isEndOfFile(c))) {
282-
tkn.type = TT_EOF;
281+
tkn.type = EOF;
283282
return tkn;
284283
}
285284

286285
// important: make sure a new char gets consumed in each iteration
287-
while (!tkn.isReady && tkn.type != TT_EOF) {
286+
while (!tkn.isReady && tkn.type != EOF) {
288287
// ignore whitespaces at beginning of a token
289288
while (format.isLeadingSpacesIgnored() && isWhitespace(c) && !eol) {
290289
wsBuf.append((char) c);
@@ -297,21 +296,21 @@ Token nextToken(Token tkn) throws IOException {
297296
in.readLine();
298297
tkn = nextToken(tkn.reset());
299298
} else if (c == format.getDelimiter()) {
300-
// empty token return TT_TOKEN("")
301-
tkn.type = TT_TOKEN;
299+
// empty token return TOKEN("")
300+
tkn.type = TOKEN;
302301
tkn.isReady = true;
303302
} else if (eol) {
304-
// empty token return TT_EORECORD("")
303+
// empty token return EORECORD("")
305304
//noop: tkn.content.append("");
306-
tkn.type = TT_EORECORD;
305+
tkn.type = EORECORD;
307306
tkn.isReady = true;
308307
} else if (c == format.getEncapsulator()) {
309308
// consume encapsulated token
310309
encapsulatedTokenLexer(tkn, c);
311310
} else if (isEndOfFile(c)) {
312-
// end of file return TT_EOF()
311+
// end of file return EOF()
313312
//noop: tkn.content.append("");
314-
tkn.type = TT_EOF;
313+
tkn.type = EOF;
315314
tkn.isReady = true;
316315
} else {
317316
// next token must be a simple token
@@ -332,9 +331,9 @@ Token nextToken(Token tkn) throws IOException {
332331
* A simple token might contain escaped delimiters (as \, or \;). The
333332
* token is finished when one of the following conditions become true:
334333
* <ul>
335-
* <li>end of line has been reached (TT_EORECORD)</li>
336-
* <li>end of stream has been reached (TT_EOF)</li>
337-
* <li>an unescaped delimiter has been reached (TT_TOKEN)</li>
334+
* <li>end of line has been reached (EORECORD)</li>
335+
* <li>end of stream has been reached (EOF)</li>
336+
* <li>an unescaped delimiter has been reached (TOKEN)</li>
338337
* </ul>
339338
*
340339
* @param tkn the current token
@@ -346,17 +345,17 @@ private Token simpleTokenLexer(Token tkn, int c) throws IOException {
346345
for (; ;) {
347346
if (isEndOfLine(c)) {
348347
// end of record
349-
tkn.type = TT_EORECORD;
348+
tkn.type = EORECORD;
350349
tkn.isReady = true;
351350
break;
352351
} else if (isEndOfFile(c)) {
353352
// end of file
354-
tkn.type = TT_EOF;
353+
tkn.type = EOF;
355354
tkn.isReady = true;
356355
break;
357356
} else if (c == format.getDelimiter()) {
358357
// end of token
359-
tkn.type = TT_TOKEN;
358+
tkn.type = TOKEN;
360359
tkn.isReady = true;
361360
break;
362361
} else if (c == '\\' && format.isUnicodeEscapesInterpreted() && in.lookAhead() == 'u') {
@@ -414,16 +413,16 @@ private Token encapsulatedTokenLexer(Token tkn, int c) throws IOException {
414413
for (; ;) {
415414
c = in.read();
416415
if (c == format.getDelimiter()) {
417-
tkn.type = TT_TOKEN;
416+
tkn.type = TOKEN;
418417
tkn.isReady = true;
419418
return tkn;
420419
} else if (isEndOfFile(c)) {
421-
tkn.type = TT_EOF;
420+
tkn.type = EOF;
422421
tkn.isReady = true;
423422
return tkn;
424423
} else if (isEndOfLine(c)) {
425424
// ok eo token reached
426-
tkn.type = TT_EORECORD;
425+
tkn.type = EORECORD;
427426
tkn.isReady = true;
428427
return tkn;
429428
} else if (!isWhitespace(c)) {

0 commit comments

Comments
 (0)