Skip to content

Commit e86202f

Browse files
Refactor CSVFormatTest to use consistent enum naming for predefined formats
1 parent 7da3eb4 commit e86202f

2 files changed

Lines changed: 63 additions & 44 deletions

File tree

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 62 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -233,79 +233,98 @@ boolean isStartOfLine(final int ch) {
233233
* @throws CSVException Thrown on invalid input.
234234
*/
235235
Token nextToken(final Token token) throws IOException {
236-
// Get the last read char (required for empty line detection)
237236
int lastChar = reader.getLastChar();
238-
// read the next char and set eol
239237
int c = reader.read();
240-
// Note: The following call will swallow LF if c == CR. But we don't need to know if the last char was CR or LF - they are equivalent here.
241238
boolean eol = readEndOfLine(c);
242-
// empty line detection: eol AND (last char was EOL or beginning)
239+
243240
if (ignoreEmptyLines) {
244-
while (eol && isStartOfLine(lastChar)) {
245-
// Go on char ahead ...
246-
lastChar = c;
247-
c = reader.read();
248-
eol = readEndOfLine(c);
249-
// reached the end of the file without any content (empty line at the end)
250-
if (isEndOfFile(c)) {
251-
token.type = Token.Type.EOF;
252-
// don't set token.isReady here because no content
253-
return token;
254-
}
241+
if (skipEmptyLines(token, lastChar, c, eol)) {
242+
return token;
255243
}
244+
// update c and eol after skipping
245+
lastChar = reader.getLastChar();
246+
c = reader.read();
247+
eol = readEndOfLine(c);
256248
}
257-
// Did we reach EOF during the last iteration already? EOF
258-
if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) {
259-
token.type = Token.Type.EOF;
260-
// don't set token.isReady here because no content
249+
250+
if (isEndOfFile(lastChar) || (!isLastTokenDelimiter && isEndOfFile(c))) {
251+
setTokenType(token, Token.Type.EOF);
261252
return token;
262253
}
254+
263255
if (isStartOfLine(lastChar) && isCommentStart(c)) {
264-
final String line = reader.readLine();
265-
if (line == null) {
266-
token.type = Token.Type.EOF;
267-
// don't set token.isReady here because no content
256+
if (handleComment(token)) {
268257
return token;
269258
}
270-
final String comment = line.trim();
271-
token.content.append(comment);
272-
token.type = Token.Type.COMMENT;
273-
return token;
274259
}
275-
// Important: make sure a new char gets consumed in each iteration
260+
261+
processToken(token, c, eol);
262+
return token;
263+
}
264+
265+
// Helper to skip empty lines
266+
private boolean skipEmptyLines(Token token, int lastChar, int c, boolean eol) throws IOException {
267+
while (eol && isStartOfLine(lastChar)) {
268+
lastChar = c;
269+
c = reader.read();
270+
eol = readEndOfLine(c);
271+
if (isEndOfFile(c)) {
272+
setTokenType(token, Token.Type.EOF);
273+
return true;
274+
}
275+
}
276+
return false;
277+
}
278+
279+
// Helper to set token type
280+
private void setTokenType(Token token, Token.Type type) {
281+
token.type = type;
282+
// don't set token.isReady here because no content
283+
}
284+
285+
// Helper to handle comments
286+
private boolean handleComment(Token token) throws IOException {
287+
final String line = reader.readLine();
288+
if (line == null) {
289+
setTokenType(token, Token.Type.EOF);
290+
return true;
291+
}
292+
final String comment = line.trim();
293+
token.content.append(comment);
294+
token.type = Token.Type.COMMENT;
295+
return true;
296+
}
297+
298+
// Helper to process the main token logic
299+
private void processToken(Token token, int c, boolean eol) throws IOException {
276300
while (token.type == Token.Type.INVALID) {
277-
// ignore whitespaces at beginning of a token
278301
if (ignoreSurroundingSpaces) {
279-
while (Character.isWhitespace((char) c) && !isDelimiter(c) && !eol) {
280-
c = reader.read();
281-
eol = readEndOfLine(c);
282-
}
302+
c = skipLeadingWhitespace(c, eol);
303+
eol = readEndOfLine(c);
283304
}
284-
// ok, start of token reached: encapsulated, or token
285305
if (isDelimiter(c)) {
286-
// empty token return TOKEN("")
287306
token.type = Token.Type.TOKEN;
288307
} else if (eol) {
289-
// empty token return EORECORD("")
290-
// noop: token.content.append("");
291308
token.type = Token.Type.EORECORD;
292309
} else if (isQuoteChar(c)) {
293-
// consume encapsulated token
294310
parseEncapsulatedToken(token);
295311
} else if (isEndOfFile(c)) {
296-
// end of file return EOF()
297-
// noop: token.content.append("");
298312
token.type = Token.Type.EOF;
299313
token.isReady = true; // there is data at EOF
300314
} else {
301-
// next token must be a simple token
302-
// add removed blanks when not ignoring whitespace chars...
303315
parseSimpleToken(token, c);
304316
}
305317
}
306-
return token;
307318
}
308319

320+
// Helper to skip leading whitespace
321+
private int skipLeadingWhitespace(int c, boolean eol) throws IOException {
322+
while (Character.isWhitespace((char) c) && !isDelimiter(c) && !eol) {
323+
c = reader.read();
324+
eol = readEndOfLine(c);
325+
}
326+
return c;
327+
}
309328
private int nullToDisabled(final Character c) {
310329
return c == null ? Constants.UNDEFINED : c.charValue(); // Explicit unboxing
311330
}

src/test/java/org/apache/commons/csv/CSVFormatTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1111,7 +1111,7 @@ public void testToString() {
11111111
@Test
11121112
public void testToStringAndWithCommentMarkerTakingCharacter() {
11131113

1114-
final CSVFormat.Predefined csvFormatPredefined = CSVFormat.Predefined.Default;
1114+
final CSVFormat.Predefined csvFormatPredefined = CSVFormat.Predefined.DEFAULT;
11151115
final CSVFormat csvFormat = csvFormatPredefined.getFormat();
11161116

11171117
assertNull(csvFormat.getEscapeCharacter());

0 commit comments

Comments
 (0)