@@ -233,79 +233,98 @@ boolean isStartOfLine(final int ch) {
233233 * @throws CSVException Thrown on invalid input.
234234 */
235235 Token nextToken (final Token token ) throws IOException {
236- // Get the last read char (required for empty line detection)
237236 int lastChar = reader .getLastChar ();
238- // read the next char and set eol
239237 int c = reader .read ();
240- // Note: The following call will swallow LF if c == CR. But we don't need to know if the last char was CR or LF - they are equivalent here.
241238 boolean eol = readEndOfLine (c );
242- // empty line detection: eol AND (last char was EOL or beginning)
239+
243240 if (ignoreEmptyLines ) {
244- while (eol && isStartOfLine (lastChar )) {
245- // Go on char ahead ...
246- lastChar = c ;
247- c = reader .read ();
248- eol = readEndOfLine (c );
249- // reached the end of the file without any content (empty line at the end)
250- if (isEndOfFile (c )) {
251- token .type = Token .Type .EOF ;
252- // don't set token.isReady here because no content
253- return token ;
254- }
241+ if (skipEmptyLines (token , lastChar , c , eol )) {
242+ return token ;
255243 }
244+ // update c and eol after skipping
245+ lastChar = reader .getLastChar ();
246+ c = reader .read ();
247+ eol = readEndOfLine (c );
256248 }
257- // Did we reach EOF during the last iteration already? EOF
258- if (isEndOfFile (lastChar ) || !isLastTokenDelimiter && isEndOfFile (c )) {
259- token .type = Token .Type .EOF ;
260- // don't set token.isReady here because no content
249+
250+ if (isEndOfFile (lastChar ) || (!isLastTokenDelimiter && isEndOfFile (c ))) {
251+ setTokenType (token , Token .Type .EOF );
261252 return token ;
262253 }
254+
263255 if (isStartOfLine (lastChar ) && isCommentStart (c )) {
264- final String line = reader .readLine ();
265- if (line == null ) {
266- token .type = Token .Type .EOF ;
267- // don't set token.isReady here because no content
256+ if (handleComment (token )) {
268257 return token ;
269258 }
270- final String comment = line .trim ();
271- token .content .append (comment );
272- token .type = Token .Type .COMMENT ;
273- return token ;
274259 }
275- // Important: make sure a new char gets consumed in each iteration
260+
261+ processToken (token , c , eol );
262+ return token ;
263+ }
264+
265+ // Helper to skip empty lines
266+ private boolean skipEmptyLines (Token token , int lastChar , int c , boolean eol ) throws IOException {
267+ while (eol && isStartOfLine (lastChar )) {
268+ lastChar = c ;
269+ c = reader .read ();
270+ eol = readEndOfLine (c );
271+ if (isEndOfFile (c )) {
272+ setTokenType (token , Token .Type .EOF );
273+ return true ;
274+ }
275+ }
276+ return false ;
277+ }
278+
279+ // Helper to set token type
280+ private void setTokenType (Token token , Token .Type type ) {
281+ token .type = type ;
282+ // don't set token.isReady here because no content
283+ }
284+
285+ // Helper to handle comments
286+ private boolean handleComment (Token token ) throws IOException {
287+ final String line = reader .readLine ();
288+ if (line == null ) {
289+ setTokenType (token , Token .Type .EOF );
290+ return true ;
291+ }
292+ final String comment = line .trim ();
293+ token .content .append (comment );
294+ token .type = Token .Type .COMMENT ;
295+ return true ;
296+ }
297+
298+ // Helper to process the main token logic
299+ private void processToken (Token token , int c , boolean eol ) throws IOException {
276300 while (token .type == Token .Type .INVALID ) {
277- // ignore whitespaces at beginning of a token
278301 if (ignoreSurroundingSpaces ) {
279- while (Character .isWhitespace ((char ) c ) && !isDelimiter (c ) && !eol ) {
280- c = reader .read ();
281- eol = readEndOfLine (c );
282- }
302+ c = skipLeadingWhitespace (c , eol );
303+ eol = readEndOfLine (c );
283304 }
284- // ok, start of token reached: encapsulated, or token
285305 if (isDelimiter (c )) {
286- // empty token return TOKEN("")
287306 token .type = Token .Type .TOKEN ;
288307 } else if (eol ) {
289- // empty token return EORECORD("")
290- // noop: token.content.append("");
291308 token .type = Token .Type .EORECORD ;
292309 } else if (isQuoteChar (c )) {
293- // consume encapsulated token
294310 parseEncapsulatedToken (token );
295311 } else if (isEndOfFile (c )) {
296- // end of file return EOF()
297- // noop: token.content.append("");
298312 token .type = Token .Type .EOF ;
299313 token .isReady = true ; // there is data at EOF
300314 } else {
301- // next token must be a simple token
302- // add removed blanks when not ignoring whitespace chars...
303315 parseSimpleToken (token , c );
304316 }
305317 }
306- return token ;
307318 }
308319
320+ // Helper to skip leading whitespace
321+ private int skipLeadingWhitespace (int c , boolean eol ) throws IOException {
322+ while (Character .isWhitespace ((char ) c ) && !isDelimiter (c ) && !eol ) {
323+ c = reader .read ();
324+ eol = readEndOfLine (c );
325+ }
326+ return c ;
327+ }
309328 private int nullToDisabled (final Character c ) {
310329 return c == null ? Constants .UNDEFINED : c .charValue (); // Explicit unboxing
311330 }
0 commit comments