File tree Expand file tree Collapse file tree
main/java/org/apache/commons/csv
test/java/org/apache/commons/csv Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -277,15 +277,22 @@ Token nextToken(final Token token) throws IOException {
277277 }
278278 // Important: make sure a new char gets consumed in each iteration
279279 while (token .type == Token .Type .INVALID ) {
280+ // isDelimiter consumes the trailing characters of a multi-character delimiter as a side effect, so it must
281+ // only be evaluated once per character. Remember a match found while skipping whitespace below.
282+ boolean delimiter = false ;
280283 // ignore whitespaces at beginning of a token
281284 if (ignoreSurroundingSpaces ) {
282- while (Character .isWhitespace ((char ) c ) && !isDelimiter (c ) && !eol ) {
285+ while (Character .isWhitespace ((char ) c ) && !eol ) {
286+ if (isDelimiter (c )) {
287+ delimiter = true ;
288+ break ;
289+ }
283290 c = reader .read ();
284291 eol = readEndOfLine (c );
285292 }
286293 }
287294 // ok, start of token reached: encapsulated, or token
288- if (isDelimiter (c )) {
295+ if (delimiter || isDelimiter (c )) {
289296 // empty token return TOKEN("")
290297 token .type = Token .Type .TOKEN ;
291298 } else if (eol ) {
Original file line number Diff line number Diff line change @@ -447,6 +447,25 @@ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
447447 }
448448 }
449449
450+ /**
451+ * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace,
452+ * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the
453+ * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped.
454+ */
455+ @ Test
456+ void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter () throws IOException {
457+ final CSVFormat format = CSVFormat .DEFAULT .builder ().setDelimiter (" |" ).setIgnoreSurroundingSpaces (true ).get ();
458+ try (Lexer lexer = createLexer (" |a" , format )) {
459+ assertNextToken (TOKEN , "" , lexer );
460+ assertNextToken (EOF , "a" , lexer );
461+ }
462+ try (Lexer lexer = createLexer ("a | |b" , format )) {
463+ assertNextToken (TOKEN , "a" , lexer );
464+ assertNextToken (TOKEN , "" , lexer );
465+ assertNextToken (EOF , "b" , lexer );
466+ }
467+ }
468+
450469 @ Test
451470 void testReadEscapeBackspace () throws IOException {
452471 try (Lexer lexer = createLexer ("b" , CSVFormat .DEFAULT .withEscape ('\b' ))) {
You can’t perform that action at this time.
0 commit comments