Skip to content

Commit 71c69df

Browse files
committed
CSV-98 Line number counting is confusing
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1479936 13f79535-47bb-0310-9956-ffa450edef68
1 parent 5b2e5f8 commit 71c69df

10 files changed

Lines changed: 79 additions & 68 deletions

File tree

src/main/java/org/apache/commons/csv/CSVLexer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ private Token parseSimpleToken(final Token tkn, int c) throws IOException {
202202
*/
203203
private Token parseEncapsulatedToken(final Token tkn) throws IOException {
204204
// save current line number in case needed for IOE
205-
final long startLineNumber = getLineNumber();
205+
final long startLineNumber = getCurrentLineNumber();
206206
int c;
207207
while (true) {
208208
c = in.read();
@@ -235,7 +235,7 @@ private Token parseEncapsulatedToken(final Token tkn) throws IOException {
235235
return tkn;
236236
} else if (!isWhitespace(c)) {
237237
// error invalid char between token and next delimiter
238-
throw new IOException("(line " + getLineNumber() +
238+
throw new IOException("(line " + getCurrentLineNumber() +
239239
") invalid char between encapsulated token and delimiter");
240240
}
241241
}

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,8 @@ public Map<String, Integer> getHeaderMap() {
158158
*
159159
* @return current line number
160160
*/
161-
public long getLineNumber() {
162-
return lexer.getLineNumber();
161+
public long getCurrentLineNumber() {
162+
return lexer.getCurrentLineNumber();
163163
}
164164

165165
/**
@@ -200,7 +200,7 @@ CSVRecord nextRecord() throws IOException {
200200
}
201201
break;
202202
case INVALID:
203-
throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
203+
throw new IOException("(line " + getCurrentLineNumber() + ") invalid parse sequence");
204204
case COMMENT: // Ignored currently
205205
if (sb == null) { // first comment for this record
206206
sb = new StringBuilder();

src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ final class ExtendedBufferedReader extends BufferedReader {
3939
/** The last char returned */
4040
private int lastChar = UNDEFINED;
4141

42-
/** The line counter */
43-
private long lineCounter;
42+
/** The count of EOLs (CR/LF/CRLF) seen so far */
43+
private long eolCounter = 0;
4444

4545
/**
4646
* Created extended buffered reader using default buffer-size
@@ -53,7 +53,7 @@ final class ExtendedBufferedReader extends BufferedReader {
5353
public int read() throws IOException {
5454
final int current = super.read();
5555
if (current == CR || (current == LF && lastChar != CR)) {
56-
lineCounter++;
56+
eolCounter++;
5757
}
5858
lastChar = current;
5959
return lastChar;
@@ -85,10 +85,10 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
8585
final char ch = buf[i];
8686
if (ch == LF) {
8787
if (CR != (i > 0 ? buf[i - 1] : lastChar)) {
88-
lineCounter++;
88+
eolCounter++;
8989
}
9090
} else if (ch == CR) {
91-
lineCounter++;
91+
eolCounter++;
9292
}
9393
}
9494

@@ -105,7 +105,7 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
105105
* Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
106106
* when processing a comment, otherwise information can be lost.
107107
* <p>
108-
* Increments {@link #lineCounter}
108+
* Increments {@link #eolCounter}
109109
* <p>
110110
* Sets {@link #lastChar} to {@link #END_OF_STREAM} at EOF, otherwise to LF
111111
*
@@ -117,7 +117,7 @@ public String readLine() throws IOException {
117117

118118
if (line != null) {
119119
lastChar = LF; // needed for detecting start of line
120-
lineCounter++;
120+
eolCounter++;
121121
} else {
122122
lastChar = END_OF_STREAM;
123123
}
@@ -127,7 +127,7 @@ public String readLine() throws IOException {
127127

128128
/**
129129
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
130-
* still return this value.
130+
* still return this value. Does not affect line number or last character.
131131
*
132132
* @return the next character
133133
*
@@ -143,11 +143,15 @@ int lookAhead() throws IOException {
143143
}
144144

145145
/**
146-
* Returns the number of lines read
146+
* Returns the current line number
147147
*
148-
* @return the number of EOLs seen so far
148+
* @return the current line number
149149
*/
150-
long getLineNumber() {
151-
return lineCounter;
150+
long getCurrentLineNumber() {
151+
// Check if we are at EOL or EOF or just starting
152+
if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
153+
return eolCounter; // counter is accurate
154+
}
155+
return eolCounter + 1; // Allow for counter being incremented only at EOL
152156
}
153157
}

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,12 @@ private final char mapNullToDisabled(final Character c) {
7070
}
7171

7272
/**
73-
* Returns the number of lines read
73+
* Returns the current line number
7474
*
75-
* @return the number of EOLs seen so far
75+
* @return the current line number
7676
*/
77-
long getLineNumber() {
78-
return in.getLineNumber();
77+
long getCurrentLineNumber() {
78+
return in.getCurrentLineNumber();
7979
}
8080

8181
// TODO escape handling needs more work

src/test/java/org/apache/commons/csv/CSVLexer1.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ private Token simpleTokenLexer(final Token tkn, int c) throws IOException {
198198
*/
199199
private Token encapsulatedTokenLexer(final Token tkn, int c) throws IOException {
200200
// save current line
201-
final long startLineNumber = getLineNumber();
201+
final long startLineNumber = getCurrentLineNumber();
202202
// ignore the given delimiter
203203
// assert c == delimiter;
204204
while (true) {
@@ -230,7 +230,7 @@ private Token encapsulatedTokenLexer(final Token tkn, int c) throws IOException
230230
return tkn;
231231
} else if (!isWhitespace(c)) {
232232
// error invalid char between token and next delimiter
233-
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
233+
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
234234
}
235235
}
236236
}

src/test/java/org/apache/commons/csv/CSVLexer1306663.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ private Token simpleTokenLexer(final Token tkn, int c) throws IOException {
187187
*/
188188
private Token encapsulatedTokenLexer(final Token tkn) throws IOException {
189189
// save current line
190-
final long startLineNumber = getLineNumber();
190+
final long startLineNumber = getCurrentLineNumber();
191191
// ignore the given delimiter
192192
// assert c == delimiter;
193193
int c;
@@ -218,7 +218,7 @@ private Token encapsulatedTokenLexer(final Token tkn) throws IOException {
218218
return tkn;
219219
} else if (!isWhitespace(c)) {
220220
// error invalid char between token and next delimiter
221-
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
221+
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
222222
}
223223
}
224224
}

src/test/java/org/apache/commons/csv/CSVLexer1306667.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ private Token simpleTokenLexer(final Token tkn, int c) throws IOException {
187187
*/
188188
private Token encapsulatedTokenLexer(final Token tkn) throws IOException {
189189
// save current line
190-
final long startLineNumber = getLineNumber();
190+
final long startLineNumber = getCurrentLineNumber();
191191
// ignore the given delimiter
192192
// assert c == delimiter;
193193
int c;
@@ -218,7 +218,7 @@ private Token encapsulatedTokenLexer(final Token tkn) throws IOException {
218218
return tkn;
219219
} else if (!isWhitespace(c)) {
220220
// error invalid char between token and next delimiter
221-
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
221+
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
222222
}
223223
}
224224
}

src/test/java/org/apache/commons/csv/CSVLexer3.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ Token nextToken(final Token tkn) throws IOException {
170170
state = State.ESCAPE_QUOTE;
171171
break;
172172
case EOFCHAR:
173-
throw new IOException("(line " + getLineNumber() + ") unexpected EOF in quoted string");
173+
throw new IOException("(line " + getCurrentLineNumber() + ") unexpected EOF in quoted string");
174174
default:
175175
tkn.content.append((char) intch);
176176
break;
@@ -194,15 +194,15 @@ Token nextToken(final Token tkn) throws IOException {
194194
case WHITESPACE: // trailing whitespace may be allowed
195195
if (!ignoreSurroundingSpaces) {
196196
// error invalid char between token and next delimiter
197-
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
197+
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
198198
}
199199
break;
200200
// Everything else is invalid
201201
case ESCAPE:
202202
case OTHER:
203203
case COMMENT_START:
204204
// error invalid char between token and next delimiter
205-
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
205+
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
206206
}
207207
break;
208208
case ESCAPE_PLAIN:
@@ -221,7 +221,7 @@ Token nextToken(final Token tkn) throws IOException {
221221
tkn.content.append((char) intch);
222222
break;
223223
case EOFCHAR:
224-
throw new IOException("(line " + getLineNumber() + ") unexpected EOF in escape sequence");
224+
throw new IOException("(line " + getCurrentLineNumber() + ") unexpected EOF in escape sequence");
225225
}
226226
break;
227227
case ESCAPE_QUOTE:
@@ -239,7 +239,7 @@ Token nextToken(final Token tkn) throws IOException {
239239
tkn.content.append((char) intch);
240240
break;
241241
case EOFCHAR:
242-
throw new IOException("(line " + getLineNumber() + ") unexpected EOF in escape sequence");
242+
throw new IOException("(line " + getCurrentLineNumber() + ") unexpected EOF in escape sequence");
243243
}
244244
break;
245245
default:

src/test/java/org/apache/commons/csv/CSVParserTest.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -629,21 +629,21 @@ public void testGetRecordWithMultiiLineValues() throws Exception {
629629
CSVFormat.newBuilder().withRecordSeparator(CRLF).build());
630630
CSVRecord record;
631631
assertEquals(0, parser.getRecordNumber());
632-
assertEquals(0, parser.getLineNumber());
632+
assertEquals(0, parser.getCurrentLineNumber());
633633
assertNotNull(record = parser.nextRecord());
634-
assertEquals(3, parser.getLineNumber());
634+
assertEquals(3, parser.getCurrentLineNumber());
635635
assertEquals(1, record.getRecordNumber());
636636
assertEquals(1, parser.getRecordNumber());
637637
assertNotNull(record = parser.nextRecord());
638-
assertEquals(6, parser.getLineNumber());
638+
assertEquals(6, parser.getCurrentLineNumber());
639639
assertEquals(2, record.getRecordNumber());
640640
assertEquals(2, parser.getRecordNumber());
641641
assertNotNull(record = parser.nextRecord());
642-
assertEquals(8, parser.getLineNumber());
642+
assertEquals(8, parser.getCurrentLineNumber());
643643
assertEquals(3, record.getRecordNumber());
644644
assertEquals(3, parser.getRecordNumber());
645645
assertNull(record = parser.nextRecord());
646-
assertEquals(8, parser.getLineNumber());
646+
assertEquals(8, parser.getCurrentLineNumber());
647647
assertEquals(3, parser.getRecordNumber());
648648
}
649649

@@ -676,17 +676,17 @@ private void validateRecordNumbers(final String lineSeparator) throws IOExceptio
676676

677677
private void validateLineNumbers(final String lineSeparator) throws IOException {
678678
final CSVParser parser = new CSVParser("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.newBuilder().withRecordSeparator(lineSeparator).build());
679-
assertEquals(0, parser.getLineNumber());
679+
assertEquals(0, parser.getCurrentLineNumber());
680680
assertNotNull(parser.nextRecord());
681-
assertEquals(1, parser.getLineNumber());
681+
assertEquals(1, parser.getCurrentLineNumber());
682682
assertNotNull(parser.nextRecord());
683-
assertEquals(2, parser.getLineNumber());
683+
assertEquals(2, parser.getCurrentLineNumber());
684684
assertNotNull(parser.nextRecord());
685685
// Still 2 because the last line is does not have EOL chars
686-
assertEquals(2, parser.getLineNumber());
686+
assertEquals(2, parser.getCurrentLineNumber());
687687
assertNull(parser.nextRecord());
688688
// Still 2 because the last line is does not have EOL chars
689-
assertEquals(2, parser.getLineNumber());
689+
assertEquals(2, parser.getCurrentLineNumber());
690690
}
691691

692692
}

0 commit comments

Comments
 (0)