Skip to content

Commit a4c6037

Browse files
king-tylersvrnggarydgregory
authored
[CSV-265] Update buffer position when reading line comment (apache#120)
* [CSV-265] Add JiraCsv265Test * [CSV-265] Update buffer position when reading line comment * Update JiraCsv265Test.java File should end in a new line. Co-authored-by: Tyler King <tylerking001@hotmail.com> Co-authored-by: Gary Gregory <garydgregory@users.noreply.github.com>
1 parent 399204c commit a4c6037

2 files changed

Lines changed: 109 additions & 13 deletions

File tree

src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -198,29 +198,37 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
198198
}
199199

200200
/**
201-
* Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
202-
* when processing a comment, otherwise information can be lost.
201+
* Gets the next line, dropping the line terminator(s). This method should only be called when processing a
202+
* comment, otherwise information can be lost.
203203
* <p>
204-
* Increments {@link #eolCounter}.
204+
* Increments {@link #eolCounter} and updates {@link #position}.
205205
* </p>
206206
* <p>
207-
* Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise to LF.
207+
* Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise the last EOL character.
208208
* </p>
209209
*
210210
* @return the line that was read, or null if reached EOF.
211211
*/
212212
@Override
213213
public String readLine() throws IOException {
214-
final String line = super.readLine();
215-
216-
if (line != null) {
217-
lastChar = LF; // needed for detecting start of line
218-
eolCounter++;
219-
} else {
220-
lastChar = END_OF_STREAM;
214+
if (lookAhead() == END_OF_STREAM) {
215+
return null;
221216
}
222-
223-
return line;
217+
final StringBuilder buffer = new StringBuilder();
218+
while (true) {
219+
final int current = read();
220+
if (current == CR) {
221+
final int next = lookAhead();
222+
if (next == LF) {
223+
read();
224+
}
225+
}
226+
if (current == END_OF_STREAM || current == LF || current == CR) {
227+
break;
228+
}
229+
buffer.append((char) current);
230+
}
231+
return buffer.toString();
224232
}
225233

226234
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.csv.issues;
19+
20+
import static org.junit.jupiter.api.Assertions.assertEquals;
21+
22+
import java.io.IOException;
23+
import java.io.StringReader;
24+
import java.util.Iterator;
25+
26+
import org.apache.commons.csv.CSVFormat;
27+
import org.apache.commons.csv.CSVParser;
28+
import org.apache.commons.csv.CSVRecord;
29+
import org.junit.jupiter.api.Test;
30+
31+
/**
32+
* Tests [CSV-265] {@link CSVRecord#getCharacterPosition()} returns the correct position after encountering a comment.
33+
*/
34+
public class JiraCsv265Test {
35+
36+
@Test
37+
public void testCharacterPositionWithComments() throws IOException {
38+
// @formatter:off
39+
final String csv = "# Comment1\n"
40+
+ "Header1,Header2\n"
41+
+ "# Comment2\n"
42+
+ "Value1,Value2\n"
43+
+ "# Comment3\n"
44+
+ "Value3,Value4\n"
45+
+ "# Comment4\n";
46+
final CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
47+
.setCommentMarker('#')
48+
.setHeader()
49+
.setSkipHeaderRecord(true)
50+
.build();
51+
// @formatter:on
52+
try (final CSVParser parser = csvFormat.parse(new StringReader(csv))) {
53+
final Iterator<CSVRecord> itr = parser.iterator();
54+
final CSVRecord record1 = itr.next();
55+
assertEquals(csv.indexOf("# Comment2"), record1.getCharacterPosition());
56+
final CSVRecord record2 = itr.next();
57+
assertEquals(csv.indexOf("# Comment3"), record2.getCharacterPosition());
58+
}
59+
}
60+
61+
@Test
62+
public void testCharacterPositionWithCommentsSpanningMultipleLines() throws IOException {
63+
// @formatter:off
64+
final String csv = "# Comment1\n"
65+
+ "# Comment2\n"
66+
+ "Header1,Header2\n"
67+
+ "# Comment3\n"
68+
+ "# Comment4\n"
69+
+ "Value1,Value2\n"
70+
+ "# Comment5\n"
71+
+ "# Comment6\n"
72+
+ "Value3,Value4";
73+
final CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
74+
.setCommentMarker('#')
75+
.setHeader()
76+
.setSkipHeaderRecord(true)
77+
.build();
78+
// @formatter:on
79+
try (final CSVParser parser = csvFormat.parse(new StringReader(csv))) {
80+
final Iterator<CSVRecord> itr = parser.iterator();
81+
final CSVRecord record1 = itr.next();
82+
assertEquals(csv.indexOf("# Comment3"), record1.getCharacterPosition());
83+
final CSVRecord record2 = itr.next();
84+
assertEquals(csv.indexOf("# Comment5"), record2.getCharacterPosition());
85+
}
86+
}
87+
88+
}

0 commit comments

Comments
 (0)