Skip to content

Commit aae6f90

Browse files
committed
[CSV-214] Adding a placeholder in the Lexer and CSV parser to store the
end-of-line string. I applied the patch in spirit and made changes: there is no need to use a boolean to track the state of the EOL String (set vs. not set). I also allowed for CR to be saved as an EOL string since we allow that already.
1 parent 4d0f226 commit aae6f90

File tree

4 files changed

+62
-0
lines changed

4 files changed

+62
-0
lines changed

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
<action issue="CSV-192" type="add" dev="ggregory" due-to="Gary Gregory">Add convenience API CSVParser.parse(Path, Charset, CSVFormat)</action>
5252
<action issue="CSV-205" type="add" dev="ggregory" due-to="Gary Gregory">Add convenience API CSVFormat#printer() to print to System.out</action>
5353
<action issue="CSV-207" type="add" dev="ggregory" due-to="Gary Gregory">Provide a CSV Format for printing PostgreSQL CSV and Text formats.</action>
54+
<action issue="CSV-214" type="add" dev="ggregory" due-to="Nitin Mahendru, Gary Gregory">Adding a placeholder in the Lexer and CSV parser to store the end-of-line string.</action>
5455
</release>
5556
<release version="1.4" date="2016-05-28" description="Feature and bug fix release">
5657
<action issue="CSV-181" type="update" dev="ggregory" due-to="Gary Gregory">Make CSVPrinter.print(Object) GC-free.</action>

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,16 @@ public long getCurrentLineNumber() {
396396
return this.lexer.getCurrentLineNumber();
397397
}
398398

399+
/**
400+
* Gets the first end-of-line string encountered.
401+
*
402+
* @return the first end-of-line string
403+
* @since 1.5
404+
*/
405+
public String getFirstEndOfLine() {
406+
return lexer.getFirstEol();
407+
}
408+
399409
/**
400410
* Returns a copy of the header map that iterates in column order.
401411
* <p>

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@
4040
*/
4141
final class Lexer implements Closeable {
4242

43+
private static final String CR_STRING = Character.toString(Constants.CR);
44+
private static final String LF_STRING = Character.toString(Constants.LF);
45+
4346
/**
4447
* Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it
4548
* won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
@@ -57,7 +60,12 @@ final class Lexer implements Closeable {
5760

5861
/** The input stream */
5962
private final ExtendedBufferedReader reader;
63+
private String firstEol;
6064

65+
String getFirstEol(){
66+
return firstEol;
67+
}
68+
6169
Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
6270
this.reader = reader;
6371
this.delimiter = format.getDelimiter();
@@ -374,7 +382,20 @@ boolean readEndOfLine(int ch) throws IOException {
374382
if (ch == CR && reader.lookAhead() == LF) {
375383
// note: does not change ch outside of this method!
376384
ch = reader.read();
385+
// Save the EOL state
386+
if (firstEol == null) {
387+
this.firstEol = Constants.CRLF;
388+
}
377389
}
390+
// save EOL state here.
391+
if (firstEol == null) {
392+
if (ch == LF) {
393+
this.firstEol = LF_STRING;
394+
} else if (ch == CR) {
395+
this.firstEol = CR_STRING;
396+
}
397+
}
398+
378399
return ch == LF || ch == CR;
379400
}
380401

src/test/java/org/apache/commons/csv/CSVParserTest.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,36 @@ public void testCarriageReturnLineFeedEndings() throws IOException {
234234
assertEquals(4, records.size());
235235
}
236236
}
237+
238+
@Test
239+
public void testFirstEndOfLineCrLf() throws IOException {
240+
final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
241+
try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
242+
final List<CSVRecord> records = parser.getRecords();
243+
assertEquals(4, records.size());
244+
assertEquals("\r\n", parser.getFirstEndOfLine());
245+
}
246+
}
247+
248+
@Test
249+
public void testFirstEndOfLineLf() throws IOException {
250+
final String data = "foo\nbaar,\nhello,world\n,kanu";
251+
try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
252+
final List<CSVRecord> records = parser.getRecords();
253+
assertEquals(4, records.size());
254+
assertEquals("\n", parser.getFirstEndOfLine());
255+
}
256+
}
257+
258+
@Test
259+
public void testFirstEndOfLineCr() throws IOException {
260+
final String data = "foo\rbaar,\rhello,world\r,kanu";
261+
try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
262+
final List<CSVRecord> records = parser.getRecords();
263+
assertEquals(4, records.size());
264+
assertEquals("\r", parser.getFirstEndOfLine());
265+
}
266+
}
237267

238268
@Test(expected = NoSuchElementException.class)
239269
public void testClose() throws Exception {

0 commit comments

Comments
 (0)