Skip to content

Commit 9f4bf36

Browse files
committed
Improve parser performance by up to 20%, YMMV
1 parent da934d0 commit 9f4bf36

4 files changed

Lines changed: 57 additions & 76 deletions

File tree

src/changes/changes.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@
5151
<action type="fix" dev="ggregory" due-to="Dávid Szigecsán">Fix documentation for CSVFormat private constructor #466.</action>
5252
<action type="fix" issue="CSV-294" dev="ggregory" due-to="Joern Huxhorn, Gary Gregory">CSVFormat does not support explicit " as escape char.</action>
5353
<action type="fix" issue="CSV-150" dev="ggregory" due-to="dota17, Gary Gregory, Jörn Huxhorn">Escaping is not disableable.</action>
54-
<action type="fix" dev="ggregory" due-to="Gary Gregory">Fix Javadoc warnings on Java 23.</action>
54+
<action type="fix" dev="ggregory" due-to="Gary Gregory">Fix Javadoc warnings on Java 23.</action>
55+
<action type="fix" dev="ggregory" due-to="Gary Gregory">Improve parser performance by up to 20%, YMMV.</action>
5556
<!-- UPDATE -->
5657
<action type="update" dev="ggregory" due-to="Dependabot">Bump commons-codec:commons-codec from 1.16.1 to 1.17.1 #422, #449.</action>
5758
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump org.apache.commons:commons-parent from 69 to 75 #435, #452, #465, #468, #475.</action>

src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java

Lines changed: 22 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@
2222
import static org.apache.commons.csv.Constants.UNDEFINED;
2323
import static org.apache.commons.io.IOUtils.EOF;
2424

25-
import java.io.BufferedReader;
2625
import java.io.IOException;
2726
import java.io.Reader;
2827

2928
import org.apache.commons.io.IOUtils;
29+
import org.apache.commons.io.input.UnsynchronizedBufferedReader;
3030

3131
/**
3232
* A special buffered reader which supports sophisticated read access.
@@ -35,18 +35,19 @@
3535
* {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
3636
* </p>
3737
*/
38-
final class ExtendedBufferedReader extends BufferedReader {
38+
final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
3939

4040
/** The last char returned */
4141
private int lastChar = UNDEFINED;
42+
private int lastCharMark = UNDEFINED;
4243

4344
/** The count of EOLs (CR/LF/CRLF) seen so far */
4445
private long lineNumber;
46+
private long lineNumberMark;
4547

4648
/** The position, which is the number of characters read so far */
4749
private long position;
48-
49-
private boolean closed;
50+
private long positionMark;
5051

5152
/**
5253
* Constructs a new instance using the default buffer size.
@@ -55,6 +56,22 @@ final class ExtendedBufferedReader extends BufferedReader {
5556
super(reader);
5657
}
5758

59+
@Override
60+
public void mark(int readAheadLimit) throws IOException {
61+
lineNumberMark = lineNumber;
62+
lastCharMark = lastChar;
63+
positionMark = position;
64+
super.mark(readAheadLimit);
65+
}
66+
67+
@Override
68+
public void reset() throws IOException {
69+
lineNumber = lineNumberMark;
70+
lastChar = lastCharMark;
71+
position = positionMark;
72+
super.reset();
73+
}
74+
5875
/**
5976
* Closes the stream.
6077
*
@@ -64,7 +81,6 @@ final class ExtendedBufferedReader extends BufferedReader {
6481
@Override
6582
public void close() throws IOException {
6683
// Set ivars before calling super close() in case close() throws an IOException.
67-
closed = true;
6884
lastChar = EOF;
6985
super.close();
7086
}
@@ -74,7 +90,7 @@ public void close() throws IOException {
7490
*
7591
* @return the current line number
7692
*/
77-
long getCurrentLineNumber() {
93+
long getLineNumber() {
7894
// Check if we are at EOL or EOF or just starting
7995
if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == EOF) {
8096
return lineNumber; // counter is accurate
@@ -103,42 +119,6 @@ long getPosition() {
103119
return this.position;
104120
}
105121

106-
public boolean isClosed() {
107-
return closed;
108-
}
109-
110-
/**
111-
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
112-
* still return this value. Does not affect the line number or the last character.
113-
*
114-
* @return the next character
115-
*
116-
* @throws IOException
117-
* If an I/O error occurs
118-
*/
119-
int peek() throws IOException {
120-
super.mark(1);
121-
final int c = super.read();
122-
super.reset();
123-
return c;
124-
}
125-
126-
/**
127-
* Populates the buffer with the next {@code buf.length} characters in the current reader without consuming them. The next call to {@link #read()} will
128-
* still return the next value. This doesn't affect the line number or the last character.
129-
*
130-
* @param buf the buffer to fill for the look ahead.
131-
* @return The number of characters peeked, or -1 if the end of the stream has been reached.
132-
* @throws IOException If an I/O error occurs
133-
*/
134-
int peek(final char[] buf) throws IOException {
135-
final int n = buf.length;
136-
super.mark(n);
137-
final int c = super.read(buf, 0, n);
138-
super.reset();
139-
return c;
140-
}
141-
142122
@Override
143123
public int read() throws IOException {
144124
final int current = super.read();

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ long getCharacterPosition() {
8989
* @return the current line number
9090
*/
9191
long getCurrentLineNumber() {
92-
return reader.getCurrentLineNumber();
92+
return reader.getLineNumber();
9393
}
9494

9595
String getFirstEol() {

src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -61,32 +61,32 @@ public void testReadChar() throws Exception {
6161
final int EOLeolct = 9;
6262

6363
try (final ExtendedBufferedReader br = createBufferedReader(test)) {
64-
assertEquals(0, br.getCurrentLineNumber());
64+
assertEquals(0, br.getLineNumber());
6565
int lineCount = 0;
6666
while (br.readLine() != null) {
6767
// consume all
6868
lineCount++;
6969
}
70-
assertEquals(EOLeolct, br.getCurrentLineNumber());
71-
assertEquals(lineCount, br.getCurrentLineNumber());
70+
assertEquals(EOLeolct, br.getLineNumber());
71+
assertEquals(lineCount, br.getLineNumber());
7272
}
7373
try (final ExtendedBufferedReader br = createBufferedReader(test)) {
74-
assertEquals(0, br.getCurrentLineNumber());
74+
assertEquals(0, br.getLineNumber());
7575
int readCount = 0;
7676
while (br.read() != EOF) {
7777
// consume all
7878
readCount++;
7979
}
80-
assertEquals(EOLeolct, br.getCurrentLineNumber());
80+
assertEquals(EOLeolct, br.getLineNumber());
8181
assertEquals(readCount, test.length());
8282
}
8383
try (final ExtendedBufferedReader br = createBufferedReader(test)) {
84-
assertEquals(0, br.getCurrentLineNumber());
84+
assertEquals(0, br.getLineNumber());
8585
final char[] buff = new char[10];
8686
while (br.read(buff, 0, 3) != EOF) {
8787
// consume all
8888
}
89-
assertEquals(EOLeolct, br.getCurrentLineNumber());
89+
assertEquals(EOLeolct, br.getLineNumber());
9090
}
9191
}
9292

@@ -96,7 +96,7 @@ public void testReadingInDifferentBuffer() throws Exception {
9696
try (ExtendedBufferedReader reader = createBufferedReader("1\r\n2\r\n")) {
9797
reader.read(tmp1, 0, 2);
9898
reader.read(tmp2, 2, 2);
99-
assertEquals(2, reader.getCurrentLineNumber());
99+
assertEquals(2, reader.getLineNumber());
100100
}
101101
}
102102

@@ -110,28 +110,28 @@ public void testReadLine() throws Exception {
110110
assertNull(br.readLine());
111111
}
112112
try (final ExtendedBufferedReader br = createBufferedReader("foo\n\nhello")) {
113-
assertEquals(0, br.getCurrentLineNumber());
113+
assertEquals(0, br.getLineNumber());
114114
assertEquals("foo", br.readLine());
115-
assertEquals(1, br.getCurrentLineNumber());
115+
assertEquals(1, br.getLineNumber());
116116
assertEquals("", br.readLine());
117-
assertEquals(2, br.getCurrentLineNumber());
117+
assertEquals(2, br.getLineNumber());
118118
assertEquals("hello", br.readLine());
119-
assertEquals(3, br.getCurrentLineNumber());
119+
assertEquals(3, br.getLineNumber());
120120
assertNull(br.readLine());
121-
assertEquals(3, br.getCurrentLineNumber());
121+
assertEquals(3, br.getLineNumber());
122122
}
123123
try (final ExtendedBufferedReader br = createBufferedReader("foo\n\nhello")) {
124124
assertEquals('f', br.read());
125125
assertEquals('o', br.peek());
126126
assertEquals("oo", br.readLine());
127-
assertEquals(1, br.getCurrentLineNumber());
127+
assertEquals(1, br.getLineNumber());
128128
assertEquals('\n', br.peek());
129129
assertEquals("", br.readLine());
130-
assertEquals(2, br.getCurrentLineNumber());
130+
assertEquals(2, br.getLineNumber());
131131
assertEquals('h', br.peek());
132132
assertEquals("hello", br.readLine());
133133
assertNull(br.readLine());
134-
assertEquals(3, br.getCurrentLineNumber());
134+
assertEquals(3, br.getLineNumber());
135135
}
136136
try (final ExtendedBufferedReader br = createBufferedReader("foo\rbaar\r\nfoo")) {
137137
assertEquals("foo", br.readLine());
@@ -146,58 +146,58 @@ public void testReadLine() throws Exception {
146146
@Test
147147
public void testReadLookahead1() throws Exception {
148148
try (final ExtendedBufferedReader br = createBufferedReader("1\n2\r3\n")) {
149-
assertEquals(0, br.getCurrentLineNumber());
149+
assertEquals(0, br.getLineNumber());
150150
assertEquals('1', br.peek());
151151
assertEquals(UNDEFINED, br.getLastChar());
152-
assertEquals(0, br.getCurrentLineNumber());
152+
assertEquals(0, br.getLineNumber());
153153
assertEquals('1', br.read()); // Start line 1
154154
assertEquals('1', br.getLastChar());
155155

156-
assertEquals(1, br.getCurrentLineNumber());
156+
assertEquals(1, br.getLineNumber());
157157
assertEquals('\n', br.peek());
158-
assertEquals(1, br.getCurrentLineNumber());
158+
assertEquals(1, br.getLineNumber());
159159
assertEquals('1', br.getLastChar());
160160
assertEquals('\n', br.read());
161-
assertEquals(1, br.getCurrentLineNumber());
161+
assertEquals(1, br.getLineNumber());
162162
assertEquals('\n', br.getLastChar());
163-
assertEquals(1, br.getCurrentLineNumber());
163+
assertEquals(1, br.getLineNumber());
164164

165165
assertEquals('2', br.peek());
166-
assertEquals(1, br.getCurrentLineNumber());
166+
assertEquals(1, br.getLineNumber());
167167
assertEquals('\n', br.getLastChar());
168-
assertEquals(1, br.getCurrentLineNumber());
168+
assertEquals(1, br.getLineNumber());
169169
assertEquals('2', br.read()); // Start line 2
170-
assertEquals(2, br.getCurrentLineNumber());
170+
assertEquals(2, br.getLineNumber());
171171
assertEquals('2', br.getLastChar());
172172

173173
assertEquals('\r', br.peek());
174-
assertEquals(2, br.getCurrentLineNumber());
174+
assertEquals(2, br.getLineNumber());
175175
assertEquals('2', br.getLastChar());
176176
assertEquals('\r', br.read());
177177
assertEquals('\r', br.getLastChar());
178-
assertEquals(2, br.getCurrentLineNumber());
178+
assertEquals(2, br.getLineNumber());
179179

180180
assertEquals('3', br.peek());
181181
assertEquals('\r', br.getLastChar());
182182
assertEquals('3', br.read()); // Start line 3
183183
assertEquals('3', br.getLastChar());
184-
assertEquals(3, br.getCurrentLineNumber());
184+
assertEquals(3, br.getLineNumber());
185185

186186
assertEquals('\n', br.peek());
187-
assertEquals(3, br.getCurrentLineNumber());
187+
assertEquals(3, br.getLineNumber());
188188
assertEquals('3', br.getLastChar());
189189
assertEquals('\n', br.read());
190-
assertEquals(3, br.getCurrentLineNumber());
190+
assertEquals(3, br.getLineNumber());
191191
assertEquals('\n', br.getLastChar());
192-
assertEquals(3, br.getCurrentLineNumber());
192+
assertEquals(3, br.getLineNumber());
193193

194194
assertEquals(EOF, br.peek());
195195
assertEquals('\n', br.getLastChar());
196196
assertEquals(EOF, br.read());
197197
assertEquals(EOF, br.getLastChar());
198198
assertEquals(EOF, br.read());
199199
assertEquals(EOF, br.peek());
200-
assertEquals(3, br.getCurrentLineNumber());
200+
assertEquals(3, br.getLineNumber());
201201

202202
}
203203
}

0 commit comments

Comments
 (0)