Skip to content

Commit afc831b

Browse files
committed
Optimized ExtendedBufferedReader as suggested by Bob Smith in CSV-42 (improves the performance by 30%)
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1299618 13f79535-47bb-0310-9956-ffa450edef68
1 parent 12b600c commit afc831b

1 file changed

Lines changed: 42 additions & 130 deletions

File tree

src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java

Lines changed: 42 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -39,193 +39,105 @@ class ExtendedBufferedReader extends BufferedReader {
3939
/** Undefined state for the lookahead char */
4040
static final int UNDEFINED = -2;
4141

42-
/** The lookahead chars */
43-
private int lookaheadChar = UNDEFINED;
44-
4542
/** The last char returned */
4643
private int lastChar = UNDEFINED;
4744

4845
/** The line counter */
4946
private int lineCounter = 0;
5047

51-
private CharBuffer line = new CharBuffer();
52-
5348
/**
5449
* Created extended buffered reader using default buffer-size
5550
*/
5651
ExtendedBufferedReader(Reader r) {
5752
super(r);
58-
/* note uh: do not fetch the first char here,
59-
* because this might block the method!
60-
*/
6153
}
6254

63-
/**
64-
* Reads the next char from the input stream.
65-
*
66-
* @return the next char or END_OF_STREAM if end of stream has been reached.
67-
*/
6855
@Override
6956
public int read() throws IOException {
70-
// initialize the lookahead
71-
if (lookaheadChar == UNDEFINED) {
72-
lookaheadChar = super.read();
73-
}
74-
lastChar = lookaheadChar;
75-
if (super.ready()) {
76-
lookaheadChar = super.read();
77-
} else {
78-
lookaheadChar = UNDEFINED;
79-
}
57+
lastChar = super.read();
58+
8059
if (lastChar == '\n') {
8160
lineCounter++;
8261
}
8362
return lastChar;
8463
}
8564

8665
/**
87-
* Returns the last read character again.
88-
*
89-
* @return the last read char or UNDEFINED
66+
* Returns the last character that was read as an integer (0 to 65535). This
67+
* will be the last character returned by any of the read methods. This will
68+
* not include a character read using the {@link #peek()} method. If no
69+
* character has been read then this will return {@link #UNDEFINED}. If the
70+
* end of the stream was reached on the last read then this will return
71+
* {@link #END_OF_STREAM}.
72+
*
73+
* @return the last character that was read
9074
*/
9175
int readAgain() {
9276
return lastChar;
9377
}
9478

95-
/**
96-
* Non-blocking reading of len chars into buffer buf starting
97-
* at bufferposition off.
98-
* <p/>
99-
* performs an iterative read on the underlying stream
100-
* as long as the following conditions hold:
101-
* - less than len chars have been read
102-
* - end of stream has not been reached
103-
* - next read is not blocking
104-
*
105-
* @return nof chars actually read or END_OF_STREAM
106-
*/
10779
@Override
10880
public int read(char[] buf, int off, int len) throws IOException {
109-
// do not claim if len == 0
11081
if (len == 0) {
11182
return 0;
11283
}
113-
114-
// init lookahead, but do not block !!
115-
if (lookaheadChar == UNDEFINED) {
116-
if (ready()) {
117-
lookaheadChar = super.read();
118-
} else {
119-
return -1;
120-
}
121-
}
122-
// 'first read of underlying stream'
123-
if (lookaheadChar == -1) {
124-
return -1;
125-
}
126-
// continue until the lookaheadChar would block
127-
int cOff = off;
128-
while (len > 0 && ready()) {
129-
if (lookaheadChar == -1) {
130-
// eof stream reached, do not continue
131-
return cOff - off;
132-
} else {
133-
buf[cOff++] = (char) lookaheadChar;
134-
if (lookaheadChar == '\n') {
84+
85+
int l = super.read(buf, off, len);
86+
87+
if (l > 0) {
88+
lastChar = buf[off + l - 1];
89+
90+
for (int i = off; i < off + l; i++) {
91+
if (buf[i] == '\n') {
13592
lineCounter++;
13693
}
137-
lastChar = lookaheadChar;
138-
lookaheadChar = super.read();
139-
len--;
14094
}
95+
96+
} else if (l == -1) {
97+
lastChar = END_OF_STREAM;
14198
}
142-
return cOff - off;
99+
100+
return l;
143101
}
144102

145-
/**
146-
* @return A String containing the contents of the line, not
147-
* including any line-termination characters, or null
148-
* if the end of the stream has been reached
149-
*/
150103
@Override
151104
public String readLine() throws IOException {
105+
String line = super.readLine();
152106

153-
if (lookaheadChar == UNDEFINED) {
154-
lookaheadChar = super.read();
155-
}
156-
157-
line.clear(); //reuse
158-
159-
// return null if end of stream has been reached
160-
if (lookaheadChar == END_OF_STREAM) {
161-
return null;
162-
}
163-
// do we have a line termination already
164-
char laChar = (char) lookaheadChar;
165-
if (laChar == '\n' || laChar == '\r') {
166-
lastChar = lookaheadChar;
167-
lookaheadChar = super.read();
168-
// ignore '\r\n' as well
169-
if ((char) lookaheadChar == '\n') {
170-
lastChar = lookaheadChar;
171-
lookaheadChar = super.read();
107+
if (line != null) {
108+
if (line.length() > 0) {
109+
lastChar = line.charAt(line.length() - 1);
172110
}
173111
lineCounter++;
174-
return line.toString();
175-
}
176-
177-
// create the rest-of-line return and update the lookahead
178-
line.append(laChar);
179-
String restOfLine = super.readLine(); // TODO involves copying
180-
lastChar = lookaheadChar;
181-
lookaheadChar = super.read();
182-
if (restOfLine != null) {
183-
line.append(restOfLine);
112+
} else {
113+
lastChar = END_OF_STREAM;
184114
}
185-
lineCounter++;
186-
return line.toString();
187-
}
188115

189-
/**
190-
* Unsupported
191-
*/
192-
@Override
193-
public long skip(long n) throws IllegalArgumentException, IOException {
194-
throw new UnsupportedOperationException("CSV has no reason to implement this");
116+
return line;
195117
}
196118

197119
/**
198-
* Returns the next char in the stream without consuming it.
199-
*
200-
* Remember the next char read by read(..) will always be
201-
* identical to lookAhead().
202-
*
203-
* @return the next char (without consuming it) or END_OF_STREAM
120+
* Returns the next character in the current reader without consuming it. So
121+
* the next call to {@link #read()} will still return this value.
122+
*
123+
* @return the next character
124+
*
125+
* @throws IOException if there is an error in reading
204126
*/
205127
int lookAhead() throws IOException {
206-
if (lookaheadChar == UNDEFINED) {
207-
lookaheadChar = super.read();
208-
}
209-
return lookaheadChar;
210-
}
128+
super.mark(1);
129+
int c = super.read();
130+
super.reset();
211131

132+
return c;
133+
}
212134

213135
/**
214136
* Returns the nof line read
215137
*
216138
* @return the current-line-number (or -1)
217139
*/
218140
int getLineNumber() {
219-
return lineCounter > -1 ? lineCounter : -1;
220-
}
221-
222-
/**
223-
* Unsupported.
224-
* @throws UnsupportedOperationException if invoked
225-
*/
226-
@Override
227-
public boolean markSupported() {
228-
throw new UnsupportedOperationException("CSV has no reason to implement this");
141+
return lineCounter;
229142
}
230-
231143
}

0 commit comments

Comments
 (0)