Skip to content

Commit 3ac702b

Browse files
authored
CSV-278: Reuse Buffers in Lexer for Delimiter Detection (apache#162)
* CSV-278: Reuse Buffers in Lexer for Delimiter Detection * Remove erroneous tab character * Reduce change set with fewer formatting changes * Reduce change set with fewer formatting changes
1 parent a4e005f commit 3ac702b

2 files changed

Lines changed: 29 additions & 12 deletions

File tree

src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,21 @@ int lookAhead() throws IOException {
132132
*/
133133
char[] lookAhead(final int n) throws IOException {
134134
final char[] buf = new char[n];
135+
return lookAhead(buf);
136+
}
137+
138+
/**
139+
* Populates the buffer with the next {@code buf.length} characters in the
140+
* current reader without consuming them. The next call to {@link #read()} will
141+
* still return the next value. This doesn't affect line number or last
142+
* character.
143+
*
144+
* @param buf the buffer to fill for the look ahead.
145+
* @return the buffer itself
146+
* @throws IOException If an I/O error occurs
147+
*/
148+
char[] lookAhead(final char[] buf) throws IOException {
149+
final int n = buf.length;
135150
super.mark(n);
136151
super.read(buf, 0, n);
137152
super.reset();

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ final class Lexer implements Closeable {
4949
private static final char DISABLED = '\ufffe';
5050

5151
private final char[] delimiter;
52+
private final char[] delimiterBuf;
53+
private final char[] escapeDelimiterBuf;
5254
private final char escape;
5355
private final char quoteChar;
5456
private final char commentStart;
@@ -68,6 +70,8 @@ final class Lexer implements Closeable {
6870
this.commentStart = mapNullToDisabled(format.getCommentMarker());
6971
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
7072
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
73+
this.delimiterBuf = new char[delimiter.length - 1];
74+
this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
7175
}
7276

7377
/**
@@ -112,7 +116,7 @@ boolean isCommentStart(final int ch) {
112116
}
113117

114118
/**
115-
* Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(int)}
119+
* Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}.
116120
*
117121
* @param ch
118122
* the current character.
@@ -126,14 +130,13 @@ boolean isDelimiter(final int ch) throws IOException {
126130
if (delimiter.length == 1) {
127131
return true;
128132
}
129-
final int len = delimiter.length - 1;
130-
final char[] buf = reader.lookAhead(len);
131-
for (int i = 0; i < len; i++) {
132-
if (buf[i] != delimiter[i+1]) {
133+
reader.lookAhead(delimiterBuf);
134+
for (int i = 0; i < delimiterBuf.length; i++) {
135+
if (delimiterBuf[i] != delimiter[i+1]) {
133136
return false;
134137
}
135138
}
136-
final int count = reader.read(buf, 0, len);
139+
final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
137140
return count != END_OF_STREAM;
138141
}
139142

@@ -156,25 +159,24 @@ boolean isEscape(final int ch) {
156159
}
157160

158161
/**
159-
* Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(int)}.
162+
* Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}.
160163
*
161164
* For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
162165
*
163166
* @return true if the next characters constitute a escape delimiter.
164167
* @throws IOException If an I/O error occurs.
165168
*/
166169
boolean isEscapeDelimiter() throws IOException {
167-
final int len = 2 * delimiter.length - 1;
168-
final char[] buf = reader.lookAhead(len);
169-
if (buf[0] != delimiter[0]) {
170+
reader.lookAhead(escapeDelimiterBuf);
171+
if (escapeDelimiterBuf[0] != delimiter[0]) {
170172
return false;
171173
}
172174
for (int i = 1; i < delimiter.length; i++) {
173-
if (buf[2 * i] != delimiter[i] || buf[2 * i - 1] != escape) {
175+
if (escapeDelimiterBuf[2 * i] != delimiter[i] || escapeDelimiterBuf[2 * i - 1] != escape) {
174176
return false;
175177
}
176178
}
177-
final int count = reader.read(buf, 0, len);
179+
final int count = reader.read(escapeDelimiterBuf, 0, escapeDelimiterBuf.length);
178180
return count != END_OF_STREAM;
179181
}
180182

0 commit comments

Comments
 (0)