Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/main/java/org/apache/commons/csv/CSVFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -1946,17 +1946,17 @@ private void printWithEscapes(final Reader reader, final Appendable appendable)
int start = 0;
int pos = 0;

@SuppressWarnings("resource") // Temp reader on input reader.
final ExtendedBufferedReader bufferedReader = new ExtendedBufferedReader(reader);
final char[] delim = getDelimiterString().toCharArray();
final int delimLength = delim.length;
@SuppressWarnings("resource") // Temp reader on input reader.
final ExtendedPushbackReader bufferedReader = ExtendedPushbackReader.create(reader, delimLength);
final char escape = getEscapeCharacter().charValue();
final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE);

int c;
while (-1 != (c = bufferedReader.read())) {
builder.append((char) c);
final boolean isDelimiterStart = isDelimiter((char) c, builder.toString() + new String(bufferedReader.lookAhead(delimLength - 1)), pos, delim,
final boolean isDelimiterStart = isDelimiter((char) c, builder.toString() + new String(bufferedReader.peek(delimLength - 1)), pos, delim,
delimLength);
if (c == CR || c == LF || c == escape || isDelimiterStart) {
// write out segment up until this char
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/apache/commons/csv/CSVParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import static org.apache.commons.csv.Token.Type.TOKEN;

import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
Expand Down Expand Up @@ -276,7 +277,7 @@ public static CSVParser parse(final InputStream inputStream, final Charset chars
public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
Objects.requireNonNull(path, "path");
Objects.requireNonNull(format, "format");
return parse(Files.newInputStream(path), charset, format);
return parse(new BufferedInputStream(Files.newInputStream(path)), charset, format);
}

/**
Expand Down Expand Up @@ -427,7 +428,7 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact
Objects.requireNonNull(format, "format");

this.format = format.copy();
this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
this.lexer = new Lexer(format, reader);
this.csvRecordIterator = new CSVRecordIterator();
this.headers = createHeaders();
this.characterOffset = characterOffset;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@
import static org.apache.commons.csv.Constants.LF;
import static org.apache.commons.csv.Constants.UNDEFINED;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;

/**
* A special buffered reader which supports sophisticated read access.
Expand All @@ -33,7 +35,7 @@
* {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
* </p>
*/
final class ExtendedBufferedReader extends BufferedReader {
final class ExtendedPushbackReader extends PushbackReader {

/** The last char returned */
private int lastChar = UNDEFINED;
Expand All @@ -49,8 +51,8 @@ final class ExtendedBufferedReader extends BufferedReader {
/**
* Created extended buffered reader using default buffer-size
*/
ExtendedBufferedReader(final Reader reader) {
super(reader);
private ExtendedPushbackReader(final Reader reader, final int delimiterSize) {
super(reader, Math.max(1, 2 * delimiterSize));
}

/**
Expand Down Expand Up @@ -105,55 +107,6 @@ public boolean isClosed() {
return closed;
}

/**
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
* still return this value. Does not affect line number or last character.
*
* @return the next character
*
* @throws IOException
* If an I/O error occurs
*/
int lookAhead() throws IOException {
super.mark(1);
final int c = super.read();
super.reset();

return c;
}

/**
* Returns the next n characters in the current reader without consuming them. The next call to {@link #read()} will still return the next value. This
* doesn't affect line number or last character.
*
* @param n the number characters look ahead.
* @return the next n characters.
* @throws IOException If an I/O error occurs
*/
char[] lookAhead(final int n) throws IOException {
final char[] buf = new char[n];
return lookAhead(buf);
}

/**
* Populates the buffer with the next {@code buf.length} characters in the
* current reader without consuming them. The next call to {@link #read()} will
* still return the next value. This doesn't affect line number or last
* character.
*
* @param buf the buffer to fill for the look ahead.
* @return the buffer itself
* @throws IOException If an I/O error occurs
*/
char[] lookAhead(final char[] buf) throws IOException {
final int n = buf.length;
super.mark(n);
super.read(buf, 0, n);
super.reset();

return buf;
}

@Override
public int read() throws IOException {
final int current = super.read();
Expand All @@ -166,6 +119,21 @@ public int read() throws IOException {
return lastChar;
}

int peek() throws IOException {
final int current = super.read();
if (current != END_OF_STREAM) {
super.unread(current);
}
return current;
}

char[] peek(int n) throws IOException {
final char[] buf = new char[n];
int count = super.read(buf);
super.unread(buf, 0, count);
return (count == buf.length) ? buf : Arrays.copyOf(buf, count);
}

@Override
public int read(final char[] buf, final int offset, final int length) throws IOException {
if (length == 0) {
Expand Down Expand Up @@ -198,7 +166,7 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
}

/**
* Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
* Read the next line of input, which drops the line terminator(s). This method should only be called
* when processing a comment, otherwise information can be lost.
* <p>
* Increments {@link #eolCounter}.
Expand All @@ -209,18 +177,60 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
*
* @return the line that was read, or null if reached EOF.
*/
@Override
public String readLine() throws IOException {
final String line = super.readLine();
StringBuilder sb = new StringBuilder(64);
final long startEolCounter = eolCounter;

if (line != null) {
lastChar = LF; // needed for detecting start of line
eolCounter++;
} else {
lastChar = END_OF_STREAM;
int c = this.read();

// First read was EOS
if (c == END_OF_STREAM) {
return null;
}
// First read was EOL
if (eolCounter != startEolCounter) {
if (c == CR && peek() == LF) {
this.read();
}
return "";
}
// Read until new line is hit
do {
sb.append((char)c);
c = this.read();
}
while (eolCounter == startEolCounter);

// If the line is terminated with CR+LF, trim the LF
if (c == CR && peek() == LF) {
this.read();
}

return line;
return sb.toString();
}

/**
* Create an ExtendedPushbackReader for the given {@code Reader} with space
* for a delimiter with {@code delimiterSize} characters.
*/
static ExtendedPushbackReader create(Reader reader, int delimiterSize) {
return new ExtendedPushbackReader(reader, delimiterSize);
}

/**
* Create an ExtendedPushbackReader for the given string with space for a
* delimiter with {@code delimiterSize} characters.
*/
static ExtendedPushbackReader create(String string, int delimiterSize) {
return create(new StringReader(string), delimiterSize);
}

/**
* Create an ExtendedPushbackReader for the given string with space for a
* single delimiter character.
*/
static ExtendedPushbackReader create(String string) {
return create(string, 1);
}

}
50 changes: 36 additions & 14 deletions src/main/java/org/apache/commons/csv/Lexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;

/**
* Lexical analyzer.
Expand Down Expand Up @@ -59,11 +60,10 @@ final class Lexer implements Closeable {
private final boolean ignoreEmptyLines;

/** The input stream */
private final ExtendedBufferedReader reader;
private final ExtendedPushbackReader reader;
private String firstEol;

Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
this.reader = reader;
Lexer(final CSVFormat format, final Reader reader) {
this.delimiter = format.getDelimiterString().toCharArray();
this.escape = mapNullToDisabled(format.getEscapeCharacter());
this.quoteChar = mapNullToDisabled(format.getQuoteCharacter());
Expand All @@ -72,6 +72,7 @@ final class Lexer implements Closeable {
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
this.delimiterBuf = new char[delimiter.length - 1];
this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
this.reader = ExtendedPushbackReader.create(reader, this.delimiter.length);
}

/**
Expand Down Expand Up @@ -116,7 +117,7 @@ boolean isCommentStart(final int ch) {
}

/**
* Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}.
* Determine whether the next characters constitute a delimiter.
*
* @param ch
* the current character.
Expand All @@ -130,14 +131,22 @@ boolean isDelimiter(final int ch) throws IOException {
if (delimiter.length == 1) {
return true;
}
reader.lookAhead(delimiterBuf);

final int count = reader.read(delimiterBuf);

if (count < delimiterBuf.length) {
reader.unread(delimiterBuf, 0, count);
return false;
}

for (int i = 0; i < delimiterBuf.length; i++) {
if (delimiterBuf[i] != delimiter[i+1]) {
reader.unread(delimiterBuf);
return false;
}
}
final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
return count != END_OF_STREAM;

return true;
}

/**
Expand All @@ -159,25 +168,38 @@ boolean isEscape(final int ch) {
}

/**
* Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}.
* Tests if the next characters constitute a escape delimiter.
*
* For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
*
* @return true if the next characters constitute a escape delimiter.
* @throws IOException If an I/O error occurs.
*/
boolean isEscapeDelimiter() throws IOException {
reader.lookAhead(escapeDelimiterBuf);
final int len = escapeDelimiterBuf.length;
final int count = reader.read(escapeDelimiterBuf);

if (count < len) {
if (count > 0) {
// incomplete read, put back what was read
reader.unread(escapeDelimiterBuf, 0, count);
}
return false;
}

if (escapeDelimiterBuf[0] != delimiter[0]) {
reader.unread(escapeDelimiterBuf);
return false;
}

for (int i = 1; i < delimiter.length; i++) {
if (escapeDelimiterBuf[2 * i] != delimiter[i] || escapeDelimiterBuf[2 * i - 1] != escape) {
reader.unread(escapeDelimiterBuf);
return false;
}
}
final int count = reader.read(escapeDelimiterBuf, 0, escapeDelimiterBuf.length);
return count != END_OF_STREAM;

return true;
}

private boolean isMetaChar(final int ch) {
Expand Down Expand Up @@ -338,7 +360,7 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
}
}
} else if (isQuoteChar(c)) {
if (isQuoteChar(reader.lookAhead())) {
if (isQuoteChar(reader.peek())) {
// double or escaped encapsulator -> add single encapsulator to token
c = reader.read();
token.content.append((char) c);
Expand Down Expand Up @@ -445,7 +467,7 @@ private Token parseSimpleToken(final Token token, int ch) throws IOException {
*/
boolean readEndOfLine(int ch) throws IOException {
// check if we have \r\n...
if (ch == CR && reader.lookAhead() == LF) {
if (ch == CR && reader.peek() == LF) {
// note: does not change ch outside of this method!
ch = reader.read();
// Save the EOL state
Expand All @@ -469,7 +491,7 @@ boolean readEndOfLine(int ch) throws IOException {
/**
* Handle an escape sequence.
* The current character must be the escape character.
* On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()}
* On return, the next character is available by calling {@link ExtendedPushbackReader#getLastChar()}
* on the input stream.
*
* @return the unescaped character (as an int) or {@link Constants#END_OF_STREAM} if char following the escape is
Expand Down
Loading