apache · belugabehr · Jul 13, 2021
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -1946,17 +1946,17 @@ private void printWithEscapes(final Reader reader, final Appendable appendable)
         int start = 0;
         int pos = 0;
 
-        @SuppressWarnings("resource") // Temp reader on input reader.
-        final ExtendedBufferedReader bufferedReader = new ExtendedBufferedReader(reader);
         final char[] delim = getDelimiterString().toCharArray();
         final int delimLength = delim.length;
+        @SuppressWarnings("resource") // Temp reader on input reader.
+        final ExtendedPushbackReader bufferedReader = ExtendedPushbackReader.create(reader, delimLength);
         final char escape = getEscapeCharacter().charValue();
         final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE);
 
         int c;
         while (-1 != (c = bufferedReader.read())) {
             builder.append((char) c);
-            final boolean isDelimiterStart = isDelimiter((char) c, builder.toString() + new String(bufferedReader.lookAhead(delimLength - 1)), pos, delim,
+            final boolean isDelimiterStart = isDelimiter((char) c, builder.toString() + new String(bufferedReader.peek(delimLength - 1)), pos, delim,
                     delimLength);
             if (c == CR || c == LF || c == escape || isDelimiterStart) {
                 // write out segment up until this char

diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -19,6 +19,7 @@
 
 import static org.apache.commons.csv.Token.Type.TOKEN;
 
+import java.io.BufferedInputStream;
 import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
@@ -276,7 +277,7 @@ public static CSVParser parse(final InputStream inputStream, final Charset chars
     public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
         Objects.requireNonNull(path, "path");
         Objects.requireNonNull(format, "format");
-        return parse(Files.newInputStream(path), charset, format);
+        return parse(new BufferedInputStream(Files.newInputStream(path)), charset, format);
     }
 
     /**
@@ -427,7 +428,7 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact
         Objects.requireNonNull(format, "format");
 
         this.format = format.copy();
-        this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
+        this.lexer = new Lexer(format, reader);
         this.csvRecordIterator = new CSVRecordIterator();
         this.headers = createHeaders();
         this.characterOffset = characterOffset;

diff --git a/...e/commons/csv/ExtendedBufferedReader.java → ...e/commons/csv/ExtendedPushbackReader.java b/...e/commons/csv/ExtendedBufferedReader.java → ...e/commons/csv/ExtendedPushbackReader.java
@@ -22,9 +22,11 @@
 import static org.apache.commons.csv.Constants.LF;
 import static org.apache.commons.csv.Constants.UNDEFINED;
 
-import java.io.BufferedReader;
 import java.io.IOException;
+import java.io.PushbackReader;
 import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
 
 /**
  * A special buffered reader which supports sophisticated read access.
@@ -33,7 +35,7 @@
  * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
  * </p>
  */
-final class ExtendedBufferedReader extends BufferedReader {
+final class ExtendedPushbackReader extends PushbackReader {
 
     /** The last char returned */
     private int lastChar = UNDEFINED;
@@ -49,8 +51,8 @@ final class ExtendedBufferedReader extends BufferedReader {
     /**
      * Created extended buffered reader using default buffer-size
      */
-    ExtendedBufferedReader(final Reader reader) {
-        super(reader);
+    private ExtendedPushbackReader(final Reader reader, final int delimiterSize) {
+        super(reader, Math.max(1, 2 * delimiterSize));
     }
 
     /**
@@ -105,55 +107,6 @@ public boolean isClosed() {
         return closed;
     }
 
-    /**
-     * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
-     * still return this value. Does not affect line number or last character.
-     *
-     * @return the next character
-     *
-     * @throws IOException
-     *             If an I/O error occurs
-     */
-    int lookAhead() throws IOException {
-        super.mark(1);
-        final int c = super.read();
-        super.reset();
-
-        return c;
-    }
-
-    /**
-     * Returns the next n characters in the current reader without consuming them. The next call to {@link #read()} will still return the next value. This
-     * doesn't affect line number or last character.
-     *
-     * @param n the number characters look ahead.
-     * @return the next n characters.
-     * @throws IOException If an I/O error occurs
-     */
-    char[] lookAhead(final int n) throws IOException {
-        final char[] buf = new char[n];
-        return lookAhead(buf);
-    }
-
-    /**
-     * Populates the buffer with the next {@code buf.length} characters in the
-     * current reader without consuming them. The next call to {@link #read()} will
-     * still return the next value. This doesn't affect line number or last
-     * character.
-     *
-     * @param buf the buffer to fill for the look ahead.
-     * @return the buffer itself
-     * @throws IOException If an I/O error occurs
-     */
-    char[] lookAhead(final char[] buf) throws IOException {
-        final int n = buf.length;
-        super.mark(n);
-        super.read(buf, 0, n);
-        super.reset();
-
-        return buf;
-    }
-
     @Override
     public int read() throws IOException {
         final int current = super.read();
@@ -166,6 +119,21 @@ public int read() throws IOException {
         return lastChar;
     }
 
+    int peek() throws IOException {
+      final int current = super.read();
+      if (current != END_OF_STREAM) {
+          super.unread(current);
+      }
+      return current;
+    }
+
+    char[] peek(int n) throws IOException {
+        final char[] buf = new char[n];
+        int count = super.read(buf);
+        super.unread(buf, 0, count);
+        return (count == buf.length) ? buf : Arrays.copyOf(buf, count);
+    }
+
     @Override
     public int read(final char[] buf, final int offset, final int length) throws IOException {
         if (length == 0) {
@@ -198,7 +166,7 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
     }
 
     /**
-     * Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
+     * Read the next line of input, which drops the line terminator(s). This method should only be called
      * when processing a comment, otherwise information can be lost.
      * <p>
      * Increments {@link #eolCounter}.
@@ -209,18 +177,60 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
      *
      * @return the line that was read, or null if reached EOF.
      */
-    @Override
     public String readLine() throws IOException {
-        final String line = super.readLine();
+        StringBuilder sb = new StringBuilder(64);
+        final long startEolCounter = eolCounter;
 
-        if (line != null) {
-            lastChar = LF; // needed for detecting start of line
-            eolCounter++;
-        } else {
-            lastChar = END_OF_STREAM;
+        int c = this.read();
+
+        // First read was EOS
+        if (c == END_OF_STREAM) {
+          return null;
+        }
+        // First read was EOL
+        if (eolCounter != startEolCounter) {
+          if (c == CR && peek() == LF) {
+            this.read();
+          }
+          return "";
+        }
+        // Read until new line is hit
+        do {
+          sb.append((char)c);
+          c = this.read();
+        }
+        while (eolCounter == startEolCounter);
+
+        // If the line is terminated with CR+LF, trim the LF
+        if (c == CR && peek() == LF) {
+          this.read();
         }
 
-        return line;
+        return sb.toString();
     }
 
+    /**
+     * Create an ExtendedPushbackReader for the given {@code Reader} with space
+     * for a delimiter with {@code delimiterSize} characters.
+     */
+    static ExtendedPushbackReader create(Reader reader, int delimiterSize) {
+        return new ExtendedPushbackReader(reader, delimiterSize);
+    }
+
+    /**
+     * Create an ExtendedPushbackReader for the given string with space for a
+     * delimiter with {@code delimiterSize} characters.
+     */
+    static ExtendedPushbackReader create(String string, int delimiterSize) {
+        return create(new StringReader(string), delimiterSize);
+    }
+
+    /**
+     * Create an ExtendedPushbackReader for the given string with space for a
+     * single delimiter character.
+     */
+    static ExtendedPushbackReader create(String string) {
+      return create(string, 1);
+  }
+
 }
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -32,6 +32,7 @@
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.io.Reader;
 
 /**
  * Lexical analyzer.
@@ -59,11 +60,10 @@ final class Lexer implements Closeable {
     private final boolean ignoreEmptyLines;
 
     /** The input stream */
-    private final ExtendedBufferedReader reader;
+    private final ExtendedPushbackReader reader;
     private String firstEol;
 
-    Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
-        this.reader = reader;
+    Lexer(final CSVFormat format, final Reader reader) {
         this.delimiter = format.getDelimiterString().toCharArray();
         this.escape = mapNullToDisabled(format.getEscapeCharacter());
         this.quoteChar = mapNullToDisabled(format.getQuoteCharacter());
@@ -72,6 +72,7 @@ final class Lexer implements Closeable {
         this.ignoreEmptyLines = format.getIgnoreEmptyLines();
         this.delimiterBuf = new char[delimiter.length - 1];
         this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
+        this.reader = ExtendedPushbackReader.create(reader, this.delimiter.length);
     }
 
     /**
@@ -116,7 +117,7 @@ boolean isCommentStart(final int ch) {
     }
 
     /**
-     * Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}.
+     * Determine whether the next characters constitute a delimiter.
      *
      * @param ch
      *             the current character.
@@ -130,14 +131,22 @@ boolean isDelimiter(final int ch) throws IOException {
         if (delimiter.length == 1) {
           return true;
         }
-        reader.lookAhead(delimiterBuf);
+
+        final int count = reader.read(delimiterBuf);
+
+        if (count < delimiterBuf.length) {
+          reader.unread(delimiterBuf, 0, count);
+          return false;
+        }
+
         for (int i = 0; i < delimiterBuf.length; i++) {
             if (delimiterBuf[i] != delimiter[i+1]) {
+                reader.unread(delimiterBuf);
                 return false;
             }
         }
-        final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
-        return count != END_OF_STREAM;
+
+        return true;
     }
 
     /**
@@ -159,25 +168,38 @@ boolean isEscape(final int ch) {
     }
 
     /**
-     * Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}.
+     * Tests if the next characters constitute a escape delimiter.
      *
      * For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
      *
      * @return true if the next characters constitute a escape delimiter.
      * @throws IOException If an I/O error occurs.
      */
     boolean isEscapeDelimiter() throws IOException {
-        reader.lookAhead(escapeDelimiterBuf);
+        final int len = escapeDelimiterBuf.length;
+        final int count = reader.read(escapeDelimiterBuf);
+
+        if (count < len) {
+            if (count > 0) {
+                // incomplete read, put back what was read
+                reader.unread(escapeDelimiterBuf, 0, count);
+            }
+          return false;
+        }
+
         if (escapeDelimiterBuf[0] != delimiter[0]) {
+            reader.unread(escapeDelimiterBuf);
             return false;
         }
+
         for (int i = 1; i < delimiter.length; i++) {
             if (escapeDelimiterBuf[2 * i] != delimiter[i] || escapeDelimiterBuf[2 * i - 1] != escape) {
+                reader.unread(escapeDelimiterBuf);
                 return false;
             }
         }
-        final int count = reader.read(escapeDelimiterBuf, 0, escapeDelimiterBuf.length);
-        return count != END_OF_STREAM;
+
+        return true;
     }
 
     private boolean isMetaChar(final int ch) {
@@ -338,7 +360,7 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
                     }
                 }
             } else if (isQuoteChar(c)) {
-                if (isQuoteChar(reader.lookAhead())) {
+                if (isQuoteChar(reader.peek())) {
                     // double or escaped encapsulator -> add single encapsulator to token
                     c = reader.read();
                     token.content.append((char) c);
@@ -445,7 +467,7 @@ private Token parseSimpleToken(final Token token, int ch) throws IOException {
      */
     boolean readEndOfLine(int ch) throws IOException {
         // check if we have \r\n...
-        if (ch == CR && reader.lookAhead() == LF) {
+        if (ch == CR && reader.peek() == LF) {
             // note: does not change ch outside of this method!
             ch = reader.read();
             // Save the EOL state
@@ -469,7 +491,7 @@ boolean readEndOfLine(int ch) throws IOException {
     /**
      * Handle an escape sequence.
      * The current character must be the escape character.
-     * On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()}
+     * On return, the next character is available by calling {@link ExtendedPushbackReader#getLastChar()}
      * on the input stream.
      *
      * @return the unescaped character (as an int) or {@link Constants#END_OF_STREAM} if char following the escape is