From 7dca28192c48b3b9cb5e27c07215d113811fb401 Mon Sep 17 00:00:00 2001
From: Yuzhan Jiang <36880517+DarrenJAN@users.noreply.github.com>
Date: Tue, 5 Nov 2024 13:57:31 -0500
Subject: [PATCH 1/7] Add support in Commons CSV for tracking byte positions
 during parsing (#9)

Add support in Commons CSV for tracking byte positions during parsing
---
 pom.xml                                       |  3 +
 .../org/apache/commons/csv/CSVFormat.java     | 24 ++++++
 .../org/apache/commons/csv/CSVParser.java     | 34 +++++++-
 .../org/apache/commons/csv/CSVRecord.java     | 24 ++++++
 .../commons/csv/ExtendedBufferedReader.java   | 61 +++++++++++++++
 .../java/org/apache/commons/csv/Lexer.java    |  9 +++
 .../org/apache/commons/csv/CSVParserTest.java | 78 +++++++++++++++++++
 .../apache/commons/csv/JiraCsv196Test.java    | 75 ++++++++++++++++++
 .../org/apache/commons/csv/CSV-196/emoji.csv  |  5 ++
 .../apache/commons/csv/CSV-196/japanese.csv   |  4 +
 10 files changed, 315 insertions(+), 2 deletions(-)
 create mode 100644 src/test/java/org/apache/commons/csv/JiraCsv196Test.java
 create mode 100644 src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv
 create mode 100644 src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv
diff --git a/pom.xml b/pom.xml
index da5bc1b4ed..bfdf9e74a7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -28,6 +28,7 @@
   <url>https://commons.apache.org/proper/commons-csv/</url>
   <inceptionYear>2005</inceptionYear>
   <description>The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types.</description>
+  <packaging>jar</packaging>
 
   <dependencies>
     <dependency>
@@ -231,6 +232,8 @@
               <exclude>src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv</exclude>
               <exclude>src/test/resources/org/apache/commons/csv/csv-167/sample1.csv</exclude>
               <exclude>src/test/resources/org/apache/commons/csv/CSV-198/optd_por_public.csv</exclude>
+              <exclude>src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv</exclude>
+              <exclude>src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv</exclude>
               <exclude>src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv</exclude>
               <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/bom.csv</exclude>
               <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv</exclude>
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 3d4b43c6ba..9833a26ed1 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -2097,6 +2097,30 @@ public CSVParser parse(final Reader reader) throws IOException {
         return new CSVParser(reader, this);
     }
 
+    /**
+     * Parses the specified content.
+     *
+     * <p>
+     * This method provides a way to parse CSV data from an input stream, starting at a specified character offset and record number,
+     * using a specified encoding. It returns a {@link CSVParser} that can be used to iterate over the parsed {@link CSVRecord}s.
+     * </p>
+     *
+     * <p>
+     * For additional parsing options, see the various static parse methods available on {@link CSVParser}.
+     * </p>
+     *
+     * @param reader the input stream
+     * @param characterOffset the character offset to start parsing from
+     * @param recordNumber the initial record number to start counting from
+     * @param encoding the character encoding of the input stream
+     * @return a parser over a stream of {@link CSVRecord}s.
+     * @throws IOException If an I/O error occurs
+     * @throws CSVException Thrown on invalid input.
+     */
+    public CSVParser parse(final Reader reader, final long characterOffset, final long recordNumber, String encoding) throws IOException {
+        return new CSVParser(reader, this, characterOffset, recordNumber, encoding);
+    }
+
     /**
      * Prints to the specified output.
      *
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index f0341cf719..75bf78d20a 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -511,10 +511,39 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException
     @SuppressWarnings("resource")
     public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
         throws IOException {
+            this(reader, format, characterOffset, recordNumber, null);
+        }
+
+        /**
+     * Constructs a new instance using the given {@link CSVFormat}
+     *
+     * <p>
+     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
+     * unless you close the {@code reader}.
+     * </p>
+     *
+     * @param reader
+     *            a Reader containing CSV-formatted input. Must not be null.
+     * @param format
+     *            the CSVFormat used for CSV parsing. Must not be null.
+     * @param characterOffset
+     *            Lexer offset when the parser does not start parsing at the beginning of the source.
+     * @param recordNumber
+     *            The next record number to assign
+     * @param encoding
+     *            The encoding to use for the reader
+     * @throws IllegalArgumentException
+     *             If the parameters of the format are inconsistent or if either the reader or format is null.
+     * @throws IOException
+     *             If there is a problem reading the header or skipping the first record
+     * @throws CSVException Thrown on invalid input.
+     */
+    public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber,
+        String encoding) throws IOException {
         Objects.requireNonNull(reader, "reader");
         Objects.requireNonNull(format, "format");
         this.format = format.copy();
-        this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
+        this.lexer = new Lexer(format, new ExtendedBufferedReader(reader, encoding));
         this.csvRecordIterator = new CSVRecordIterator();
         this.headers = createHeaders();
         this.characterOffset = characterOffset;
@@ -841,6 +870,7 @@ CSVRecord nextRecord() throws IOException {
         recordList.clear();
         StringBuilder sb = null;
         final long startCharPosition = lexer.getCharacterPosition() + characterOffset;
+        final long startCharByte = lexer.getBytesRead() + this.characterOffset;
         do {
             reusableToken.reset();
             lexer.nextToken(reusableToken);
@@ -878,7 +908,7 @@ CSVRecord nextRecord() throws IOException {
             recordNumber++;
             final String comment = Objects.toString(sb, null);
             result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment,
-                recordNumber, startCharPosition);
+                recordNumber, startCharPosition, startCharByte);
         }
         return result;
     }
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java
index 1fac65843d..f0a0a6b816 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -48,6 +48,11 @@ public final class CSVRecord implements Serializable, Iterable<String> {
      */
     private final long characterPosition;
 
+    /**
+     * The start byte of this record as a character byte in the source stream.
+     */
+    private final long characterByte;
+
     /** The accumulated comments (if any) */
     private final String comment;
 
@@ -67,8 +72,18 @@ public final class CSVRecord implements Serializable, Iterable<String> {
         this.parser = parser;
         this.comment = comment;
         this.characterPosition = characterPosition;
+        this.characterByte = 0L;
     }
 
+    CSVRecord(final CSVParser parser, final String[] values,  final String comment, final long recordNumber,
+            final long characterPosition, final long characterByte) {
+        this.recordNumber = recordNumber;
+        this.values = values != null ? values : Constants.EMPTY_STRING_ARRAY;
+        this.parser = parser;
+        this.comment = comment;
+        this.characterPosition = characterPosition;
+        this.characterByte = characterByte;
+    }
     /**
      * Returns a value by {@link Enum}.
      *
@@ -144,6 +159,15 @@ public long getCharacterPosition() {
         return characterPosition;
     }
 
+    /**
+     * Returns the start byte of this record as a character byte in the source stream.
+     *
+     * @return the start byte of this record as a character byte in the source stream.
+     */
+    public long getCharacterByte() {
+        return characterByte;
+    }
+
     /**
      * Returns the comment for this record, if any.
      * Note that comments are attached to the following record.
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 18c922a508..2a82d48a5a 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -24,6 +24,10 @@
 
 import java.io.IOException;
 import java.io.Reader;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.input.UnsynchronizedBufferedReader;
@@ -49,6 +53,13 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
     private long position;
     private long positionMark;
 
+    /** The number of bytes read so far */
+    private long bytesRead;
+    private long bytesReadMark;
+
+    /** Encoder used to calculate the bytes of characters */
+    CharsetEncoder encoder;
+
     /**
      * Constructs a new instance using the default buffer size.
      */
@@ -56,6 +67,13 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
         super(reader);
     }
 
+    ExtendedBufferedReader(final Reader reader, String encoding) {
+        super(reader);
+        if (encoding != null) {
+            encoder = Charset.forName(encoding).newEncoder();
+        }
+    }
+
     /**
      * Closes the stream.
      *
@@ -108,6 +126,7 @@ public void mark(final int readAheadLimit) throws IOException {
         lineNumberMark = lineNumber;
         lastCharMark = lastChar;
         positionMark = position;
+        bytesReadMark = bytesRead;
         super.mark(readAheadLimit);
     }
 
@@ -118,11 +137,43 @@ public int read() throws IOException {
             current == EOF && lastChar != CR && lastChar != LF && lastChar != EOF) {
             lineNumber++;
         }
+        if (encoder != null) {
+            this.bytesRead += getCharBytes(current);
+        }
         lastChar = current;
         position++;
         return lastChar;
     }
 
+    /**
+     *  In Java, a char data type are based on the original Unicode
+     *  specification, which defined characters as fixed-width 16-bit entities.
+     *   U+0000 to U+FFFF:
+     *     - BMP, represented using 1 16-bit char
+     *     - Consists of UTF-8 1-byte, 2-byte, some 3-byte chars
+     *   U+10000 to U+10FFFF:
+     *     - Supplementary characters, represented as a pair of characters,
+     *     the first char from the high-surrogates range (\uD800-\uDBFF),
+     *     and the second char from the low-surrogates range (uDC00-\uDFFF).
+     *     - Consists of UTF-8 some 3-byte chars and 4-byte chars
+     */
+    private long getCharBytes(int current) throws CharacterCodingException {
+        char cChar = (char) current;
+        char lChar = (char) lastChar;
+        if (!Character.isSurrogate(cChar)) {
+            return encoder.encode(
+                CharBuffer.wrap(new char[] {cChar})).limit();
+        } else {
+            if (Character.isHighSurrogate(cChar)) {
+                // Move on to the next char (low surrogate)
+                return 0;
+            } else if (Character.isSurrogatePair(lChar, cChar)) {
+                return encoder.encode(
+                    CharBuffer.wrap(new char[] {lChar, cChar})).limit();
+            } else throw new CharacterCodingException();
+        }
+    }
+
     @Override
     public int read(final char[] buf, final int offset, final int length) throws IOException {
         if (length == 0) {
@@ -187,7 +238,17 @@ public void reset() throws IOException {
         lineNumber = lineNumberMark;
         lastChar = lastCharMark;
         position = positionMark;
+        bytesRead = bytesReadMark;
         super.reset();
     }
 
+    /**
+     * Gets the number of bytes read by the reader.
+     *
+     * @return the number of bytes read by the read
+     */
+    long getBytesRead() {
+        return this.bytesRead;
+    }
+
 }
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index 6d9c8a4850..afbba4d21d 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -103,6 +103,15 @@ long getCharacterPosition() {
         return reader.getPosition();
     }
 
+    /**
+     * Returns the number of bytes read
+     *
+     * @return the number of bytes read
+     */
+    long getBytesRead() {
+        return reader.getBytesRead();
+    }
+
     /**
      * Returns the current line number
      *
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 8f5d577f66..fd1ecdb021 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -701,6 +701,84 @@ public void testGetHeaderComment_NoComment3() throws IOException {
         }
     }
 
+    @Test
+    public void testGetRecordThreeBytesRead() throws Exception {
+        String code = "id,date,val5,val4\n" +
+            "11111111111111,'4017-09-01',きちんと節分近くには咲いてる～,v4\n" +
+            "22222222222222,'4017-01-01',おはよう私の友人～,v4\n" +
+            "33333333333333,'4017-01-01',きる自然の力ってすごいな～,v4\n";
+        // String code = "'1',4";
+        // final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'');
+        final CSVFormat format = CSVFormat.Builder.create()
+                               .setDelimiter(',')
+                               .setQuote('\'')
+                               .build();
+        // CSVParser parser = new CSVParser(new StringReader(code), format, 0L, 1L, "UTF-8");
+        CSVParser parser =  format.parse(new StringReader(code), 0L, 1L, "UTF-8");
+
+        CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
+        assertEquals(0, parser.getRecordNumber());
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(1, record.getRecordNumber());
+        assertEquals(code.indexOf('i'), record.getCharacterPosition());
+        assertEquals(record.getCharacterByte(), record.getCharacterPosition());
+
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(2, record.getRecordNumber());
+        assertEquals(code.indexOf('1'), record.getCharacterPosition());
+        assertEquals(record.getCharacterByte(), record.getCharacterPosition());
+
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(3, record.getRecordNumber());
+        assertEquals(code.indexOf('2'), record.getCharacterPosition());
+        assertEquals(record.getCharacterByte(), 95);
+
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(4, record.getRecordNumber());
+        assertEquals(code.indexOf('3'), record.getCharacterPosition());
+        assertEquals(record.getCharacterByte(), 154);
+
+        parser.close();
+
+    }
+
+    @Test
+    public void testGetRecordFourBytesRead() throws Exception {
+        String code = "id,a,b,c\n" +
+            "1,😊,🤔,😂\n" +
+            "2,😊,🤔,😂\n" +
+            "3,😊,🤔,😂\n";
+        // final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'');
+        final CSVFormat format = CSVFormat.Builder.create()
+            .setDelimiter(',')
+            .setQuote('\'')
+            .build();
+
+        // CSVParser parser = new CSVParser(new StringReader(code), format, 0L, 1L, "UTF-8");
+        CSVParser parser =  format.parse(new StringReader(code), 0L, 1L, "UTF-8");
+
+        CSVRecord record;
+        assertEquals(0, parser.getRecordNumber());
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(1, record.getRecordNumber());
+        assertEquals(code.indexOf('i'), record.getCharacterPosition());
+        assertEquals(record.getCharacterByte(), record.getCharacterPosition());
+
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(2, record.getRecordNumber());
+        assertEquals(code.indexOf('1'), record.getCharacterPosition());
+        assertEquals(record.getCharacterByte(), record.getCharacterPosition());
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(3, record.getRecordNumber());
+        assertEquals(code.indexOf('2'), record.getCharacterPosition());
+        assertEquals(record.getCharacterByte(), 26);
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(4, record.getRecordNumber());
+        assertEquals(code.indexOf('3'), record.getCharacterPosition());
+        assertEquals(record.getCharacterByte(), 43);
+        parser.close();
+    }
+
     @Test
     public void testGetHeaderMap() throws Exception {
         try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
diff --git a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
new file mode 100644
index 0000000000..7dbc23cafa
--- /dev/null
+++ b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.csv;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+
+
+import org.junit.jupiter.api.Test;
+
+
+public class JiraCsv196Test {
+    @Test
+    public void parseThreeBytes() throws IOException {
+
+        // final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'');
+        final CSVFormat format = CSVFormat.Builder.create()
+                               .setDelimiter(',')
+                               .setQuote('\'')
+                               .build();
+        // CSVParser parser = new CSVParser(getTestInput(
+            // "org/apache/commons/csv/CSV-196/japanese.csv"), format, 0L, 1L, "UTF-8");
+        CSVParser parser =  format.parse(getTestInput(
+            "org/apache/commons/csv/CSV-196/japanese.csv"), 0L, 1L, "UTF-8");
+        long[] charByteKey = {0, 89, 242, 395};
+        int idx = 0;
+        for (CSVRecord record : parser) {
+            assertEquals(charByteKey[idx++], record.getCharacterByte());
+        }
+        parser.close();
+    }
+
+
+    @Test
+    public void parseFourBytes() throws IOException {
+        // final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'');
+        final CSVFormat format = CSVFormat.Builder.create()
+            .setDelimiter(',')
+            .setQuote('\'')
+            .build();
+
+        CSVParser parser =  format.parse(getTestInput(
+                "org/apache/commons/csv/CSV-196/emoji.csv"), 0L, 1L, "UTF-8");
+
+        long[] charByteKey = {0, 84, 701, 1318, 1935};
+        int idx = 0;
+        for (CSVRecord record : parser) {
+            assertEquals(charByteKey[idx++], record.getCharacterByte());
+        }
+        parser.close();
+    }
+
+
+    private Reader getTestInput(String path) {
+        return new InputStreamReader(
+            ClassLoader.getSystemClassLoader().getResourceAsStream(path));
+    }
+}
diff --git a/src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv b/src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv
new file mode 100644
index 0000000000..0bff7a44f3
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv
@@ -0,0 +1,5 @@
+id,val1,val2,val3,val4,val5,val6,val7,val8,val9,val10,val11,val12,val13,val14,val15
+1,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄
+2,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄
+3,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄
+4,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄,😄😄😄😄😄😄😄😄😄😄
\ No newline at end of file
diff --git a/src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv b/src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv
new file mode 100644
index 0000000000..b06e04bd6a
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv
@@ -0,0 +1,4 @@
+id,date,val1,val2,val3,val4,val5,val6,val7,val8,val9,val10,val11,val12,val13,val14,val15
+00000000000001,2017-01-01,きちんと節分近くには咲いてる。自然の力ってすごいな～,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15
+00000000000002,2017-01-01,きちんと節分近くには咲いてる。自然の力ってすごいな～,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15
+00000000000003,2017-01-01,きちんと節分近くには咲いてる。自然の力ってすごいな～,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15
\ No newline at end of file

From 3599f5bc44b5772b989212101e1b509ac86122ac Mon Sep 17 00:00:00 2001
From: Yuzhan Jiang <36880517+DarrenJAN@users.noreply.github.com>
Date: Tue, 19 Nov 2024 15:21:18 -0500
Subject: [PATCH 2/7] Add support in Commons CSV for tracking byte positions
 during parsing (#12)

Add support in Commons CSV for tracking byte positions during parsing
---
 pom.xml                                       |  1 -
 .../org/apache/commons/csv/CSVFormat.java     | 24 -----
 .../org/apache/commons/csv/CSVParser.java     | 30 ++++--
 .../commons/csv/ExtendedBufferedReader.java   | 46 +++++----
 .../org/apache/commons/csv/CSVParserTest.java | 99 +++++++++----------
 .../apache/commons/csv/JiraCsv196Test.java    | 32 +++---
 6 files changed, 110 insertions(+), 122 deletions(-)

diff --git a/pom.xml b/pom.xml
index bfdf9e74a7..a03787382e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -28,7 +28,6 @@
   <url>https://commons.apache.org/proper/commons-csv/</url>
   <inceptionYear>2005</inceptionYear>
   <description>The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types.</description>
-  <packaging>jar</packaging>
 
   <dependencies>
     <dependency>
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index cabcb5135e..8205f4c47e 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -2097,30 +2097,6 @@ public CSVParser parse(final Reader reader) throws IOException {
         return CSVParser.builder().setReader(reader).setFormat(this).get();
     }
 
-    /**
-     * Parses the specified content.
-     *
-     * <p>
-     * This method provides a way to parse CSV data from an input stream, starting at a specified character offset and record number,
-     * using a specified encoding. It returns a {@link CSVParser} that can be used to iterate over the parsed {@link CSVRecord}s.
-     * </p>
-     *
-     * <p>
-     * For additional parsing options, see the various static parse methods available on {@link CSVParser}.
-     * </p>
-     *
-     * @param reader the input stream
-     * @param characterOffset the character offset to start parsing from
-     * @param recordNumber the initial record number to start counting from
-     * @param encoding the character encoding of the input stream
-     * @return a parser over a stream of {@link CSVRecord}s.
-     * @throws IOException If an I/O error occurs
-     * @throws CSVException Thrown on invalid input.
-     */
-    public CSVParser parse(final Reader reader, final long characterOffset, final long recordNumber, String encoding) throws IOException {
-        return new CSVParser(reader, this, characterOffset, recordNumber, encoding);
-    }
-
     /**
      * Prints to the specified output.
      *
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index c48e1da096..024dd562d4 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -153,6 +153,7 @@ public static class Builder extends AbstractStreamBuilder<CSVParser, Builder> {
         private CSVFormat format;
         private long characterOffset;
         private long recordNumber = 1;
+        private Charset charset;
 
         /**
          * Constructs a new instance.
@@ -164,7 +165,7 @@ protected Builder() {
         @SuppressWarnings("resource")
         @Override
         public CSVParser get() throws IOException {
-            return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber);
+            return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber, charset);
         }
 
         /**
@@ -200,6 +201,16 @@ public Builder setRecordNumber(final long recordNumber) {
             return asThis();
         }
 
+        /**
+         * Sets the character encoding to be used for the reader.
+         *
+         * @param charset the character encoding.
+         * @return this instance.
+         */
+        public Builder setCharset(final Charset charset) {
+            this.charset = charset;
+            return asThis();
+        }
     }
 
     final class CSVRecordIterator implements Iterator<CSVRecord> {
@@ -510,7 +521,7 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact
             this(reader, format, characterOffset, recordNumber, null);
         }
 
-        /**
+    /**
      * Constructs a new instance using the given {@link CSVFormat}
      *
      * <p>
@@ -525,21 +536,22 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact
      * @param characterOffset
      *            Lexer offset when the parser does not start parsing at the beginning of the source.
      * @param recordNumber
-     *            The next record number to assign
-     * @param encoding
-     *            The encoding to use for the reader
+     *            The next record number to assign.
+     * @param charset
+     *            The character encoding to be used for the reader.
      * @throws IllegalArgumentException
      *             If the parameters of the format are inconsistent or if either the reader or format is null.
      * @throws IOException
-     *             If there is a problem reading the header or skipping the first record
+     *             If there is a problem reading the header or skipping the first record.
      * @throws CSVException Thrown on invalid input.
+     * @since 1.13.0.
      */
-    public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber,
-        String encoding) throws IOException {
+    private CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber, final Charset charset)
+        throws IOException {
         Objects.requireNonNull(reader, "reader");
         Objects.requireNonNull(format, "format");
         this.format = format.copy();
-        this.lexer = new Lexer(format, new ExtendedBufferedReader(reader, encoding));
+        this.lexer = new Lexer(format, new ExtendedBufferedReader(reader, charset));
         this.csvRecordIterator = new CSVRecordIterator();
         this.headers = createHeaders();
         this.characterOffset = characterOffset;
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 2a82d48a5a..158f90a755 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -53,12 +53,12 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
     private long position;
     private long positionMark;
 
-    /** The number of bytes read so far */
+    /** The number of bytes read so far. */
     private long bytesRead;
     private long bytesReadMark;
 
-    /** Encoder used to calculate the bytes of characters */
-    CharsetEncoder encoder;
+    /** Encoder for calculating the number of bytes for each character read. */
+    private CharsetEncoder encoder;
 
     /**
      * Constructs a new instance using the default buffer size.
@@ -67,10 +67,10 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
         super(reader);
     }
 
-    ExtendedBufferedReader(final Reader reader, String encoding) {
+    ExtendedBufferedReader(final Reader reader, Charset charset) {
         super(reader);
-        if (encoding != null) {
-            encoder = Charset.forName(encoding).newEncoder();
+        if (charset != null) {
+            encoder = charset.newEncoder();
         }
     }
 
@@ -146,20 +146,30 @@ public int read() throws IOException {
     }
 
     /**
-     *  In Java, a char data type are based on the original Unicode
-     *  specification, which defined characters as fixed-width 16-bit entities.
-     *   U+0000 to U+FFFF:
-     *     - BMP, represented using 1 16-bit char
-     *     - Consists of UTF-8 1-byte, 2-byte, some 3-byte chars
-     *   U+10000 to U+10FFFF:
-     *     - Supplementary characters, represented as a pair of characters,
-     *     the first char from the high-surrogates range (\uD800-\uDBFF),
-     *     and the second char from the low-surrogates range (uDC00-\uDFFF).
-     *     - Consists of UTF-8 some 3-byte chars and 4-byte chars
+     * In Java, the {@code char} data type is based on the original Unicode
+     * specification, which defined characters as fixed-width 16-bit entities.
+     * <p>
+     * The Unicode characters are divided into two main ranges:
+     * <ul>
+     *   <li><b>U+0000 to U+FFFF (Basic Multilingual Plane, BMP):</b>
+     *     <ul>
+     *       <li>Represented using a single 16-bit {@code char}.</li>
+     *       <li>Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte characters.</li>
+     *     </ul>
+     *   </li>
+     *   <li><b>U+10000 to U+10FFFF (Supplementary Characters):</b>
+     *     <ul>
+     *       <li>Represented as a pair of {@code char}s:</li>
+     *       <li>The first {@code char} is from the high-surrogates range (\uD800-\uDBFF).</li>
+     *       <li>The second {@code char} is from the low-surrogates range (\uDC00-\uDFFF).</li>
+     *       <li>Includes UTF-8 encodings of some 3-byte characters and all 4-byte characters.</li>
+     *     </ul>
+     *   </li>
+     * </ul>
      */
     private long getCharBytes(int current) throws CharacterCodingException {
-        char cChar = (char) current;
-        char lChar = (char) lastChar;
+        final char cChar = (char) current;
+        final char lChar = (char) lastChar;
         if (!Character.isSurrogate(cChar)) {
             return encoder.encode(
                 CharBuffer.wrap(new char[] {cChar})).limit();
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index fd1ecdb021..2b68155624 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -707,38 +707,34 @@ public void testGetRecordThreeBytesRead() throws Exception {
             "11111111111111,'4017-09-01',きちんと節分近くには咲いてる～,v4\n" +
             "22222222222222,'4017-01-01',おはよう私の友人～,v4\n" +
             "33333333333333,'4017-01-01',きる自然の力ってすごいな～,v4\n";
-        // String code = "'1',4";
-        // final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'');
         final CSVFormat format = CSVFormat.Builder.create()
-                               .setDelimiter(',')
-                               .setQuote('\'')
-                               .build();
-        // CSVParser parser = new CSVParser(new StringReader(code), format, 0L, 1L, "UTF-8");
-        CSVParser parser =  format.parse(new StringReader(code), 0L, 1L, "UTF-8");
-
-        CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
-        assertEquals(0, parser.getRecordNumber());
-        assertNotNull(record = parser.nextRecord());
-        assertEquals(1, record.getRecordNumber());
-        assertEquals(code.indexOf('i'), record.getCharacterPosition());
-        assertEquals(record.getCharacterByte(), record.getCharacterPosition());
+            .setDelimiter(',')
+            .setQuote('\'')
+            .get();
+        try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).get() ) {
+            CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
 
-        assertNotNull(record = parser.nextRecord());
-        assertEquals(2, record.getRecordNumber());
-        assertEquals(code.indexOf('1'), record.getCharacterPosition());
-        assertEquals(record.getCharacterByte(), record.getCharacterPosition());
+            assertEquals(0, parser.getRecordNumber());
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(1, record.getRecordNumber());
+            assertEquals(code.indexOf('i'), record.getCharacterPosition());
+            assertEquals(record.getCharacterByte(), record.getCharacterPosition());
 
-        assertNotNull(record = parser.nextRecord());
-        assertEquals(3, record.getRecordNumber());
-        assertEquals(code.indexOf('2'), record.getCharacterPosition());
-        assertEquals(record.getCharacterByte(), 95);
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(2, record.getRecordNumber());
+            assertEquals(code.indexOf('1'), record.getCharacterPosition());
+            assertEquals(record.getCharacterByte(), record.getCharacterPosition());
 
-        assertNotNull(record = parser.nextRecord());
-        assertEquals(4, record.getRecordNumber());
-        assertEquals(code.indexOf('3'), record.getCharacterPosition());
-        assertEquals(record.getCharacterByte(), 154);
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(3, record.getRecordNumber());
+            assertEquals(code.indexOf('2'), record.getCharacterPosition());
+            assertEquals(record.getCharacterByte(), 95);
 
-        parser.close();
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(4, record.getRecordNumber());
+            assertEquals(code.indexOf('3'), record.getCharacterPosition());
+            assertEquals(record.getCharacterByte(), 154);
+        };
 
     }
 
@@ -748,35 +744,32 @@ public void testGetRecordFourBytesRead() throws Exception {
             "1,😊,🤔,😂\n" +
             "2,😊,🤔,😂\n" +
             "3,😊,🤔,😂\n";
-        // final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'');
         final CSVFormat format = CSVFormat.Builder.create()
             .setDelimiter(',')
             .setQuote('\'')
-            .build();
-
-        // CSVParser parser = new CSVParser(new StringReader(code), format, 0L, 1L, "UTF-8");
-        CSVParser parser =  format.parse(new StringReader(code), 0L, 1L, "UTF-8");
-
-        CSVRecord record;
-        assertEquals(0, parser.getRecordNumber());
-        assertNotNull(record = parser.nextRecord());
-        assertEquals(1, record.getRecordNumber());
-        assertEquals(code.indexOf('i'), record.getCharacterPosition());
-        assertEquals(record.getCharacterByte(), record.getCharacterPosition());
-
-        assertNotNull(record = parser.nextRecord());
-        assertEquals(2, record.getRecordNumber());
-        assertEquals(code.indexOf('1'), record.getCharacterPosition());
-        assertEquals(record.getCharacterByte(), record.getCharacterPosition());
-        assertNotNull(record = parser.nextRecord());
-        assertEquals(3, record.getRecordNumber());
-        assertEquals(code.indexOf('2'), record.getCharacterPosition());
-        assertEquals(record.getCharacterByte(), 26);
-        assertNotNull(record = parser.nextRecord());
-        assertEquals(4, record.getRecordNumber());
-        assertEquals(code.indexOf('3'), record.getCharacterPosition());
-        assertEquals(record.getCharacterByte(), 43);
-        parser.close();
+            .get();
+        try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).get()) {
+            CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
+
+            assertEquals(0, parser.getRecordNumber());
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(1, record.getRecordNumber());
+            assertEquals(code.indexOf('i'), record.getCharacterPosition());
+            assertEquals(record.getCharacterByte(), record.getCharacterPosition());
+
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(2, record.getRecordNumber());
+            assertEquals(code.indexOf('1'), record.getCharacterPosition());
+            assertEquals(record.getCharacterByte(), record.getCharacterPosition());
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(3, record.getRecordNumber());
+            assertEquals(code.indexOf('2'), record.getCharacterPosition());
+            assertEquals(record.getCharacterByte(), 26);
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(4, record.getRecordNumber());
+            assertEquals(code.indexOf('3'), record.getCharacterPosition());
+            assertEquals(record.getCharacterByte(), 43);
+        }
     }
 
     @Test
diff --git a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
index 7dbc23cafa..853007f9e5 100644
--- a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
+++ b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
@@ -21,7 +21,7 @@
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
-
+import java.nio.charset.StandardCharsets;
 
 import org.junit.jupiter.api.Test;
 
@@ -29,16 +29,15 @@
 public class JiraCsv196Test {
     @Test
     public void parseThreeBytes() throws IOException {
-
-        // final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'');
         final CSVFormat format = CSVFormat.Builder.create()
-                               .setDelimiter(',')
-                               .setQuote('\'')
-                               .build();
-        // CSVParser parser = new CSVParser(getTestInput(
-            // "org/apache/commons/csv/CSV-196/japanese.csv"), format, 0L, 1L, "UTF-8");
-        CSVParser parser =  format.parse(getTestInput(
-            "org/apache/commons/csv/CSV-196/japanese.csv"), 0L, 1L, "UTF-8");
+            .setDelimiter(',')
+            .setQuote('\'')
+            .get();
+        CSVParser parser = new CSVParser.Builder()
+            .setFormat(format)
+            .setReader(getTestInput("org/apache/commons/csv/CSV-196/japanese.csv"))
+            .setCharset(StandardCharsets.UTF_8)
+            .get();
         long[] charByteKey = {0, 89, 242, 395};
         int idx = 0;
         for (CSVRecord record : parser) {
@@ -50,15 +49,15 @@ public void parseThreeBytes() throws IOException {
 
     @Test
     public void parseFourBytes() throws IOException {
-        // final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'');
         final CSVFormat format = CSVFormat.Builder.create()
             .setDelimiter(',')
             .setQuote('\'')
-            .build();
-
-        CSVParser parser =  format.parse(getTestInput(
-                "org/apache/commons/csv/CSV-196/emoji.csv"), 0L, 1L, "UTF-8");
-
+            .get();
+        CSVParser parser = new CSVParser.Builder()
+            .setFormat(format)
+            .setReader(getTestInput("org/apache/commons/csv/CSV-196/emoji.csv"))
+            .setCharset(StandardCharsets.UTF_8)
+            .get();
         long[] charByteKey = {0, 84, 701, 1318, 1935};
         int idx = 0;
         for (CSVRecord record : parser) {
@@ -67,7 +66,6 @@ public void parseFourBytes() throws IOException {
         parser.close();
     }
 
-
     private Reader getTestInput(String path) {
         return new InputStreamReader(
             ClassLoader.getSystemClassLoader().getResourceAsStream(path));

From 344f282dbead967c49fd57820fca9d9249cc4ba3 Mon Sep 17 00:00:00 2001
From: Yuzhan Jiang <36880517+DarrenJAN@users.noreply.github.com>
Date: Tue, 19 Nov 2024 17:41:45 -0500
Subject: [PATCH 3/7] CSV-196: Remove duplicated Charset (#13)

---
 src/main/java/org/apache/commons/csv/CSVParser.java | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 024dd562d4..0879cf3bc9 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -153,7 +153,6 @@ public static class Builder extends AbstractStreamBuilder<CSVParser, Builder> {
         private CSVFormat format;
         private long characterOffset;
         private long recordNumber = 1;
-        private Charset charset;
 
         /**
          * Constructs a new instance.
@@ -165,7 +164,7 @@ protected Builder() {
         @SuppressWarnings("resource")
         @Override
         public CSVParser get() throws IOException {
-            return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber, charset);
+            return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber, getCharset());
         }
 
         /**
@@ -201,16 +200,6 @@ public Builder setRecordNumber(final long recordNumber) {
             return asThis();
         }
 
-        /**
-         * Sets the character encoding to be used for the reader.
-         *
-         * @param charset the character encoding.
-         * @return this instance.
-         */
-        public Builder setCharset(final Charset charset) {
-            this.charset = charset;
-            return asThis();
-        }
     }
 
     final class CSVRecordIterator implements Iterator<CSVRecord> {

From 27511be186b22755a8b9337f52faa47ce3051ff9 Mon Sep 17 00:00:00 2001
From: Yuzhan Jiang <36880517+DarrenJAN@users.noreply.github.com>
Date: Tue, 3 Dec 2024 15:45:38 -0500
Subject: [PATCH 4/7] Adding a boolean to drive byte tracking opt-in behavior
 (#14)

Adding a boolean to drive byte tracking opt-in behavior
---
 .../java/org/apache/commons/csv/CSVParser.java    | 15 +++++++++++----
 .../commons/csv/ExtendedBufferedReader.java       |  4 ++--
 .../org/apache/commons/csv/CSVParserTest.java     |  4 ++--
 .../org/apache/commons/csv/JiraCsv196Test.java    |  2 ++
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 0879cf3bc9..d3d8c9f3da 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -153,6 +153,7 @@ public static class Builder extends AbstractStreamBuilder<CSVParser, Builder> {
         private CSVFormat format;
         private long characterOffset;
         private long recordNumber = 1;
+        private boolean enableByteTracking = false;
 
         /**
          * Constructs a new instance.
@@ -164,7 +165,7 @@ protected Builder() {
         @SuppressWarnings("resource")
         @Override
         public CSVParser get() throws IOException {
-            return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber, getCharset());
+            return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber, getCharset(), enableByteTracking);
         }
 
         /**
@@ -200,6 +201,11 @@ public Builder setRecordNumber(final long recordNumber) {
             return asThis();
         }
 
+        public Builder setEnableByteTracking(final boolean enableByteTracking) {
+            this.enableByteTracking = enableByteTracking;
+            return asThis();
+        }
+
     }
 
     final class CSVRecordIterator implements Iterator<CSVRecord> {
@@ -507,7 +513,7 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException
     @SuppressWarnings("resource")
     public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
         throws IOException {
-            this(reader, format, characterOffset, recordNumber, null);
+            this(reader, format, characterOffset, recordNumber, null, false);
         }
 
     /**
@@ -535,12 +541,13 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact
      * @throws CSVException Thrown on invalid input.
      * @since 1.13.0.
      */
-    private CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber, final Charset charset)
+    private CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber,
+        final Charset charset, final boolean enableByteTracking)
         throws IOException {
         Objects.requireNonNull(reader, "reader");
         Objects.requireNonNull(format, "format");
         this.format = format.copy();
-        this.lexer = new Lexer(format, new ExtendedBufferedReader(reader, charset));
+        this.lexer = new Lexer(format, new ExtendedBufferedReader(reader, charset, enableByteTracking));
         this.csvRecordIterator = new CSVRecordIterator();
         this.headers = createHeaders();
         this.characterOffset = characterOffset;
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 158f90a755..a64868b39b 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -67,9 +67,9 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
         super(reader);
     }
 
-    ExtendedBufferedReader(final Reader reader, Charset charset) {
+    ExtendedBufferedReader(final Reader reader, Charset charset, boolean enableByteTracking) {
         super(reader);
-        if (charset != null) {
+        if (charset != null && enableByteTracking) {
             encoder = charset.newEncoder();
         }
     }
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 2b68155624..219e5e5fa5 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -711,7 +711,7 @@ public void testGetRecordThreeBytesRead() throws Exception {
             .setDelimiter(',')
             .setQuote('\'')
             .get();
-        try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).get() ) {
+        try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get() ) {
             CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
 
             assertEquals(0, parser.getRecordNumber());
@@ -748,7 +748,7 @@ public void testGetRecordFourBytesRead() throws Exception {
             .setDelimiter(',')
             .setQuote('\'')
             .get();
-        try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).get()) {
+        try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get()) {
             CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
 
             assertEquals(0, parser.getRecordNumber());
diff --git a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
index 853007f9e5..a49d934cfc 100644
--- a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
+++ b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
@@ -37,6 +37,7 @@ public void parseThreeBytes() throws IOException {
             .setFormat(format)
             .setReader(getTestInput("org/apache/commons/csv/CSV-196/japanese.csv"))
             .setCharset(StandardCharsets.UTF_8)
+            .setEnableByteTracking(true)
             .get();
         long[] charByteKey = {0, 89, 242, 395};
         int idx = 0;
@@ -57,6 +58,7 @@ public void parseFourBytes() throws IOException {
             .setFormat(format)
             .setReader(getTestInput("org/apache/commons/csv/CSV-196/emoji.csv"))
             .setCharset(StandardCharsets.UTF_8)
+            .setEnableByteTracking(true)
             .get();
         long[] charByteKey = {0, 84, 701, 1318, 1935};
         int idx = 0;

From 8387f796b89cedbfbd0b5a30266702c682e22371 Mon Sep 17 00:00:00 2001
From: Yuzhan Jiang <36880517+DarrenJAN@users.noreply.github.com>
Date: Fri, 13 Dec 2024 17:05:48 -0500
Subject: [PATCH 5/7] Fix comments (#15)

* Fix comments
---
 .../java/org/apache/commons/csv/CSVParser.java    |  8 +++++++-
 .../java/org/apache/commons/csv/CSVRecord.java    |  2 +-
 .../commons/csv/ExtendedBufferedReader.java       | 15 ++++++++++++++-
 src/main/java/org/apache/commons/csv/Lexer.java   |  2 +-
 .../org/apache/commons/csv/CSVParserTest.java     |  4 ++--
 5 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index d3d8c9f3da..9ff28a96ae 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -153,7 +153,7 @@ public static class Builder extends AbstractStreamBuilder<CSVParser, Builder> {
         private CSVFormat format;
         private long characterOffset;
         private long recordNumber = 1;
-        private boolean enableByteTracking = false;
+        private boolean enableByteTracking;
 
         /**
          * Constructs a new instance.
@@ -201,6 +201,12 @@ public Builder setRecordNumber(final long recordNumber) {
             return asThis();
         }
 
+        /**
+         * Sets whether to enable byte tracking for the parser.
+         *
+         * @param enableByteTracking {@code true} to enable byte tracking; {@code false} to disable it.
+         * @return this instance.
+         */
         public Builder setEnableByteTracking(final boolean enableByteTracking) {
             this.enableByteTracking = enableByteTracking;
             return asThis();
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java
index f0a0a6b816..54c88812f0 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -160,7 +160,7 @@ public long getCharacterPosition() {
     }
 
     /**
-     * Returns the start byte of this record as a character byte in the source stream.
+     * Gets the start byte of this record as a character byte in the source stream
      *
      * @return the start byte of this record as a character byte in the source stream.
      */
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index a64868b39b..61f6ae2f3e 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -67,6 +67,15 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
         super(reader);
     }
 
+    /**
+     * Constructs a new instance with the specified reader, character set,
+     * and byte tracking option. Initializes an encoder if byte tracking is enabled
+     * and a character set is provided.
+     *
+     * @param reader the reader supports a look-ahead option.
+     * @param charset the character set for encoding, or {@code null} if not applicable.
+     * @param enableByteTracking {@code true} to enable byte tracking; {@code false} to disable it.
+     */
     ExtendedBufferedReader(final Reader reader, Charset charset, boolean enableByteTracking) {
         super(reader);
         if (charset != null && enableByteTracking) {
@@ -146,7 +155,7 @@ public int read() throws IOException {
     }
 
     /**
-     * In Java, the {@code char} data type is based on the original Unicode
+     * Gets the byte length of the given character based on the the original Unicode
      * specification, which defined characters as fixed-width 16-bit entities.
      * <p>
      * The Unicode characters are divided into two main ranges:
@@ -166,6 +175,10 @@ public int read() throws IOException {
      *     </ul>
      *   </li>
      * </ul>
+     *
+     * @param current the current character to process.
+     * @return the byte length of the character.
+     * @throws CharacterCodingException if the character cannot be encoded.
      */
     private long getCharBytes(int current) throws CharacterCodingException {
         final char cChar = (char) current;
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index afbba4d21d..3f14b2d883 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -104,7 +104,7 @@ long getCharacterPosition() {
     }
 
     /**
-     * Returns the number of bytes read
+     * Gets the number of bytes read
      *
      * @return the number of bytes read
      */
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 219e5e5fa5..7e3cafa65c 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -703,7 +703,7 @@ public void testGetHeaderComment_NoComment3() throws IOException {
 
     @Test
     public void testGetRecordThreeBytesRead() throws Exception {
-        String code = "id,date,val5,val4\n" +
+        final String code = "id,date,val5,val4\n" +
             "11111111111111,'4017-09-01',きちんと節分近くには咲いてる～,v4\n" +
             "22222222222222,'4017-01-01',おはよう私の友人～,v4\n" +
             "33333333333333,'4017-01-01',きる自然の力ってすごいな～,v4\n";
@@ -740,7 +740,7 @@ public void testGetRecordThreeBytesRead() throws Exception {
 
     @Test
     public void testGetRecordFourBytesRead() throws Exception {
-        String code = "id,a,b,c\n" +
+        final String code = "id,a,b,c\n" +
             "1,😊,🤔,😂\n" +
             "2,😊,🤔,😂\n" +
             "3,😊,🤔,😂\n";

From bdd152f917f22d6dc551f0b841bfd1ee809e95c7 Mon Sep 17 00:00:00 2001
From: Yuzhan Jiang <36880517+DarrenJAN@users.noreply.github.com>
Date: Thu, 26 Dec 2024 19:08:39 -0500
Subject: [PATCH 6/7] CSV-196-master: More changes (#16)

---
 .../org/apache/commons/csv/CSVParser.java     |  5 +++--
 .../org/apache/commons/csv/CSVRecord.java     | 20 +++++--------------
 .../commons/csv/ExtendedBufferedReader.java   |  4 ++--
 .../org/apache/commons/csv/CSVParserTest.java | 16 +++++++--------
 .../org/apache/commons/csv/CSVRecordTest.java |  2 +-
 .../apache/commons/csv/JiraCsv196Test.java    |  4 ++--
 6 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 9ff28a96ae..50230388f8 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -539,13 +539,14 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact
      * @param recordNumber
      *            The next record number to assign.
      * @param charset
-     *            The character encoding to be used for the reader.
+     *            The character encoding to be used for the reader when enableByteTracking is true.
+     * @param enableByteTracking
+     *           {@code true} to enable byte tracking for the parser; {@code false} to disable it.
      * @throws IllegalArgumentException
      *             If the parameters of the format are inconsistent or if either the reader or format is null.
      * @throws IOException
      *             If there is a problem reading the header or skipping the first record.
      * @throws CSVException Thrown on invalid input.
-     * @since 1.13.0.
      */
     private CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber,
         final Charset charset, final boolean enableByteTracking)
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java
index 54c88812f0..386a25c852 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -51,7 +51,7 @@ public final class CSVRecord implements Serializable, Iterable<String> {
     /**
      * The start byte of this record as a character byte in the source stream.
      */
-    private final long characterByte;
+    private final long bytePosition;
 
     /** The accumulated comments (if any) */
     private final String comment;
@@ -65,24 +65,14 @@ public final class CSVRecord implements Serializable, Iterable<String> {
     /** The parser that originates this record. This is not serialized. */
     private final transient CSVParser parser;
 
-    CSVRecord(final CSVParser parser, final String[] values, final String comment, final long recordNumber,
-            final long characterPosition) {
-        this.recordNumber = recordNumber;
-        this.values = values != null ? values : Constants.EMPTY_STRING_ARRAY;
-        this.parser = parser;
-        this.comment = comment;
-        this.characterPosition = characterPosition;
-        this.characterByte = 0L;
-    }
-
     CSVRecord(final CSVParser parser, final String[] values,  final String comment, final long recordNumber,
-            final long characterPosition, final long characterByte) {
+            final long characterPosition, final long bytePosition) {
         this.recordNumber = recordNumber;
         this.values = values != null ? values : Constants.EMPTY_STRING_ARRAY;
         this.parser = parser;
         this.comment = comment;
         this.characterPosition = characterPosition;
-        this.characterByte = characterByte;
+        this.bytePosition = bytePosition;
     }
     /**
      * Returns a value by {@link Enum}.
@@ -164,8 +154,8 @@ public long getCharacterPosition() {
      *
      * @return the start byte of this record as a character byte in the source stream.
      */
-    public long getCharacterByte() {
-        return characterByte;
+    public long getBytePosition() {
+        return bytePosition;
     }
 
     /**
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 61f6ae2f3e..24044966d1 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -147,7 +147,7 @@ public int read() throws IOException {
             lineNumber++;
         }
         if (encoder != null) {
-            this.bytesRead += getCharBytes(current);
+            this.bytesRead += getEncodedCharLength(current);
         }
         lastChar = current;
         position++;
@@ -180,7 +180,7 @@ public int read() throws IOException {
      * @return the byte length of the character.
      * @throws CharacterCodingException if the character cannot be encoded.
      */
-    private long getCharBytes(int current) throws CharacterCodingException {
+    private int getEncodedCharLength(int current) throws CharacterCodingException {
         final char cChar = (char) current;
         final char lChar = (char) lastChar;
         if (!Character.isSurrogate(cChar)) {
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 7e3cafa65c..ac3708a52a 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -718,22 +718,22 @@ public void testGetRecordThreeBytesRead() throws Exception {
             assertNotNull(record = parser.nextRecord());
             assertEquals(1, record.getRecordNumber());
             assertEquals(code.indexOf('i'), record.getCharacterPosition());
-            assertEquals(record.getCharacterByte(), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), record.getCharacterPosition());
 
             assertNotNull(record = parser.nextRecord());
             assertEquals(2, record.getRecordNumber());
             assertEquals(code.indexOf('1'), record.getCharacterPosition());
-            assertEquals(record.getCharacterByte(), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), record.getCharacterPosition());
 
             assertNotNull(record = parser.nextRecord());
             assertEquals(3, record.getRecordNumber());
             assertEquals(code.indexOf('2'), record.getCharacterPosition());
-            assertEquals(record.getCharacterByte(), 95);
+            assertEquals(record.getBytePosition(), 95);
 
             assertNotNull(record = parser.nextRecord());
             assertEquals(4, record.getRecordNumber());
             assertEquals(code.indexOf('3'), record.getCharacterPosition());
-            assertEquals(record.getCharacterByte(), 154);
+            assertEquals(record.getBytePosition(), 154);
         };
 
     }
@@ -755,20 +755,20 @@ public void testGetRecordFourBytesRead() throws Exception {
             assertNotNull(record = parser.nextRecord());
             assertEquals(1, record.getRecordNumber());
             assertEquals(code.indexOf('i'), record.getCharacterPosition());
-            assertEquals(record.getCharacterByte(), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), record.getCharacterPosition());
 
             assertNotNull(record = parser.nextRecord());
             assertEquals(2, record.getRecordNumber());
             assertEquals(code.indexOf('1'), record.getCharacterPosition());
-            assertEquals(record.getCharacterByte(), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), record.getCharacterPosition());
             assertNotNull(record = parser.nextRecord());
             assertEquals(3, record.getRecordNumber());
             assertEquals(code.indexOf('2'), record.getCharacterPosition());
-            assertEquals(record.getCharacterByte(), 26);
+            assertEquals(record.getBytePosition(), 26);
             assertNotNull(record = parser.nextRecord());
             assertEquals(4, record.getRecordNumber());
             assertEquals(code.indexOf('3'), record.getCharacterPosition());
-            assertEquals(record.getCharacterByte(), 43);
+            assertEquals(record.getBytePosition(), 43);
         }
     }
 
diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java b/src/test/java/org/apache/commons/csv/CSVRecordTest.java
index 5b0c5d812c..40c057e9b8 100644
--- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java
@@ -85,7 +85,7 @@ record = parser.iterator().next();
     @Test
     public void testCSVRecordNULLValues() throws IOException {
         try (CSVParser parser = CSVParser.parse("A,B\r\nONE,TWO", CSVFormat.DEFAULT.withHeader())) {
-            final CSVRecord csvRecord = new CSVRecord(parser, null, null, 0L, 0L);
+            final CSVRecord csvRecord = new CSVRecord(parser, null, null, 0L, 0L, 0L);
             assertEquals(0, csvRecord.size());
             assertThrows(IllegalArgumentException.class, () -> csvRecord.get("B"));
         }
diff --git a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
index a49d934cfc..150a5f7f13 100644
--- a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
+++ b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
@@ -42,7 +42,7 @@ public void parseThreeBytes() throws IOException {
         long[] charByteKey = {0, 89, 242, 395};
         int idx = 0;
         for (CSVRecord record : parser) {
-            assertEquals(charByteKey[idx++], record.getCharacterByte());
+            assertEquals(charByteKey[idx++], record.getBytePosition());
         }
         parser.close();
     }
@@ -63,7 +63,7 @@ public void parseFourBytes() throws IOException {
         long[] charByteKey = {0, 84, 701, 1318, 1935};
         int idx = 0;
         for (CSVRecord record : parser) {
-            assertEquals(charByteKey[idx++], record.getCharacterByte());
+            assertEquals(charByteKey[idx++], record.getBytePosition());
         }
         parser.close();
     }

From d403084ddaf83992123035b7dd2876d0dcb083e8 Mon Sep 17 00:00:00 2001
From: Yuzhan Jiang <36880517+DarrenJAN@users.noreply.github.com>
Date: Tue, 31 Dec 2024 17:10:07 -0500
Subject: [PATCH 7/7] CSV-196: Comments changes on Dec30 (#17)

---
 .../org/apache/commons/csv/CSVParser.java     |  5 +--
 .../org/apache/commons/csv/CSVRecord.java     |  7 ++--
 .../commons/csv/ExtendedBufferedReader.java   |  4 ++-
 .../org/apache/commons/csv/CSVParserTest.java |  3 +-
 .../apache/commons/csv/JiraCsv196Test.java    | 34 ++++++++++---------
 5 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 95dd282aea..d9bb01fcff 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -208,6 +208,7 @@ public Builder setRecordNumber(final long recordNumber) {
          *
          * @param enableByteTracking {@code true} to enable byte tracking; {@code false} to disable it.
          * @return this instance.
+         * @since 1.13.0
          */
         public Builder setEnableByteTracking(final boolean enableByteTracking) {
             this.enableByteTracking = enableByteTracking;
@@ -885,7 +886,7 @@ CSVRecord nextRecord() throws IOException {
         recordList.clear();
         StringBuilder sb = null;
         final long startCharPosition = lexer.getCharacterPosition() + characterOffset;
-        final long startCharByte = lexer.getBytesRead() + this.characterOffset;
+        final long startBytePosition = lexer.getBytesRead() + this.characterOffset;
         do {
             reusableToken.reset();
             lexer.nextToken(reusableToken);
@@ -923,7 +924,7 @@ CSVRecord nextRecord() throws IOException {
             recordNumber++;
             final String comment = Objects.toString(sb, null);
             result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment,
-                recordNumber, startCharPosition, startCharByte);
+                recordNumber, startCharPosition, startBytePosition);
         }
         return result;
     }
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java
index 0da013458b..284220c38f 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -51,7 +51,7 @@ public final class CSVRecord implements Serializable, Iterable<String> {
     private final long characterPosition;
 
     /**
-     * The start byte of this record as a character byte in the source stream.
+     * The starting position of this record in the source stream, measured in bytes.
      */
     private final long bytePosition;
 
@@ -152,9 +152,10 @@ public long getCharacterPosition() {
     }
 
     /**
-     * Gets the start byte of this record as a character byte in the source stream
+     * Returns the starting position of this record in the source stream, measured in bytes.
      *
-     * @return the start byte of this record as a character byte in the source stream.
+     * @return the byte position of this record in the source stream.
+     * @since 1.13.0
      */
     public long getBytePosition() {
         return bytePosition;
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index f4a093f94c..6043ccaf08 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -195,7 +195,9 @@ private int getEncodedCharLength(int current) throws CharacterCodingException {
             } else if (Character.isSurrogatePair(lChar, cChar)) {
                 return encoder.encode(
                     CharBuffer.wrap(new char[] {lChar, cChar})).limit();
-            } else throw new CharacterCodingException();
+            } else {
+                throw new CharacterCodingException();
+            }
         }
     }
 
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 1e4a099a14..c42a3c25ab 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -736,8 +736,7 @@ public void testGetRecordThreeBytesRead() throws Exception {
             assertEquals(4, record.getRecordNumber());
             assertEquals(code.indexOf('3'), record.getCharacterPosition());
             assertEquals(record.getBytePosition(), 154);
-        };
-
+        }
     }
 
     @Test
diff --git a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
index 150a5f7f13..ab7af819e7 100644
--- a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
+++ b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java
@@ -1,18 +1,20 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ *   https://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 package org.apache.commons.csv;
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -33,13 +35,13 @@ public void parseThreeBytes() throws IOException {
             .setDelimiter(',')
             .setQuote('\'')
             .get();
-        CSVParser parser = new CSVParser.Builder()
+        final CSVParser parser = new CSVParser.Builder()
             .setFormat(format)
             .setReader(getTestInput("org/apache/commons/csv/CSV-196/japanese.csv"))
             .setCharset(StandardCharsets.UTF_8)
             .setEnableByteTracking(true)
             .get();
-        long[] charByteKey = {0, 89, 242, 395};
+        final long[] charByteKey = {0, 89, 242, 395};
         int idx = 0;
         for (CSVRecord record : parser) {
             assertEquals(charByteKey[idx++], record.getBytePosition());
@@ -54,13 +56,13 @@ public void parseFourBytes() throws IOException {
             .setDelimiter(',')
             .setQuote('\'')
             .get();
-        CSVParser parser = new CSVParser.Builder()
+        final CSVParser parser = new CSVParser.Builder()
             .setFormat(format)
             .setReader(getTestInput("org/apache/commons/csv/CSV-196/emoji.csv"))
             .setCharset(StandardCharsets.UTF_8)
             .setEnableByteTracking(true)
             .get();
-        long[] charByteKey = {0, 84, 701, 1318, 1935};
+        final long[] charByteKey = {0, 84, 701, 1318, 1935};
         int idx = 0;
         for (CSVRecord record : parser) {
             assertEquals(charByteKey[idx++], record.getBytePosition());