+ * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, + * unless you close the {@code reader}. + *
+ * + * @param reader + * a Reader containing CSV-formatted input. Must not be null. + * @param format + * the CSVFormat used for CSV parsing. Must not be null. + * @param characterOffset + * Lexer offset when the parser does not start parsing at the beginning of the source. + * @param recordNumber + * The next record number to assign. + * @param charset + * The character encoding to be used for the reader when enableByteTracking is true. + * @param enableByteTracking + * {@code true} to enable byte tracking for the parser; {@code false} to disable it. + * @throws IllegalArgumentException + * If the parameters of the format are inconsistent or if either the reader or format is null. + * @throws IOException + * If there is a problem reading the header or skipping the first record. + * @throws CSVException Thrown on invalid input. + */ + private CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber, + final Charset charset, final boolean enableByteTracking) throws IOException { Objects.requireNonNull(reader, "reader"); Objects.requireNonNull(format, "format"); this.format = format.copy(); - this.lexer = new Lexer(format, new ExtendedBufferedReader(reader)); + this.lexer = new Lexer(format, new ExtendedBufferedReader(reader, charset, enableByteTracking)); this.csvRecordIterator = new CSVRecordIterator(); this.headers = createHeaders(); this.characterOffset = characterOffset; @@ -841,6 +886,7 @@ CSVRecord nextRecord() throws IOException { recordList.clear(); StringBuilder sb = null; final long startCharPosition = lexer.getCharacterPosition() + characterOffset; + final long startBytePosition = lexer.getBytesRead() + this.characterOffset; do { reusableToken.reset(); lexer.nextToken(reusableToken); @@ -878,7 +924,7 @@ CSVRecord nextRecord() throws IOException { recordNumber++; final String comment = Objects.toString(sb, null); result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment, - recordNumber, startCharPosition); + recordNumber, startCharPosition, startBytePosition); } return result; } diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java index 948edbe77f..284220c38f 100644 --- a/src/main/java/org/apache/commons/csv/CSVRecord.java +++ b/src/main/java/org/apache/commons/csv/CSVRecord.java @@ -50,6 +50,11 @@ public final class CSVRecord implements Serializable, Iterable+ * The Unicode characters are divided into two main ranges: + *