diff --git a/.asf.yaml b/.asf.yaml
index c56b33138c..cdb8cd101b 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -5,7 +5,7 @@
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@@ -23,7 +23,8 @@ notifications:
pullrequests: issues@commons.apache.org
jira_options: link label
jobs: notifications@commons.apache.org
- issues_bot_dependabot: notifications@commons.apache.org
- pullrequests_bot_dependabot: notifications@commons.apache.org
+ # commits_bot_dependabot: dependabot@commons.apache.org
+ issues_bot_dependabot: dependabot@commons.apache.org
+ pullrequests_bot_dependabot: dependabot@commons.apache.org
issues_bot_codecov-commenter: notifications@commons.apache.org
pullrequests_bot_codecov-commenter: notifications@commons.apache.org
diff --git a/.gitattributes b/.gitattributes
index bec231c194..f42866e4bd 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -5,7 +5,7 @@
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/.github/GH-ROBOTS.txt b/.github/GH-ROBOTS.txt
index e3329e55fb..64a88674fe 100644
--- a/.github/GH-ROBOTS.txt
+++ b/.github/GH-ROBOTS.txt
@@ -5,7 +5,7 @@
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 9ebcd0ebb1..90ec55f742 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -5,7 +5,7 @@
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@@ -18,10 +18,8 @@ updates:
- package-ecosystem: "maven"
directory: "/"
schedule:
- interval: "weekly"
- day: "friday"
+ interval: "quarterly"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
- interval: "weekly"
- day: "friday"
+ interval: "quarterly"
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index d126a970ce..9ff35c83e7 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -7,7 +7,7 @@
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
@@ -22,7 +22,9 @@ Thanks for your contribution to [Apache Commons](https://commons.apache.org/)! Y
Before you push a pull request, review this list:
- [ ] Read the [contribution guidelines](CONTRIBUTING.md) for this project.
+- [ ] Read the [ASF Generative Tooling Guidance](https://www.apache.org/legal/generative-tooling.html) if you use Artificial Intelligence (AI).
+- [ ] I used AI to create any part of, or all of, this pull request. Which AI tool was used to create this pull request, and to what extent did it contribute?
- [ ] Run a successful build using the default [Maven](https://maven.apache.org/) goal with `mvn`; that's `mvn` on the command line by itself.
-- [ ] Write unit tests that match behavioral changes, where the tests fail if the changes to the runtime are not applied. This may not always be possible but is a best-practice.
+- [ ] Write unit tests that match behavioral changes, where the tests fail if the changes to the runtime are not applied. This may not always be possible, but it is a best practice.
- [ ] Write a pull request description that is detailed enough to understand what the pull request does, how, and why.
-- [ ] Each commit in the pull request should have a meaningful subject line and body. Note that commits might be squashed by a maintainer on merge.
+- [ ] Each commit in the pull request should have a meaningful subject line and body. Note that a maintainer may squash commits during the merge process.
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 1f42db3b1a..cca38e5121 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -5,7 +5,7 @@
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@@ -37,6 +37,7 @@ jobs:
security-events: write
strategy:
+ max-parallel: 20
fail-fast: false
matrix:
language: [ 'java' ]
@@ -45,10 +46,10 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
persist-credentials: false
- - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
+ - uses: actions/cache@55cc8345863c7cc4c66a329aec7e433d2d1c52a9 #v6.1.0
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
@@ -57,7 +58,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
- uses: github/codeql-action/init@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # 3.28.1
+ uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -68,7 +69,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
- uses: github/codeql-action/autobuild@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # 3.28.1
+ uses: github/codeql-action/autobuild@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@@ -82,4 +83,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # 3.28.1
+ uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml
index c6ece650e2..7bc02bdd23 100644
--- a/.github/workflows/dependency-review.yml
+++ b/.github/workflows/dependency-review.yml
@@ -6,7 +6,7 @@
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
@@ -26,6 +26,6 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: 'Checkout Repository'
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
- name: 'Dependency Review PR'
- uses: actions/dependency-review-action@3b139cfc5fae8b618d3eae3675e383bb1769c019 # v4.5.0
+ uses: actions/dependency-review-action@a1d282b36b6f3519aa1f3fc636f609c47dddb294 # v5.0.0
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index 71887b86d9..17ba7dd386 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -5,7 +5,7 @@
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,7 +15,11 @@
name: Java CI
-on: [push, pull_request]
+on:
+ push:
+ branches:
+ - 'master'
+ pull_request: {}
permissions:
contents: read
@@ -23,30 +27,35 @@ permissions:
jobs:
build:
- runs-on: ubuntu-latest
+ runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.experimental }}
strategy:
+ max-parallel: 20
+ fail-fast: false
matrix:
- java: [ 8, 11, 17, 21, 23 ]
+ os: [ubuntu-latest, macos-latest]
+ java: [ 8, 11, 17, 21, 25, 26 ]
experimental: [false]
+ # Keep the same parameter order as the matrix above
include:
- - java: 24-ea
- experimental: true
-
+ - os: ubuntu-latest
+ java: 27-ea
+ experimental: true
+
steps:
- - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
+ - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
persist-credentials: false
- - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
+ - uses: actions/cache@55cc8345863c7cc4c66a329aec7e433d2d1c52a9 #v6.1.0
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up JDK ${{ matrix.java }}
- uses: actions/setup-java@7a6d8a8234af8eb26422e24e3006232cccaa061b # v4.6.0
+ uses: actions/setup-java@1bcf9fb12cf4aa7d266a90ae39939e61372fe520 # v5.4.0
with:
- distribution: 'temurin'
+ distribution: ${{ runner.os == 'macOS' && matrix.java == '8' && 'zulu' || 'temurin' }}
java-version: ${{ matrix.java }}
- name: Build with Maven
run: mvn -Ddoclint=all --show-version --batch-mode --no-transfer-progress
diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml
index 8b2137c4a3..e1868cb462 100644
--- a/.github/workflows/scorecards-analysis.yml
+++ b/.github/workflows/scorecards-analysis.yml
@@ -5,7 +5,7 @@
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@@ -40,12 +40,12 @@ jobs:
steps:
- name: "Checkout code"
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # 7.0.0
with:
persist-credentials: false
- name: "Run analysis"
- uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # 2.4.0
+ uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # 2.4.3
with:
results_file: results.sarif
results_format: sarif
@@ -57,13 +57,13 @@ jobs:
publish_results: true
- name: "Upload artifact"
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # 4.6.0
+ uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: SARIF file
path: results.sarif
retention-days: 5
- name: "Upload to code-scanning"
- uses: github/codeql-action/upload-sarif@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # 3.28.1
+ uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
with:
sarif_file: results.sarif
diff --git a/.gitignore b/.gitignore
index 4b377d5762..2ff17ae4a8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,8 @@ buildNumber.properties
*.iml
/.vscode/
+/.DS_Store
+
+# NetBeans files
+nb-configuration.xml
+nbactions.xml
diff --git a/BENCHMARK.md b/BENCHMARK.md
index e8b579b2e2..c45918a289 100644
--- a/BENCHMARK.md
+++ b/BENCHMARK.md
@@ -6,7 +6,7 @@
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 3ed501501d..b4342f33ca 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -6,7 +6,7 @@
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 18fce304e6..3423e18ad2 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,7 +6,7 @@
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@@ -48,30 +48,33 @@ Getting Started
---------------
+ Make sure you have a [JIRA account](https://issues.apache.org/jira/).
-+ Make sure you have a [GitHub account](https://github.com/signup/free). This is not essential, but makes providing patches much easier.
++ Make sure you have a [GitHub account](https://github.com/signup). This is not essential, but makes providing patches much easier.
+ If you're planning to implement a new feature it makes sense to discuss your changes on the [dev list](https://commons.apache.org/mail-lists.html) first. This way you can make sure you're not wasting your time on something that isn't considered to be in Apache Commons CSV's scope.
+ Submit a [Jira Ticket][jira] for your issue, assuming one does not already exist.
+ Clearly describe the issue including steps to reproduce when it is a bug.
+ Make sure you fill in the earliest version that you know has the issue.
+ Find the corresponding [repository on GitHub](https://github.com/apache/?query=commons-),
-[fork](https://help.github.com/articles/fork-a-repo/) and check out your forked repository. If you don't have a GitHub account, you can still clone the Commons repository.
+[fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) and check out your forked repository. If you don't have a GitHub account, you can still clone the Commons repository.
Making Changes
--------------
+ Create a _topic branch_ for your isolated work.
- * Usually you should base your branch on the `master` branch.
+ * Usually you should base your branch from the `master` branch.
* A good topic branch name can be the JIRA bug ID plus a keyword, e.g. `CSV-123-InputStream`.
* If you have submitted multiple JIRA issues, try to maintain separate branches and pull requests.
+ Make commits of logical units.
* Make sure your commit messages are meaningful and in the proper format. Your commit message should contain the key of the JIRA issue.
- * e.g. `CSV-123: Close input stream earlier`
+ * For example, `[CSV-123] Close input stream sooner`
+ Respect the original code style:
- + Only use spaces for indentation.
+ + Only use spaces for indentation; you can check for unnecessary whitespace with `git diff` before committing.
+ Create minimal diffs - disable _On Save_ actions like _Reformat Source Code_ or _Organize Imports_. If you feel the source code should be reformatted create a separate PR for this change first.
- + Check for unnecessary whitespace with `git diff` -- check before committing.
-+ Make sure you have added the necessary tests for your changes, typically in `src/test/java`.
-+ Run all the tests with `mvn clean verify` to ensure nothing else was accidentally broken.
++ Write unit tests that match behavioral changes, where the tests fail if the changes to the runtime are not applied. This may not always be possible but is a best practice.
+Unit tests are typically in the `src/test/java` directory.
++ Run a successful build using the default [Maven](https://maven.apache.org/) goal with `mvn`; that's `mvn` on the command line by itself.
++ Write a pull request description that is detailed enough to understand what the pull request does, how, and why.
++ Each commit in the pull request should have a meaningful subject line and body. Note that commits might be squashed by a maintainer on merge.
+
Making Trivial Changes
----------------------
@@ -79,7 +82,7 @@ Making Trivial Changes
The JIRA tickets are used to generate the changelog for the next release.
For changes of a trivial nature to comments and documentation, it is not always necessary to create a new ticket in JIRA.
-In this case, it is appropriate to start the first line of a commit with '(doc)' instead of a ticket number.
+In this case, it is appropriate to start the first line of a commit with '[doc]' or '[javadoc]' instead of a ticket number.
Submitting Changes
@@ -105,8 +108,8 @@ Additional Resources
+ [Contributing patches](https://commons.apache.org/patches.html)
+ [Apache Commons CSV JIRA project page][jira]
+ [Contributor License Agreement][cla]
-+ [General GitHub documentation](https://help.github.com/)
-+ [GitHub pull request documentation](https://help.github.com/articles/creating-a-pull-request/)
++ [General GitHub documentation](https://docs.github.com/)
++ [GitHub pull request documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request)
+ [Apache Commons Twitter Account](https://twitter.com/ApacheCommons)
[cla]:https://www.apache.org/licenses/#clas
diff --git a/LICENSE.txt b/LICENSE.txt
index d645695673..ff9ad4530f 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -193,7 +193,7 @@
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/NOTICE.txt b/NOTICE.txt
index b5c1f1445b..06d3824a28 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -1,5 +1,5 @@
Apache Commons CSV
-Copyright 2005-2025 The Apache Software Foundation
+Copyright 2005-2026 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (https://www.apache.org/).
diff --git a/README.md b/README.md
index 969da9b8df..f30de4b9c9 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@@ -45,7 +45,7 @@ Apache Commons CSV
[](https://github.com/apache/commons-csv/actions/workflows/maven.yml)
[](https://search.maven.org/artifact/org.apache.commons/commons-csv)
-[](https://javadoc.io/doc/org.apache.commons/commons-csv/1.13.0)
+[](https://javadoc.io/doc/org.apache.commons/commons-csv/1.14.1)
[](https://github.com/apache/commons-csv/actions/workflows/codeql-analysis.yml)
[](https://api.securityscorecards.dev/projects/github.com/apache/commons-csv)
@@ -68,7 +68,7 @@ Alternatively, you can pull it from the central Maven repositories:
org.apache.commonscommons-csv
- 1.13.0
+ 1.14.1
```
@@ -90,7 +90,7 @@ There are some guidelines which will make applying PRs easier for us:
+ Create minimal diffs - disable on save actions like reformat source code or organize imports. If you feel the source code should be reformatted create a separate PR for this change.
+ Provide JUnit tests for your changes and make sure your changes don't break any existing tests by running `mvn`.
+ Before you pushing a PR, run `mvn` (by itself), this runs the default goal, which contains all build checks.
-+ To see the code coverage report, regardless of coverage failures, run `mvn clean site -Dcommons.jacoco.haltOnFailure=false`
++ To see the code coverage report, regardless of coverage failures, run `mvn clean site -Dcommons.jacoco.haltOnFailure=false -Pjacoco`
If you plan to contribute on a regular basis, please consider filing a [contributor license agreement](https://www.apache.org/licenses/#clas).
You can learn more about contributing via GitHub in our [contribution guidelines](CONTRIBUTING.md).
diff --git a/RELEASE-NOTES.txt b/RELEASE-NOTES.txt
index 2d99a93d99..bfeb4bb8de 100644
--- a/RELEASE-NOTES.txt
+++ b/RELEASE-NOTES.txt
@@ -1,4 +1,116 @@
-Apache Commons CSV Version 1.13.0 Release Notes
+Apache Commons CSV 1.14.1 Release Notes
+---------------------------------------
+
+The Apache Commons CSV team is pleased to announce the release of Apache Commons CSV 1.14.1.
+
+
+This document contains the release notes for the 1.14.1 version of Apache Commons CSV.
+Commons CSV reads and writes files in Comma Separated Value (CSV) format variations.
+
+Commons CSV requires at least Java 8.
+
+The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types.
+
+This is a feature and maintenance release. Java 8 or later is required.
+
+Changes in this version include:
+
+
+Fixed Bugs
+----------
+
+* CSV-318: CSVPrinter.printRecord(Stream) hangs if given a parallel stream. Thanks to Joseph Shraibman, Gary Gregory.
+* CSV-318: CSVPrinter now uses an internal lock instead of synchronized methods. Thanks to Joseph Shraibman, Gary Gregory.
+* org.apache.commons.csv.CSVPrinter.printRecords(ResultSet) now writes one record at a time using a lock. Thanks to Gary Gregory.
+
+Changes
+-------
+
+* Bump org.apache.commons:commons-parent from 81 to 85 #542. Thanks to Gary Gregory, Dependabot.
+* Bump commons-io:commons-io from 2.18.0 to 2.20.0. Thanks to Gary Gregory.
+* Bump com.opencsv:opencsv from 5.10 to 5.11.2 #545, #551, #553. Thanks to Gary Gregory, Dependabot.
+* Bump org.apache.commons:commons-lang3 from 3.17.0 to 3.18.0 #556. Thanks to Gary Gregory, Dependabot.
+* Bump commons-codec:commons-codec from 1.18.0 to 1.19.0. Thanks to Gary Gregory.
+
+
+Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html
+
+For complete information on Apache Commons CSV, including instructions on how to submit bug reports,
+patches, or suggestions for improvement, see the Apache Commons CSV website:
+
+https://commons.apache.org/proper/commons-csv/
+
+Download page: https://commons.apache.org/proper/commons-csv/download_csv.cgi
+
+Have fun!
+-Apache Commons CSV team
+
+------------------------------------------------------------------------------
+
+Apache Commons CSV 1.14.0 Release Notes
+---------------------------------------
+
+This document contains the release notes for the 1.14.0 version of Apache Commons CSV.
+Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
+
+Commons CSV requires at least Java 8.
+
+The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types.
+
+This is a feature and maintenance release. Java 8 or later is required.
+
+Changes in this version include:
+
+New Features
+------------
+
+* Define and use Maven property commons.jmh.version. Thanks to Gary Gregory.
+* Add CSVFormat.Builder.setMaxRows(long). Thanks to Gary Gregory.
+* Add CSVFormat.getMaxRows(). Thanks to Gary Gregory.
+* CSVPrinter.printRecords(ResultSet) knows how to use CSVFormat's maxRows. Thanks to Gary Gregory.
+* CSVPrinter.printRecords(Iterable) knows how to use CSVFormat's maxRows. Thanks to Gary Gregory.
+* CSVPrinter.printRecords(Stream) knows how to use CSVFormat's maxRows. Thanks to Gary Gregory.
+* CSVParser.stream() knows how to use CSVFormat's maxRows. Thanks to Gary Gregory.
+* CSVParser.getRecords() knows how to use CSVFormat's maxRows. Thanks to Gary Gregory.
+* CSVParser.iterator() knows how to use CSVFormat's maxRows. Thanks to Gary Gregory.
+
+Fixed Bugs
+----------
+
+* CSV-317: Release history link changed from changes-report.html to changes.html #516. Thanks to Filipe Roque.
+* Remove -nouses directive from maven-bundle-plugin. OSGi package imports now state 'uses' definitions for package imports, this doesn't affect JPMS (from org.apache.commons:commons-parent:80). Thanks to Gary Gregory.
+* CSVParser.parse(URL, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory.
+* CSVParser.parse(String, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory.
+* CSVParser.parse(File, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory.
+* CSVParser.parse(Path, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory.
+* CSVParser.parse(InputStream, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory.
+* CSVParser.parse(*) methods with a null Charset maps to Charset.defaultCharset(). Thanks to Gary Gregory.
+* Fix possible NullPointerException in Token.toString(). Thanks to Gary Gregory.
+
+Changes
+-------
+
+* Bump com.opencsv:opencsv from 5.9 to 5.10. Thanks to Gary Gregory.
+* Bump commons-codec:commons-codec from 1.17.2 to 1.18.0 #522. Thanks to Gary Gregory.
+* Bump org.apache.commons:commons-parent from 79 to 81. Thanks to Gary Gregory.
+
+
+Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html
+
+For complete information on Apache Commons CSV, including instructions on how to submit bug reports,
+patches, or suggestions for improvement, see the Apache Commons CSV website:
+
+https://commons.apache.org/proper/commons-csv/
+
+Download page: https://commons.apache.org/proper/commons-csv/download_csv.cgi
+
+Have fun!
+-Apache Commons CSV team
+
+------------------------------------------------------------------------------
+
+Apache Commons CSV 1.13.0 Release Notes
+---------------------------------------
This document contains the release notes for the 1.13.0 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
@@ -50,7 +162,8 @@ Have fun!
------------------------------------------------------------------------------
-Apache Commons CSV Version 1.12.0 Release Notes
+Apache Commons CSV 1.12.0 Release Notes
+---------------------------------------
This document contains the release notes for the 1.12.0 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
@@ -106,7 +219,8 @@ Have fun!
------------------------------------------------------------------------------
-Apache Commons CSV Version 1.11.0 Release Notes
+Apache Commons CSV 1.11.0 Release Notes
+---------------------------------------
This document contains the release notes for the 1.11.0 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
@@ -166,7 +280,8 @@ Have fun!
------------------------------------------------------------------------------
-Apache Commons CSV Version 1.10.0 Release Notes
+Apache Commons CSV 1.10.0 Release Notes
+---------------------------------------
This document contains the release notes for the 1.10.0 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
@@ -247,7 +362,8 @@ Have fun!
------------------------------------------------------------------------------
-Apache Commons CSV Version 1.9.0 Release Notes
+Apache Commons CSV 1.9.0 Release Notes
+--------------------------------------
This document contains the release notes for the 1.9.0 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
@@ -349,7 +465,8 @@ Have fun!
------------------------------------------------------------------------------
-Apache Commons CSV Version 1.8 Release Notes
+Apache Commons CSV 1.8 Release Notes
+------------------------------------
This document contains the release notes for the 1.8 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
@@ -404,7 +521,8 @@ Have fun!
------------------------------------------------------------------------------
-Apache Commons CSV Version 1.7 Release Notes
+Apache Commons CSV 1.7 Release Notes
+------------------------------------
This document contains the release notes for the 1.7 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
@@ -451,7 +569,8 @@ Have fun!
------------------------------------------------------------------------------
-Apache Commons CSV Version 1.6 Release Notes
+Apache Commons CSV 1.6 Release Notes
+------------------------------------
This document contains the release notes for the 1.6 version of
Apache Commons CSV. Commons CSV reads and writes files in variations of the
@@ -500,7 +619,8 @@ Have fun!
------------------------------------------------------------------------------
-Apache Commons CSV Version 1.5 Release Notes
+Apache Commons CSV 1.5 Release Notes
+------------------------------------
This document contains the release notes for the 1.5 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
@@ -552,7 +672,8 @@ Have fun!
------------------------------------------------------------------------------
-Apache Commons CSV Version 1.4 Release Notes
+Apache Commons CSV 1.4 Release Notes
+------------------------------------
This document contains the release notes for the 1.4 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
@@ -591,7 +712,8 @@ Have fun!
------------------------------------------------------------------------------
-Apache Commons CSV Version 1.3 Release Notes
+Apache Commons CSV 1.3 Release Notes
+------------------------------------
This document contains the release notes for the 1.3 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
@@ -635,7 +757,8 @@ Have fun!
------------------------------------------------------------------------------
-Apache Commons CSV Version 1.2 Release Notes
+Apache Commons CSV 1.2 Release Notes
+------------------------------------
This document contains the release notes for the 1.2 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
@@ -673,7 +796,8 @@ Have fun!
------------------------------------------------------------------------------
-Apache Commons CSV Version 1.1 Release Notes
+Apache Commons CSV 1.1 Release Notes
+------------------------------------
This document contains the release notes for the 1.1 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
@@ -714,7 +838,8 @@ Have fun!
-------------------------------------------------------------------------------
-Apache Commons CSV Version 1.0 Release Notes
+Apache Commons CSV 1.0 Release Notes
+------------------------------------
This document contains the release notes for the 1.0 version of Apache Commons CSV.
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
diff --git a/SECURITY.md b/SECURITY.md
index 51943ba7b4..744d4cddbb 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -6,7 +6,7 @@
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/benchmark-prereq.sh b/benchmark-prereq.sh
index 1d03f6773a..bd1db91821 100755
--- a/benchmark-prereq.sh
+++ b/benchmark-prereq.sh
@@ -8,7 +8,7 @@
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/pom.xml b/pom.xml
index cbfbf5db4a..8cb13ed7c2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -7,7 +7,7 @@
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@@ -20,10 +20,10 @@
org.apache.commonscommons-parent
- 79
+ 102commons-csv
- 1.13.1-SNAPSHOT
+ 1.15.0-SNAPSHOTApache Commons CSVhttps://commons.apache.org/proper/commons-csv/2005
@@ -53,29 +53,29 @@
org.apache.commonscommons-lang3
- 3.17.0
+ 3.20.0testcom.h2databaseh2
+
2.2.224testorg.openjdk.jmhjmh-core
- 1.37
+ ${commons.jmh.version}test
- scm:git:http://gitbox.apache.org/repos/asf/commons-csv.git
+ scm:git:https://gitbox.apache.org/repos/asf/commons-csv.gitscm:git:https://gitbox.apache.org/repos/asf/commons-csv.githttps://gitbox.apache.org/repos/asf?p=commons-csv.git
- jirahttps://issues.apache.org/jira/browse/CSV
@@ -90,12 +90,12 @@
- 1.13.0
+ 1.15.0(Java 8 or above)RC1
- 1.12.0
- 1.13.1
+ 1.14.1
+ 1.15.1csvorg.apache.commons.csvCSV
@@ -108,9 +108,9 @@
UTF-8falsetrue
- 2025-01-11T14:07:50Z
- 1.17.2
- 2.18.0
+ 2025-07-30T14:51:35Z
+ 1.22.0
+ 2.22.0
org.apache.commons.codec.binary;version="${commons.codec.version}",
@@ -125,11 +125,11 @@
true1.00
- 0.98
+ 0.990.990.970.99
- 0.96
+ 0.97${basedir}/src/conf/checkstyle/checkstyle-header.txt${basedir}/src/conf/checkstyle/checkstyle.xml
@@ -168,34 +168,35 @@
apache-rat-plugin
-
+
- src/test/resources/org/apache/commons/csv/empty.txt
- src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv
- src/test/resources/org/apache/commons/csv/csv-167/sample1.csv
- src/test/resources/org/apache/commons/csv/CSV-198/optd_por_public.csv
- src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv
- src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv
- src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv
- src/test/resources/org/apache/commons/csv/CSVFileParser/bom.csv
- src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv
- src/test/resources/org/apache/commons/csv/CSVFileParser/test_default.txt
- src/test/resources/org/apache/commons/csv/CSVFileParser/test_default_comment.txt
- src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180.txt
- src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180_trim.txt
- src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85.csv
- src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_default.txt
- src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_ignoreEmpty.txt
+ src/test/resources/org/apache/commons/csv/empty.txt
+ src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv
+ src/test/resources/org/apache/commons/csv/csv-167/sample1.csv
+ src/test/resources/org/apache/commons/csv/CSV-198/optd_por_public.csv
+ src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv
+ src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv
+ src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv
+ src/test/resources/org/apache/commons/csv/CSV-254/csv-254.csv
+ src/test/resources/org/apache/commons/csv/CSVFileParser/bom.csv
+ src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv
+ src/test/resources/org/apache/commons/csv/CSVFileParser/test_default.txt
+ src/test/resources/org/apache/commons/csv/CSVFileParser/test_default_comment.txt
+ src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180.txt
+ src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180_trim.txt
+ src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85.csv
+ src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_default.txt
+ src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_ignoreEmpty.txt
- src/test/resources/org/apache/commons/csv/ferc.gov/contract.txt
- src/test/resources/org/apache/commons/csv/ferc.gov/transaction.txt
- src/test/resources/**/*.bin
- src/test/resources/org/apache/commons/csv/CSV-259/sample.txt
- src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv
- src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt
- src/test/resources/org/apache/commons/csv/CSV-290/psql.csv
- src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv
-
+ src/test/resources/org/apache/commons/csv/ferc.gov/contract.txt
+ src/test/resources/org/apache/commons/csv/ferc.gov/transaction.txt
+ src/test/resources/**/*.bin
+ src/test/resources/org/apache/commons/csv/CSV-259/sample.txt
+ src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv
+ src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt
+ src/test/resources/org/apache/commons/csv/CSV-290/psql.csv
+ src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv
+
@@ -359,7 +360,7 @@
org.openjdk.jmhjmh-generator-annprocess
- 1.37
+ ${commons.jmh.version}test
@@ -380,7 +381,7 @@
com.opencsvopencsv
- 5.10
+ 5.12.0test
@@ -390,13 +391,16 @@
2.4.0test
-
-
+
org.skife.kasparovcsv1.0
@@ -406,7 +410,7 @@
org.apache.commonscommons-lang3
- 3.17.0
+ 3.20.0
diff --git a/src/assembly/bin.xml b/src/assembly/bin.xml
index f73d62f6df..3526ca9e91 100644
--- a/src/assembly/bin.xml
+++ b/src/assembly/bin.xml
@@ -6,7 +6,7 @@ Licensed to the Apache Software Foundation (ASF) under one or more
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/src/assembly/src.xml b/src/assembly/src.xml
index 9f33f58f20..1330db01f6 100644
--- a/src/assembly/src.xml
+++ b/src/assembly/src.xml
@@ -6,7 +6,7 @@ Licensed to the Apache Software Foundation (ASF) under one or more
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index a05e5d52e8..93952e9f18 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -7,7 +7,7 @@
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@@ -40,12 +40,77 @@
Apache Commons CSV Release Notes
-
+
+
+ Remove Spotbugs dependency and use exclude-filter instead #564.
+ Remove broken website link #577.
+ Fix Apache RAT plugin console warnings.
+ [Javadoc] Clarify behavior of deprecated CSVFormat#withFirstRecordAsHeader() #2413.
+ CSVFormat.equals()/hashCode() ignores maxRows (#600).
+ ExtendedBufferedReader byte tracking leads to an incorrect CSVRecord.getBytePosition() (#601).
+ CSVFormat.Builder.setQuote() does not refresh quotedNullString (#2447).
+ Lexer.isDelimiter() accepts a partial multi-character delimiter at EOF (#603).
+ CSVParser applies characterOffset to bytePosition (#604).
+ CSVPrinter Reader printing with quote and escape can emit CSV that its parser cannot read back.
+ CSVParser applies maxRows to record numbers instead of rows produced when setRecordNumber(...) is used.
+ CSVParser with trackBytes enabled throws on multi-character delimiters containing supplementary Unicode characters.
+ CSVFormat.Builder.setNullString(String) can build an invalid quoted null string after setQuote(null).
+ Escape Reader values with quote and escape (#606).
+ Clear escape delimiter buffer before peek in Lexer.isEscapeDelimiter() (#608, #611).
+ Escape quote char in printWithEscapes when QuoteMode is NONE (#609).
+ Quote value starting with comment marker in minimal quote mode (#610).
+ Escape leading comment marker in printWithEscapes (#614).
+ Skip byte counting at EOF in ExtendedBufferedReader.read (#615).
+ Keep quoted empty trailing field with trailingDelimiter (#616).
+ Evaluate isDelimiter once in nextToken whitespace skip (#618)..
+
+ Add an "Android Compatibility" section to the web site.
+ Add CSVParser.Builder.setByteOffset(long) (#604).
+
+ Bump org.apache.commons:commons-parent from 85 to 102 #573, #595.
+ [test] Bump com.opencsv:opencsv from 5.11.2 to 5.12.0 #558.
+ Bump org.apache.commons:commons-lang3 from 3.18.0 to 3.20.0.
+ Bump commons-codec:commons-codec from 1.19.0 to 1.22.0.
+ Bump commons-io:commons-io from 2.20.0 to 2.22.0 #594.
+
+
+
+ CSVPrinter.printRecord(Stream) hangs if given a parallel stream.
+ CSVPrinter now uses an internal lock instead of synchronized methods.
+ org.apache.commons.csv.CSVPrinter.printRecords(ResultSet) now writes one record at a time using a lock.
+
+
+ Bump org.apache.commons:commons-parent from 81 to 85 #542.
+ Bump commons-io:commons-io from 2.18.0 to 2.20.0.
+ Bump com.opencsv:opencsv from 5.10 to 5.11.2 #545, #551, #553.
+ Bump org.apache.commons:commons-lang3 from 3.17.0 to 3.18.0 #556.
+ Bump commons-codec:commons-codec from 1.18.0 to 1.19.0.
+
+ Release history link changed from changes-report.html to changes.html #516.
+ Remove -nouses directive from maven-bundle-plugin. OSGi package imports now state 'uses' definitions for package imports, this doesn't affect JPMS (from org.apache.commons:commons-parent:80).
+ CSVParser.parse(URL, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)).
+ CSVParser.parse(String, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)).
+ CSVParser.parse(File, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)).
+ CSVParser.parse(Path, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)).
+ CSVParser.parse(InputStream, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)).
+ CSVParser.parse(*) methods with a null Charset maps to Charset.defaultCharset().
+ Fix possible NullPointerException in Token.toString().
+ Define and use Maven property commons.jmh.version.
+ Add CSVFormat.Builder.setMaxRows(long).
+ Add CSVFormat.getMaxRows().
+ CSVPrinter.printRecords(ResultSet) knows how to use CSVFormat's maxRows.
+ CSVPrinter.printRecords(Iterable) knows how to use CSVFormat's maxRows.
+ CSVPrinter.printRecords(Stream) knows how to use CSVFormat's maxRows.
+ CSVParser.stream() knows how to use CSVFormat's maxRows.
+ CSVParser.getRecords() knows how to use CSVFormat's maxRows.
+ CSVParser.iterator() knows how to use CSVFormat's maxRows.Bump com.opencsv:opencsv from 5.9 to 5.10.
+ Bump commons-codec:commons-codec from 1.17.2 to 1.18.0 #522.
+ Bump org.apache.commons:commons-parent from 79 to 81.
diff --git a/src/changes/release-notes.vm b/src/changes/release-notes.vm
index 08252f8ab9..5769829552 100644
--- a/src/changes/release-notes.vm
+++ b/src/changes/release-notes.vm
@@ -6,7 +6,7 @@
## "License"); you may not use this file except in compliance
## with the License. You may obtain a copy of the License at
##
-## http://www.apache.org/licenses/LICENSE-2.0
+## https://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing,
## software distributed under the License is distributed on an
@@ -16,16 +16,20 @@
## under the License.
##
${project.name} ${version} Release Notes
+------------------------------------------------
-This document contains the release notes for the ${version} version of Apache Commons CSV.
-Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
+The ${developmentTeam} is pleased to announce the release of ${project.name} ${version}.
+
+
+This document contains the release notes for the ${version} version of ${project.name}.
+Commons CSV reads and writes files in Comma Separated Value (CSV) format variations.
Commons CSV requires at least Java 8.
$introduction.replaceAll("(?
+
-
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 70c033a181..7145d23d3b 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -43,6 +43,7 @@
import org.apache.commons.codec.binary.Base64OutputStream;
import org.apache.commons.io.IOUtils;
+import org.apache.commons.io.function.IOStream;
import org.apache.commons.io.function.Uncheck;
import org.apache.commons.io.output.AppendableOutputStream;
@@ -215,7 +216,7 @@ public static Builder create() {
.setRecordSeparator(Constants.CRLF)
.setIgnoreEmptyLines(true)
.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL);
- // @formatter:on
+ // @formatter:on
}
/**
@@ -270,32 +271,36 @@ public static Builder create(final CSVFormat csvFormat) {
private boolean trim;
+ /** The maximum number of rows to process, excluding the header row. */
+ private long maxRows;
+
private Builder() {
// empty
}
private Builder(final CSVFormat csvFormat) {
- this.delimiter = csvFormat.delimiter;
- this.quoteCharacter = csvFormat.quoteCharacter;
- this.quoteMode = csvFormat.quoteMode;
+ this.allowMissingColumnNames = csvFormat.allowMissingColumnNames;
+ this.autoFlush = csvFormat.autoFlush;
this.commentMarker = csvFormat.commentMarker;
+ this.delimiter = csvFormat.delimiter;
+ this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
this.escapeCharacter = csvFormat.escapeCharacter;
- this.ignoreSurroundingSpaces = csvFormat.ignoreSurroundingSpaces;
- this.allowMissingColumnNames = csvFormat.allowMissingColumnNames;
- this.ignoreEmptyLines = csvFormat.ignoreEmptyLines;
- this.recordSeparator = csvFormat.recordSeparator;
- this.nullString = csvFormat.nullString;
this.headerComments = csvFormat.headerComments;
this.headers = csvFormat.headers;
- this.skipHeaderRecord = csvFormat.skipHeaderRecord;
+ this.ignoreEmptyLines = csvFormat.ignoreEmptyLines;
this.ignoreHeaderCase = csvFormat.ignoreHeaderCase;
+ this.ignoreSurroundingSpaces = csvFormat.ignoreSurroundingSpaces;
this.lenientEof = csvFormat.lenientEof;
+ this.maxRows = csvFormat.maxRows;
+ this.nullString = csvFormat.nullString;
+ this.quoteCharacter = csvFormat.quoteCharacter;
+ this.quoteMode = csvFormat.quoteMode;
+ this.quotedNullString = csvFormat.quotedNullString;
+ this.recordSeparator = csvFormat.recordSeparator;
+ this.skipHeaderRecord = csvFormat.skipHeaderRecord;
this.trailingData = csvFormat.trailingData;
this.trailingDelimiter = csvFormat.trailingDelimiter;
this.trim = csvFormat.trim;
- this.autoFlush = csvFormat.autoFlush;
- this.quotedNullString = csvFormat.quotedNullString;
- this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
}
/**
@@ -606,6 +611,9 @@ public Builder setHeader(final ResultSetMetaData resultSetMetaData) throws SQLEx
*
* The header is also used by the {@link CSVPrinter}.
*
+ *
+ * This method keeps a copy of the input array.
+ *
*
* @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance.
@@ -644,6 +652,9 @@ public Builder setHeader(final String... header) {
* # Generated by Apache Commons CSV.
* # 1970-01-01T00:00:00Z
*
+ *
+ * This method keeps a copy of the input array.
+ *
*
* @param headerComments the headerComments which will be printed by the Printer before the CSV data.
* @return This instance.
@@ -682,6 +693,9 @@ public Builder setHeaderComments(final Object... headerComments) {
* # Generated by Apache Commons CSV.
* # 1970-01-01T00:00:00Z
*
+ *
+ * This method keeps a copy of the input array.
+ *
*
* @param headerComments the headerComments which will be printed by the Printer before the CSV data.
* @return This instance.
@@ -738,6 +752,21 @@ public Builder setLenientEof(final boolean lenientEof) {
return this;
}
+ /**
+ * Sets the maximum number of rows to process, excluding the header row.
+ *
+ * Values less than or equal to 0 mean no limit.
+ *
+ *
+ * @param maxRows the maximum number of rows to process, excluding the header row.
+ * @return This instance.
+ * @since 1.14.0
+ */
+ public Builder setMaxRows(final long maxRows) {
+ this.maxRows = maxRows;
+ return this;
+ }
+
/**
* Sets the String to convert to and from {@code null}. No substitution occurs if {@code null}.
*
@@ -751,8 +780,7 @@ public Builder setLenientEof(final boolean lenientEof) {
*/
public Builder setNullString(final String nullString) {
this.nullString = nullString;
- this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
- return this;
+ return setQuotedNullString();
}
/**
@@ -777,6 +805,12 @@ public Builder setQuote(final Character quoteCharacter) {
throw new IllegalArgumentException("The quoteCharacter cannot be a line break");
}
this.quoteCharacter = quoteCharacter;
+ return setQuotedNullString();
+ }
+
+ private Builder setQuotedNullString() {
+ final Character quote = quoteCharacter != null ? quoteCharacter : Constants.DOUBLE_QUOTE_CHAR;
+ this.quotedNullString = quote + nullString + quote;
return this;
}
@@ -849,6 +883,16 @@ public Builder setTrailingData(final boolean trailingData) {
/**
* Sets whether to add a trailing delimiter.
*
+ *
+ * When writing, a delimiter is appended after the last value of each record. When reading, the empty field
+ * that such a trailing delimiter produces is dropped so the output round-trips back to the original record;
+ * a quoted empty trailing field ({@code ""}) is a real value rather than a trailing delimiter and is kept.
+ *
+ *
+ * This is unrelated to {@link #setTrailingData(boolean) trailing data}, which controls whether characters
+ * after the closing quote of an encapsulated value are tolerated when reading.
+ *
+ *
* @param trailingDelimiter whether to add a trailing delimiter.
* @return This instance.
*/
@@ -857,6 +901,7 @@ public Builder setTrailingDelimiter(final boolean trailingDelimiter) {
return this;
}
+
/**
* Sets whether to trim leading and trailing blanks.
*
@@ -870,7 +915,7 @@ public Builder setTrim(final boolean trim) {
}
/**
- * Predefines formats.
+ * Enumerates predefines formats.
*
* @since 1.2
*/
@@ -1447,7 +1492,7 @@ private static boolean isLineBreak(final char c) {
* @return true if {@code c} is a line break character (and not null).
*/
private static boolean isLineBreak(final Character c) {
- return c != null && isLineBreak(c.charValue()); // N.B. Explicit (un)boxing is intentional
+ return c != null && isLineBreak(c.charValue()); // Explicit unboxing is intentional
}
/** Same test as in as {@link String#trim()}. */
@@ -1580,28 +1625,32 @@ public static CSVFormat valueOf(final String format) {
/** Whether to trim leading and trailing blanks. */
private final boolean trim;
+ /** The maximum number of rows to process, excluding the header row. */
+ private final long maxRows;
+
private CSVFormat(final Builder builder) {
- this.delimiter = builder.delimiter;
- this.quoteCharacter = builder.quoteCharacter;
- this.quoteMode = builder.quoteMode;
+ this.allowMissingColumnNames = builder.allowMissingColumnNames;
+ this.autoFlush = builder.autoFlush;
this.commentMarker = builder.commentMarker;
+ this.delimiter = builder.delimiter;
+ this.duplicateHeaderMode = builder.duplicateHeaderMode;
this.escapeCharacter = builder.escapeCharacter;
- this.ignoreSurroundingSpaces = builder.ignoreSurroundingSpaces;
- this.allowMissingColumnNames = builder.allowMissingColumnNames;
- this.ignoreEmptyLines = builder.ignoreEmptyLines;
- this.recordSeparator = builder.recordSeparator;
- this.nullString = builder.nullString;
this.headerComments = builder.headerComments;
this.headers = builder.headers;
- this.skipHeaderRecord = builder.skipHeaderRecord;
+ this.ignoreEmptyLines = builder.ignoreEmptyLines;
this.ignoreHeaderCase = builder.ignoreHeaderCase;
+ this.ignoreSurroundingSpaces = builder.ignoreSurroundingSpaces;
this.lenientEof = builder.lenientEof;
+ this.maxRows = builder.maxRows;
+ this.nullString = builder.nullString;
+ this.quoteCharacter = builder.quoteCharacter;
+ this.quoteMode = builder.quoteMode;
+ this.quotedNullString = builder.quotedNullString;
+ this.recordSeparator = builder.recordSeparator;
+ this.skipHeaderRecord = builder.skipHeaderRecord;
this.trailingData = builder.trailingData;
this.trailingDelimiter = builder.trailingDelimiter;
this.trim = builder.trim;
- this.autoFlush = builder.autoFlush;
- this.quotedNullString = builder.quotedNullString;
- this.duplicateHeaderMode = builder.duplicateHeaderMode;
validate();
}
@@ -1656,23 +1705,23 @@ public boolean equals(final Object obj) {
duplicateHeaderMode == other.duplicateHeaderMode && Objects.equals(escapeCharacter, other.escapeCharacter) &&
Arrays.equals(headerComments, other.headerComments) && Arrays.equals(headers, other.headers) &&
ignoreEmptyLines == other.ignoreEmptyLines && ignoreHeaderCase == other.ignoreHeaderCase &&
- ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && lenientEof == other.lenientEof &&
- Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) &&
- quoteMode == other.quoteMode && Objects.equals(quotedNullString, other.quotedNullString) &&
- Objects.equals(recordSeparator, other.recordSeparator) && skipHeaderRecord == other.skipHeaderRecord &&
- trailingData == other.trailingData && trailingDelimiter == other.trailingDelimiter && trim == other.trim;
+ ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && lenientEof == other.lenientEof && maxRows == other.maxRows &&
+ Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == other.quoteMode &&
+ Objects.equals(quotedNullString, other.quotedNullString) && Objects.equals(recordSeparator, other.recordSeparator) &&
+ skipHeaderRecord == other.skipHeaderRecord && trailingData == other.trailingData && trailingDelimiter == other.trailingDelimiter &&
+ trim == other.trim;
}
private void escape(final char c, final Appendable appendable) throws IOException {
- append(escapeCharacter.charValue(), appendable); // N.B. Explicit (un)boxing is intentional
+ append(escapeCharacter.charValue(), appendable); // Explicit unboxing is intentional
append(c, appendable);
}
/**
- * Formats the specified values.
+ * Formats the specified values as a CSV record string.
*
- * @param values the values to format
- * @return the formatted values
+ * @param values the values to format.
+ * @return the formatted values.
*/
public String format(final Object... values) {
return Uncheck.get(() -> format_(values));
@@ -1802,7 +1851,7 @@ public DuplicateHeaderMode getDuplicateHeaderMode() {
* @return the escape character, may be {@code 0}
*/
char getEscapeChar() {
- return escapeCharacter != null ? escapeCharacter.charValue() : 0; // N.B. Explicit (un)boxing is intentional
+ return escapeCharacter != null ? escapeCharacter.charValue() : 0; // Explicit unboxing is intentional
}
/**
@@ -1898,6 +1947,19 @@ public boolean getLenientEof() {
return lenientEof;
}
+ /**
+ * Gets the maximum number of rows to process, excluding the header row.
+ *
+ * Values less than or equal to 0 mean no limit.
+ *
+ *
+ * @return The maximum number of rows to process, excluding the header row.
+ * @since 1.14.0
+ */
+ public long getMaxRows() {
+ return maxRows;
+ }
+
/**
* Gets the String to convert to and from {@code null}.
*
@@ -1960,6 +2022,16 @@ public boolean getTrailingData() {
/**
* Gets whether to add a trailing delimiter.
*
+ *
+ * When writing, a delimiter is appended after the last value of each record. When reading, the empty field
+ * that such a trailing delimiter produces is dropped so the output round-trips back to the original record;
+ * a quoted empty trailing field ({@code ""}) is a real value rather than a trailing delimiter and is kept.
+ *
+ *
+ * This is unrelated to {@link #getTrailingData() trailing data}, which controls whether characters after the
+ * closing quote of an encapsulated value are tolerated when reading.
+ *
+ *
* @return whether to add a trailing delimiter.
* @since 1.3
*/
@@ -1983,7 +2055,7 @@ public int hashCode() {
result = prime * result + Arrays.hashCode(headerComments);
result = prime * result + Arrays.hashCode(headers);
result = prime * result + Objects.hash(allowMissingColumnNames, autoFlush, commentMarker, delimiter, duplicateHeaderMode, escapeCharacter,
- ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, lenientEof, nullString, quoteCharacter, quoteMode, quotedNullString,
+ ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, lenientEof, maxRows, nullString, quoteCharacter, quoteMode, quotedNullString,
recordSeparator, skipHeaderRecord, trailingData, trailingDelimiter, trim);
return result;
}
@@ -2052,6 +2124,10 @@ public boolean isQuoteCharacterSet() {
return quoteCharacter != null;
}
+ IOStream limit(final IOStream stream) {
+ return useMaxRows() ? stream.limit(getMaxRows()) : stream;
+ }
+
/**
* Parses the specified content.
*
@@ -2108,7 +2184,7 @@ private void print(final InputStream inputStream, final Appendable out, final bo
}
final boolean quoteCharacterSet = isQuoteCharacterSet();
if (quoteCharacterSet) {
- append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional
+ append(getQuoteCharacter().charValue(), out); // Explicit unboxing is intentional
}
// Stream the input to the output without reading or holding the whole value in memory.
// AppendableOutputStream cannot "close" an Appendable.
@@ -2116,7 +2192,7 @@ private void print(final InputStream inputStream, final Appendable out, final bo
IOUtils.copy(inputStream, outputStream);
}
if (quoteCharacterSet) {
- append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional
+ append(getQuoteCharacter().charValue(), out); // Explicit unboxing is intentional
}
}
@@ -2262,7 +2338,7 @@ public synchronized void printRecord(final Appendable appendable, final Object..
}
/*
- * Note: Must only be called if escaping is enabled, otherwise can throw exceptions.
+ * This method must only be called if escaping is enabled, otherwise can throw exceptions.
*/
private void printWithEscapes(final CharSequence charSeq, final Appendable appendable) throws IOException {
int start = 0;
@@ -2271,12 +2347,18 @@ private void printWithEscapes(final CharSequence charSeq, final Appendable appen
final char[] delimArray = getDelimiterCharArray();
final int delimLength = delimArray.length;
final char escape = getEscapeChar();
+ final boolean quoteSet = isQuoteCharacterSet();
+ final char quote = quoteSet ? getQuoteCharacter().charValue() : 0;
+ final boolean commentMarkerSet = isCommentMarkerSet();
+ final char commentChar = commentMarkerSet ? commentMarker.charValue() : 0; // Explicit unboxing is intentional
while (pos < end) {
char c = charSeq.charAt(pos);
final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delimArray, delimLength);
final boolean isCr = c == Constants.CR;
final boolean isLf = c == Constants.LF;
- if (isCr || isLf || c == escape || isDelimiterStart) {
+ // A leading comment marker would be read back as a comment, so escape it.
+ final boolean isComment = commentMarkerSet && pos == 0 && c == commentChar;
+ if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart || isComment) {
// write out segment up until this char
if (pos > start) {
appendable.append(charSeq, start, pos);
@@ -2305,7 +2387,7 @@ private void printWithEscapes(final CharSequence charSeq, final Appendable appen
}
/*
- * Note: Must only be called if escaping is enabled, otherwise can throw exceptions.
+ * This method must only be called if escaping is enabled, otherwise can throw exceptions.
*/
private void printWithEscapes(final Reader reader, final Appendable appendable) throws IOException {
int start = 0;
@@ -2315,8 +2397,13 @@ private void printWithEscapes(final Reader reader, final Appendable appendable)
final char[] delimArray = getDelimiterCharArray();
final int delimLength = delimArray.length;
final char escape = getEscapeChar();
+ final boolean quoteSet = isQuoteCharacterSet();
+ final char quote = quoteSet ? getQuoteCharacter().charValue() : 0;
+ final boolean commentMarkerSet = isCommentMarkerSet();
+ final char commentChar = commentMarkerSet ? commentMarker.charValue() : 0; // Explicit unboxing is intentional
final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE);
int c;
+ boolean firstChar = true;
final char[] lookAheadBuffer = new char[delimLength - 1];
while (EOF != (c = bufferedReader.read())) {
builder.append((char) c);
@@ -2326,7 +2413,10 @@ private void printWithEscapes(final Reader reader, final Appendable appendable)
final boolean isDelimiterStart = isDelimiter((char) c, test, pos, delimArray, delimLength);
final boolean isCr = c == Constants.CR;
final boolean isLf = c == Constants.LF;
- if (isCr || isLf || c == escape || isDelimiterStart) {
+ // A leading comment marker would be read back as a comment, so escape it.
+ final boolean isComment = commentMarkerSet && firstChar && c == commentChar;
+ firstChar = false;
+ if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart || isComment) {
// write out segment up until this char
if (pos > start) {
append(builder.substring(start, pos), appendable);
@@ -2355,9 +2445,9 @@ private void printWithEscapes(final Reader reader, final Appendable appendable)
}
/*
- * Note: must only be called if quoting is enabled, otherwise will generate NPE
+ * This method must only be called if quoting is enabled, otherwise will generate NPE.
+ * The original object is needed so can check for Number
*/
- // the original object is needed so can check for Number
private void printWithQuotes(final Object object, final CharSequence charSeq, final Appendable out, final boolean newRecord) throws IOException {
boolean quote = false;
int start = 0;
@@ -2365,7 +2455,7 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi
final int len = charSeq.length();
final char[] delim = getDelimiterCharArray();
final int delimLength = delim.length;
- final char quoteChar = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional
+ final char quoteChar = getQuoteCharacter().charValue(); // Explicit unboxing is intentional
// If escape char not specified, default to the quote char
// This avoids having to keep checking whether there is an escape character
// at the cost of checking against quote twice
@@ -2397,10 +2487,11 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi
}
} else {
char c = charSeq.charAt(pos);
- if (c <= Constants.COMMENT) {
+ if (c <= Constants.COMMENT || isCommentMarkerSet() && c == commentMarker.charValue()) {
// Some other chars at the start of a value caused the parser to fail, so for now
// encapsulate if we start in anything less than '#'. We are being conservative
- // by including the default comment char too.
+ // by including the default comment char and any configured comment marker too,
+ // which the parser would otherwise read back as a comment line.
quote = true;
} else {
while (pos < len) {
@@ -2468,15 +2559,16 @@ private void printWithQuotes(final Reader reader, final Appendable appendable) t
printWithEscapes(reader, appendable);
return;
}
- final char quote = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional
+ final char quote = getQuoteCharacter().charValue(); // Explicit unboxing is intentional
+ final char escape = isEscapeCharacterSet() ? getEscapeChar() : quote;
// (1) Append opening quote
append(quote, appendable);
- // (2) Append Reader contents, doubling quotes
+ // (2) Append Reader contents, doubling quotes and escape characters
int c;
while (EOF != (c = reader.read())) {
append((char) c, appendable);
- if (c == quote) {
- append(quote, appendable);
+ if (c == quote || c == escape) {
+ append((char) c, appendable);
}
}
// (3) Append closing quote
@@ -2488,27 +2580,27 @@ public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("Delimiter=<").append(delimiter).append('>');
if (isEscapeCharacterSet()) {
- sb.append(' ');
+ sb.append(Constants.SP);
sb.append("Escape=<").append(escapeCharacter).append('>');
}
if (isQuoteCharacterSet()) {
- sb.append(' ');
+ sb.append(Constants.SP);
sb.append("QuoteChar=<").append(quoteCharacter).append('>');
}
if (quoteMode != null) {
- sb.append(' ');
+ sb.append(Constants.SP);
sb.append("QuoteMode=<").append(quoteMode).append('>');
}
if (isCommentMarkerSet()) {
- sb.append(' ');
+ sb.append(Constants.SP);
sb.append("CommentStart=<").append(commentMarker).append('>');
}
if (isNullStringSet()) {
- sb.append(' ');
+ sb.append(Constants.SP);
sb.append("NullString=<").append(nullString).append('>');
}
if (recordSeparator != null) {
- sb.append(' ');
+ sb.append(Constants.SP);
sb.append("RecordSeparator=<").append(recordSeparator).append('>');
}
if (getIgnoreEmptyLines()) {
@@ -2522,11 +2614,11 @@ public String toString() {
}
sb.append(" SkipHeaderRecord:").append(skipHeaderRecord);
if (headerComments != null) {
- sb.append(' ');
+ sb.append(Constants.SP);
sb.append("HeaderComments:").append(Arrays.toString(headerComments));
}
if (headers != null) {
- sb.append(' ');
+ sb.append(Constants.SP);
sb.append("Header:").append(Arrays.toString(headers));
}
return sb.toString();
@@ -2536,6 +2628,14 @@ String trim(final String value) {
return getTrim() ? value.trim() : value;
}
+ boolean useMaxRows() {
+ return getMaxRows() > 0;
+ }
+
+ boolean useRow(final long rowNum) {
+ return !useMaxRows() || rowNum <= getMaxRows();
+ }
+
/**
* Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary.
*
@@ -2546,16 +2646,13 @@ String trim(final String value) {
* @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes.
*/
private void validate() throws IllegalArgumentException {
- if (containsLineBreak(delimiter)) {
- throw new IllegalArgumentException("The delimiter cannot be a line break");
- }
- if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional
+ if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // Explicit unboxing is intentional
throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')");
}
- if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional
+ if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // Explicit unboxing is intentional
throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')");
}
- if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { // N.B. Explicit (un)boxing is intentional
+ if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { // Explicit unboxing is intentional
throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" + commentMarker + "')");
}
if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) {
@@ -2733,6 +2830,9 @@ public CSVFormat withEscape(final Character escape) {
* .get();
*
*
+ *
Any previously set headers are reset to empty.
+ * The resulting format will have {@code skipHeaderRecord = true}.
+ *
* @return A new CSVFormat that is equal to this but using the first record as header.
* @see Builder#setSkipHeaderRecord(boolean)
* @see Builder#setHeader(String...)
@@ -3142,4 +3242,5 @@ public CSVFormat withTrim() {
public CSVFormat withTrim(final boolean trim) {
return builder().setTrim(trim).get();
}
+
}
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 1c88d9c7f6..141eba732c 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -49,6 +49,7 @@
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
+import org.apache.commons.io.Charsets;
import org.apache.commons.io.build.AbstractStreamBuilder;
import org.apache.commons.io.function.Uncheck;
@@ -153,6 +154,7 @@ public final class CSVParser implements Iterable, Closeable {
public static class Builder extends AbstractStreamBuilder {
private CSVFormat format;
+ private long byteOffset = -1;
private long characterOffset;
private long recordNumber = 1;
private boolean trackBytes;
@@ -164,17 +166,33 @@ protected Builder() {
// empty
}
- @SuppressWarnings("resource")
@Override
public CSVParser get() throws IOException {
- return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber, getCharset(), trackBytes);
+ return new CSVParser(this);
}
/**
- * Sets the lexer offset when the parser does not start parsing at the beginning of the source.
+ * Sets the lexer byte offset when the parser does not start parsing at the beginning of the source.
+ *
+ * By default, the value is {@code -1}, which reuses the character offset for the byte offset.
+ *
*
- * @param characterOffset the lexer offset.
- * @return this instance.
+ * @param byteOffset the lexer byte offset.
+ * @return {@code this} instance.
+ * @see #setCharacterOffset(long)
+ * @since 1.15.0
+ */
+ public Builder setByteOffset(final long byteOffset) {
+ this.byteOffset = byteOffset;
+ return asThis();
+ }
+
+ /**
+ * Sets the lexer character offset when the parser does not start parsing at the beginning of the source.
+ *
+ * @param characterOffset the lexer character offset.
+ * @return {@code this} instance.
+ * @see #setByteOffset(long)
*/
public Builder setCharacterOffset(final long characterOffset) {
this.characterOffset = characterOffset;
@@ -184,8 +202,8 @@ public Builder setCharacterOffset(final long characterOffset) {
/**
* Sets the CSV format. A copy of the given format is kept.
*
- * @param format the CSV format, null is equivalent to {@link CSVFormat#DEFAULT}.
- * @return this instance.
+ * @param format the CSV format, {@code null} resets to {@link CSVFormat#DEFAULT}.
+ * @return {@code this} instance.
*/
public Builder setFormat(final CSVFormat format) {
this.format = CSVFormat.copy(format);
@@ -196,7 +214,7 @@ public Builder setFormat(final CSVFormat format) {
* Sets the next record number to assign, defaults to {@code 1}.
*
* @param recordNumber the next record number to assign.
- * @return this instance.
+ * @return {@code this} instance.
*/
public Builder setRecordNumber(final long recordNumber) {
this.recordNumber = recordNumber;
@@ -207,7 +225,7 @@ public Builder setRecordNumber(final long recordNumber) {
* Sets whether to enable byte tracking for the parser.
*
* @param trackBytes {@code true} to enable byte tracking; {@code false} to disable it.
- * @return this instance.
+ * @return {@code this} instance.
* @since 1.13.0
*/
public Builder setTrackBytes(final boolean trackBytes) {
@@ -219,16 +237,24 @@ public Builder setTrackBytes(final boolean trackBytes) {
final class CSVRecordIterator implements Iterator {
private CSVRecord current;
+ private long recordCount;
/**
- * Gets the next record.
+ * Gets the next record or null at the end of stream or max rows read.
*
* @throws IOException on parse error or input read-failure
* @throws CSVException on invalid input.
- * @return the next record.
+ * @return the next record, or {@code null} if the end of the stream has been reached.
*/
private CSVRecord getNextRecord() {
- return Uncheck.get(CSVParser.this::nextRecord);
+ CSVRecord record = null;
+ if (format.useRow(recordCount + 1)) {
+ record = Uncheck.get(CSVParser.this::nextRecord);
+ if (record != null) {
+ recordCount++;
+ }
+ }
+ return record;
}
@Override
@@ -239,7 +265,6 @@ public boolean hasNext() {
if (current == null) {
current = getNextRecord();
}
-
return current != null;
}
@@ -250,7 +275,6 @@ public CSVRecord next() {
}
CSVRecord next = current;
current = null;
-
if (next == null) {
// hasNext() wasn't called before
next = getNextRecord();
@@ -258,7 +282,6 @@ public CSVRecord next() {
throw new NoSuchElementException("No more CSV records available");
}
}
-
return next;
}
@@ -267,6 +290,7 @@ public void remove() {
throw new UnsupportedOperationException();
}
}
+
/**
* Header information based on name and position.
*/
@@ -304,15 +328,16 @@ public static Builder builder() {
* @param file
* a CSV file. Must not be null.
* @param charset
- * The Charset to decode the given file.
+ * The Charset to decode the given file, {@code null} maps to the {@link Charset#defaultCharset() default Charset}.
* @param format
- * the CSVFormat used for CSV parsing. Must not be null.
+ * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}.
* @return a new parser
* @throws IllegalArgumentException
- * If the parameters of the format are inconsistent or if either file or format are null.
+ * If the parameters of the format are inconsistent.
* @throws IOException
* If an I/O error occurs
- * @throws CSVException Thrown on invalid input.
+ * @throws CSVException Thrown on invalid CSV input data.
+ * @throws NullPointerException if {@code file} is {@code null}.
*/
public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
Objects.requireNonNull(file, "file");
@@ -328,25 +353,22 @@ public static CSVParser parse(final File file, final Charset charset, final CSVF
*
*
* @param inputStream
- * an InputStream containing CSV-formatted input. Must not be null.
+ * an InputStream containing CSV-formatted input, {@code null} maps to {@link CSVFormat#DEFAULT}.
* @param charset
- * The Charset to decode the given file.
+ * The Charset to decode the given file, {@code null} maps to the {@link Charset#defaultCharset() default Charset}.
* @param format
- * the CSVFormat used for CSV parsing. Must not be null.
+ * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}.
* @return a new CSVParser configured with the given reader and format.
* @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
- * @throws CSVException Thrown on invalid input.
+ * @throws CSVException Thrown on invalid CSV input data.
* @since 1.5
*/
- @SuppressWarnings("resource")
public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format)
throws IOException {
- Objects.requireNonNull(inputStream, "inputStream");
- Objects.requireNonNull(format, "format");
- return parse(new InputStreamReader(inputStream, charset), format);
+ return parse(new InputStreamReader(inputStream, Charsets.toCharset(charset)), format);
}
/**
@@ -355,21 +377,21 @@ public static CSVParser parse(final InputStream inputStream, final Charset chars
* @param path
* a CSV file. Must not be null.
* @param charset
- * The Charset to decode the given file.
+ * The Charset to decode the given file, {@code null} maps to the {@link Charset#defaultCharset() default Charset}.
* @param format
- * the CSVFormat used for CSV parsing. Must not be null.
+ * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}.
* @return a new parser
* @throws IllegalArgumentException
- * If the parameters of the format are inconsistent or if either file or format are null.
+ * If the parameters of the format are inconsistent.
* @throws IOException
* If an I/O error occurs
- * @throws CSVException Thrown on invalid input.
+ * @throws CSVException Thrown on invalid CSV input data.
+ * @throws NullPointerException if {@code path} is {@code null}.
* @since 1.5
*/
@SuppressWarnings("resource")
public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
Objects.requireNonNull(path, "path");
- Objects.requireNonNull(format, "format");
return parse(Files.newInputStream(path), charset, format);
}
@@ -384,13 +406,13 @@ public static CSVParser parse(final Path path, final Charset charset, final CSVF
* @param reader
* a Reader containing CSV-formatted input. Must not be null.
* @param format
- * the CSVFormat used for CSV parsing. Must not be null.
+ * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}.
* @return a new CSVParser configured with the given reader and format.
* @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
- * @throws CSVException Thrown on invalid input.
+ * @throws CSVException Thrown on invalid CSV input data.
* @since 1.5
*/
public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
@@ -403,17 +425,17 @@ public static CSVParser parse(final Reader reader, final CSVFormat format) throw
* @param string
* a CSV string. Must not be null.
* @param format
- * the CSVFormat used for CSV parsing. Must not be null.
+ * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}.
* @return a new parser
* @throws IllegalArgumentException
- * If the parameters of the format are inconsistent or if either string or format are null.
+ * If the parameters of the format are inconsistent.
* @throws IOException
* If an I/O error occurs
- * @throws CSVException Thrown on invalid input.
+ * @throws CSVException Thrown on invalid CSV input data.
+ * @throws NullPointerException if {@code string} is {@code null}.
*/
public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
Objects.requireNonNull(string, "string");
- Objects.requireNonNull(format, "format");
return parse(new StringReader(string), format);
}
@@ -428,15 +450,16 @@ public static CSVParser parse(final String string, final CSVFormat format) throw
* @param url
* a URL. Must not be null.
* @param charset
- * the charset for the resource. Must not be null.
+ * the charset for the resource, {@code null} maps to the {@link Charset#defaultCharset() default Charset}.
* @param format
- * the CSVFormat used for CSV parsing. Must not be null.
+ * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}.
* @return a new parser
* @throws IllegalArgumentException
- * If the parameters of the format are inconsistent or if either url, charset or format are null.
+ * If the parameters of the format are inconsistent.
* @throws IOException
* If an I/O error occurs
- * @throws CSVException Thrown on invalid input.
+ * @throws CSVException Thrown on invalid CSV input data.
+ * @throws NullPointerException if {@code url} is {@code null}.
*/
@SuppressWarnings("resource")
public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
@@ -464,6 +487,12 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor
*/
private long recordNumber;
+ /**
+ * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
+ * with {@link #recordNumber}.
+ */
+ private final long byteOffset;
+
/**
* Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
* with {@link #recordNumber}.
@@ -473,7 +502,24 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor
private final Token reusableToken = new Token();
/**
- * Constructs a new instance using the given {@link CSVFormat}
+ * Constructs a new instance from a builder.
+ *
+ * @param builder The source builder.
+ * @throws IOException if an I/O error occurs.
+ */
+ @SuppressWarnings("resource") // Lexer manages ExtendedBufferedReader.
+ private CSVParser(final Builder builder) throws IOException {
+ this.format = (builder.format != null ? builder.format : CSVFormat.DEFAULT).copy();
+ this.lexer = new Lexer(format, new ExtendedBufferedReader(builder.getReader(), builder.getCharset(), builder.trackBytes));
+ this.csvRecordIterator = new CSVRecordIterator();
+ this.headers = createHeaders();
+ this.byteOffset = builder.byteOffset != -1 ? builder.byteOffset : builder.characterOffset;
+ this.characterOffset = builder.characterOffset;
+ this.recordNumber = builder.recordNumber - 1;
+ }
+
+ /**
+ * Constructs a new instance using the given {@link CSVFormat}.
*
*
* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
@@ -488,7 +534,7 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
- * @throws CSVException Thrown on invalid input.
+ * @throws CSVException Thrown on invalid CSV input data.
* @deprecated Will be removed in the next major version, use {@link Builder#get()}.
*/
@Deprecated
@@ -497,7 +543,7 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException
}
/**
- * Constructs a new instance using the given {@link CSVFormat}
+ * Constructs a new instance using the given {@link CSVFormat}.
*
*
* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
@@ -518,56 +564,25 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException
* if there is a problem reading the header or skipping the first record
* @throws CSVException on invalid input.
* @since 1.1
- * @deprecated Will be private in the next major version, use {@link Builder#get()}.
+ * @deprecated Will be removed in the next major version, use {@link Builder#get()}.
*/
@Deprecated
- public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
- throws IOException {
- this(reader, format, characterOffset, recordNumber, null, false);
- }
-
- /**
- * Constructs a new instance using the given {@link CSVFormat}
- *
- *
- * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
- * unless you close the {@code reader}.
- *
- *
- * @param reader
- * a Reader containing CSV-formatted input. Must not be null.
- * @param format
- * the CSVFormat used for CSV parsing. Must not be null.
- * @param characterOffset
- * Lexer offset when the parser does not start parsing at the beginning of the source.
- * @param recordNumber
- * The next record number to assign.
- * @param charset
- * The character encoding to be used for the reader when enableByteTracking is true.
- * @param trackBytes
- * {@code true} to enable byte tracking for the parser; {@code false} to disable it.
- * @throws IllegalArgumentException
- * If the parameters of the format are inconsistent or if either the reader or format is null.
- * @throws IOException
- * If there is a problem reading the header or skipping the first record.
- * @throws CSVException Thrown on invalid input.
- */
- private CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber,
- final Charset charset, final boolean trackBytes)
- throws IOException {
- Objects.requireNonNull(reader, "reader");
- Objects.requireNonNull(format, "format");
- this.format = format.copy();
- this.lexer = new Lexer(format, new ExtendedBufferedReader(reader, charset, trackBytes));
- this.csvRecordIterator = new CSVRecordIterator();
- this.headers = createHeaders();
- this.characterOffset = characterOffset;
- this.recordNumber = recordNumber - 1;
+ public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) throws IOException {
+ // @formatter:off
+ this(builder()
+ .setReader(reader)
+ .setFormat(Objects.requireNonNull(format, "format")) // requireNonNull for full compatibility
+ .setCharacterOffset(characterOffset)
+ .setRecordNumber(recordNumber)
+ .setCharset((Charset) null).setTrackBytes(false));
+ // @formatter:off
}
private void addRecordValue(final boolean lastRecord) {
final String input = format.trim(reusableToken.content.toString());
- if (lastRecord && input.isEmpty() && format.getTrailingDelimiter()) {
+ // Only drop the empty field produced by an actual trailing delimiter. A quoted empty
+ // field ("") is a real value, not a trailing delimiter, so it must be kept.
+ if (lastRecord && input.isEmpty() && format.getTrailingDelimiter() && !reusableToken.isQuoted) {
return;
}
recordList.add(handleNull(input));
@@ -598,11 +613,11 @@ private Map createEmptyHeaderMap() {
* @throws CSVException on invalid input.
*/
private Headers createHeaders() throws IOException {
- Map hdrMap = null;
+ Map headerMap = null;
List headerNames = null;
final String[] formatHeader = format.getHeader();
if (formatHeader != null) {
- hdrMap = createEmptyHeaderMap();
+ headerMap = createEmptyHeaderMap();
String[] headerRecord = null;
if (formatHeader.length == 0) {
// read the header from the first line of the file
@@ -620,7 +635,6 @@ private Headers createHeaders() throws IOException {
}
headerRecord = formatHeader;
}
-
// build the name to index mappings
if (headerRecord != null) {
// Track an occurrence of a null, empty or blank header.
@@ -629,24 +643,20 @@ private Headers createHeaders() throws IOException {
final String header = headerRecord[i];
final boolean blankHeader = CSVFormat.isBlank(header);
if (blankHeader && !format.getAllowMissingColumnNames()) {
- throw new IllegalArgumentException(
- "A header name is missing in " + Arrays.toString(headerRecord));
+ throw new IllegalArgumentException("A header name is missing in " + Arrays.toString(headerRecord));
}
-
- final boolean containsHeader = blankHeader ? observedMissing : hdrMap.containsKey(header);
+ final boolean containsHeader = blankHeader ? observedMissing : headerMap.containsKey(header);
final DuplicateHeaderMode headerMode = format.getDuplicateHeaderMode();
final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
-
if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) {
- throw new IllegalArgumentException(
- String.format(
+ throw new IllegalArgumentException(String.format(
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
header, Arrays.toString(headerRecord)));
}
observedMissing |= blankHeader;
if (header != null) {
- hdrMap.put(header, Integer.valueOf(i)); // N.B. Explicit (un)boxing is intentional
+ headerMap.put(header, Integer.valueOf(i)); // Explicit boxing is intentional
if (headerNames == null) {
headerNames = new ArrayList<>(headerRecord.length);
}
@@ -656,18 +666,18 @@ private Headers createHeaders() throws IOException {
}
}
// Make header names Collection immutable
- return new Headers(hdrMap, headerNames == null ? Collections.emptyList() : Collections.unmodifiableList(headerNames));
+ return new Headers(headerMap, headerNames == null ? Collections.emptyList() : Collections.unmodifiableList(headerNames));
}
/**
* Gets the current line number in the input stream.
*
*
- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to
+ * Note: If your CSV input has multi-line values, the returned number does not correspond to
* the record number.
*
*
- * @return current line number
+ * @return current line number.
*/
public long getCurrentLineNumber() {
return lexer.getCurrentLineNumber();
@@ -676,7 +686,7 @@ public long getCurrentLineNumber() {
/**
* Gets the first end-of-line string encountered.
*
- * @return the first end-of-line string
+ * @return the first end-of-line string.
* @since 1.5
*/
public String getFirstEndOfLine() {
@@ -700,7 +710,7 @@ public String getHeaderComment() {
* The map keys are column names. The map values are 0-based indices.
*
*
- * Note: The map can only provide a one-to-one mapping when the format did not
+ * Note: The map can only provide a one-to-one mapping when the format did not
* contain null or duplicate column names.
*
*
@@ -744,7 +754,7 @@ public List getHeaderNames() {
* Gets the current record number in the input stream.
*
*
- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to
+ * Note: If your CSV input has multi-line values, the returned number does not correspond to
* the line number.
*
*
@@ -761,6 +771,9 @@ public long getRecordNumber() {
*
* The returned content starts at the current parse-position in the stream.
*
+ *
+ * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows this method produces.
+ *
*
* @return list of {@link CSVRecord CSVRecords}, may be empty
* @throws UncheckedIOException
@@ -867,6 +880,9 @@ private boolean isStrictQuoteMode() {
* parser is closed, one option is to extract all records as a list with
* {@link #getRecords()}, and return an iterator to that list.
*
+ *
+ * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows an Iterator produces.
+ *
*/
@Override
public Iterator iterator() {
@@ -876,16 +892,16 @@ public Iterator iterator() {
/**
* Parses the next record from the current point in the stream.
*
- * @return the record as an array of values, or {@code null} if the end of the stream has been reached
- * @throws IOException on parse error or input read-failure
- * @throws CSVException on invalid input.
+ * @return the record as an array of values, or {@code null} if the end of the stream has been reached.
+ * @throws IOException on parse error or input read-failure.
+ * @throws CSVException on invalid CSV input data.
*/
CSVRecord nextRecord() throws IOException {
CSVRecord result = null;
recordList.clear();
StringBuilder sb = null;
final long startCharPosition = lexer.getCharacterPosition() + characterOffset;
- final long startBytePosition = lexer.getBytesRead() + this.characterOffset;
+ final long startBytePosition = lexer.getBytesRead() + byteOffset;
do {
reusableToken.reset();
lexer.nextToken(reusableToken);
@@ -918,12 +934,10 @@ CSVRecord nextRecord() throws IOException {
throw new CSVException("Unexpected Token type: %s", reusableToken.type);
}
} while (reusableToken.type == TOKEN);
-
if (!recordList.isEmpty()) {
recordNumber++;
- final String comment = Objects.toString(sb, null);
- result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment,
- recordNumber, startCharPosition, startBytePosition);
+ result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), Objects.toString(sb, null), recordNumber, startCharPosition,
+ startBytePosition);
}
return result;
}
@@ -934,6 +948,10 @@ CSVRecord nextRecord() throws IOException {
* If the parser is closed, the stream will not produce any more values.
* See the comments in {@link #iterator()}.
*
+ *
+ * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows a Stream produces.
+ *
+ *
* @return a sequential {@code Stream} with this collection as its source.
* @since 1.9.0
*/
diff --git a/src/main/java/org/apache/commons/csv/CSVPrinter.java b/src/main/java/org/apache/commons/csv/CSVPrinter.java
index 67088c38a5..a7048fd625 100644
--- a/src/main/java/org/apache/commons/csv/CSVPrinter.java
+++ b/src/main/java/org/apache/commons/csv/CSVPrinter.java
@@ -32,8 +32,10 @@
import java.sql.Clob;
import java.sql.ResultSet;
import java.sql.SQLException;
+import java.sql.Statement;
import java.util.Arrays;
import java.util.Objects;
+import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Stream;
import org.apache.commons.io.function.IOStream;
@@ -87,26 +89,24 @@ public final class CSVPrinter implements Flushable, Closeable {
private long recordCount;
+ private final ReentrantLock lock = new ReentrantLock();
+
/**
* Creates a printer that will print values to the given stream following the CSVFormat.
*
- * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation
- * and escaping with a different character) are not supported.
+ * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation and escaping with a different
+ * character) are not supported.
*
*
- * @param appendable
- * stream to which to print. Must not be null.
- * @param format
- * the CSV format. Must not be null.
- * @throws IOException
- * thrown if the optional header cannot be printed.
- * @throws IllegalArgumentException
- * thrown if the parameters of the format are inconsistent or if either out or format are null.
+ * @param appendable stream to which to print. Must not be null.
+ * @param format the CSV format. Must not be null.
+ * @throws IOException thrown if the optional header cannot be printed.
+ * @throws IllegalArgumentException thrown if the parameters of the format are inconsistent.
+ * @throws NullPointerException thrown if either parameters are null.
*/
public CSVPrinter(final Appendable appendable, final CSVFormat format) throws IOException {
Objects.requireNonNull(appendable, "appendable");
Objects.requireNonNull(format, "format");
-
this.appendable = appendable;
this.format = format.copy();
// TODO: Is it a good idea to do this here instead of on the first call to a print method?
@@ -129,10 +129,12 @@ public void close() throws IOException {
/**
* Closes the underlying stream with an optional flush first.
+ *
* @param flush whether to flush before the actual close.
* @throws IOException
* If an I/O error occurs
* @since 1.6
+ * @see CSVFormat#getAutoFlush()
*/
public void close(final boolean flush) throws IOException {
if (flush || format.getAutoFlush()) {
@@ -144,12 +146,12 @@ public void close(final boolean flush) throws IOException {
}
/**
- * Outputs the record separator and increments the record count.
+ * Prints the record separator and increments the record count.
*
* @throws IOException
* If an I/O error occurs
*/
- private synchronized void endOfRecord() throws IOException {
+ private void endOfRecord() throws IOException {
println();
recordCount++;
}
@@ -173,7 +175,7 @@ public void flush() throws IOException {
* @return the target Appendable.
*/
public Appendable getOut() {
- return this.appendable;
+ return appendable;
}
/**
@@ -194,9 +196,13 @@ public long getRecordCount() {
* @throws IOException
* If an I/O error occurs
*/
- public synchronized void print(final Object value) throws IOException {
- format.print(value, appendable, newRecord);
- newRecord = false;
+ public void print(final Object value) throws IOException {
+ lock.lock();
+ try {
+ printRaw(value);
+ } finally {
+ lock.unlock();
+ }
}
/**
@@ -220,34 +226,39 @@ public synchronized void print(final Object value) throws IOException {
* @throws IOException
* If an I/O error occurs
*/
- public synchronized void printComment(final String comment) throws IOException {
- if (comment == null || !format.isCommentMarkerSet()) {
- return;
- }
- if (!newRecord) {
- println();
- }
- appendable.append(format.getCommentMarker().charValue()); // N.B. Explicit (un)boxing is intentional
- appendable.append(SP);
- for (int i = 0; i < comment.length(); i++) {
- final char c = comment.charAt(i);
- switch (c) {
- case CR:
- if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) {
- i++;
- }
- // falls-through: break intentionally excluded.
- case LF:
+ public void printComment(final String comment) throws IOException {
+ lock.lock();
+ try {
+ if (comment == null || !format.isCommentMarkerSet()) {
+ return;
+ }
+ if (!newRecord) {
println();
- appendable.append(format.getCommentMarker().charValue()); // N.B. Explicit (un)boxing is intentional
- appendable.append(SP);
- break;
- default:
- appendable.append(c);
- break;
}
+ appendable.append(format.getCommentMarker().charValue()); // Explicit unboxing is intentional
+ appendable.append(SP);
+ for (int i = 0; i < comment.length(); i++) {
+ final char c = comment.charAt(i);
+ switch (c) {
+ case CR:
+ if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) {
+ i++;
+ }
+ // falls-through: break intentionally excluded.
+ case LF:
+ println();
+ appendable.append(format.getCommentMarker().charValue()); // Explicit unboxing is intentional
+ appendable.append(SP);
+ break;
+ default:
+ appendable.append(c);
+ break;
+ }
+ }
+ println();
+ } finally {
+ lock.unlock();
}
- println();
}
/**
@@ -258,22 +269,45 @@ public synchronized void printComment(final String comment) throws IOException {
* @throws SQLException If a database access error occurs or this method is called on a closed result set.
* @since 1.9.0
*/
- public synchronized void printHeaders(final ResultSet resultSet) throws IOException, SQLException {
- try (IOStream stream = IOStream.of(format.builder().setHeader(resultSet).get().getHeader())) {
- stream.forEachOrdered(this::print);
+ public void printHeaders(final ResultSet resultSet) throws IOException, SQLException {
+ lock.lock();
+ try {
+ try (IOStream stream = IOStream.of(format.builder().setHeader(resultSet).get().getHeader())) {
+ stream.forEachOrdered(this::print);
+ }
+ println();
+ } finally {
+ lock.unlock();
+ }
+ }
+
+ /**
+ * Prints the record separator.
+ *
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ public void println() throws IOException {
+ lock.lock();
+ try {
+ format.println(appendable);
+ newRecord = true;
+ } finally {
+ lock.unlock();
}
- println();
}
/**
- * Outputs the record separator.
+ * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed.
*
+ * @param value
+ * value to be output.
* @throws IOException
* If an I/O error occurs
*/
- public synchronized void println() throws IOException {
- format.println(appendable);
- newRecord = true;
+ private void printRaw(final Object value) throws IOException {
+ format.print(value, appendable, newRecord);
+ newRecord = false;
}
/**
@@ -290,9 +324,14 @@ public synchronized void println() throws IOException {
* If an I/O error occurs
*/
@SuppressWarnings("resource")
- public synchronized void printRecord(final Iterable> values) throws IOException {
- IOStream.of(values).forEachOrdered(this::print);
- endOfRecord();
+ public void printRecord(final Iterable> values) throws IOException {
+ lock.lock();
+ try {
+ IOStream.of(values).forEachOrdered(this::print);
+ endOfRecord();
+ } finally {
+ lock.unlock();
+ }
}
/**
@@ -320,16 +359,21 @@ public void printRecord(final Object... values) throws IOException {
* separator to the output after printing the record, so there is no need to call {@link #println()}.
*
*
- * @param values
+ * @param stream
* values to output.
* @throws IOException
* If an I/O error occurs
* @since 1.10.0
*/
@SuppressWarnings("resource") // caller closes.
- public synchronized void printRecord(final Stream> values) throws IOException {
- IOStream.adapt(values).forEachOrdered(this::print);
- endOfRecord();
+ public void printRecord(final Stream> stream) throws IOException {
+ lock.lock();
+ try {
+ IOStream.adapt(stream).forEachOrdered(stream.isParallel() ? this::printRaw : this::print);
+ endOfRecord();
+ } finally {
+ lock.unlock();
+ }
}
private void printRecordObject(final Object value) throws IOException {
@@ -342,6 +386,11 @@ private void printRecordObject(final Object value) throws IOException {
}
}
+ @SuppressWarnings("resource")
+ private void printRecords(final IOStream> stream) throws IOException {
+ format.limit(stream).forEachOrdered(this::printRecordObject);
+ }
+
/**
* Prints all the objects in the given {@link Iterable} handling nested collections/arrays as records.
*
@@ -382,7 +431,7 @@ private void printRecordObject(final Object value) throws IOException {
*/
@SuppressWarnings("resource")
public void printRecords(final Iterable> values) throws IOException {
- IOStream.of(values).forEachOrdered(this::printRecordObject);
+ printRecords(IOStream.of(values));
}
/**
@@ -428,37 +477,47 @@ public void printRecords(final Object... values) throws IOException {
/**
* Prints all the objects in the given JDBC result set.
+ *
+ * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows a result set produces. This is most useful when you cannot limit rows
+ * through {@link Statement#setLargeMaxRows(long)} or {@link Statement#setMaxRows(int)}.
+ *
*
- * @param resultSet
- * The values to print.
- * @throws IOException
- * If an I/O error occurs.
- * @throws SQLException
- * Thrown when a database access error occurs.
+ * @param resultSet The values to print.
+ * @throws IOException If an I/O error occurs.
+ * @throws SQLException Thrown when a database access error occurs.
*/
public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
final int columnCount = resultSet.getMetaData().getColumnCount();
- while (resultSet.next()) {
- for (int i = 1; i <= columnCount; i++) {
- final Object object = resultSet.getObject(i);
- if (object instanceof Clob) {
- try (Reader reader = ((Clob) object).getCharacterStream()) {
- print(reader);
- }
- } else if (object instanceof Blob) {
- try (InputStream inputStream = ((Blob) object).getBinaryStream()) {
- print(inputStream);
+ while (resultSet.next() && format.useRow(resultSet.getRow())) {
+ lock.lock();
+ try {
+ for (int i = 1; i <= columnCount; i++) {
+ final Object object = resultSet.getObject(i);
+ if (object instanceof Clob) {
+ try (Reader reader = ((Clob) object).getCharacterStream()) {
+ print(reader);
+ }
+ } else if (object instanceof Blob) {
+ try (InputStream inputStream = ((Blob) object).getBinaryStream()) {
+ print(inputStream);
+ }
+ } else {
+ print(object);
}
- } else {
- print(object);
}
+ endOfRecord();
+ } finally {
+ lock.unlock();
}
- endOfRecord();
}
}
/**
* Prints all the objects with metadata in the given JDBC result set based on the header boolean.
+ *
+ * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows a result set produces. This is most useful when you cannot limit rows
+ * through {@link Statement#setLargeMaxRows(long)} or {@link Statement#setMaxRows(int)}.
+ *
*
* @param resultSet source of row data.
* @param printHeader whether to print headers.
@@ -515,6 +574,6 @@ public void printRecords(final ResultSet resultSet, final boolean printHeader) t
*/
@SuppressWarnings({ "resource" }) // Caller closes.
public void printRecords(final Stream> values) throws IOException {
- IOStream.adapt(values).forEachOrdered(this::printRecordObject);
+ printRecords(IOStream.adapt(values));
}
}
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java
index b120f945f4..8dab14d907 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -55,13 +55,13 @@ public final class CSVRecord implements Serializable, Iterable {
*/
private final long bytePosition;
- /** The accumulated comments (if any) */
+ /** The accumulated comments (if any). */
private final String comment;
/** The record number. */
private final long recordNumber;
- /** The values of the record */
+ /** The values of the record. */
private final String[] values;
/** The parser that originates this record. This is not serialized. */
@@ -114,9 +114,9 @@ public String get(final int i) {
* the name of the column to be retrieved.
* @return the column value, maybe null depending on {@link CSVFormat#getNullString()}.
* @throws IllegalStateException
- * if no header mapping was provided
+ * if no header mapping was provided.
* @throws IllegalArgumentException
- * if {@code name} is not mapped or if the record is inconsistent
+ * if {@code name} is not mapped or if the record is inconsistent.
* @see #isMapped(String)
* @see #isConsistent()
* @see #getParser()
@@ -125,20 +125,18 @@ public String get(final int i) {
public String get(final String name) {
final Map headerMap = getHeaderMapRaw();
if (headerMap == null) {
- throw new IllegalStateException(
- "No header mapping was specified, the record values can't be accessed by name");
+ throw new IllegalStateException("No header mapping was specified, the record values can't be accessed by name");
}
final Integer index = headerMap.get(name);
if (index == null) {
- throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name,
- headerMap.keySet()));
+ throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, headerMap.keySet()));
}
try {
- return values[index.intValue()]; // N.B. Explicit (un)boxing is intentional
+ return values[index.intValue()]; // Explicit unboxing is intentional
} catch (final ArrayIndexOutOfBoundsException e) {
- throw new IllegalArgumentException(String.format(
- "Index for header '%s' is %d but CSVRecord only has %d values!", name, index,
- Integer.valueOf(values.length))); // N.B. Explicit (un)boxing is intentional
+ // Explicit boxing is intentional
+ throw new IllegalArgumentException(
+ String.format("Index for header '%s' is %d but CSVRecord only has %d values!", name, index, Integer.valueOf(values.length)));
}
}
@@ -165,7 +163,7 @@ public long getCharacterPosition() {
/**
* Returns the comment for this record, if any.
* Note that comments are attached to the following record.
- * If there is no following record (i.e. the comment is at EOF),
+ * If there is no following record (that is, the comment is at EOF),
* then the comment will be ignored.
*
* @return the comment for this record, or null if no comment for this record is available.
@@ -197,7 +195,7 @@ public CSVParser getParser() {
* Returns the number of this record in the parsed CSV file.
*
*
- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to
+ * NOTE:If your CSV input has multi-line values, the returned number does not correspond to
* the current line number of the parser that created this record.
*
*
@@ -211,10 +209,10 @@ public long getRecordNumber() {
/**
* Checks whether this record has a comment, false otherwise.
* Note that comments are attached to the following record.
- * If there is no following record (i.e. the comment is at EOF),
+ * If there is no following record (that is, the comment is at EOF),
* then the comment will be ignored.
*
- * @return true if this record has a comment, false otherwise
+ * @return true if this record has a comment, false otherwise.
* @since 1.3
*/
public boolean hasComment() {
@@ -229,7 +227,7 @@ public boolean hasComment() {
* test but still produce parsable files.
*
*
- * @return true of this record is valid, false if not
+ * @return true of this record is valid, false if not.
*/
public boolean isConsistent() {
final Map headerMap = getHeaderMapRaw();
@@ -237,7 +235,7 @@ public boolean isConsistent() {
}
/**
- * Checks whether a given column is mapped, i.e. its name has been defined to the parser.
+ * Checks whether a given column is mapped, that is, its name has been defined to the parser.
*
* @param name
* the name of the column to be retrieved.
@@ -252,8 +250,8 @@ public boolean isMapped(final String name) {
* Checks whether a column with a given index has a value.
*
* @param index
- * a column index (0-based)
- * @return whether a column with a given index has a value
+ * a column index (0-based).
+ * @return whether a column with a given index has a value.
*/
public boolean isSet(final int index) {
return 0 <= index && index < values.length;
@@ -264,10 +262,10 @@ public boolean isSet(final int index) {
*
* @param name
* the name of the column to be retrieved.
- * @return whether a given column is mapped and has a value
+ * @return whether a given column is mapped and has a value.
*/
public boolean isSet(final String name) {
- return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; // N.B. Explicit (un)boxing is intentional
+ return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; // Explicit unboxing is intentional
}
/**
@@ -283,7 +281,7 @@ public Iterator iterator() {
/**
* Puts all values of this record into the given Map.
*
- * @param the map type
+ * @param The map type.
* @param map The Map to populate.
* @return the given map.
* @since 1.9.0
@@ -354,14 +352,13 @@ public Map toMap() {
*/
@Override
public String toString() {
- return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" +
- Arrays.toString(values) + "]";
+ return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" + Arrays.toString(values) + "]";
}
/**
- * Gets the values for this record. This is not a copy.
+ * Gets the values for this record. This is not a copy.
*
- * @return the values for this record.
+ * @return the values for this record, never null.
* @since 1.10.0
*/
public String[] values() {
diff --git a/src/main/java/org/apache/commons/csv/Constants.java b/src/main/java/org/apache/commons/csv/Constants.java
index e85578467d..9dd276eccc 100644
--- a/src/main/java/org/apache/commons/csv/Constants.java
+++ b/src/main/java/org/apache/commons/csv/Constants.java
@@ -20,7 +20,7 @@
package org.apache.commons.csv;
/**
- * Private constants to this package.
+ * Private constants for this package.
*/
final class Constants {
@@ -37,10 +37,10 @@ final class Constants {
static final char CR = '\r';
- /** RFC 4180 defines line breaks as CRLF */
+ /** RFC 4180 defines line breaks as CRLF. */
static final String CRLF = "\r\n";
- static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"'); // N.B. Explicit (un)boxing is intentional
+ static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"'); // Explicit boxing is intentional.
static final String EMPTY = "";
@@ -67,7 +67,7 @@ final class Constants {
static final char PIPE = '|';
- /** ASCII record separator */
+ /** ASCII record separator. */
static final char RS = 30;
static final char SP = ' ';
@@ -76,10 +76,10 @@ final class Constants {
static final char TAB = '\t';
- /** Undefined state for the lookahead char */
+ /** Undefined state for the lookahead char. */
static final int UNDEFINED = -2;
- /** ASCII unit separator */
+ /** ASCII unit separator. */
static final char US = 31;
/** No instances. */
diff --git a/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java
index 01989d6640..8087f16eeb 100644
--- a/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java
+++ b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java
@@ -20,7 +20,7 @@
package org.apache.commons.csv;
/**
- * Determines how duplicate header fields should be handled
+ * Enumerates how duplicate header fields should be handled
* if {@link CSVFormat.Builder#setHeader(Class)} is not null.
*
* @since 1.10.0
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 8c0a034a22..20c1ef5444 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -37,26 +37,30 @@
/**
* A special buffered reader which supports sophisticated read access.
*
- * In particular the reader supports a look-ahead option, which allows you to see the next char returned by
- * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
+ * In particular the reader supports a look-ahead option, which allows you to see the next char returned by {@link #read()}. This reader also tracks how many
+ * characters have been read with {@link #getPosition()}.
*
*/
final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
/** The last char returned */
private int lastChar = UNDEFINED;
+
private int lastCharMark = UNDEFINED;
/** The count of EOLs (CR/LF/CRLF) seen so far */
private long lineNumber;
+
private long lineNumberMark;
/** The position, which is the number of characters read so far */
private long position;
+
private long positionMark;
/** The number of bytes read so far. */
private long bytesRead;
+
private long bytesReadMark;
/** Encoder for calculating the number of bytes for each character read. */
@@ -70,12 +74,11 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
}
/**
- * Constructs a new instance with the specified reader, character set,
- * and byte tracking option. Initializes an encoder if byte tracking is enabled
- * and a character set is provided.
+ * Constructs a new instance with the specified reader, character set, and byte tracking option. Initializes an encoder if byte tracking is enabled and a
+ * character set is provided.
*
- * @param reader the reader supports a look-ahead option.
- * @param charset the character set for encoding, or {@code null} if not applicable.
+ * @param reader the reader supports a look-ahead option.
+ * @param charset the character set for encoding, or {@code null} if not applicable.
* @param trackBytes {@code true} to enable byte tracking; {@code false} to disable it.
*/
ExtendedBufferedReader(final Reader reader, final Charset charset, final boolean trackBytes) {
@@ -86,8 +89,7 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
/**
* Closes the stream.
*
- * @throws IOException
- * If an I/O error occurs
+ * @throws IOException If an I/O error occurs
*/
@Override
public void close() throws IOException {
@@ -105,26 +107,35 @@ long getBytesRead() {
return this.bytesRead;
}
+ private long getEncodedCharLength(final char[] buf, final int offset, final int length) throws CharacterCodingException {
+ long len = 0;
+ int previous = lastChar;
+ for (int i = offset; i < offset + length; i++) {
+ len += getEncodedCharLength(previous, buf[i]);
+ previous = buf[i];
+ }
+ return len;
+ }
+
/**
- * Gets the byte length of the given character based on the the original Unicode
- * specification, which defined characters as fixed-width 16-bit entities.
+ * Gets the byte length of the given character based on the original Unicode specification, which defined characters as fixed-width 16-bit entities.
*
* The Unicode characters are divided into two main ranges:
*
- *
U+0000 to U+FFFF (Basic Multilingual Plane, BMP):
- *
- *
Represented using a single 16-bit {@code char}.
- *
Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte characters.
- *
- *
- *
U+10000 to U+10FFFF (Supplementary Characters):
- *
- *
Represented as a pair of {@code char}s:
- *
The first {@code char} is from the high-surrogates range (\uD800-\uDBFF).
- *
The second {@code char} is from the low-surrogates range (\uDC00-\uDFFF).
- *
Includes UTF-8 encodings of some 3-byte characters and all 4-byte characters.
- *
- *
+ *
U+0000 to U+FFFF (Basic Multilingual Plane, BMP):
+ *
+ *
Represented using a single 16-bit {@code char}.
+ *
Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte characters.
+ *
+ *
+ *
U+10000 to U+10FFFF (Supplementary Characters):
+ *
+ *
Represented as a pair of {@code char}s:
+ *
The first {@code char} is from the high-surrogates range (\uD800-\uDBFF).
+ *
The second {@code char} is from the low-surrogates range (\uDC00-\uDFFF).
+ *
Includes UTF-8 encodings of some 3-byte characters and all 4-byte characters.
+ *
+ *
*
*
* @param current the current character to process.
@@ -132,26 +143,29 @@ long getBytesRead() {
* @throws CharacterCodingException if the character cannot be encoded.
*/
private int getEncodedCharLength(final int current) throws CharacterCodingException {
+ return getEncodedCharLength(lastChar, current);
+ }
+
+ private int getEncodedCharLength(final int previous, final int current) throws CharacterCodingException {
final char cChar = (char) current;
- final char lChar = (char) lastChar;
+ final char lChar = (char) previous;
if (!Character.isSurrogate(cChar)) {
return encoder.encode(CharBuffer.wrap(new char[] { cChar })).limit();
}
if (Character.isHighSurrogate(cChar)) {
// Move on to the next char (low surrogate)
return 0;
- } else if (Character.isSurrogatePair(lChar, cChar)) {
+ }
+ if (Character.isSurrogatePair(lChar, cChar)) {
return encoder.encode(CharBuffer.wrap(new char[] { lChar, cChar })).limit();
- } else {
- throw new CharacterCodingException();
}
+ throw new CharacterCodingException();
}
/**
- * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by
- * any of the read methods. This will not include a character read using the {@link #peek()} method. If no
- * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached
- * on the last read then this will return {@link IOUtils#EOF}.
+ * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by any of the read methods. This will not
+ * include a character read using the {@link #peek()} method. If no character has been read then this will return {@link Constants#UNDEFINED}. If the end of
+ * the stream was reached on the last read then this will return {@link IOUtils#EOF}.
*
* @return the last character that was read
*/
@@ -193,11 +207,10 @@ public void mark(final int readAheadLimit) throws IOException {
@Override
public int read() throws IOException {
final int current = super.read();
- if (current == CR || current == LF && lastChar != CR ||
- current == EOF && lastChar != CR && lastChar != LF && lastChar != EOF) {
+ if (current == CR || current == LF && lastChar != CR || current == EOF && lastChar != CR && lastChar != LF && lastChar != EOF) {
lineNumber++;
}
- if (encoder != null) {
+ if (encoder != null && current != EOF) {
this.bytesRead += getEncodedCharLength(current);
}
lastChar = current;
@@ -211,6 +224,9 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
return 0;
}
final int len = super.read(buf, offset, length);
+ if (encoder != null && len > 0) {
+ this.bytesRead += getEncodedCharLength(buf, offset, len);
+ }
if (len > 0) {
for (int i = offset; i < offset + len; i++) {
final char ch = buf[i];
@@ -231,8 +247,7 @@ public int read(final char[] buf, final int offset, final int length) throws IOE
}
/**
- * Gets the next line, dropping the line terminator(s). This method should only be called when processing a
- * comment, otherwise, information can be lost.
+ * Gets the next line, dropping the line terminator(s). This method should only be called when processing a comment, otherwise, information can be lost.
*
* Increments {@link #lineNumber} and updates {@link #position}.
*
@@ -272,5 +287,4 @@ public void reset() throws IOException {
bytesRead = bytesReadMark;
super.reset();
}
-
}
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index 0e5f368665..fe964480a4 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -23,6 +23,7 @@
import java.io.Closeable;
import java.io.IOException;
+import java.util.Arrays;
import org.apache.commons.io.IOUtils;
@@ -68,8 +69,8 @@ final class Lexer implements Closeable {
/**
* Appends the next escaped character to the token's content.
*
- * @param token the current token
- * @throws IOException on stream access error
+ * @param token the current token.
+ * @throws IOException on stream access error.
* @throws CSVException Thrown on invalid input.
*/
private void appendNextEscapedCharacterToToken(final Token token) throws IOException {
@@ -89,7 +90,7 @@ private void appendNextEscapedCharacterToToken(final Token token) throws IOExcep
* Closes resources.
*
* @throws IOException
- * If an I/O error occurs
+ * If an I/O error occurs.
*/
@Override
public void close() throws IOException {
@@ -97,27 +98,27 @@ public void close() throws IOException {
}
/**
- * Gets the number of bytes read
+ * Gets the number of bytes read.
*
- * @return the number of bytes read
+ * @return the number of bytes read.
*/
long getBytesRead() {
return reader.getBytesRead();
}
/**
- * Returns the current character position
+ * Gets the current character position.
*
- * @return the current character position
+ * @return the current character position.
*/
long getCharacterPosition() {
return reader.getPosition();
}
/**
- * Returns the current line number
+ * Gets the current line number.
*
- * @return the current line number
+ * @return the current line number.
*/
long getCurrentLineNumber() {
return reader.getLineNumber();
@@ -136,7 +137,7 @@ boolean isCommentStart(final int ch) {
}
/**
- * Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#peek(char[])}.
+ * Tests whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#peek(char[])}.
*
* @param ch
* the current character.
@@ -152,6 +153,7 @@ boolean isDelimiter(final int ch) throws IOException {
isLastTokenDelimiter = true;
return true;
}
+ Arrays.fill(delimiterBuf, '\0');
reader.peek(delimiterBuf);
for (int i = 0; i < delimiterBuf.length; i++) {
if (delimiterBuf[i] != delimiter[i + 1]) {
@@ -190,6 +192,7 @@ boolean isEscape(final int ch) {
* @throws IOException If an I/O error occurs.
*/
boolean isEscapeDelimiter() throws IOException {
+ Arrays.fill(escapeDelimiterBuf, '\0');
reader.peek(escapeDelimiterBuf);
if (escapeDelimiterBuf[0] != delimiter[0]) {
return false;
@@ -214,7 +217,7 @@ boolean isQuoteChar(final int ch) {
/**
* Tests if the current character represents the start of a line: a CR, LF, or is at the start of the file.
*
- * @param ch the character to check
+ * @param ch the character to check.
* @return true if the character is at the start of a line.
*/
boolean isStartOfLine(final int ch) {
@@ -274,15 +277,22 @@ Token nextToken(final Token token) throws IOException {
}
// Important: make sure a new char gets consumed in each iteration
while (token.type == Token.Type.INVALID) {
+ // isDelimiter consumes the trailing characters of a multi-character delimiter as a side effect, so it must
+ // only be evaluated once per character. Remember a match found while skipping whitespace below.
+ boolean delimiter = false;
// ignore whitespaces at beginning of a token
if (ignoreSurroundingSpaces) {
- while (Character.isWhitespace((char) c) && !isDelimiter(c) && !eol) {
+ while (Character.isWhitespace((char) c) && !eol) {
+ if (isDelimiter(c)) {
+ delimiter = true;
+ break;
+ }
c = reader.read();
eol = readEndOfLine(c);
}
}
// ok, start of token reached: encapsulated, or token
- if (isDelimiter(c)) {
+ if (delimiter || isDelimiter(c)) {
// empty token return TOKEN("")
token.type = Token.Type.TOKEN;
} else if (eol) {
@@ -340,7 +350,6 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
int c;
while (true) {
c = reader.read();
-
if (isQuoteChar(c)) {
if (isQuoteChar(reader.peek())) {
// double or escaped encapsulator -> add single encapsulator to token
@@ -401,10 +410,10 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
*
An unescaped delimiter has been reached (TOKEN)
*
*
- * @param token the current token
- * @param ch the current character
- * @return the filled token
- * @throws IOException on stream access error
+ * @param token the current token.
+ * @param ch the current character.
+ * @return the filled token.
+ * @throws IOException on stream access error.
* @throws CSVException Thrown on invalid input.
*/
private Token parseSimpleToken(final Token token, final int ch) throws IOException {
@@ -443,7 +452,7 @@ private Token parseSimpleToken(final Token token, final int ch) throws IOExcepti
/**
* Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
*
- * @return true if the given or next character is a line-terminator
+ * @return true if the given or next character is a line-terminator.
*/
boolean readEndOfLine(final int ch) throws IOException {
// check if we have \r\n...
diff --git a/src/main/java/org/apache/commons/csv/QuoteMode.java b/src/main/java/org/apache/commons/csv/QuoteMode.java
index d9c032ffc4..ae64ab4863 100644
--- a/src/main/java/org/apache/commons/csv/QuoteMode.java
+++ b/src/main/java/org/apache/commons/csv/QuoteMode.java
@@ -19,7 +19,9 @@
package org.apache.commons.csv;
/**
- * Defines quoting behavior.
+ * Enumerates quoting behavior.
+ *
+ * @see CSVFormat.Builder#setQuoteMode(QuoteMode)
*/
public enum QuoteMode {
diff --git a/src/main/java/org/apache/commons/csv/Token.java b/src/main/java/org/apache/commons/csv/Token.java
index 9e63b944b6..87af335678 100644
--- a/src/main/java/org/apache/commons/csv/Token.java
+++ b/src/main/java/org/apache/commons/csv/Token.java
@@ -24,13 +24,14 @@
/**
* Internal token representation.
*
- * It is used as a contract between the lexer and the parser.
+ * This is used as a contract between the lexer and the parser.
*
*/
final class Token {
enum Type {
- /** Token has no valid content, i.e. is in its initialized state. */
+
+ /** Token has no valid content, that is, is in its initialized state. */
INVALID,
/** Token with content, at the beginning or in the middle of a line. */
@@ -47,13 +48,13 @@ enum Type {
}
/** Length of the initial token (content-)buffer */
- private static final int INITIAL_TOKEN_LENGTH = 50;
+ private static final int DEFAULT_CAPACITY = 50;
/** Token type */
Token.Type type = INVALID;
- /** The content buffer. */
- final StringBuilder content = new StringBuilder(INITIAL_TOKEN_LENGTH);
+ /** The content buffer, never null. */
+ final StringBuilder content = new StringBuilder(DEFAULT_CAPACITY);
/** Token ready flag: indicates a valid token with content (ready for the parser). */
boolean isReady;
@@ -68,12 +69,12 @@ void reset() {
}
/**
- * Eases IDE debugging.
+ * Converts the token state to a string to ease debugging.
*
* @return a string helpful for debugging.
*/
@Override
public String toString() {
- return type.name() + " [" + content.toString() + "]";
+ return type + " [" + content.toString() + "]";
}
}
diff --git a/src/main/javadoc/overview.html b/src/main/javadoc/overview.html
new file mode 100644
index 0000000000..0598cf19d0
--- /dev/null
+++ b/src/main/javadoc/overview.html
@@ -0,0 +1,375 @@
+
+
+
+Apache Commons CSV Overview
+
+
+
+
Custom formats can be created using a fluent style API.
+
+
+
Parsing Standard CSV Files
+
+ Parsing files with Apache Commons CSV is relatively straight forward. Pick a
+ CSVFormat
+ and go from there.
+
+
+
Parsing an Excel CSV File
+
To parse an Excel CSV file, write:
+
+
+Reader in = new FileReader("path/to/file.csv");
+Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
+for (CSVRecord record : records) {
+ String lastName = record.get("Last Name");
+ String firstName = record.get("First Name");
+}
+
+
+
+
+
+
Parsing Custom CSV Files
+
+ You can define your own using IO rules by building your own CSVFormat instance. Starting with
+ CSVFormat.builder()
+ lets you start from a predefined format and customize. For example:
+
+ To handle files that start with a Byte Order Mark (BOM), like some Excel CSV files, you need an extra step to deal with the optional BOM bytes. Using the
+ BOMInputStream class from Apache Commons IO simplifies this task; for example:
+
You might find it handy to create something like this:
+
+
+/**
+ * Creates a reader capable of handling BOMs.
+ *
+ * @param path The path to read.
+ * @return a new InputStreamReader for UTF-8 bytes.
+ * @throws IOException if an I/O error occurs.
+ */
+public InputStreamReader newReader(final Path path) throws IOException {
+ return new InputStreamReader(BOMInputStream.builder()
+ .setPath(path)
+ .get(), StandardCharsets.UTF_8);
+}
+
+
+
+
+
Using Headers
+
+ Apache Commons CSV provides several ways to access record values. The simplest way is to access values by their index in the record. However, columns in
+ CSV files often have a name, for example: ID, CustomerNo, Birthday, etc. The CSVFormat class provides an API for specifying these header names and
+ CSVRecord on the other hand has methods to access values by their corresponding header name.
+
+
+
Accessing column values by index
+
To access a record value by index, no special configuration of the CSVFormat is necessary:
+
+
+Reader in = new FileReader("path/to/file.csv");
+Iterable<CSVRecord> records = CSVFormat.RFC4180.parse(in);
+for (CSVRecord record : records) {
+ String columnOne = record.get(0);
+ String columnTwo = record.get(1);
+}
+
+
+
+
+
Defining a header manually
+
Indices may not be the most intuitive way to access record values. For this reason it is possible to assign names to each column in the file:
+
+
+Reader in = new FileReader("path/to/file.csv");
+Iterable<CSVRecord> records = CSVFormat.RFC4180.builder()
+ .setHeader("ID", "CustomerNo", "Name")
+ .build()
+ .parse(in);
+for (CSVRecord record : records) {
+ String id = record.get("ID");
+ String customerNo = record.get("CustomerNo");
+ String name = record.get("Name");
+}
+
+
+ Note that column values can still be accessed using their index.
+
+
+
Using an enum to define a header
+
Using String values all over the code to reference columns can be error prone. For this reason, it is possible to define an enum to specify header
+ names. Note that the enum constant names are used to access column values. This may lead to enums constant names which do not follow the Java coding
+ standard of defining constants in upper case with underscores:
+
+
+public enum Headers {
+ ID, CustomerNo, Name
+}
+Reader in = new FileReader("path/to/file.csv");
+Iterable<CSVRecord> records = CSVFormat.RFC4180.builder()
+ .setHeader(Headers.class)
+ .build()
+ .parse(in);
+for (CSVRecord record : records) {
+ String id = record.get(Headers.ID);
+ String customerNo = record.get(Headers.CustomerNo);
+ String name = record.get(Headers.Name);
+}
+
+
+ Again it is possible to access values by their index and by using a String (for example "CustomerNo").
+
+
+
Header auto detection
+
Some CSV files define header names in their first record. If configured, Apache Commons CSV can parse the header names from the first record:
+
+
+Reader in = new FileReader("path/to/file.csv");
+Iterable<CSVRecord> records = CSVFormat.RFC4180.builder()
+ .setHeader()
+ .setSkipHeaderRecord(true)
+ .build()
+ .parse(in);
+for (CSVRecord record : records) {
+ String id = record.get("ID");
+ String customerNo = record.get("CustomerNo");
+ String name = record.get("Name");
+}
+
+
+ This will use the values from the first record as header names and skip the first record when iterating.
+
+
+
+
Printing with headers
+
To print a CSV file with headers, you specify the headers in the format:
SQL lets you limit how many rows a SELECT statement returns with the LIMIT clause.
+
+ When you can't or don't want to change the SQL used to generate rows, JDBC lets you limit how many rows a JDBC Statement returns with the Statement.setMaxRows(int) method.
+
+
+ When you get a JDBC ResultSet from an API like
+ DatabaseMetaData.getProcedures(...), there is no SQL or JDBC Statement to use to set a limit, the ResultSet class does not have an API to limit rows.
+
+ Using the above, calling CSVPrinter.printRecords(ResultSet) will
+ limit the row count to the maximum number of rows specified in setMaxRows().
+
+
Note that setMaxRows() works with the other methods that print a sequence of records.
+
+
+
+
diff --git a/src/media/commons-logo-component-100.xcf b/src/media/commons-logo-component-100.xcf
new file mode 100644
index 0000000000..77d92f2779
Binary files /dev/null and b/src/media/commons-logo-component-100.xcf differ
diff --git a/src/media/commons-logo-component.xcf b/src/media/commons-logo-component.xcf
new file mode 100644
index 0000000000..3670221da7
Binary files /dev/null and b/src/media/commons-logo-component.xcf differ
diff --git a/src/media/logo.png b/src/media/logo.png
new file mode 100644
index 0000000000..93bb6c0148
Binary files /dev/null and b/src/media/logo.png differ
diff --git a/src/site/resources/images/logo.png b/src/site/resources/images/logo.png
index 77e721d2c7..93bb6c0148 100644
Binary files a/src/site/resources/images/logo.png and b/src/site/resources/images/logo.png differ
diff --git a/src/site/resources/images/logo.xcf b/src/site/resources/images/logo.xcf
deleted file mode 100644
index 98ff21ec1d..0000000000
Binary files a/src/site/resources/images/logo.xcf and /dev/null differ
diff --git a/src/site/resources/pmd/pmd-ruleset.xml b/src/site/resources/pmd/pmd-ruleset.xml
index 5acc764021..74e41f991d 100644
--- a/src/site/resources/pmd/pmd-ruleset.xml
+++ b/src/site/resources/pmd/pmd-ruleset.xml
@@ -7,7 +7,7 @@
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@@ -59,7 +59,6 @@
-
diff --git a/src/site/resources/spotbugs/spotbugs-exclude-filter.xml b/src/site/resources/spotbugs/spotbugs-exclude-filter.xml
index a7364d40db..79c57d3ae4 100644
--- a/src/site/resources/spotbugs/spotbugs-exclude-filter.xml
+++ b/src/site/resources/spotbugs/spotbugs-exclude-filter.xml
@@ -6,7 +6,7 @@
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@@ -54,5 +54,12 @@
+
+
+
+
+
+
+
diff --git a/src/site/site.xml b/src/site/site.xml
index 86c1afa1eb..232c2056c5 100644
--- a/src/site/site.xml
+++ b/src/site/site.xml
@@ -7,7 +7,7 @@
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/src/site/xdoc/download_csv.xml b/src/site/xdoc/download_csv.xml
index 00b7f3c74c..151c3f69ec 100644
--- a/src/site/xdoc/download_csv.xml
+++ b/src/site/xdoc/download_csv.xml
@@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@@ -56,10 +56,12 @@ limitations under the License.
| |
+======================================================================+
-->
-
+Download Apache Commons CSV
- Apache Commons Documentation Team
+ Apache Commons Team
@@ -79,7 +81,7 @@ limitations under the License.
mirrors (at the end of the mirrors list) that should be
available.
- [if-any logo][end]
+ [if-any logo][end]
-
+
diff --git a/src/site/xdoc/index.xml b/src/site/xdoc/index.xml
index 7c7c4f3cc7..ac5b8cfa9f 100644
--- a/src/site/xdoc/index.xml
+++ b/src/site/xdoc/index.xml
@@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@@ -20,30 +20,18 @@ limitations under the License.
HomeApache Commons Team
+
+
+
-
Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.
-
The most common CSV formats are predefined in the CSVFormat class:
-
The commons developer mailing list is the main channel of communication for contributors. Please remember that the lists are shared between all commons components, so prefix your email by [csv].
@@ -97,7 +103,6 @@ For previous releases, see the TagList report.
If you'd like to offer up pull requests via GitHub rather than applying patches to JIRA, we have a GitHub mirror.
-
The commons mailing lists act as the main support forum.
@@ -111,14 +116,12 @@ For previous releases, see the
In addition to the code from Netcetera (org.apache.commons.csv), Martin van den Bemt has added an additional writer API.
Other CSV implementations:
@@ -126,7 +129,5 @@ For previous releases, see the Super CSV
-
-
diff --git a/src/site/xdoc/issue-tracking.xml b/src/site/xdoc/issue-tracking.xml
index 3564ef4fdd..3aa64b4042 100644
--- a/src/site/xdoc/issue-tracking.xml
+++ b/src/site/xdoc/issue-tracking.xml
@@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@@ -41,10 +41,12 @@ limitations under the License.
| |
+======================================================================+
-->
-
+Apache Commons CSV Issue tracking
- Apache Commons Documentation Team
+ Apache Commons Team
@@ -64,6 +66,7 @@ limitations under the License.
If you would like to report a bug, or raise an enhancement request with
Apache Commons CSV please do the following:
+
Search existing open bugs.
If you find your issue listed then please add a comment with your details.
@@ -73,16 +76,15 @@ limitations under the License.
-
diff --git a/src/site/xdoc/mail-lists.xml b/src/site/xdoc/mail-lists.xml
index 727e4a555a..345cef8996 100644
--- a/src/site/xdoc/mail-lists.xml
+++ b/src/site/xdoc/mail-lists.xml
@@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@@ -39,10 +39,12 @@ limitations under the License.
| |
+======================================================================+
-->
-
+Apache Commons CSV Mailing Lists
- Apache Commons Documentation Team
+ Apache Commons Team
@@ -53,10 +55,10 @@ limitations under the License.
To make it easier for people to only read messages related to components they are interested in,
the convention in Commons is to prefix the subject line of messages with the component's name,
for example:
-
-
[csv] Problem with the ...
-
+
+
[csv] Problem with the ...
+
Questions related to the usage of Apache Commons CSV should be posted to the
User List.
diff --git a/src/site/xdoc/security.xml b/src/site/xdoc/security.xml
index ab00560494..47edf5d116 100644
--- a/src/site/xdoc/security.xml
+++ b/src/site/xdoc/security.xml
@@ -47,5 +47,10 @@
None.
+
+
+ For information about safe deserialization, please see Safe Deserialization.
+
+
\ No newline at end of file
diff --git a/src/site/xdoc/user-guide.xml b/src/site/xdoc/user-guide.xml
index 3ec3dd9b2d..d5a1f26850 100644
--- a/src/site/xdoc/user-guide.xml
+++ b/src/site/xdoc/user-guide.xml
@@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
+ https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@@ -21,179 +21,6 @@ limitations under the License.
Apache Commons Documentation Team
-
-
-
Apache Commons CSV User Guide
-
-
-
-
-
-
- Parsing files with Apache Commons CSV is relatively straight forward.
- The CSVFormat class provides some commonly used CSV variants:
-
-
- Reader in = new FileReader("path/to/file.csv");
-Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
-for (CSVRecord record : records) {
- String lastName = record.get("Last Name");
- String firstName = record.get("First Name");
-}
-
-
-
-
- To handle files that start with a Byte Order Mark (BOM) like some Excel CSV files, you need an extra step to
- deal with these optional bytes.
- You can use the
-
- BOMInputStream
-
- class from
- Apache Commons IO
- for example:
-
- You might find it handy to create something like this:
-
-
-/**
- * Creates a reader capable of handling BOMs.
- *
- * @param path The path to read.
- * @return a new InputStreamReader for UTF-8 bytes.
- * @throws IOException if an I/O error occurs.
- */
-public InputStreamReader newReader(final Path path) throws IOException {
- return new InputStreamReader(BOMInputStream.builder()
- .setPath(path)
- .get(), StandardCharsets.UTF_8);
-}
-
-
-
-
- Apache Commons CSV provides several ways to access record values.
- The simplest way is to access values by their index in the record.
- However, columns in CSV files often have a name, for example: ID, CustomerNo, Birthday, etc.
- The CSVFormat class provides an API for specifying these header names and CSVRecord on
- the other hand has methods to access values by their corresponding header name.
-
- To access a record value by index, no special configuration of the CSVFormat is necessary:
- Reader in = new FileReader("path/to/file.csv");
-Iterable<CSVRecord> records = CSVFormat.RFC4180.parse(in);
-for (CSVRecord record : records) {
- String columnOne = record.get(0);
- String columnTwo = record.get(1);
-}
-
-
-
- Indices may not be the most intuitive way to access record values. For this reason it is possible to
- assign names to each column in the file:
- Reader in = new FileReader("path/to/file.csv");
-Iterable<CSVRecord> records = CSVFormat.RFC4180.builder()
- .setHeader("ID", "CustomerNo", "Name")
- .build()
- .parse(in);
-for (CSVRecord record : records) {
- String id = record.get("ID");
- String customerNo = record.get("CustomerNo");
- String name = record.get("Name");
-}
-
- Note that column values can still be accessed using their index.
-
-
- Using String values all over the code to reference columns can be error prone. For this reason,
- it is possible to define an enum to specify header names. Note that the enum constant names are
- used to access column values. This may lead to enums constant names which do not follow the Java
- coding standard of defining constants in upper case with underscores:
- public enum Headers {
- ID, CustomerNo, Name
-}
-Reader in = new FileReader("path/to/file.csv");
-Iterable<CSVRecord> records = CSVFormat.RFC4180.builder()
- .setHeader(Headers.class)
- .build()
- .parse(in);
-for (CSVRecord record : records) {
- String id = record.get(Headers.ID);
- String customerNo = record.get(Headers.CustomerNo);
- String name = record.get(Headers.Name);
-}
-
- Again it is possible to access values by their index and by using a String (for example "CustomerNo").
-
-
- Some CSV files define header names in their first record. If configured, Apache Commons CSV can parse
- the header names from the first record:
- Reader in = new FileReader("path/to/file.csv");
-Iterable<CSVRecord> records = CSVFormat.RFC4180.builder()
- .setHeader()
- .setSkipHeaderRecord(true)
- .build()
- .parse(in);
-for (CSVRecord record : records) {
- String id = record.get("ID");
- String customerNo = record.get("CustomerNo");
- String name = record.get("Name");
-}
-
- This will use the values from the first record as header names and skip the first record when iterating.
-
-
-
- To print a CSV file with headers, you specify the headers in the format:
-
- final Appendable out = ...;
-final CSVPrinter printer = CSVFormat.DEFAULT.builder()
- .setHeader("H1", "H2")
- .build()
- .print(out);
-
-
- To print a CSV file with JDBC column labels, you specify the ResultSet in the format:
-