diff --git a/.asf.yaml b/.asf.yaml index c56b33138c..cdb8cd101b 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -5,7 +5,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -23,7 +23,8 @@ notifications: pullrequests: issues@commons.apache.org jira_options: link label jobs: notifications@commons.apache.org - issues_bot_dependabot: notifications@commons.apache.org - pullrequests_bot_dependabot: notifications@commons.apache.org + # commits_bot_dependabot: dependabot@commons.apache.org + issues_bot_dependabot: dependabot@commons.apache.org + pullrequests_bot_dependabot: dependabot@commons.apache.org issues_bot_codecov-commenter: notifications@commons.apache.org pullrequests_bot_codecov-commenter: notifications@commons.apache.org diff --git a/.gitattributes b/.gitattributes index bec231c194..f42866e4bd 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5,7 +5,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/.github/GH-ROBOTS.txt b/.github/GH-ROBOTS.txt index e3329e55fb..64a88674fe 100644 --- a/.github/GH-ROBOTS.txt +++ b/.github/GH-ROBOTS.txt @@ -5,7 +5,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 9ebcd0ebb1..90ec55f742 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,7 +5,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -18,10 +18,8 @@ updates: - package-ecosystem: "maven" directory: "/" schedule: - interval: "weekly" - day: "friday" + interval: "quarterly" - package-ecosystem: "github-actions" directory: "/" schedule: - interval: "weekly" - day: "friday" + interval: "quarterly" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index d126a970ce..9ff35c83e7 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -7,7 +7,7 @@ "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an @@ -22,7 +22,9 @@ Thanks for your contribution to [Apache Commons](https://commons.apache.org/)! Y Before you push a pull request, review this list: - [ ] Read the [contribution guidelines](CONTRIBUTING.md) for this project. +- [ ] Read the [ASF Generative Tooling Guidance](https://www.apache.org/legal/generative-tooling.html) if you use Artificial Intelligence (AI). +- [ ] I used AI to create any part of, or all of, this pull request. Which AI tool was used to create this pull request, and to what extent did it contribute? - [ ] Run a successful build using the default [Maven](https://maven.apache.org/) goal with `mvn`; that's `mvn` on the command line by itself. -- [ ] Write unit tests that match behavioral changes, where the tests fail if the changes to the runtime are not applied. This may not always be possible but is a best-practice. +- [ ] Write unit tests that match behavioral changes, where the tests fail if the changes to the runtime are not applied. This may not always be possible, but it is a best practice. - [ ] Write a pull request description that is detailed enough to understand what the pull request does, how, and why. -- [ ] Each commit in the pull request should have a meaningful subject line and body. Note that commits might be squashed by a maintainer on merge. +- [ ] Each commit in the pull request should have a meaningful subject line and body. Note that a maintainer may squash commits during the merge process. diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 1f42db3b1a..cca38e5121 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -5,7 +5,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -37,6 +37,7 @@ jobs: security-events: write strategy: + max-parallel: 20 fail-fast: false matrix: language: [ 'java' ] @@ -45,10 +46,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: persist-credentials: false - - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + - uses: actions/cache@55cc8345863c7cc4c66a329aec7e433d2d1c52a9 #v6.1.0 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} @@ -57,7 +58,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # 3.28.1 + uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -68,7 +69,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # 3.28.1 + uses: github/codeql-action/autobuild@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2 # ℹ️ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -82,4 +83,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # 3.28.1 + uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2 diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index c6ece650e2..7bc02bdd23 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -6,7 +6,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an @@ -26,6 +26,6 @@ jobs: runs-on: ubuntu-latest steps: - name: 'Checkout Repository' - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 - name: 'Dependency Review PR' - uses: actions/dependency-review-action@3b139cfc5fae8b618d3eae3675e383bb1769c019 # v4.5.0 + uses: actions/dependency-review-action@a1d282b36b6f3519aa1f3fc636f609c47dddb294 # v5.0.0 diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 71887b86d9..17ba7dd386 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -5,7 +5,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -15,7 +15,11 @@ name: Java CI -on: [push, pull_request] +on: + push: + branches: + - 'master' + pull_request: {} permissions: contents: read @@ -23,30 +27,35 @@ permissions: jobs: build: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} continue-on-error: ${{ matrix.experimental }} strategy: + max-parallel: 20 + fail-fast: false matrix: - java: [ 8, 11, 17, 21, 23 ] + os: [ubuntu-latest, macos-latest] + java: [ 8, 11, 17, 21, 25, 26 ] experimental: [false] + # Keep the same parameter order as the matrix above include: - - java: 24-ea - experimental: true - + - os: ubuntu-latest + java: 27-ea + experimental: true + steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2 + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: persist-credentials: false - - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + - uses: actions/cache@55cc8345863c7cc4c66a329aec7e433d2d1c52a9 #v6.1.0 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@7a6d8a8234af8eb26422e24e3006232cccaa061b # v4.6.0 + uses: actions/setup-java@1bcf9fb12cf4aa7d266a90ae39939e61372fe520 # v5.4.0 with: - distribution: 'temurin' + distribution: ${{ runner.os == 'macOS' && matrix.java == '8' && 'zulu' || 'temurin' }} java-version: ${{ matrix.java }} - name: Build with Maven run: mvn -Ddoclint=all --show-version --batch-mode --no-transfer-progress diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml index 8b2137c4a3..e1868cb462 100644 --- a/.github/workflows/scorecards-analysis.yml +++ b/.github/workflows/scorecards-analysis.yml @@ -5,7 +5,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -40,12 +40,12 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # 7.0.0 with: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # 2.4.0 + uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # 2.4.3 with: results_file: results.sarif results_format: sarif @@ -57,13 +57,13 @@ jobs: publish_results: true - name: "Upload artifact" - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # 4.6.0 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: SARIF file path: results.sarif retention-days: 5 - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # 3.28.1 + uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2 with: sarif_file: results.sarif diff --git a/.gitignore b/.gitignore index 4b377d5762..2ff17ae4a8 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,8 @@ buildNumber.properties *.iml /.vscode/ +/.DS_Store + +# NetBeans files +nb-configuration.xml +nbactions.xml diff --git a/BENCHMARK.md b/BENCHMARK.md index e8b579b2e2..c45918a289 100644 --- a/BENCHMARK.md +++ b/BENCHMARK.md @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 3ed501501d..b4342f33ca 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 18fce304e6..3423e18ad2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -48,30 +48,33 @@ Getting Started --------------- + Make sure you have a [JIRA account](https://issues.apache.org/jira/). -+ Make sure you have a [GitHub account](https://github.com/signup/free). This is not essential, but makes providing patches much easier. ++ Make sure you have a [GitHub account](https://github.com/signup). This is not essential, but makes providing patches much easier. + If you're planning to implement a new feature it makes sense to discuss your changes on the [dev list](https://commons.apache.org/mail-lists.html) first. This way you can make sure you're not wasting your time on something that isn't considered to be in Apache Commons CSV's scope. + Submit a [Jira Ticket][jira] for your issue, assuming one does not already exist. + Clearly describe the issue including steps to reproduce when it is a bug. + Make sure you fill in the earliest version that you know has the issue. + Find the corresponding [repository on GitHub](https://github.com/apache/?query=commons-), -[fork](https://help.github.com/articles/fork-a-repo/) and check out your forked repository. If you don't have a GitHub account, you can still clone the Commons repository. +[fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) and check out your forked repository. If you don't have a GitHub account, you can still clone the Commons repository. Making Changes -------------- + Create a _topic branch_ for your isolated work. - * Usually you should base your branch on the `master` branch. + * Usually you should base your branch from the `master` branch. * A good topic branch name can be the JIRA bug ID plus a keyword, e.g. `CSV-123-InputStream`. * If you have submitted multiple JIRA issues, try to maintain separate branches and pull requests. + Make commits of logical units. * Make sure your commit messages are meaningful and in the proper format. Your commit message should contain the key of the JIRA issue. - * e.g. `CSV-123: Close input stream earlier` + * For example, `[CSV-123] Close input stream sooner` + Respect the original code style: - + Only use spaces for indentation. + + Only use spaces for indentation; you can check for unnecessary whitespace with `git diff` before committing. + Create minimal diffs - disable _On Save_ actions like _Reformat Source Code_ or _Organize Imports_. If you feel the source code should be reformatted create a separate PR for this change first. - + Check for unnecessary whitespace with `git diff` -- check before committing. -+ Make sure you have added the necessary tests for your changes, typically in `src/test/java`. -+ Run all the tests with `mvn clean verify` to ensure nothing else was accidentally broken. ++ Write unit tests that match behavioral changes, where the tests fail if the changes to the runtime are not applied. This may not always be possible but is a best practice. +Unit tests are typically in the `src/test/java` directory. ++ Run a successful build using the default [Maven](https://maven.apache.org/) goal with `mvn`; that's `mvn` on the command line by itself. ++ Write a pull request description that is detailed enough to understand what the pull request does, how, and why. ++ Each commit in the pull request should have a meaningful subject line and body. Note that commits might be squashed by a maintainer on merge. + Making Trivial Changes ---------------------- @@ -79,7 +82,7 @@ Making Trivial Changes The JIRA tickets are used to generate the changelog for the next release. For changes of a trivial nature to comments and documentation, it is not always necessary to create a new ticket in JIRA. -In this case, it is appropriate to start the first line of a commit with '(doc)' instead of a ticket number. +In this case, it is appropriate to start the first line of a commit with '[doc]' or '[javadoc]' instead of a ticket number. Submitting Changes @@ -105,8 +108,8 @@ Additional Resources + [Contributing patches](https://commons.apache.org/patches.html) + [Apache Commons CSV JIRA project page][jira] + [Contributor License Agreement][cla] -+ [General GitHub documentation](https://help.github.com/) -+ [GitHub pull request documentation](https://help.github.com/articles/creating-a-pull-request/) ++ [General GitHub documentation](https://docs.github.com/) ++ [GitHub pull request documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) + [Apache Commons Twitter Account](https://twitter.com/ApacheCommons) [cla]:https://www.apache.org/licenses/#clas diff --git a/LICENSE.txt b/LICENSE.txt index d645695673..ff9ad4530f 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -193,7 +193,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/NOTICE.txt b/NOTICE.txt index b5c1f1445b..06d3824a28 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,5 +1,5 @@ Apache Commons CSV -Copyright 2005-2025 The Apache Software Foundation +Copyright 2005-2026 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (https://www.apache.org/). diff --git a/README.md b/README.md index 969da9b8df..f30de4b9c9 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -45,7 +45,7 @@ Apache Commons CSV [![Java CI](https://github.com/apache/commons-csv/actions/workflows/maven.yml/badge.svg)](https://github.com/apache/commons-csv/actions/workflows/maven.yml) [![Maven Central](https://img.shields.io/maven-central/v/org.apache.commons/commons-csv?label=Maven%20Central)](https://search.maven.org/artifact/org.apache.commons/commons-csv) -[![Javadocs](https://javadoc.io/badge/org.apache.commons/commons-csv/1.13.0.svg)](https://javadoc.io/doc/org.apache.commons/commons-csv/1.13.0) +[![Javadocs](https://javadoc.io/badge/org.apache.commons/commons-csv/1.14.1.svg)](https://javadoc.io/doc/org.apache.commons/commons-csv/1.14.1) [![CodeQL](https://github.com/apache/commons-csv/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/apache/commons-csv/actions/workflows/codeql-analysis.yml) [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/apache/commons-csv/badge)](https://api.securityscorecards.dev/projects/github.com/apache/commons-csv) @@ -68,7 +68,7 @@ Alternatively, you can pull it from the central Maven repositories: org.apache.commons commons-csv - 1.13.0 + 1.14.1 ``` @@ -90,7 +90,7 @@ There are some guidelines which will make applying PRs easier for us: + Create minimal diffs - disable on save actions like reformat source code or organize imports. If you feel the source code should be reformatted create a separate PR for this change. + Provide JUnit tests for your changes and make sure your changes don't break any existing tests by running `mvn`. + Before you pushing a PR, run `mvn` (by itself), this runs the default goal, which contains all build checks. -+ To see the code coverage report, regardless of coverage failures, run `mvn clean site -Dcommons.jacoco.haltOnFailure=false` ++ To see the code coverage report, regardless of coverage failures, run `mvn clean site -Dcommons.jacoco.haltOnFailure=false -Pjacoco` If you plan to contribute on a regular basis, please consider filing a [contributor license agreement](https://www.apache.org/licenses/#clas). You can learn more about contributing via GitHub in our [contribution guidelines](CONTRIBUTING.md). diff --git a/RELEASE-NOTES.txt b/RELEASE-NOTES.txt index 2d99a93d99..bfeb4bb8de 100644 --- a/RELEASE-NOTES.txt +++ b/RELEASE-NOTES.txt @@ -1,4 +1,116 @@ -Apache Commons CSV Version 1.13.0 Release Notes +Apache Commons CSV 1.14.1 Release Notes +--------------------------------------- + +The Apache Commons CSV team is pleased to announce the release of Apache Commons CSV 1.14.1. + + +This document contains the release notes for the 1.14.1 version of Apache Commons CSV. +Commons CSV reads and writes files in Comma Separated Value (CSV) format variations. + +Commons CSV requires at least Java 8. + +The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. + +This is a feature and maintenance release. Java 8 or later is required. + +Changes in this version include: + + +Fixed Bugs +---------- + +* CSV-318: CSVPrinter.printRecord(Stream) hangs if given a parallel stream. Thanks to Joseph Shraibman, Gary Gregory. +* CSV-318: CSVPrinter now uses an internal lock instead of synchronized methods. Thanks to Joseph Shraibman, Gary Gregory. +* org.apache.commons.csv.CSVPrinter.printRecords(ResultSet) now writes one record at a time using a lock. Thanks to Gary Gregory. + +Changes +------- + +* Bump org.apache.commons:commons-parent from 81 to 85 #542. Thanks to Gary Gregory, Dependabot. +* Bump commons-io:commons-io from 2.18.0 to 2.20.0. Thanks to Gary Gregory. +* Bump com.opencsv:opencsv from 5.10 to 5.11.2 #545, #551, #553. Thanks to Gary Gregory, Dependabot. +* Bump org.apache.commons:commons-lang3 from 3.17.0 to 3.18.0 #556. Thanks to Gary Gregory, Dependabot. +* Bump commons-codec:commons-codec from 1.18.0 to 1.19.0. Thanks to Gary Gregory. + + +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html + +For complete information on Apache Commons CSV, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Commons CSV website: + +https://commons.apache.org/proper/commons-csv/ + +Download page: https://commons.apache.org/proper/commons-csv/download_csv.cgi + +Have fun! +-Apache Commons CSV team + +------------------------------------------------------------------------------ + +Apache Commons CSV 1.14.0 Release Notes +--------------------------------------- + +This document contains the release notes for the 1.14.0 version of Apache Commons CSV. +Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. + +Commons CSV requires at least Java 8. + +The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. + +This is a feature and maintenance release. Java 8 or later is required. + +Changes in this version include: + +New Features +------------ + +* Define and use Maven property commons.jmh.version. Thanks to Gary Gregory. +* Add CSVFormat.Builder.setMaxRows(long). Thanks to Gary Gregory. +* Add CSVFormat.getMaxRows(). Thanks to Gary Gregory. +* CSVPrinter.printRecords(ResultSet) knows how to use CSVFormat's maxRows. Thanks to Gary Gregory. +* CSVPrinter.printRecords(Iterable) knows how to use CSVFormat's maxRows. Thanks to Gary Gregory. +* CSVPrinter.printRecords(Stream) knows how to use CSVFormat's maxRows. Thanks to Gary Gregory. +* CSVParser.stream() knows how to use CSVFormat's maxRows. Thanks to Gary Gregory. +* CSVParser.getRecords() knows how to use CSVFormat's maxRows. Thanks to Gary Gregory. +* CSVParser.iterator() knows how to use CSVFormat's maxRows. Thanks to Gary Gregory. + +Fixed Bugs +---------- + +* CSV-317: Release history link changed from changes-report.html to changes.html #516. Thanks to Filipe Roque. +* Remove -nouses directive from maven-bundle-plugin. OSGi package imports now state 'uses' definitions for package imports, this doesn't affect JPMS (from org.apache.commons:commons-parent:80). Thanks to Gary Gregory. +* CSVParser.parse(URL, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory. +* CSVParser.parse(String, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory. +* CSVParser.parse(File, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory. +* CSVParser.parse(Path, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory. +* CSVParser.parse(InputStream, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory. +* CSVParser.parse(*) methods with a null Charset maps to Charset.defaultCharset(). Thanks to Gary Gregory. +* Fix possible NullPointerException in Token.toString(). Thanks to Gary Gregory. + +Changes +------- + +* Bump com.opencsv:opencsv from 5.9 to 5.10. Thanks to Gary Gregory. +* Bump commons-codec:commons-codec from 1.17.2 to 1.18.0 #522. Thanks to Gary Gregory. +* Bump org.apache.commons:commons-parent from 79 to 81. Thanks to Gary Gregory. + + +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html + +For complete information on Apache Commons CSV, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Commons CSV website: + +https://commons.apache.org/proper/commons-csv/ + +Download page: https://commons.apache.org/proper/commons-csv/download_csv.cgi + +Have fun! +-Apache Commons CSV team + +------------------------------------------------------------------------------ + +Apache Commons CSV 1.13.0 Release Notes +--------------------------------------- This document contains the release notes for the 1.13.0 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -50,7 +162,8 @@ Have fun! ------------------------------------------------------------------------------ -Apache Commons CSV Version 1.12.0 Release Notes +Apache Commons CSV 1.12.0 Release Notes +--------------------------------------- This document contains the release notes for the 1.12.0 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -106,7 +219,8 @@ Have fun! ------------------------------------------------------------------------------ -Apache Commons CSV Version 1.11.0 Release Notes +Apache Commons CSV 1.11.0 Release Notes +--------------------------------------- This document contains the release notes for the 1.11.0 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -166,7 +280,8 @@ Have fun! ------------------------------------------------------------------------------ -Apache Commons CSV Version 1.10.0 Release Notes +Apache Commons CSV 1.10.0 Release Notes +--------------------------------------- This document contains the release notes for the 1.10.0 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -247,7 +362,8 @@ Have fun! ------------------------------------------------------------------------------ -Apache Commons CSV Version 1.9.0 Release Notes +Apache Commons CSV 1.9.0 Release Notes +-------------------------------------- This document contains the release notes for the 1.9.0 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -349,7 +465,8 @@ Have fun! ------------------------------------------------------------------------------ -Apache Commons CSV Version 1.8 Release Notes +Apache Commons CSV 1.8 Release Notes +------------------------------------ This document contains the release notes for the 1.8 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -404,7 +521,8 @@ Have fun! ------------------------------------------------------------------------------ -Apache Commons CSV Version 1.7 Release Notes +Apache Commons CSV 1.7 Release Notes +------------------------------------ This document contains the release notes for the 1.7 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -451,7 +569,8 @@ Have fun! ------------------------------------------------------------------------------ -Apache Commons CSV Version 1.6 Release Notes +Apache Commons CSV 1.6 Release Notes +------------------------------------ This document contains the release notes for the 1.6 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the @@ -500,7 +619,8 @@ Have fun! ------------------------------------------------------------------------------ -Apache Commons CSV Version 1.5 Release Notes +Apache Commons CSV 1.5 Release Notes +------------------------------------ This document contains the release notes for the 1.5 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -552,7 +672,8 @@ Have fun! ------------------------------------------------------------------------------ -Apache Commons CSV Version 1.4 Release Notes +Apache Commons CSV 1.4 Release Notes +------------------------------------ This document contains the release notes for the 1.4 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -591,7 +712,8 @@ Have fun! ------------------------------------------------------------------------------ -Apache Commons CSV Version 1.3 Release Notes +Apache Commons CSV 1.3 Release Notes +------------------------------------ This document contains the release notes for the 1.3 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -635,7 +757,8 @@ Have fun! ------------------------------------------------------------------------------ -Apache Commons CSV Version 1.2 Release Notes +Apache Commons CSV 1.2 Release Notes +------------------------------------ This document contains the release notes for the 1.2 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -673,7 +796,8 @@ Have fun! ------------------------------------------------------------------------------ -Apache Commons CSV Version 1.1 Release Notes +Apache Commons CSV 1.1 Release Notes +------------------------------------ This document contains the release notes for the 1.1 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -714,7 +838,8 @@ Have fun! ------------------------------------------------------------------------------- -Apache Commons CSV Version 1.0 Release Notes +Apache Commons CSV 1.0 Release Notes +------------------------------------ This document contains the release notes for the 1.0 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. diff --git a/SECURITY.md b/SECURITY.md index 51943ba7b4..744d4cddbb 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/benchmark-prereq.sh b/benchmark-prereq.sh index 1d03f6773a..bd1db91821 100755 --- a/benchmark-prereq.sh +++ b/benchmark-prereq.sh @@ -8,7 +8,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/pom.xml b/pom.xml index cbfbf5db4a..8cb13ed7c2 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -20,10 +20,10 @@ org.apache.commons commons-parent - 79 + 102 commons-csv - 1.13.1-SNAPSHOT + 1.15.0-SNAPSHOT Apache Commons CSV https://commons.apache.org/proper/commons-csv/ 2005 @@ -53,29 +53,29 @@ org.apache.commons commons-lang3 - 3.17.0 + 3.20.0 test com.h2database h2 + 2.2.224 test org.openjdk.jmh jmh-core - 1.37 + ${commons.jmh.version} test - scm:git:http://gitbox.apache.org/repos/asf/commons-csv.git + scm:git:https://gitbox.apache.org/repos/asf/commons-csv.git scm:git:https://gitbox.apache.org/repos/asf/commons-csv.git https://gitbox.apache.org/repos/asf?p=commons-csv.git - jira https://issues.apache.org/jira/browse/CSV @@ -90,12 +90,12 @@ - 1.13.0 + 1.15.0 (Java 8 or above) RC1 - 1.12.0 - 1.13.1 + 1.14.1 + 1.15.1 csv org.apache.commons.csv CSV @@ -108,9 +108,9 @@ UTF-8 false true - 2025-01-11T14:07:50Z - 1.17.2 - 2.18.0 + 2025-07-30T14:51:35Z + 1.22.0 + 2.22.0 org.apache.commons.codec.binary;version="${commons.codec.version}", @@ -125,11 +125,11 @@ true 1.00 - 0.98 + 0.99 0.99 0.97 0.99 - 0.96 + 0.97 ${basedir}/src/conf/checkstyle/checkstyle-header.txt ${basedir}/src/conf/checkstyle/checkstyle.xml @@ -168,34 +168,35 @@ apache-rat-plugin - + - src/test/resources/org/apache/commons/csv/empty.txt - src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv - src/test/resources/org/apache/commons/csv/csv-167/sample1.csv - src/test/resources/org/apache/commons/csv/CSV-198/optd_por_public.csv - src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv - src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv - src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv - src/test/resources/org/apache/commons/csv/CSVFileParser/bom.csv - src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv - src/test/resources/org/apache/commons/csv/CSVFileParser/test_default.txt - src/test/resources/org/apache/commons/csv/CSVFileParser/test_default_comment.txt - src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180.txt - src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180_trim.txt - src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85.csv - src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_default.txt - src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_ignoreEmpty.txt + src/test/resources/org/apache/commons/csv/empty.txt + src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv + src/test/resources/org/apache/commons/csv/csv-167/sample1.csv + src/test/resources/org/apache/commons/csv/CSV-198/optd_por_public.csv + src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv + src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv + src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv + src/test/resources/org/apache/commons/csv/CSV-254/csv-254.csv + src/test/resources/org/apache/commons/csv/CSVFileParser/bom.csv + src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv + src/test/resources/org/apache/commons/csv/CSVFileParser/test_default.txt + src/test/resources/org/apache/commons/csv/CSVFileParser/test_default_comment.txt + src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180.txt + src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180_trim.txt + src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85.csv + src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_default.txt + src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_ignoreEmpty.txt - src/test/resources/org/apache/commons/csv/ferc.gov/contract.txt - src/test/resources/org/apache/commons/csv/ferc.gov/transaction.txt - src/test/resources/**/*.bin - src/test/resources/org/apache/commons/csv/CSV-259/sample.txt - src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv - src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt - src/test/resources/org/apache/commons/csv/CSV-290/psql.csv - src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv - + src/test/resources/org/apache/commons/csv/ferc.gov/contract.txt + src/test/resources/org/apache/commons/csv/ferc.gov/transaction.txt + src/test/resources/**/*.bin + src/test/resources/org/apache/commons/csv/CSV-259/sample.txt + src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv + src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt + src/test/resources/org/apache/commons/csv/CSV-290/psql.csv + src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv + @@ -359,7 +360,7 @@ org.openjdk.jmh jmh-generator-annprocess - 1.37 + ${commons.jmh.version} test @@ -380,7 +381,7 @@ com.opencsv opencsv - 5.10 + 5.12.0 test @@ -390,13 +391,16 @@ 2.4.0 test - - + org.skife.kasparov csv 1.0 @@ -406,7 +410,7 @@ org.apache.commons commons-lang3 - 3.17.0 + 3.20.0 diff --git a/src/assembly/bin.xml b/src/assembly/bin.xml index f73d62f6df..3526ca9e91 100644 --- a/src/assembly/bin.xml +++ b/src/assembly/bin.xml @@ -6,7 +6,7 @@ Licensed to the Apache Software Foundation (ASF) under one or more (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/src/assembly/src.xml b/src/assembly/src.xml index 9f33f58f20..1330db01f6 100644 --- a/src/assembly/src.xml +++ b/src/assembly/src.xml @@ -6,7 +6,7 @@ Licensed to the Apache Software Foundation (ASF) under one or more (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/src/changes/changes.xml b/src/changes/changes.xml index a05e5d52e8..93952e9f18 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -7,7 +7,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -40,12 +40,77 @@ Apache Commons CSV Release Notes - + + + Remove Spotbugs dependency and use exclude-filter instead #564. + Remove broken website link #577. + Fix Apache RAT plugin console warnings. + [Javadoc] Clarify behavior of deprecated CSVFormat#withFirstRecordAsHeader() #2413. + CSVFormat.equals()/hashCode() ignores maxRows (#600). + ExtendedBufferedReader byte tracking leads to an incorrect CSVRecord.getBytePosition() (#601). + CSVFormat.Builder.setQuote() does not refresh quotedNullString (#2447). + Lexer.isDelimiter() accepts a partial multi-character delimiter at EOF (#603). + CSVParser applies characterOffset to bytePosition (#604). + CSVPrinter Reader printing with quote and escape can emit CSV that its parser cannot read back. + CSVParser applies maxRows to record numbers instead of rows produced when setRecordNumber(...) is used. + CSVParser with trackBytes enabled throws on multi-character delimiters containing supplementary Unicode characters. + CSVFormat.Builder.setNullString(String) can build an invalid quoted null string after setQuote(null). + Escape Reader values with quote and escape (#606). + Clear escape delimiter buffer before peek in Lexer.isEscapeDelimiter() (#608, #611). + Escape quote char in printWithEscapes when QuoteMode is NONE (#609). + Quote value starting with comment marker in minimal quote mode (#610). + Escape leading comment marker in printWithEscapes (#614). + Skip byte counting at EOF in ExtendedBufferedReader.read (#615). + Keep quoted empty trailing field with trailingDelimiter (#616). + Evaluate isDelimiter once in nextToken whitespace skip (#618).. + + Add an "Android Compatibility" section to the web site. + Add CSVParser.Builder.setByteOffset(long) (#604). + + Bump org.apache.commons:commons-parent from 85 to 102 #573, #595. + [test] Bump com.opencsv:opencsv from 5.11.2 to 5.12.0 #558. + Bump org.apache.commons:commons-lang3 from 3.18.0 to 3.20.0. + Bump commons-codec:commons-codec from 1.19.0 to 1.22.0. + Bump commons-io:commons-io from 2.20.0 to 2.22.0 #594. + + + + CSVPrinter.printRecord(Stream) hangs if given a parallel stream. + CSVPrinter now uses an internal lock instead of synchronized methods. + org.apache.commons.csv.CSVPrinter.printRecords(ResultSet) now writes one record at a time using a lock. + + + Bump org.apache.commons:commons-parent from 81 to 85 #542. + Bump commons-io:commons-io from 2.18.0 to 2.20.0. + Bump com.opencsv:opencsv from 5.10 to 5.11.2 #545, #551, #553. + Bump org.apache.commons:commons-lang3 from 3.17.0 to 3.18.0 #556. + Bump commons-codec:commons-codec from 1.18.0 to 1.19.0. + + Release history link changed from changes-report.html to changes.html #516. + Remove -nouses directive from maven-bundle-plugin. OSGi package imports now state 'uses' definitions for package imports, this doesn't affect JPMS (from org.apache.commons:commons-parent:80). + CSVParser.parse(URL, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). + CSVParser.parse(String, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). + CSVParser.parse(File, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). + CSVParser.parse(Path, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). + CSVParser.parse(InputStream, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). + CSVParser.parse(*) methods with a null Charset maps to Charset.defaultCharset(). + Fix possible NullPointerException in Token.toString(). + Define and use Maven property commons.jmh.version. + Add CSVFormat.Builder.setMaxRows(long). + Add CSVFormat.getMaxRows(). + CSVPrinter.printRecords(ResultSet) knows how to use CSVFormat's maxRows. + CSVPrinter.printRecords(Iterable) knows how to use CSVFormat's maxRows. + CSVPrinter.printRecords(Stream) knows how to use CSVFormat's maxRows. + CSVParser.stream() knows how to use CSVFormat's maxRows. + CSVParser.getRecords() knows how to use CSVFormat's maxRows. + CSVParser.iterator() knows how to use CSVFormat's maxRows. Bump com.opencsv:opencsv from 5.9 to 5.10. + Bump commons-codec:commons-codec from 1.17.2 to 1.18.0 #522. + Bump org.apache.commons:commons-parent from 79 to 81. diff --git a/src/changes/release-notes.vm b/src/changes/release-notes.vm index 08252f8ab9..5769829552 100644 --- a/src/changes/release-notes.vm +++ b/src/changes/release-notes.vm @@ -6,7 +6,7 @@ ## "License"); you may not use this file except in compliance ## with the License. You may obtain a copy of the License at ## -## http://www.apache.org/licenses/LICENSE-2.0 +## https://www.apache.org/licenses/LICENSE-2.0 ## ## Unless required by applicable law or agreed to in writing, ## software distributed under the License is distributed on an @@ -16,16 +16,20 @@ ## under the License. ## ${project.name} ${version} Release Notes +------------------------------------------------ -This document contains the release notes for the ${version} version of Apache Commons CSV. -Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. +The ${developmentTeam} is pleased to announce the release of ${project.name} ${version}. + + +This document contains the release notes for the ${version} version of ${project.name}. +Commons CSV reads and writes files in Comma Separated Value (CSV) format variations. Commons CSV requires at least Java 8. $introduction.replaceAll("(? + - diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index 70c033a181..7145d23d3b 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -43,6 +43,7 @@ import org.apache.commons.codec.binary.Base64OutputStream; import org.apache.commons.io.IOUtils; +import org.apache.commons.io.function.IOStream; import org.apache.commons.io.function.Uncheck; import org.apache.commons.io.output.AppendableOutputStream; @@ -215,7 +216,7 @@ public static Builder create() { .setRecordSeparator(Constants.CRLF) .setIgnoreEmptyLines(true) .setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL); - // @formatter:on + // @formatter:on } /** @@ -270,32 +271,36 @@ public static Builder create(final CSVFormat csvFormat) { private boolean trim; + /** The maximum number of rows to process, excluding the header row. */ + private long maxRows; + private Builder() { // empty } private Builder(final CSVFormat csvFormat) { - this.delimiter = csvFormat.delimiter; - this.quoteCharacter = csvFormat.quoteCharacter; - this.quoteMode = csvFormat.quoteMode; + this.allowMissingColumnNames = csvFormat.allowMissingColumnNames; + this.autoFlush = csvFormat.autoFlush; this.commentMarker = csvFormat.commentMarker; + this.delimiter = csvFormat.delimiter; + this.duplicateHeaderMode = csvFormat.duplicateHeaderMode; this.escapeCharacter = csvFormat.escapeCharacter; - this.ignoreSurroundingSpaces = csvFormat.ignoreSurroundingSpaces; - this.allowMissingColumnNames = csvFormat.allowMissingColumnNames; - this.ignoreEmptyLines = csvFormat.ignoreEmptyLines; - this.recordSeparator = csvFormat.recordSeparator; - this.nullString = csvFormat.nullString; this.headerComments = csvFormat.headerComments; this.headers = csvFormat.headers; - this.skipHeaderRecord = csvFormat.skipHeaderRecord; + this.ignoreEmptyLines = csvFormat.ignoreEmptyLines; this.ignoreHeaderCase = csvFormat.ignoreHeaderCase; + this.ignoreSurroundingSpaces = csvFormat.ignoreSurroundingSpaces; this.lenientEof = csvFormat.lenientEof; + this.maxRows = csvFormat.maxRows; + this.nullString = csvFormat.nullString; + this.quoteCharacter = csvFormat.quoteCharacter; + this.quoteMode = csvFormat.quoteMode; + this.quotedNullString = csvFormat.quotedNullString; + this.recordSeparator = csvFormat.recordSeparator; + this.skipHeaderRecord = csvFormat.skipHeaderRecord; this.trailingData = csvFormat.trailingData; this.trailingDelimiter = csvFormat.trailingDelimiter; this.trim = csvFormat.trim; - this.autoFlush = csvFormat.autoFlush; - this.quotedNullString = csvFormat.quotedNullString; - this.duplicateHeaderMode = csvFormat.duplicateHeaderMode; } /** @@ -606,6 +611,9 @@ public Builder setHeader(final ResultSetMetaData resultSetMetaData) throws SQLEx *

* The header is also used by the {@link CSVPrinter}. *

+ *

+ * This method keeps a copy of the input array. + *

* * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. * @return This instance. @@ -644,6 +652,9 @@ public Builder setHeader(final String... header) { * # Generated by Apache Commons CSV. * # 1970-01-01T00:00:00Z * + *

+ * This method keeps a copy of the input array. + *

* * @param headerComments the headerComments which will be printed by the Printer before the CSV data. * @return This instance. @@ -682,6 +693,9 @@ public Builder setHeaderComments(final Object... headerComments) { * # Generated by Apache Commons CSV. * # 1970-01-01T00:00:00Z * + *

+ * This method keeps a copy of the input array. + *

* * @param headerComments the headerComments which will be printed by the Printer before the CSV data. * @return This instance. @@ -738,6 +752,21 @@ public Builder setLenientEof(final boolean lenientEof) { return this; } + /** + * Sets the maximum number of rows to process, excluding the header row. + *

+ * Values less than or equal to 0 mean no limit. + *

+ * + * @param maxRows the maximum number of rows to process, excluding the header row. + * @return This instance. + * @since 1.14.0 + */ + public Builder setMaxRows(final long maxRows) { + this.maxRows = maxRows; + return this; + } + /** * Sets the String to convert to and from {@code null}. No substitution occurs if {@code null}. * @@ -751,8 +780,7 @@ public Builder setLenientEof(final boolean lenientEof) { */ public Builder setNullString(final String nullString) { this.nullString = nullString; - this.quotedNullString = quoteCharacter + nullString + quoteCharacter; - return this; + return setQuotedNullString(); } /** @@ -777,6 +805,12 @@ public Builder setQuote(final Character quoteCharacter) { throw new IllegalArgumentException("The quoteCharacter cannot be a line break"); } this.quoteCharacter = quoteCharacter; + return setQuotedNullString(); + } + + private Builder setQuotedNullString() { + final Character quote = quoteCharacter != null ? quoteCharacter : Constants.DOUBLE_QUOTE_CHAR; + this.quotedNullString = quote + nullString + quote; return this; } @@ -849,6 +883,16 @@ public Builder setTrailingData(final boolean trailingData) { /** * Sets whether to add a trailing delimiter. * + *

+ * When writing, a delimiter is appended after the last value of each record. When reading, the empty field + * that such a trailing delimiter produces is dropped so the output round-trips back to the original record; + * a quoted empty trailing field ({@code ""}) is a real value rather than a trailing delimiter and is kept. + *

+ *

+ * This is unrelated to {@link #setTrailingData(boolean) trailing data}, which controls whether characters + * after the closing quote of an encapsulated value are tolerated when reading. + *

+ * * @param trailingDelimiter whether to add a trailing delimiter. * @return This instance. */ @@ -857,6 +901,7 @@ public Builder setTrailingDelimiter(final boolean trailingDelimiter) { return this; } + /** * Sets whether to trim leading and trailing blanks. * @@ -870,7 +915,7 @@ public Builder setTrim(final boolean trim) { } /** - * Predefines formats. + * Enumerates predefines formats. * * @since 1.2 */ @@ -1447,7 +1492,7 @@ private static boolean isLineBreak(final char c) { * @return true if {@code c} is a line break character (and not null). */ private static boolean isLineBreak(final Character c) { - return c != null && isLineBreak(c.charValue()); // N.B. Explicit (un)boxing is intentional + return c != null && isLineBreak(c.charValue()); // Explicit unboxing is intentional } /** Same test as in as {@link String#trim()}. */ @@ -1580,28 +1625,32 @@ public static CSVFormat valueOf(final String format) { /** Whether to trim leading and trailing blanks. */ private final boolean trim; + /** The maximum number of rows to process, excluding the header row. */ + private final long maxRows; + private CSVFormat(final Builder builder) { - this.delimiter = builder.delimiter; - this.quoteCharacter = builder.quoteCharacter; - this.quoteMode = builder.quoteMode; + this.allowMissingColumnNames = builder.allowMissingColumnNames; + this.autoFlush = builder.autoFlush; this.commentMarker = builder.commentMarker; + this.delimiter = builder.delimiter; + this.duplicateHeaderMode = builder.duplicateHeaderMode; this.escapeCharacter = builder.escapeCharacter; - this.ignoreSurroundingSpaces = builder.ignoreSurroundingSpaces; - this.allowMissingColumnNames = builder.allowMissingColumnNames; - this.ignoreEmptyLines = builder.ignoreEmptyLines; - this.recordSeparator = builder.recordSeparator; - this.nullString = builder.nullString; this.headerComments = builder.headerComments; this.headers = builder.headers; - this.skipHeaderRecord = builder.skipHeaderRecord; + this.ignoreEmptyLines = builder.ignoreEmptyLines; this.ignoreHeaderCase = builder.ignoreHeaderCase; + this.ignoreSurroundingSpaces = builder.ignoreSurroundingSpaces; this.lenientEof = builder.lenientEof; + this.maxRows = builder.maxRows; + this.nullString = builder.nullString; + this.quoteCharacter = builder.quoteCharacter; + this.quoteMode = builder.quoteMode; + this.quotedNullString = builder.quotedNullString; + this.recordSeparator = builder.recordSeparator; + this.skipHeaderRecord = builder.skipHeaderRecord; this.trailingData = builder.trailingData; this.trailingDelimiter = builder.trailingDelimiter; this.trim = builder.trim; - this.autoFlush = builder.autoFlush; - this.quotedNullString = builder.quotedNullString; - this.duplicateHeaderMode = builder.duplicateHeaderMode; validate(); } @@ -1656,23 +1705,23 @@ public boolean equals(final Object obj) { duplicateHeaderMode == other.duplicateHeaderMode && Objects.equals(escapeCharacter, other.escapeCharacter) && Arrays.equals(headerComments, other.headerComments) && Arrays.equals(headers, other.headers) && ignoreEmptyLines == other.ignoreEmptyLines && ignoreHeaderCase == other.ignoreHeaderCase && - ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && lenientEof == other.lenientEof && - Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && - quoteMode == other.quoteMode && Objects.equals(quotedNullString, other.quotedNullString) && - Objects.equals(recordSeparator, other.recordSeparator) && skipHeaderRecord == other.skipHeaderRecord && - trailingData == other.trailingData && trailingDelimiter == other.trailingDelimiter && trim == other.trim; + ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && lenientEof == other.lenientEof && maxRows == other.maxRows && + Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == other.quoteMode && + Objects.equals(quotedNullString, other.quotedNullString) && Objects.equals(recordSeparator, other.recordSeparator) && + skipHeaderRecord == other.skipHeaderRecord && trailingData == other.trailingData && trailingDelimiter == other.trailingDelimiter && + trim == other.trim; } private void escape(final char c, final Appendable appendable) throws IOException { - append(escapeCharacter.charValue(), appendable); // N.B. Explicit (un)boxing is intentional + append(escapeCharacter.charValue(), appendable); // Explicit unboxing is intentional append(c, appendable); } /** - * Formats the specified values. + * Formats the specified values as a CSV record string. * - * @param values the values to format - * @return the formatted values + * @param values the values to format. + * @return the formatted values. */ public String format(final Object... values) { return Uncheck.get(() -> format_(values)); @@ -1802,7 +1851,7 @@ public DuplicateHeaderMode getDuplicateHeaderMode() { * @return the escape character, may be {@code 0} */ char getEscapeChar() { - return escapeCharacter != null ? escapeCharacter.charValue() : 0; // N.B. Explicit (un)boxing is intentional + return escapeCharacter != null ? escapeCharacter.charValue() : 0; // Explicit unboxing is intentional } /** @@ -1898,6 +1947,19 @@ public boolean getLenientEof() { return lenientEof; } + /** + * Gets the maximum number of rows to process, excluding the header row. + *

+ * Values less than or equal to 0 mean no limit. + *

+ * + * @return The maximum number of rows to process, excluding the header row. + * @since 1.14.0 + */ + public long getMaxRows() { + return maxRows; + } + /** * Gets the String to convert to and from {@code null}. *
    @@ -1960,6 +2022,16 @@ public boolean getTrailingData() { /** * Gets whether to add a trailing delimiter. * + *

    + * When writing, a delimiter is appended after the last value of each record. When reading, the empty field + * that such a trailing delimiter produces is dropped so the output round-trips back to the original record; + * a quoted empty trailing field ({@code ""}) is a real value rather than a trailing delimiter and is kept. + *

    + *

    + * This is unrelated to {@link #getTrailingData() trailing data}, which controls whether characters after the + * closing quote of an encapsulated value are tolerated when reading. + *

    + * * @return whether to add a trailing delimiter. * @since 1.3 */ @@ -1983,7 +2055,7 @@ public int hashCode() { result = prime * result + Arrays.hashCode(headerComments); result = prime * result + Arrays.hashCode(headers); result = prime * result + Objects.hash(allowMissingColumnNames, autoFlush, commentMarker, delimiter, duplicateHeaderMode, escapeCharacter, - ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, lenientEof, nullString, quoteCharacter, quoteMode, quotedNullString, + ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, lenientEof, maxRows, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator, skipHeaderRecord, trailingData, trailingDelimiter, trim); return result; } @@ -2052,6 +2124,10 @@ public boolean isQuoteCharacterSet() { return quoteCharacter != null; } + IOStream limit(final IOStream stream) { + return useMaxRows() ? stream.limit(getMaxRows()) : stream; + } + /** * Parses the specified content. * @@ -2108,7 +2184,7 @@ private void print(final InputStream inputStream, final Appendable out, final bo } final boolean quoteCharacterSet = isQuoteCharacterSet(); if (quoteCharacterSet) { - append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional + append(getQuoteCharacter().charValue(), out); // Explicit unboxing is intentional } // Stream the input to the output without reading or holding the whole value in memory. // AppendableOutputStream cannot "close" an Appendable. @@ -2116,7 +2192,7 @@ private void print(final InputStream inputStream, final Appendable out, final bo IOUtils.copy(inputStream, outputStream); } if (quoteCharacterSet) { - append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional + append(getQuoteCharacter().charValue(), out); // Explicit unboxing is intentional } } @@ -2262,7 +2338,7 @@ public synchronized void printRecord(final Appendable appendable, final Object.. } /* - * Note: Must only be called if escaping is enabled, otherwise can throw exceptions. + * This method must only be called if escaping is enabled, otherwise can throw exceptions. */ private void printWithEscapes(final CharSequence charSeq, final Appendable appendable) throws IOException { int start = 0; @@ -2271,12 +2347,18 @@ private void printWithEscapes(final CharSequence charSeq, final Appendable appen final char[] delimArray = getDelimiterCharArray(); final int delimLength = delimArray.length; final char escape = getEscapeChar(); + final boolean quoteSet = isQuoteCharacterSet(); + final char quote = quoteSet ? getQuoteCharacter().charValue() : 0; + final boolean commentMarkerSet = isCommentMarkerSet(); + final char commentChar = commentMarkerSet ? commentMarker.charValue() : 0; // Explicit unboxing is intentional while (pos < end) { char c = charSeq.charAt(pos); final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delimArray, delimLength); final boolean isCr = c == Constants.CR; final boolean isLf = c == Constants.LF; - if (isCr || isLf || c == escape || isDelimiterStart) { + // A leading comment marker would be read back as a comment, so escape it. + final boolean isComment = commentMarkerSet && pos == 0 && c == commentChar; + if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart || isComment) { // write out segment up until this char if (pos > start) { appendable.append(charSeq, start, pos); @@ -2305,7 +2387,7 @@ private void printWithEscapes(final CharSequence charSeq, final Appendable appen } /* - * Note: Must only be called if escaping is enabled, otherwise can throw exceptions. + * This method must only be called if escaping is enabled, otherwise can throw exceptions. */ private void printWithEscapes(final Reader reader, final Appendable appendable) throws IOException { int start = 0; @@ -2315,8 +2397,13 @@ private void printWithEscapes(final Reader reader, final Appendable appendable) final char[] delimArray = getDelimiterCharArray(); final int delimLength = delimArray.length; final char escape = getEscapeChar(); + final boolean quoteSet = isQuoteCharacterSet(); + final char quote = quoteSet ? getQuoteCharacter().charValue() : 0; + final boolean commentMarkerSet = isCommentMarkerSet(); + final char commentChar = commentMarkerSet ? commentMarker.charValue() : 0; // Explicit unboxing is intentional final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); int c; + boolean firstChar = true; final char[] lookAheadBuffer = new char[delimLength - 1]; while (EOF != (c = bufferedReader.read())) { builder.append((char) c); @@ -2326,7 +2413,10 @@ private void printWithEscapes(final Reader reader, final Appendable appendable) final boolean isDelimiterStart = isDelimiter((char) c, test, pos, delimArray, delimLength); final boolean isCr = c == Constants.CR; final boolean isLf = c == Constants.LF; - if (isCr || isLf || c == escape || isDelimiterStart) { + // A leading comment marker would be read back as a comment, so escape it. + final boolean isComment = commentMarkerSet && firstChar && c == commentChar; + firstChar = false; + if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart || isComment) { // write out segment up until this char if (pos > start) { append(builder.substring(start, pos), appendable); @@ -2355,9 +2445,9 @@ private void printWithEscapes(final Reader reader, final Appendable appendable) } /* - * Note: must only be called if quoting is enabled, otherwise will generate NPE + * This method must only be called if quoting is enabled, otherwise will generate NPE. + * The original object is needed so can check for Number */ - // the original object is needed so can check for Number private void printWithQuotes(final Object object, final CharSequence charSeq, final Appendable out, final boolean newRecord) throws IOException { boolean quote = false; int start = 0; @@ -2365,7 +2455,7 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi final int len = charSeq.length(); final char[] delim = getDelimiterCharArray(); final int delimLength = delim.length; - final char quoteChar = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional + final char quoteChar = getQuoteCharacter().charValue(); // Explicit unboxing is intentional // If escape char not specified, default to the quote char // This avoids having to keep checking whether there is an escape character // at the cost of checking against quote twice @@ -2397,10 +2487,11 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi } } else { char c = charSeq.charAt(pos); - if (c <= Constants.COMMENT) { + if (c <= Constants.COMMENT || isCommentMarkerSet() && c == commentMarker.charValue()) { // Some other chars at the start of a value caused the parser to fail, so for now // encapsulate if we start in anything less than '#'. We are being conservative - // by including the default comment char too. + // by including the default comment char and any configured comment marker too, + // which the parser would otherwise read back as a comment line. quote = true; } else { while (pos < len) { @@ -2468,15 +2559,16 @@ private void printWithQuotes(final Reader reader, final Appendable appendable) t printWithEscapes(reader, appendable); return; } - final char quote = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional + final char quote = getQuoteCharacter().charValue(); // Explicit unboxing is intentional + final char escape = isEscapeCharacterSet() ? getEscapeChar() : quote; // (1) Append opening quote append(quote, appendable); - // (2) Append Reader contents, doubling quotes + // (2) Append Reader contents, doubling quotes and escape characters int c; while (EOF != (c = reader.read())) { append((char) c, appendable); - if (c == quote) { - append(quote, appendable); + if (c == quote || c == escape) { + append((char) c, appendable); } } // (3) Append closing quote @@ -2488,27 +2580,27 @@ public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("Delimiter=<").append(delimiter).append('>'); if (isEscapeCharacterSet()) { - sb.append(' '); + sb.append(Constants.SP); sb.append("Escape=<").append(escapeCharacter).append('>'); } if (isQuoteCharacterSet()) { - sb.append(' '); + sb.append(Constants.SP); sb.append("QuoteChar=<").append(quoteCharacter).append('>'); } if (quoteMode != null) { - sb.append(' '); + sb.append(Constants.SP); sb.append("QuoteMode=<").append(quoteMode).append('>'); } if (isCommentMarkerSet()) { - sb.append(' '); + sb.append(Constants.SP); sb.append("CommentStart=<").append(commentMarker).append('>'); } if (isNullStringSet()) { - sb.append(' '); + sb.append(Constants.SP); sb.append("NullString=<").append(nullString).append('>'); } if (recordSeparator != null) { - sb.append(' '); + sb.append(Constants.SP); sb.append("RecordSeparator=<").append(recordSeparator).append('>'); } if (getIgnoreEmptyLines()) { @@ -2522,11 +2614,11 @@ public String toString() { } sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); if (headerComments != null) { - sb.append(' '); + sb.append(Constants.SP); sb.append("HeaderComments:").append(Arrays.toString(headerComments)); } if (headers != null) { - sb.append(' '); + sb.append(Constants.SP); sb.append("Header:").append(Arrays.toString(headers)); } return sb.toString(); @@ -2536,6 +2628,14 @@ String trim(final String value) { return getTrim() ? value.trim() : value; } + boolean useMaxRows() { + return getMaxRows() > 0; + } + + boolean useRow(final long rowNum) { + return !useMaxRows() || rowNum <= getMaxRows(); + } + /** * Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary. *

    @@ -2546,16 +2646,13 @@ String trim(final String value) { * @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes. */ private void validate() throws IllegalArgumentException { - if (containsLineBreak(delimiter)) { - throw new IllegalArgumentException("The delimiter cannot be a line break"); - } - if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional + if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // Explicit unboxing is intentional throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); } - if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional + if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // Explicit unboxing is intentional throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')"); } - if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { // N.B. Explicit (un)boxing is intentional + if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { // Explicit unboxing is intentional throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" + commentMarker + "')"); } if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) { @@ -2733,6 +2830,9 @@ public CSVFormat withEscape(final Character escape) { * .get(); * * + *

    Any previously set headers are reset to empty. + * The resulting format will have {@code skipHeaderRecord = true}.

    + * * @return A new CSVFormat that is equal to this but using the first record as header. * @see Builder#setSkipHeaderRecord(boolean) * @see Builder#setHeader(String...) @@ -3142,4 +3242,5 @@ public CSVFormat withTrim() { public CSVFormat withTrim(final boolean trim) { return builder().setTrim(trim).get(); } + } diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 1c88d9c7f6..141eba732c 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -49,6 +49,7 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; +import org.apache.commons.io.Charsets; import org.apache.commons.io.build.AbstractStreamBuilder; import org.apache.commons.io.function.Uncheck; @@ -153,6 +154,7 @@ public final class CSVParser implements Iterable, Closeable { public static class Builder extends AbstractStreamBuilder { private CSVFormat format; + private long byteOffset = -1; private long characterOffset; private long recordNumber = 1; private boolean trackBytes; @@ -164,17 +166,33 @@ protected Builder() { // empty } - @SuppressWarnings("resource") @Override public CSVParser get() throws IOException { - return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber, getCharset(), trackBytes); + return new CSVParser(this); } /** - * Sets the lexer offset when the parser does not start parsing at the beginning of the source. + * Sets the lexer byte offset when the parser does not start parsing at the beginning of the source. + *

    + * By default, the value is {@code -1}, which reuses the character offset for the byte offset. + *

    * - * @param characterOffset the lexer offset. - * @return this instance. + * @param byteOffset the lexer byte offset. + * @return {@code this} instance. + * @see #setCharacterOffset(long) + * @since 1.15.0 + */ + public Builder setByteOffset(final long byteOffset) { + this.byteOffset = byteOffset; + return asThis(); + } + + /** + * Sets the lexer character offset when the parser does not start parsing at the beginning of the source. + * + * @param characterOffset the lexer character offset. + * @return {@code this} instance. + * @see #setByteOffset(long) */ public Builder setCharacterOffset(final long characterOffset) { this.characterOffset = characterOffset; @@ -184,8 +202,8 @@ public Builder setCharacterOffset(final long characterOffset) { /** * Sets the CSV format. A copy of the given format is kept. * - * @param format the CSV format, null is equivalent to {@link CSVFormat#DEFAULT}. - * @return this instance. + * @param format the CSV format, {@code null} resets to {@link CSVFormat#DEFAULT}. + * @return {@code this} instance. */ public Builder setFormat(final CSVFormat format) { this.format = CSVFormat.copy(format); @@ -196,7 +214,7 @@ public Builder setFormat(final CSVFormat format) { * Sets the next record number to assign, defaults to {@code 1}. * * @param recordNumber the next record number to assign. - * @return this instance. + * @return {@code this} instance. */ public Builder setRecordNumber(final long recordNumber) { this.recordNumber = recordNumber; @@ -207,7 +225,7 @@ public Builder setRecordNumber(final long recordNumber) { * Sets whether to enable byte tracking for the parser. * * @param trackBytes {@code true} to enable byte tracking; {@code false} to disable it. - * @return this instance. + * @return {@code this} instance. * @since 1.13.0 */ public Builder setTrackBytes(final boolean trackBytes) { @@ -219,16 +237,24 @@ public Builder setTrackBytes(final boolean trackBytes) { final class CSVRecordIterator implements Iterator { private CSVRecord current; + private long recordCount; /** - * Gets the next record. + * Gets the next record or null at the end of stream or max rows read. * * @throws IOException on parse error or input read-failure * @throws CSVException on invalid input. - * @return the next record. + * @return the next record, or {@code null} if the end of the stream has been reached. */ private CSVRecord getNextRecord() { - return Uncheck.get(CSVParser.this::nextRecord); + CSVRecord record = null; + if (format.useRow(recordCount + 1)) { + record = Uncheck.get(CSVParser.this::nextRecord); + if (record != null) { + recordCount++; + } + } + return record; } @Override @@ -239,7 +265,6 @@ public boolean hasNext() { if (current == null) { current = getNextRecord(); } - return current != null; } @@ -250,7 +275,6 @@ public CSVRecord next() { } CSVRecord next = current; current = null; - if (next == null) { // hasNext() wasn't called before next = getNextRecord(); @@ -258,7 +282,6 @@ public CSVRecord next() { throw new NoSuchElementException("No more CSV records available"); } } - return next; } @@ -267,6 +290,7 @@ public void remove() { throw new UnsupportedOperationException(); } } + /** * Header information based on name and position. */ @@ -304,15 +328,16 @@ public static Builder builder() { * @param file * a CSV file. Must not be null. * @param charset - * The Charset to decode the given file. + * The Charset to decode the given file, {@code null} maps to the {@link Charset#defaultCharset() default Charset}. * @param format - * the CSVFormat used for CSV parsing. Must not be null. + * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}. * @return a new parser * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either file or format are null. + * If the parameters of the format are inconsistent. * @throws IOException * If an I/O error occurs - * @throws CSVException Thrown on invalid input. + * @throws CSVException Thrown on invalid CSV input data. + * @throws NullPointerException if {@code file} is {@code null}. */ public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException { Objects.requireNonNull(file, "file"); @@ -328,25 +353,22 @@ public static CSVParser parse(final File file, final Charset charset, final CSVF *

    * * @param inputStream - * an InputStream containing CSV-formatted input. Must not be null. + * an InputStream containing CSV-formatted input, {@code null} maps to {@link CSVFormat#DEFAULT}. * @param charset - * The Charset to decode the given file. + * The Charset to decode the given file, {@code null} maps to the {@link Charset#defaultCharset() default Charset}. * @param format - * the CSVFormat used for CSV parsing. Must not be null. + * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}. * @return a new CSVParser configured with the given reader and format. * @throws IllegalArgumentException * If the parameters of the format are inconsistent or if either reader or format are null. * @throws IOException * If there is a problem reading the header or skipping the first record - * @throws CSVException Thrown on invalid input. + * @throws CSVException Thrown on invalid CSV input data. * @since 1.5 */ - @SuppressWarnings("resource") public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format) throws IOException { - Objects.requireNonNull(inputStream, "inputStream"); - Objects.requireNonNull(format, "format"); - return parse(new InputStreamReader(inputStream, charset), format); + return parse(new InputStreamReader(inputStream, Charsets.toCharset(charset)), format); } /** @@ -355,21 +377,21 @@ public static CSVParser parse(final InputStream inputStream, final Charset chars * @param path * a CSV file. Must not be null. * @param charset - * The Charset to decode the given file. + * The Charset to decode the given file, {@code null} maps to the {@link Charset#defaultCharset() default Charset}. * @param format - * the CSVFormat used for CSV parsing. Must not be null. + * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}. * @return a new parser * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either file or format are null. + * If the parameters of the format are inconsistent. * @throws IOException * If an I/O error occurs - * @throws CSVException Thrown on invalid input. + * @throws CSVException Thrown on invalid CSV input data. + * @throws NullPointerException if {@code path} is {@code null}. * @since 1.5 */ @SuppressWarnings("resource") public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException { Objects.requireNonNull(path, "path"); - Objects.requireNonNull(format, "format"); return parse(Files.newInputStream(path), charset, format); } @@ -384,13 +406,13 @@ public static CSVParser parse(final Path path, final Charset charset, final CSVF * @param reader * a Reader containing CSV-formatted input. Must not be null. * @param format - * the CSVFormat used for CSV parsing. Must not be null. + * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}. * @return a new CSVParser configured with the given reader and format. * @throws IllegalArgumentException * If the parameters of the format are inconsistent or if either reader or format are null. * @throws IOException * If there is a problem reading the header or skipping the first record - * @throws CSVException Thrown on invalid input. + * @throws CSVException Thrown on invalid CSV input data. * @since 1.5 */ public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException { @@ -403,17 +425,17 @@ public static CSVParser parse(final Reader reader, final CSVFormat format) throw * @param string * a CSV string. Must not be null. * @param format - * the CSVFormat used for CSV parsing. Must not be null. + * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}. * @return a new parser * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either string or format are null. + * If the parameters of the format are inconsistent. * @throws IOException * If an I/O error occurs - * @throws CSVException Thrown on invalid input. + * @throws CSVException Thrown on invalid CSV input data. + * @throws NullPointerException if {@code string} is {@code null}. */ public static CSVParser parse(final String string, final CSVFormat format) throws IOException { Objects.requireNonNull(string, "string"); - Objects.requireNonNull(format, "format"); return parse(new StringReader(string), format); } @@ -428,15 +450,16 @@ public static CSVParser parse(final String string, final CSVFormat format) throw * @param url * a URL. Must not be null. * @param charset - * the charset for the resource. Must not be null. + * the charset for the resource, {@code null} maps to the {@link Charset#defaultCharset() default Charset}. * @param format - * the CSVFormat used for CSV parsing. Must not be null. + * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}. * @return a new parser * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either url, charset or format are null. + * If the parameters of the format are inconsistent. * @throws IOException * If an I/O error occurs - * @throws CSVException Thrown on invalid input. + * @throws CSVException Thrown on invalid CSV input data. + * @throws NullPointerException if {@code url} is {@code null}. */ @SuppressWarnings("resource") public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException { @@ -464,6 +487,12 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor */ private long recordNumber; + /** + * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination + * with {@link #recordNumber}. + */ + private final long byteOffset; + /** * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination * with {@link #recordNumber}. @@ -473,7 +502,24 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor private final Token reusableToken = new Token(); /** - * Constructs a new instance using the given {@link CSVFormat} + * Constructs a new instance from a builder. + * + * @param builder The source builder. + * @throws IOException if an I/O error occurs. + */ + @SuppressWarnings("resource") // Lexer manages ExtendedBufferedReader. + private CSVParser(final Builder builder) throws IOException { + this.format = (builder.format != null ? builder.format : CSVFormat.DEFAULT).copy(); + this.lexer = new Lexer(format, new ExtendedBufferedReader(builder.getReader(), builder.getCharset(), builder.trackBytes)); + this.csvRecordIterator = new CSVRecordIterator(); + this.headers = createHeaders(); + this.byteOffset = builder.byteOffset != -1 ? builder.byteOffset : builder.characterOffset; + this.characterOffset = builder.characterOffset; + this.recordNumber = builder.recordNumber - 1; + } + + /** + * Constructs a new instance using the given {@link CSVFormat}. * *

    * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, @@ -488,7 +534,7 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor * If the parameters of the format are inconsistent or if either reader or format are null. * @throws IOException * If there is a problem reading the header or skipping the first record - * @throws CSVException Thrown on invalid input. + * @throws CSVException Thrown on invalid CSV input data. * @deprecated Will be removed in the next major version, use {@link Builder#get()}. */ @Deprecated @@ -497,7 +543,7 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException } /** - * Constructs a new instance using the given {@link CSVFormat} + * Constructs a new instance using the given {@link CSVFormat}. * *

    * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, @@ -518,56 +564,25 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException * if there is a problem reading the header or skipping the first record * @throws CSVException on invalid input. * @since 1.1 - * @deprecated Will be private in the next major version, use {@link Builder#get()}. + * @deprecated Will be removed in the next major version, use {@link Builder#get()}. */ @Deprecated - public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) - throws IOException { - this(reader, format, characterOffset, recordNumber, null, false); - } - - /** - * Constructs a new instance using the given {@link CSVFormat} - * - *

    - * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, - * unless you close the {@code reader}. - *

    - * - * @param reader - * a Reader containing CSV-formatted input. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @param characterOffset - * Lexer offset when the parser does not start parsing at the beginning of the source. - * @param recordNumber - * The next record number to assign. - * @param charset - * The character encoding to be used for the reader when enableByteTracking is true. - * @param trackBytes - * {@code true} to enable byte tracking for the parser; {@code false} to disable it. - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either the reader or format is null. - * @throws IOException - * If there is a problem reading the header or skipping the first record. - * @throws CSVException Thrown on invalid input. - */ - private CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber, - final Charset charset, final boolean trackBytes) - throws IOException { - Objects.requireNonNull(reader, "reader"); - Objects.requireNonNull(format, "format"); - this.format = format.copy(); - this.lexer = new Lexer(format, new ExtendedBufferedReader(reader, charset, trackBytes)); - this.csvRecordIterator = new CSVRecordIterator(); - this.headers = createHeaders(); - this.characterOffset = characterOffset; - this.recordNumber = recordNumber - 1; + public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) throws IOException { + // @formatter:off + this(builder() + .setReader(reader) + .setFormat(Objects.requireNonNull(format, "format")) // requireNonNull for full compatibility + .setCharacterOffset(characterOffset) + .setRecordNumber(recordNumber) + .setCharset((Charset) null).setTrackBytes(false)); + // @formatter:off } private void addRecordValue(final boolean lastRecord) { final String input = format.trim(reusableToken.content.toString()); - if (lastRecord && input.isEmpty() && format.getTrailingDelimiter()) { + // Only drop the empty field produced by an actual trailing delimiter. A quoted empty + // field ("") is a real value, not a trailing delimiter, so it must be kept. + if (lastRecord && input.isEmpty() && format.getTrailingDelimiter() && !reusableToken.isQuoted) { return; } recordList.add(handleNull(input)); @@ -598,11 +613,11 @@ private Map createEmptyHeaderMap() { * @throws CSVException on invalid input. */ private Headers createHeaders() throws IOException { - Map hdrMap = null; + Map headerMap = null; List headerNames = null; final String[] formatHeader = format.getHeader(); if (formatHeader != null) { - hdrMap = createEmptyHeaderMap(); + headerMap = createEmptyHeaderMap(); String[] headerRecord = null; if (formatHeader.length == 0) { // read the header from the first line of the file @@ -620,7 +635,6 @@ private Headers createHeaders() throws IOException { } headerRecord = formatHeader; } - // build the name to index mappings if (headerRecord != null) { // Track an occurrence of a null, empty or blank header. @@ -629,24 +643,20 @@ private Headers createHeaders() throws IOException { final String header = headerRecord[i]; final boolean blankHeader = CSVFormat.isBlank(header); if (blankHeader && !format.getAllowMissingColumnNames()) { - throw new IllegalArgumentException( - "A header name is missing in " + Arrays.toString(headerRecord)); + throw new IllegalArgumentException("A header name is missing in " + Arrays.toString(headerRecord)); } - - final boolean containsHeader = blankHeader ? observedMissing : hdrMap.containsKey(header); + final boolean containsHeader = blankHeader ? observedMissing : headerMap.containsKey(header); final DuplicateHeaderMode headerMode = format.getDuplicateHeaderMode(); final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL; final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY; - if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) { - throw new IllegalArgumentException( - String.format( + throw new IllegalArgumentException(String.format( "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", header, Arrays.toString(headerRecord))); } observedMissing |= blankHeader; if (header != null) { - hdrMap.put(header, Integer.valueOf(i)); // N.B. Explicit (un)boxing is intentional + headerMap.put(header, Integer.valueOf(i)); // Explicit boxing is intentional if (headerNames == null) { headerNames = new ArrayList<>(headerRecord.length); } @@ -656,18 +666,18 @@ private Headers createHeaders() throws IOException { } } // Make header names Collection immutable - return new Headers(hdrMap, headerNames == null ? Collections.emptyList() : Collections.unmodifiableList(headerNames)); + return new Headers(headerMap, headerNames == null ? Collections.emptyList() : Collections.unmodifiableList(headerNames)); } /** * Gets the current line number in the input stream. * *

    - * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to + * Note: If your CSV input has multi-line values, the returned number does not correspond to * the record number. *

    * - * @return current line number + * @return current line number. */ public long getCurrentLineNumber() { return lexer.getCurrentLineNumber(); @@ -676,7 +686,7 @@ public long getCurrentLineNumber() { /** * Gets the first end-of-line string encountered. * - * @return the first end-of-line string + * @return the first end-of-line string. * @since 1.5 */ public String getFirstEndOfLine() { @@ -700,7 +710,7 @@ public String getHeaderComment() { * The map keys are column names. The map values are 0-based indices. *

    *

    - * Note: The map can only provide a one-to-one mapping when the format did not + * Note: The map can only provide a one-to-one mapping when the format did not * contain null or duplicate column names. *

    * @@ -744,7 +754,7 @@ public List getHeaderNames() { * Gets the current record number in the input stream. * *

    - * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to + * Note: If your CSV input has multi-line values, the returned number does not correspond to * the line number. *

    * @@ -761,6 +771,9 @@ public long getRecordNumber() { *

    * The returned content starts at the current parse-position in the stream. *

    + *

    + * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows this method produces. + *

    * * @return list of {@link CSVRecord CSVRecords}, may be empty * @throws UncheckedIOException @@ -867,6 +880,9 @@ private boolean isStrictQuoteMode() { * parser is closed, one option is to extract all records as a list with * {@link #getRecords()}, and return an iterator to that list. *

    + *

    + * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows an Iterator produces. + *

    */ @Override public Iterator iterator() { @@ -876,16 +892,16 @@ public Iterator iterator() { /** * Parses the next record from the current point in the stream. * - * @return the record as an array of values, or {@code null} if the end of the stream has been reached - * @throws IOException on parse error or input read-failure - * @throws CSVException on invalid input. + * @return the record as an array of values, or {@code null} if the end of the stream has been reached. + * @throws IOException on parse error or input read-failure. + * @throws CSVException on invalid CSV input data. */ CSVRecord nextRecord() throws IOException { CSVRecord result = null; recordList.clear(); StringBuilder sb = null; final long startCharPosition = lexer.getCharacterPosition() + characterOffset; - final long startBytePosition = lexer.getBytesRead() + this.characterOffset; + final long startBytePosition = lexer.getBytesRead() + byteOffset; do { reusableToken.reset(); lexer.nextToken(reusableToken); @@ -918,12 +934,10 @@ CSVRecord nextRecord() throws IOException { throw new CSVException("Unexpected Token type: %s", reusableToken.type); } } while (reusableToken.type == TOKEN); - if (!recordList.isEmpty()) { recordNumber++; - final String comment = Objects.toString(sb, null); - result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment, - recordNumber, startCharPosition, startBytePosition); + result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), Objects.toString(sb, null), recordNumber, startCharPosition, + startBytePosition); } return result; } @@ -934,6 +948,10 @@ CSVRecord nextRecord() throws IOException { * If the parser is closed, the stream will not produce any more values. * See the comments in {@link #iterator()}. *

    + *

    + * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows a Stream produces. + *

    + * * @return a sequential {@code Stream} with this collection as its source. * @since 1.9.0 */ diff --git a/src/main/java/org/apache/commons/csv/CSVPrinter.java b/src/main/java/org/apache/commons/csv/CSVPrinter.java index 67088c38a5..a7048fd625 100644 --- a/src/main/java/org/apache/commons/csv/CSVPrinter.java +++ b/src/main/java/org/apache/commons/csv/CSVPrinter.java @@ -32,8 +32,10 @@ import java.sql.Clob; import java.sql.ResultSet; import java.sql.SQLException; +import java.sql.Statement; import java.util.Arrays; import java.util.Objects; +import java.util.concurrent.locks.ReentrantLock; import java.util.stream.Stream; import org.apache.commons.io.function.IOStream; @@ -87,26 +89,24 @@ public final class CSVPrinter implements Flushable, Closeable { private long recordCount; + private final ReentrantLock lock = new ReentrantLock(); + /** * Creates a printer that will print values to the given stream following the CSVFormat. *

    - * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation - * and escaping with a different character) are not supported. + * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation and escaping with a different + * character) are not supported. *

    * - * @param appendable - * stream to which to print. Must not be null. - * @param format - * the CSV format. Must not be null. - * @throws IOException - * thrown if the optional header cannot be printed. - * @throws IllegalArgumentException - * thrown if the parameters of the format are inconsistent or if either out or format are null. + * @param appendable stream to which to print. Must not be null. + * @param format the CSV format. Must not be null. + * @throws IOException thrown if the optional header cannot be printed. + * @throws IllegalArgumentException thrown if the parameters of the format are inconsistent. + * @throws NullPointerException thrown if either parameters are null. */ public CSVPrinter(final Appendable appendable, final CSVFormat format) throws IOException { Objects.requireNonNull(appendable, "appendable"); Objects.requireNonNull(format, "format"); - this.appendable = appendable; this.format = format.copy(); // TODO: Is it a good idea to do this here instead of on the first call to a print method? @@ -129,10 +129,12 @@ public void close() throws IOException { /** * Closes the underlying stream with an optional flush first. + * * @param flush whether to flush before the actual close. * @throws IOException * If an I/O error occurs * @since 1.6 + * @see CSVFormat#getAutoFlush() */ public void close(final boolean flush) throws IOException { if (flush || format.getAutoFlush()) { @@ -144,12 +146,12 @@ public void close(final boolean flush) throws IOException { } /** - * Outputs the record separator and increments the record count. + * Prints the record separator and increments the record count. * * @throws IOException * If an I/O error occurs */ - private synchronized void endOfRecord() throws IOException { + private void endOfRecord() throws IOException { println(); recordCount++; } @@ -173,7 +175,7 @@ public void flush() throws IOException { * @return the target Appendable. */ public Appendable getOut() { - return this.appendable; + return appendable; } /** @@ -194,9 +196,13 @@ public long getRecordCount() { * @throws IOException * If an I/O error occurs */ - public synchronized void print(final Object value) throws IOException { - format.print(value, appendable, newRecord); - newRecord = false; + public void print(final Object value) throws IOException { + lock.lock(); + try { + printRaw(value); + } finally { + lock.unlock(); + } } /** @@ -220,34 +226,39 @@ public synchronized void print(final Object value) throws IOException { * @throws IOException * If an I/O error occurs */ - public synchronized void printComment(final String comment) throws IOException { - if (comment == null || !format.isCommentMarkerSet()) { - return; - } - if (!newRecord) { - println(); - } - appendable.append(format.getCommentMarker().charValue()); // N.B. Explicit (un)boxing is intentional - appendable.append(SP); - for (int i = 0; i < comment.length(); i++) { - final char c = comment.charAt(i); - switch (c) { - case CR: - if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) { - i++; - } - // falls-through: break intentionally excluded. - case LF: + public void printComment(final String comment) throws IOException { + lock.lock(); + try { + if (comment == null || !format.isCommentMarkerSet()) { + return; + } + if (!newRecord) { println(); - appendable.append(format.getCommentMarker().charValue()); // N.B. Explicit (un)boxing is intentional - appendable.append(SP); - break; - default: - appendable.append(c); - break; } + appendable.append(format.getCommentMarker().charValue()); // Explicit unboxing is intentional + appendable.append(SP); + for (int i = 0; i < comment.length(); i++) { + final char c = comment.charAt(i); + switch (c) { + case CR: + if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) { + i++; + } + // falls-through: break intentionally excluded. + case LF: + println(); + appendable.append(format.getCommentMarker().charValue()); // Explicit unboxing is intentional + appendable.append(SP); + break; + default: + appendable.append(c); + break; + } + } + println(); + } finally { + lock.unlock(); } - println(); } /** @@ -258,22 +269,45 @@ public synchronized void printComment(final String comment) throws IOException { * @throws SQLException If a database access error occurs or this method is called on a closed result set. * @since 1.9.0 */ - public synchronized void printHeaders(final ResultSet resultSet) throws IOException, SQLException { - try (IOStream stream = IOStream.of(format.builder().setHeader(resultSet).get().getHeader())) { - stream.forEachOrdered(this::print); + public void printHeaders(final ResultSet resultSet) throws IOException, SQLException { + lock.lock(); + try { + try (IOStream stream = IOStream.of(format.builder().setHeader(resultSet).get().getHeader())) { + stream.forEachOrdered(this::print); + } + println(); + } finally { + lock.unlock(); + } + } + + /** + * Prints the record separator. + * + * @throws IOException + * If an I/O error occurs + */ + public void println() throws IOException { + lock.lock(); + try { + format.println(appendable); + newRecord = true; + } finally { + lock.unlock(); } - println(); } /** - * Outputs the record separator. + * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. * + * @param value + * value to be output. * @throws IOException * If an I/O error occurs */ - public synchronized void println() throws IOException { - format.println(appendable); - newRecord = true; + private void printRaw(final Object value) throws IOException { + format.print(value, appendable, newRecord); + newRecord = false; } /** @@ -290,9 +324,14 @@ public synchronized void println() throws IOException { * If an I/O error occurs */ @SuppressWarnings("resource") - public synchronized void printRecord(final Iterable values) throws IOException { - IOStream.of(values).forEachOrdered(this::print); - endOfRecord(); + public void printRecord(final Iterable values) throws IOException { + lock.lock(); + try { + IOStream.of(values).forEachOrdered(this::print); + endOfRecord(); + } finally { + lock.unlock(); + } } /** @@ -320,16 +359,21 @@ public void printRecord(final Object... values) throws IOException { * separator to the output after printing the record, so there is no need to call {@link #println()}. *

    * - * @param values + * @param stream * values to output. * @throws IOException * If an I/O error occurs * @since 1.10.0 */ @SuppressWarnings("resource") // caller closes. - public synchronized void printRecord(final Stream values) throws IOException { - IOStream.adapt(values).forEachOrdered(this::print); - endOfRecord(); + public void printRecord(final Stream stream) throws IOException { + lock.lock(); + try { + IOStream.adapt(stream).forEachOrdered(stream.isParallel() ? this::printRaw : this::print); + endOfRecord(); + } finally { + lock.unlock(); + } } private void printRecordObject(final Object value) throws IOException { @@ -342,6 +386,11 @@ private void printRecordObject(final Object value) throws IOException { } } + @SuppressWarnings("resource") + private void printRecords(final IOStream stream) throws IOException { + format.limit(stream).forEachOrdered(this::printRecordObject); + } + /** * Prints all the objects in the given {@link Iterable} handling nested collections/arrays as records. * @@ -382,7 +431,7 @@ private void printRecordObject(final Object value) throws IOException { */ @SuppressWarnings("resource") public void printRecords(final Iterable values) throws IOException { - IOStream.of(values).forEachOrdered(this::printRecordObject); + printRecords(IOStream.of(values)); } /** @@ -428,37 +477,47 @@ public void printRecords(final Object... values) throws IOException { /** * Prints all the objects in the given JDBC result set. + *

    + * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows a result set produces. This is most useful when you cannot limit rows + * through {@link Statement#setLargeMaxRows(long)} or {@link Statement#setMaxRows(int)}. + *

    * - * @param resultSet - * The values to print. - * @throws IOException - * If an I/O error occurs. - * @throws SQLException - * Thrown when a database access error occurs. + * @param resultSet The values to print. + * @throws IOException If an I/O error occurs. + * @throws SQLException Thrown when a database access error occurs. */ public void printRecords(final ResultSet resultSet) throws SQLException, IOException { final int columnCount = resultSet.getMetaData().getColumnCount(); - while (resultSet.next()) { - for (int i = 1; i <= columnCount; i++) { - final Object object = resultSet.getObject(i); - if (object instanceof Clob) { - try (Reader reader = ((Clob) object).getCharacterStream()) { - print(reader); - } - } else if (object instanceof Blob) { - try (InputStream inputStream = ((Blob) object).getBinaryStream()) { - print(inputStream); + while (resultSet.next() && format.useRow(resultSet.getRow())) { + lock.lock(); + try { + for (int i = 1; i <= columnCount; i++) { + final Object object = resultSet.getObject(i); + if (object instanceof Clob) { + try (Reader reader = ((Clob) object).getCharacterStream()) { + print(reader); + } + } else if (object instanceof Blob) { + try (InputStream inputStream = ((Blob) object).getBinaryStream()) { + print(inputStream); + } + } else { + print(object); } - } else { - print(object); } + endOfRecord(); + } finally { + lock.unlock(); } - endOfRecord(); } } /** * Prints all the objects with metadata in the given JDBC result set based on the header boolean. + *

    + * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows a result set produces. This is most useful when you cannot limit rows + * through {@link Statement#setLargeMaxRows(long)} or {@link Statement#setMaxRows(int)}. + *

    * * @param resultSet source of row data. * @param printHeader whether to print headers. @@ -515,6 +574,6 @@ public void printRecords(final ResultSet resultSet, final boolean printHeader) t */ @SuppressWarnings({ "resource" }) // Caller closes. public void printRecords(final Stream values) throws IOException { - IOStream.adapt(values).forEachOrdered(this::printRecordObject); + printRecords(IOStream.adapt(values)); } } diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java index b120f945f4..8dab14d907 100644 --- a/src/main/java/org/apache/commons/csv/CSVRecord.java +++ b/src/main/java/org/apache/commons/csv/CSVRecord.java @@ -55,13 +55,13 @@ public final class CSVRecord implements Serializable, Iterable { */ private final long bytePosition; - /** The accumulated comments (if any) */ + /** The accumulated comments (if any). */ private final String comment; /** The record number. */ private final long recordNumber; - /** The values of the record */ + /** The values of the record. */ private final String[] values; /** The parser that originates this record. This is not serialized. */ @@ -114,9 +114,9 @@ public String get(final int i) { * the name of the column to be retrieved. * @return the column value, maybe null depending on {@link CSVFormat#getNullString()}. * @throws IllegalStateException - * if no header mapping was provided + * if no header mapping was provided. * @throws IllegalArgumentException - * if {@code name} is not mapped or if the record is inconsistent + * if {@code name} is not mapped or if the record is inconsistent. * @see #isMapped(String) * @see #isConsistent() * @see #getParser() @@ -125,20 +125,18 @@ public String get(final int i) { public String get(final String name) { final Map headerMap = getHeaderMapRaw(); if (headerMap == null) { - throw new IllegalStateException( - "No header mapping was specified, the record values can't be accessed by name"); + throw new IllegalStateException("No header mapping was specified, the record values can't be accessed by name"); } final Integer index = headerMap.get(name); if (index == null) { - throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, - headerMap.keySet())); + throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, headerMap.keySet())); } try { - return values[index.intValue()]; // N.B. Explicit (un)boxing is intentional + return values[index.intValue()]; // Explicit unboxing is intentional } catch (final ArrayIndexOutOfBoundsException e) { - throw new IllegalArgumentException(String.format( - "Index for header '%s' is %d but CSVRecord only has %d values!", name, index, - Integer.valueOf(values.length))); // N.B. Explicit (un)boxing is intentional + // Explicit boxing is intentional + throw new IllegalArgumentException( + String.format("Index for header '%s' is %d but CSVRecord only has %d values!", name, index, Integer.valueOf(values.length))); } } @@ -165,7 +163,7 @@ public long getCharacterPosition() { /** * Returns the comment for this record, if any. * Note that comments are attached to the following record. - * If there is no following record (i.e. the comment is at EOF), + * If there is no following record (that is, the comment is at EOF), * then the comment will be ignored. * * @return the comment for this record, or null if no comment for this record is available. @@ -197,7 +195,7 @@ public CSVParser getParser() { * Returns the number of this record in the parsed CSV file. * *

    - * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to + * NOTE:If your CSV input has multi-line values, the returned number does not correspond to * the current line number of the parser that created this record. *

    * @@ -211,10 +209,10 @@ public long getRecordNumber() { /** * Checks whether this record has a comment, false otherwise. * Note that comments are attached to the following record. - * If there is no following record (i.e. the comment is at EOF), + * If there is no following record (that is, the comment is at EOF), * then the comment will be ignored. * - * @return true if this record has a comment, false otherwise + * @return true if this record has a comment, false otherwise. * @since 1.3 */ public boolean hasComment() { @@ -229,7 +227,7 @@ public boolean hasComment() { * test but still produce parsable files. *

    * - * @return true of this record is valid, false if not + * @return true of this record is valid, false if not. */ public boolean isConsistent() { final Map headerMap = getHeaderMapRaw(); @@ -237,7 +235,7 @@ public boolean isConsistent() { } /** - * Checks whether a given column is mapped, i.e. its name has been defined to the parser. + * Checks whether a given column is mapped, that is, its name has been defined to the parser. * * @param name * the name of the column to be retrieved. @@ -252,8 +250,8 @@ public boolean isMapped(final String name) { * Checks whether a column with a given index has a value. * * @param index - * a column index (0-based) - * @return whether a column with a given index has a value + * a column index (0-based). + * @return whether a column with a given index has a value. */ public boolean isSet(final int index) { return 0 <= index && index < values.length; @@ -264,10 +262,10 @@ public boolean isSet(final int index) { * * @param name * the name of the column to be retrieved. - * @return whether a given column is mapped and has a value + * @return whether a given column is mapped and has a value. */ public boolean isSet(final String name) { - return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; // N.B. Explicit (un)boxing is intentional + return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; // Explicit unboxing is intentional } /** @@ -283,7 +281,7 @@ public Iterator iterator() { /** * Puts all values of this record into the given Map. * - * @param the map type + * @param The map type. * @param map The Map to populate. * @return the given map. * @since 1.9.0 @@ -354,14 +352,13 @@ public Map toMap() { */ @Override public String toString() { - return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" + - Arrays.toString(values) + "]"; + return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" + Arrays.toString(values) + "]"; } /** - * Gets the values for this record. This is not a copy. + * Gets the values for this record. This is not a copy. * - * @return the values for this record. + * @return the values for this record, never null. * @since 1.10.0 */ public String[] values() { diff --git a/src/main/java/org/apache/commons/csv/Constants.java b/src/main/java/org/apache/commons/csv/Constants.java index e85578467d..9dd276eccc 100644 --- a/src/main/java/org/apache/commons/csv/Constants.java +++ b/src/main/java/org/apache/commons/csv/Constants.java @@ -20,7 +20,7 @@ package org.apache.commons.csv; /** - * Private constants to this package. + * Private constants for this package. */ final class Constants { @@ -37,10 +37,10 @@ final class Constants { static final char CR = '\r'; - /** RFC 4180 defines line breaks as CRLF */ + /** RFC 4180 defines line breaks as CRLF. */ static final String CRLF = "\r\n"; - static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"'); // N.B. Explicit (un)boxing is intentional + static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"'); // Explicit boxing is intentional. static final String EMPTY = ""; @@ -67,7 +67,7 @@ final class Constants { static final char PIPE = '|'; - /** ASCII record separator */ + /** ASCII record separator. */ static final char RS = 30; static final char SP = ' '; @@ -76,10 +76,10 @@ final class Constants { static final char TAB = '\t'; - /** Undefined state for the lookahead char */ + /** Undefined state for the lookahead char. */ static final int UNDEFINED = -2; - /** ASCII unit separator */ + /** ASCII unit separator. */ static final char US = 31; /** No instances. */ diff --git a/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java index 01989d6640..8087f16eeb 100644 --- a/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java +++ b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java @@ -20,7 +20,7 @@ package org.apache.commons.csv; /** - * Determines how duplicate header fields should be handled + * Enumerates how duplicate header fields should be handled * if {@link CSVFormat.Builder#setHeader(Class)} is not null. * * @since 1.10.0 diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java index 8c0a034a22..20c1ef5444 100644 --- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java +++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java @@ -37,26 +37,30 @@ /** * A special buffered reader which supports sophisticated read access. *

    - * In particular the reader supports a look-ahead option, which allows you to see the next char returned by - * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}. + * In particular the reader supports a look-ahead option, which allows you to see the next char returned by {@link #read()}. This reader also tracks how many + * characters have been read with {@link #getPosition()}. *

    */ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader { /** The last char returned */ private int lastChar = UNDEFINED; + private int lastCharMark = UNDEFINED; /** The count of EOLs (CR/LF/CRLF) seen so far */ private long lineNumber; + private long lineNumberMark; /** The position, which is the number of characters read so far */ private long position; + private long positionMark; /** The number of bytes read so far. */ private long bytesRead; + private long bytesReadMark; /** Encoder for calculating the number of bytes for each character read. */ @@ -70,12 +74,11 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader { } /** - * Constructs a new instance with the specified reader, character set, - * and byte tracking option. Initializes an encoder if byte tracking is enabled - * and a character set is provided. + * Constructs a new instance with the specified reader, character set, and byte tracking option. Initializes an encoder if byte tracking is enabled and a + * character set is provided. * - * @param reader the reader supports a look-ahead option. - * @param charset the character set for encoding, or {@code null} if not applicable. + * @param reader the reader supports a look-ahead option. + * @param charset the character set for encoding, or {@code null} if not applicable. * @param trackBytes {@code true} to enable byte tracking; {@code false} to disable it. */ ExtendedBufferedReader(final Reader reader, final Charset charset, final boolean trackBytes) { @@ -86,8 +89,7 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader { /** * Closes the stream. * - * @throws IOException - * If an I/O error occurs + * @throws IOException If an I/O error occurs */ @Override public void close() throws IOException { @@ -105,26 +107,35 @@ long getBytesRead() { return this.bytesRead; } + private long getEncodedCharLength(final char[] buf, final int offset, final int length) throws CharacterCodingException { + long len = 0; + int previous = lastChar; + for (int i = offset; i < offset + length; i++) { + len += getEncodedCharLength(previous, buf[i]); + previous = buf[i]; + } + return len; + } + /** - * Gets the byte length of the given character based on the the original Unicode - * specification, which defined characters as fixed-width 16-bit entities. + * Gets the byte length of the given character based on the original Unicode specification, which defined characters as fixed-width 16-bit entities. *

    * The Unicode characters are divided into two main ranges: *

      - *
    • U+0000 to U+FFFF (Basic Multilingual Plane, BMP): - *
        - *
      • Represented using a single 16-bit {@code char}.
      • - *
      • Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte characters.
      • - *
      - *
    • - *
    • U+10000 to U+10FFFF (Supplementary Characters): - *
        - *
      • Represented as a pair of {@code char}s:
      • - *
      • The first {@code char} is from the high-surrogates range (\uD800-\uDBFF).
      • - *
      • The second {@code char} is from the low-surrogates range (\uDC00-\uDFFF).
      • - *
      • Includes UTF-8 encodings of some 3-byte characters and all 4-byte characters.
      • - *
      - *
    • + *
    • U+0000 to U+FFFF (Basic Multilingual Plane, BMP): + *
        + *
      • Represented using a single 16-bit {@code char}.
      • + *
      • Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte characters.
      • + *
      + *
    • + *
    • U+10000 to U+10FFFF (Supplementary Characters): + *
        + *
      • Represented as a pair of {@code char}s:
      • + *
      • The first {@code char} is from the high-surrogates range (\uD800-\uDBFF).
      • + *
      • The second {@code char} is from the low-surrogates range (\uDC00-\uDFFF).
      • + *
      • Includes UTF-8 encodings of some 3-byte characters and all 4-byte characters.
      • + *
      + *
    • *
    * * @param current the current character to process. @@ -132,26 +143,29 @@ long getBytesRead() { * @throws CharacterCodingException if the character cannot be encoded. */ private int getEncodedCharLength(final int current) throws CharacterCodingException { + return getEncodedCharLength(lastChar, current); + } + + private int getEncodedCharLength(final int previous, final int current) throws CharacterCodingException { final char cChar = (char) current; - final char lChar = (char) lastChar; + final char lChar = (char) previous; if (!Character.isSurrogate(cChar)) { return encoder.encode(CharBuffer.wrap(new char[] { cChar })).limit(); } if (Character.isHighSurrogate(cChar)) { // Move on to the next char (low surrogate) return 0; - } else if (Character.isSurrogatePair(lChar, cChar)) { + } + if (Character.isSurrogatePair(lChar, cChar)) { return encoder.encode(CharBuffer.wrap(new char[] { lChar, cChar })).limit(); - } else { - throw new CharacterCodingException(); } + throw new CharacterCodingException(); } /** - * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by - * any of the read methods. This will not include a character read using the {@link #peek()} method. If no - * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached - * on the last read then this will return {@link IOUtils#EOF}. + * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by any of the read methods. This will not + * include a character read using the {@link #peek()} method. If no character has been read then this will return {@link Constants#UNDEFINED}. If the end of + * the stream was reached on the last read then this will return {@link IOUtils#EOF}. * * @return the last character that was read */ @@ -193,11 +207,10 @@ public void mark(final int readAheadLimit) throws IOException { @Override public int read() throws IOException { final int current = super.read(); - if (current == CR || current == LF && lastChar != CR || - current == EOF && lastChar != CR && lastChar != LF && lastChar != EOF) { + if (current == CR || current == LF && lastChar != CR || current == EOF && lastChar != CR && lastChar != LF && lastChar != EOF) { lineNumber++; } - if (encoder != null) { + if (encoder != null && current != EOF) { this.bytesRead += getEncodedCharLength(current); } lastChar = current; @@ -211,6 +224,9 @@ public int read(final char[] buf, final int offset, final int length) throws IOE return 0; } final int len = super.read(buf, offset, length); + if (encoder != null && len > 0) { + this.bytesRead += getEncodedCharLength(buf, offset, len); + } if (len > 0) { for (int i = offset; i < offset + len; i++) { final char ch = buf[i]; @@ -231,8 +247,7 @@ public int read(final char[] buf, final int offset, final int length) throws IOE } /** - * Gets the next line, dropping the line terminator(s). This method should only be called when processing a - * comment, otherwise, information can be lost. + * Gets the next line, dropping the line terminator(s). This method should only be called when processing a comment, otherwise, information can be lost. *

    * Increments {@link #lineNumber} and updates {@link #position}. *

    @@ -272,5 +287,4 @@ public void reset() throws IOException { bytesRead = bytesReadMark; super.reset(); } - } diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java index 0e5f368665..fe964480a4 100644 --- a/src/main/java/org/apache/commons/csv/Lexer.java +++ b/src/main/java/org/apache/commons/csv/Lexer.java @@ -23,6 +23,7 @@ import java.io.Closeable; import java.io.IOException; +import java.util.Arrays; import org.apache.commons.io.IOUtils; @@ -68,8 +69,8 @@ final class Lexer implements Closeable { /** * Appends the next escaped character to the token's content. * - * @param token the current token - * @throws IOException on stream access error + * @param token the current token. + * @throws IOException on stream access error. * @throws CSVException Thrown on invalid input. */ private void appendNextEscapedCharacterToToken(final Token token) throws IOException { @@ -89,7 +90,7 @@ private void appendNextEscapedCharacterToToken(final Token token) throws IOExcep * Closes resources. * * @throws IOException - * If an I/O error occurs + * If an I/O error occurs. */ @Override public void close() throws IOException { @@ -97,27 +98,27 @@ public void close() throws IOException { } /** - * Gets the number of bytes read + * Gets the number of bytes read. * - * @return the number of bytes read + * @return the number of bytes read. */ long getBytesRead() { return reader.getBytesRead(); } /** - * Returns the current character position + * Gets the current character position. * - * @return the current character position + * @return the current character position. */ long getCharacterPosition() { return reader.getPosition(); } /** - * Returns the current line number + * Gets the current line number. * - * @return the current line number + * @return the current line number. */ long getCurrentLineNumber() { return reader.getLineNumber(); @@ -136,7 +137,7 @@ boolean isCommentStart(final int ch) { } /** - * Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#peek(char[])}. + * Tests whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#peek(char[])}. * * @param ch * the current character. @@ -152,6 +153,7 @@ boolean isDelimiter(final int ch) throws IOException { isLastTokenDelimiter = true; return true; } + Arrays.fill(delimiterBuf, '\0'); reader.peek(delimiterBuf); for (int i = 0; i < delimiterBuf.length; i++) { if (delimiterBuf[i] != delimiter[i + 1]) { @@ -190,6 +192,7 @@ boolean isEscape(final int ch) { * @throws IOException If an I/O error occurs. */ boolean isEscapeDelimiter() throws IOException { + Arrays.fill(escapeDelimiterBuf, '\0'); reader.peek(escapeDelimiterBuf); if (escapeDelimiterBuf[0] != delimiter[0]) { return false; @@ -214,7 +217,7 @@ boolean isQuoteChar(final int ch) { /** * Tests if the current character represents the start of a line: a CR, LF, or is at the start of the file. * - * @param ch the character to check + * @param ch the character to check. * @return true if the character is at the start of a line. */ boolean isStartOfLine(final int ch) { @@ -274,15 +277,22 @@ Token nextToken(final Token token) throws IOException { } // Important: make sure a new char gets consumed in each iteration while (token.type == Token.Type.INVALID) { + // isDelimiter consumes the trailing characters of a multi-character delimiter as a side effect, so it must + // only be evaluated once per character. Remember a match found while skipping whitespace below. + boolean delimiter = false; // ignore whitespaces at beginning of a token if (ignoreSurroundingSpaces) { - while (Character.isWhitespace((char) c) && !isDelimiter(c) && !eol) { + while (Character.isWhitespace((char) c) && !eol) { + if (isDelimiter(c)) { + delimiter = true; + break; + } c = reader.read(); eol = readEndOfLine(c); } } // ok, start of token reached: encapsulated, or token - if (isDelimiter(c)) { + if (delimiter || isDelimiter(c)) { // empty token return TOKEN("") token.type = Token.Type.TOKEN; } else if (eol) { @@ -340,7 +350,6 @@ private Token parseEncapsulatedToken(final Token token) throws IOException { int c; while (true) { c = reader.read(); - if (isQuoteChar(c)) { if (isQuoteChar(reader.peek())) { // double or escaped encapsulator -> add single encapsulator to token @@ -401,10 +410,10 @@ private Token parseEncapsulatedToken(final Token token) throws IOException { *
  • An unescaped delimiter has been reached (TOKEN)
  • *
* - * @param token the current token - * @param ch the current character - * @return the filled token - * @throws IOException on stream access error + * @param token the current token. + * @param ch the current character. + * @return the filled token. + * @throws IOException on stream access error. * @throws CSVException Thrown on invalid input. */ private Token parseSimpleToken(final Token token, final int ch) throws IOException { @@ -443,7 +452,7 @@ private Token parseSimpleToken(final Token token, final int ch) throws IOExcepti /** * Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character... * - * @return true if the given or next character is a line-terminator + * @return true if the given or next character is a line-terminator. */ boolean readEndOfLine(final int ch) throws IOException { // check if we have \r\n... diff --git a/src/main/java/org/apache/commons/csv/QuoteMode.java b/src/main/java/org/apache/commons/csv/QuoteMode.java index d9c032ffc4..ae64ab4863 100644 --- a/src/main/java/org/apache/commons/csv/QuoteMode.java +++ b/src/main/java/org/apache/commons/csv/QuoteMode.java @@ -19,7 +19,9 @@ package org.apache.commons.csv; /** - * Defines quoting behavior. + * Enumerates quoting behavior. + * + * @see CSVFormat.Builder#setQuoteMode(QuoteMode) */ public enum QuoteMode { diff --git a/src/main/java/org/apache/commons/csv/Token.java b/src/main/java/org/apache/commons/csv/Token.java index 9e63b944b6..87af335678 100644 --- a/src/main/java/org/apache/commons/csv/Token.java +++ b/src/main/java/org/apache/commons/csv/Token.java @@ -24,13 +24,14 @@ /** * Internal token representation. *

- * It is used as a contract between the lexer and the parser. + * This is used as a contract between the lexer and the parser. *

*/ final class Token { enum Type { - /** Token has no valid content, i.e. is in its initialized state. */ + + /** Token has no valid content, that is, is in its initialized state. */ INVALID, /** Token with content, at the beginning or in the middle of a line. */ @@ -47,13 +48,13 @@ enum Type { } /** Length of the initial token (content-)buffer */ - private static final int INITIAL_TOKEN_LENGTH = 50; + private static final int DEFAULT_CAPACITY = 50; /** Token type */ Token.Type type = INVALID; - /** The content buffer. */ - final StringBuilder content = new StringBuilder(INITIAL_TOKEN_LENGTH); + /** The content buffer, never null. */ + final StringBuilder content = new StringBuilder(DEFAULT_CAPACITY); /** Token ready flag: indicates a valid token with content (ready for the parser). */ boolean isReady; @@ -68,12 +69,12 @@ void reset() { } /** - * Eases IDE debugging. + * Converts the token state to a string to ease debugging. * * @return a string helpful for debugging. */ @Override public String toString() { - return type.name() + " [" + content.toString() + "]"; + return type + " [" + content.toString() + "]"; } } diff --git a/src/main/javadoc/overview.html b/src/main/javadoc/overview.html new file mode 100644 index 0000000000..0598cf19d0 --- /dev/null +++ b/src/main/javadoc/overview.html @@ -0,0 +1,375 @@ + + + +Apache Commons CSV Overview + + + Apache Commons CSV +

+ You can find the Javadoc package list at the bottom of this page. +

+
+

Introducing Commons CSV

+

Apache Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.

+

+ Common CSV formats are predefined in the CSVFormat class: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CSV Formats
CSVFormatDescriptionSince Version
DEFAULTIO for the Standard Comma Separated Value format, like RFC 4180 but allowing + empty lines. + 1.0
EXCELIO for the Microsoft + Excel CSV. format. + 1.0
INFORMIX_UNLOADIO for the Informix UNLOAD TO file_name + command. + 1.3
INFORMIX_UNLOAD_CSVIO for the Informix UNLOAD CSV TO + file_name command with escaping disabled. + 1.3
MONGODB_CSVIO for the MongoDB CSV mongoexport command. + 1.7
MONGODB_TSVIO for the MongoDB Tab Separated Values (TSV)mongoexport + command. + 1.7
MYSQLIO for the MySQL CSV format. + 1.0
ORACLEIO for the Oracle CSV format + of the SQL*Loader utility. + 1.6
POSTGRESQL_CSVIO for the PostgreSQL CSV format used by the COPY + operation. + 1.5
POSTGRESQL_TEXTIO for the PostgreSQL Text format used by the COPY + operation. + 1.5
RFC4180IO for the RFC-4180 format defined byRFC 4180. + 1.0
TDFIO for the Tab Delimited Format (also known as Tab Separated Values). + 1.0
+

Custom formats can be created using a fluent style API.

+
+
+

Parsing Standard CSV Files

+

+ Parsing files with Apache Commons CSV is relatively straight forward. Pick a + CSVFormat + and go from there. +

+
+

Parsing an Excel CSV File

+

To parse an Excel CSV file, write:

+
+        
+Reader in = new FileReader("path/to/file.csv");
+Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
+for (CSVRecord record : records) {
+    String lastName = record.get("Last Name");
+    String firstName = record.get("First Name");
+}
+        
+      
+
+
+
+

Parsing Custom CSV Files

+

+ You can define your own using IO rules by building your own CSVFormat instance. Starting with + CSVFormat.builder() + lets you start from a predefined format and customize. For example: +

+
+      
+CSVFormat myFormat = CSVFormat.DEFAULT.builder()
+    .setCommentMarker('#')
+    .setEscape('+')
+    .setIgnoreSurroundingSpaces(true)
+    .setQuote('"')
+    .setQuoteMode(QuoteMode.ALL)
+    .get()
+      
+    
+
+
+

Handling Byte Order Marks

+

+ To handle files that start with a Byte Order Mark (BOM), like some Excel CSV files, you need an extra step to deal with the optional BOM bytes. Using the + BOMInputStream class from Apache Commons IO simplifies this task; for example: +

+
+        
+try (Reader reader = new InputStreamReader(BOMInputStream.builder()
+        .setPath(path)
+        .get(), "UTF-8");
+        CSVParser parser = CSVFormat.EXCEL.builder()
+                .setHeader()
+                .get()
+                .parse(reader)) {
+    for (CSVRecord record : parser) {
+        String string = record.get("ColumnA");
+        // ...
+    }
+}
+        
+      
+

You might find it handy to create something like this:

+
+        
+/**
+ * Creates a reader capable of handling BOMs.
+ *
+ * @param path The path to read.
+ * @return a new InputStreamReader for UTF-8 bytes.
+ * @throws IOException if an I/O error occurs.
+ */
+public InputStreamReader newReader(final Path path) throws IOException {
+    return new InputStreamReader(BOMInputStream.builder()
+            .setPath(path)
+            .get(), StandardCharsets.UTF_8);
+}
+        
+      
+
+
+

Using Headers

+

+ Apache Commons CSV provides several ways to access record values. The simplest way is to access values by their index in the record. However, columns in + CSV files often have a name, for example: ID, CustomerNo, Birthday, etc. The CSVFormat class provides an API for specifying these header names and + CSVRecord on the other hand has methods to access values by their corresponding header name. +

+
+

Accessing column values by index

+

To access a record value by index, no special configuration of the CSVFormat is necessary:

+
+        
+Reader in = new FileReader("path/to/file.csv");
+Iterable<CSVRecord> records = CSVFormat.RFC4180.parse(in);
+for (CSVRecord record : records) {
+    String columnOne = record.get(0);
+    String columnTwo = record.get(1);
+}
+        
+      
+
+
+

Defining a header manually

+

Indices may not be the most intuitive way to access record values. For this reason it is possible to assign names to each column in the file:

+
+        
+Reader in = new FileReader("path/to/file.csv");
+Iterable<CSVRecord> records = CSVFormat.RFC4180.builder()
+  .setHeader("ID", "CustomerNo", "Name")
+  .build()
+  .parse(in);
+for (CSVRecord record : records) {
+    String id = record.get("ID");
+    String customerNo = record.get("CustomerNo");
+    String name = record.get("Name");
+}
+        
+      
+ Note that column values can still be accessed using their index. +
+
+

Using an enum to define a header

+

Using String values all over the code to reference columns can be error prone. For this reason, it is possible to define an enum to specify header + names. Note that the enum constant names are used to access column values. This may lead to enums constant names which do not follow the Java coding + standard of defining constants in upper case with underscores:

+
+        
+public enum Headers {
+    ID, CustomerNo, Name
+}
+Reader in = new FileReader("path/to/file.csv");
+Iterable<CSVRecord> records = CSVFormat.RFC4180.builder()
+  .setHeader(Headers.class)
+  .build()
+  .parse(in);
+for (CSVRecord record : records) {
+    String id = record.get(Headers.ID);
+    String customerNo = record.get(Headers.CustomerNo);
+    String name = record.get(Headers.Name);
+}
+        
+      
+ Again it is possible to access values by their index and by using a String (for example "CustomerNo"). +
+
+

Header auto detection

+

Some CSV files define header names in their first record. If configured, Apache Commons CSV can parse the header names from the first record:

+
+        
+Reader in = new FileReader("path/to/file.csv");
+Iterable<CSVRecord> records = CSVFormat.RFC4180.builder()
+  .setHeader()
+  .setSkipHeaderRecord(true)
+  .build()
+  .parse(in);
+for (CSVRecord record : records) {
+    String id = record.get("ID");
+    String customerNo = record.get("CustomerNo");
+    String name = record.get("Name");
+}
+        
+      
+ This will use the values from the first record as header names and skip the first record when iterating. +
+
+
+

Printing with headers

+

To print a CSV file with headers, you specify the headers in the format:

+
+        
+Appendable out = ...;
+CSVPrinter printer = CSVFormat.DEFAULT.builder()
+  .setHeader("H1", "H2")
+  .build()
+  .print(out);
+        
+      
+

To print a CSV file with JDBC column labels, you specify the ResultSet in the format:

+
+        
+try (ResultSet resultSet = ...) {
+    CSVPrinter printer = CSVFormat.DEFAULT.builder()
+      .setHeader(resultSet)
+      .build()
+      .print(out);
+}
+        
+      
+
+
+

Working with JDBC

+
+

Exporting JDBC Result Sets

+

+ To export row data from a JDBC + ResultSet + , use CSVPrinter.printRecords(ResultSet) : +

+
+        
+        final StringWriter sw = new StringWriter();
+        final CSVFormat csvFormat = CSVFormat.DEFAULT;
+        try (Connection connection = DriverManager.getConnection("jdbc:h2:mem:my_test;", "sa", "")) {
+            try (Statement stmt = connection.createStatement();
+                    CSVPrinter printer = new CSVPrinter(sw, csvFormat);
+                    ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT, BIN_DATA from TEST")) {
+                printer.printRecords(resultSet);
+            }
+        }
+        final String csv = sw.toString();
+        System.out.println(csv);
+        
+        
+
+
+

Limiting rows from JDBC Result Sets

+

SQL lets you limit how many rows a SELECT statement returns with the LIMIT clause.

+

+ When you can't or don't want to change the SQL used to generate rows, JDBC lets you limit how many rows a JDBC Statement returns with the Statement.setMaxRows(int) method. +

+

+ When you get a JDBC ResultSet from an API like + DatabaseMetaData.getProcedures(...), there is no SQL or JDBC Statement to use to set a limit, the ResultSet class does not have an API to limit rows. +

+

+ To simplify limiting ResultSet rows, Commons CVS offers the CSVFormat.Builder.setMaxRows(long) + method. For example: +

+
+        
+        CSVFormat csvFormat = CSVFormat.DEFAULT
+            .setMaxRows(5_000)
+            .get();
+        try (ResultSet resultSet = ...) {
+            csvFormat.printer().printRecords(resultSet);
+        }
+        
+      
+

+ Using the above, calling CSVPrinter.printRecords(ResultSet) will + limit the row count to the maximum number of rows specified in setMaxRows(). +

+

Note that setMaxRows() works with the other methods that print a sequence of records.

+
+
+ + diff --git a/src/media/commons-logo-component-100.xcf b/src/media/commons-logo-component-100.xcf new file mode 100644 index 0000000000..77d92f2779 Binary files /dev/null and b/src/media/commons-logo-component-100.xcf differ diff --git a/src/media/commons-logo-component.xcf b/src/media/commons-logo-component.xcf new file mode 100644 index 0000000000..3670221da7 Binary files /dev/null and b/src/media/commons-logo-component.xcf differ diff --git a/src/media/logo.png b/src/media/logo.png new file mode 100644 index 0000000000..93bb6c0148 Binary files /dev/null and b/src/media/logo.png differ diff --git a/src/site/resources/images/logo.png b/src/site/resources/images/logo.png index 77e721d2c7..93bb6c0148 100644 Binary files a/src/site/resources/images/logo.png and b/src/site/resources/images/logo.png differ diff --git a/src/site/resources/images/logo.xcf b/src/site/resources/images/logo.xcf deleted file mode 100644 index 98ff21ec1d..0000000000 Binary files a/src/site/resources/images/logo.xcf and /dev/null differ diff --git a/src/site/resources/pmd/pmd-ruleset.xml b/src/site/resources/pmd/pmd-ruleset.xml index 5acc764021..74e41f991d 100644 --- a/src/site/resources/pmd/pmd-ruleset.xml +++ b/src/site/resources/pmd/pmd-ruleset.xml @@ -7,7 +7,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -59,7 +59,6 @@ - diff --git a/src/site/resources/spotbugs/spotbugs-exclude-filter.xml b/src/site/resources/spotbugs/spotbugs-exclude-filter.xml index a7364d40db..79c57d3ae4 100644 --- a/src/site/resources/spotbugs/spotbugs-exclude-filter.xml +++ b/src/site/resources/spotbugs/spotbugs-exclude-filter.xml @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -54,5 +54,12 @@ + + + + + + + diff --git a/src/site/site.xml b/src/site/site.xml index 86c1afa1eb..232c2056c5 100644 --- a/src/site/site.xml +++ b/src/site/site.xml @@ -7,7 +7,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/src/site/xdoc/download_csv.xml b/src/site/xdoc/download_csv.xml index 00b7f3c74c..151c3f69ec 100644 --- a/src/site/xdoc/download_csv.xml +++ b/src/site/xdoc/download_csv.xml @@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -56,10 +56,12 @@ limitations under the License. | | +======================================================================+ --> - + Download Apache Commons CSV - Apache Commons Documentation Team + Apache Commons Team
@@ -79,7 +81,7 @@ limitations under the License. mirrors (at the end of the mirrors list) that should be available.

- [if-any logo][end] + [if-any logo]Logo[end]

@@ -113,32 +115,32 @@ limitations under the License.

-
+
- - - + + + - - - + + +
commons-csv-1.13.0-bin.tar.gzsha512pgpcommons-csv-1.14.1-bin.tar.gzsha512pgp
commons-csv-1.13.0-bin.zipsha512pgpcommons-csv-1.14.1-bin.zipsha512pgp
- - - + + + - - - + + +
commons-csv-1.13.0-src.tar.gzsha512pgpcommons-csv-1.14.1-src.tar.gzsha512pgp
commons-csv-1.13.0-src.zipsha512pgpcommons-csv-1.14.1-src.zipsha512pgp
diff --git a/src/site/xdoc/index.xml b/src/site/xdoc/index.xml index 7c7c4f3cc7..ac5b8cfa9f 100644 --- a/src/site/xdoc/index.xml +++ b/src/site/xdoc/index.xml @@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -20,30 +20,18 @@ limitations under the License. Home Apache Commons Team + + + -

Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.

-

The most common CSV formats are predefined in the CSVFormat class: -

-

-

Custom formats can be created using a fluent style API.

+

Read the documentation starting with the Javadoc Overview.

-

An overview of the functionality is provided in the -user guide. +user guide. Various project reports are also available.

@@ -58,7 +46,6 @@ The git repository can be browsed.

-
-

The latest code can be checked out from our git repository at https://gitbox.apache.org/repos/asf/commons-csv.git. You can build the component using Apache Maven using mvn clean package.

- +
+

+ Apache Commons CSV requires Java 8 or above. +

+ + + + + + + + + + + + + + + +
Commons CSVJavaAndroid
1.10.0+8Android 7.0 (API level 24)
+

The commons developer mailing list is the main channel of communication for contributors. Please remember that the lists are shared between all commons components, so prefix your email by [csv].

@@ -97,7 +103,6 @@ For previous releases, see the TagList report.

If you'd like to offer up pull requests via GitHub rather than applying patches to JIRA, we have a GitHub mirror.

-

The commons mailing lists act as the main support forum. @@ -111,14 +116,12 @@ For previous releases, see the

Commons CSV was started to unify a common and simple interface for reading and writing CSV files under an ASL license. It has been bootstrapped by a code donation from Netcetera in Switzerland. There are three pre-existing BSD compatible CSV parsers which this component will hopefully make redundant (authors willing):

In addition to the code from Netcetera (org.apache.commons.csv), Martin van den Bemt has added an additional writer API.

Other CSV implementations:

@@ -126,7 +129,5 @@ For previous releases, see the Super CSV
- - diff --git a/src/site/xdoc/issue-tracking.xml b/src/site/xdoc/issue-tracking.xml index 3564ef4fdd..3aa64b4042 100644 --- a/src/site/xdoc/issue-tracking.xml +++ b/src/site/xdoc/issue-tracking.xml @@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -41,10 +41,12 @@ limitations under the License. | | +======================================================================+ --> - + Apache Commons CSV Issue tracking - Apache Commons Documentation Team + Apache Commons Team @@ -64,6 +66,7 @@ limitations under the License.

If you would like to report a bug, or raise an enhancement request with Apache Commons CSV please do the following: +

  1. Search existing open bugs. If you find your issue listed then please add a comment with your details.
  2. @@ -73,16 +76,15 @@ limitations under the License.
  3. Submit either a bug report or enhancement request.
-

Please also remember these points: +

  • the more information you provide, the better we can help you
  • test cases are vital, particularly for any proposed enhancements
  • the developers of Apache Commons CSV are all unpaid volunteers
-

For more information on creating patches see the @@ -91,12 +93,12 @@ limitations under the License.

You may also find these links useful: +

-

diff --git a/src/site/xdoc/mail-lists.xml b/src/site/xdoc/mail-lists.xml index 727e4a555a..345cef8996 100644 --- a/src/site/xdoc/mail-lists.xml +++ b/src/site/xdoc/mail-lists.xml @@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -39,10 +39,12 @@ limitations under the License. | | +======================================================================+ --> - + Apache Commons CSV Mailing Lists - Apache Commons Documentation Team + Apache Commons Team @@ -53,10 +55,10 @@ limitations under the License. To make it easier for people to only read messages related to components they are interested in, the convention in Commons is to prefix the subject line of messages with the component's name, for example: -
    -
  • [csv] Problem with the ...
  • -

+
    +
  • [csv] Problem with the ...
  • +

Questions related to the usage of Apache Commons CSV should be posted to the User List. diff --git a/src/site/xdoc/security.xml b/src/site/xdoc/security.xml index ab00560494..47edf5d116 100644 --- a/src/site/xdoc/security.xml +++ b/src/site/xdoc/security.xml @@ -47,5 +47,10 @@

None.

+
+

+ For information about safe deserialization, please see Safe Deserialization. +

+
\ No newline at end of file diff --git a/src/site/xdoc/user-guide.xml b/src/site/xdoc/user-guide.xml index 3ec3dd9b2d..d5a1f26850 100644 --- a/src/site/xdoc/user-guide.xml +++ b/src/site/xdoc/user-guide.xml @@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -21,179 +21,6 @@ limitations under the License. Apache Commons Documentation Team - - -

Apache Commons CSV User Guide

- - - - -
- - Parsing files with Apache Commons CSV is relatively straight forward. - The CSVFormat class provides some commonly used CSV variants: - -
-
DEFAULT
Standard Comma Separated Value format, as for RFC4180 but allowing empty lines.
-
EXCEL
The Microsoft Excel CSV format.
-
INFORMIX_UNLOAD1.3
Informix UNLOAD format used by the UNLOAD TO file_name operation.
-
INFORMIX_UNLOAD_CSV1.3
Informix CSV UNLOAD format used by the UNLOAD TO file_name operation (escaping is disabled.)
-
MONGO_CSV1.7
MongoDB CSV format used by the mongoexport operation.
-
MONGO_TSV1.7
MongoDB TSV format used by the mongoexport operation.
-
MYSQL
The MySQL CSV format.
-
ORACLE1.6
Default Oracle format used by the SQL*Loader utility.
-
POSTGRESSQL_CSV1.5
Default PostgreSQL CSV format used by the COPY operation.
-
POSTGRESSQL_TEXT1.5
Default PostgreSQL text format used by the COPY operation.
-
RFC-4180
The RFC-4180 format defined by RFC-4180.
-
TDF
A tab delimited format.
-
- - -

To parse an Excel CSV file, write:

- Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in); -for (CSVRecord record : records) { - String lastName = record.get("Last Name"); - String firstName = record.get("First Name"); -} - -
- -

- To handle files that start with a Byte Order Mark (BOM) like some Excel CSV files, you need an extra step to - deal with these optional bytes. - You can use the - - BOMInputStream - - class from - Apache Commons IO - for example: -

- -try (Reader reader = new InputStreamReader(BOMInputStream.builder() - .setPath(path) - .get(), "UTF-8"); - CSVParser parser = CSVFormat.EXCEL.builder() - .setHeader() - .get() - .parse(reader)) { - for (final CSVRecord record : parser) { - final String string = record.get("ColumnA"); - // ... - } -} - -

- You might find it handy to create something like this: -

- -/** - * Creates a reader capable of handling BOMs. - * - * @param path The path to read. - * @return a new InputStreamReader for UTF-8 bytes. - * @throws IOException if an I/O error occurs. - */ -public InputStreamReader newReader(final Path path) throws IOException { - return new InputStreamReader(BOMInputStream.builder() - .setPath(path) - .get(), StandardCharsets.UTF_8); -} - -
-
-
- Apache Commons CSV provides several ways to access record values. - The simplest way is to access values by their index in the record. - However, columns in CSV files often have a name, for example: ID, CustomerNo, Birthday, etc. - The CSVFormat class provides an API for specifying these header names and CSVRecord on - the other hand has methods to access values by their corresponding header name. - - To access a record value by index, no special configuration of the CSVFormat is necessary: - Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.RFC4180.parse(in); -for (CSVRecord record : records) { - String columnOne = record.get(0); - String columnTwo = record.get(1); -} - - - - Indices may not be the most intuitive way to access record values. For this reason it is possible to - assign names to each column in the file: - Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.RFC4180.builder() - .setHeader("ID", "CustomerNo", "Name") - .build() - .parse(in); -for (CSVRecord record : records) { - String id = record.get("ID"); - String customerNo = record.get("CustomerNo"); - String name = record.get("Name"); -} - - Note that column values can still be accessed using their index. - - - Using String values all over the code to reference columns can be error prone. For this reason, - it is possible to define an enum to specify header names. Note that the enum constant names are - used to access column values. This may lead to enums constant names which do not follow the Java - coding standard of defining constants in upper case with underscores: - public enum Headers { - ID, CustomerNo, Name -} -Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.RFC4180.builder() - .setHeader(Headers.class) - .build() - .parse(in); -for (CSVRecord record : records) { - String id = record.get(Headers.ID); - String customerNo = record.get(Headers.CustomerNo); - String name = record.get(Headers.Name); -} - - Again it is possible to access values by their index and by using a String (for example "CustomerNo"). - - - Some CSV files define header names in their first record. If configured, Apache Commons CSV can parse - the header names from the first record: - Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.RFC4180.builder() - .setHeader() - .setSkipHeaderRecord(true) - .build() - .parse(in); -for (CSVRecord record : records) { - String id = record.get("ID"); - String customerNo = record.get("CustomerNo"); - String name = record.get("Name"); -} - - This will use the values from the first record as header names and skip the first record when iterating. - - -

- To print a CSV file with headers, you specify the headers in the format: -

- final Appendable out = ...; -final CSVPrinter printer = CSVFormat.DEFAULT.builder() - .setHeader("H1", "H2") - .build() - .print(out); - -

- To print a CSV file with JDBC column labels, you specify the ResultSet in the format: -

- try (final ResultSet resultSet = ...) { - final CSVPrinter printer = CSVFormat.DEFAULT.builder() - .setHeader(resultSet) - .build() - .print(out); -} - -
-
+

The User Guide migrated to the Javadoc.

diff --git a/src/test/java/org/apache/commons/csv/CSVDuplicateHeaderTest.java b/src/test/java/org/apache/commons/csv/CSVDuplicateHeaderTest.java index 124e9efce6..2f518a1206 100644 --- a/src/test/java/org/apache/commons/csv/CSVDuplicateHeaderTest.java +++ b/src/test/java/org/apache/commons/csv/CSVDuplicateHeaderTest.java @@ -19,13 +19,16 @@ package org.apache.commons.csv; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; @@ -34,7 +37,7 @@ * Tests parsing of duplicate column names in a CSV header. * The test verifies that headers are consistently handled by CSVFormat and CSVParser. */ -public class CSVDuplicateHeaderTest { +class CSVDuplicateHeaderTest { /** * Return test cases for duplicate header data for use in CSVFormat. @@ -272,7 +275,7 @@ static Stream duplicateHeaderData() { */ @ParameterizedTest @MethodSource(value = {"duplicateHeaderAllowsMissingColumnsNamesData"}) - public void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode, + void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode, final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final String[] headers, @@ -285,11 +288,11 @@ public void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode, .setHeader(headers); if (valid) { final CSVFormat format = builder.get(); - Assertions.assertEquals(duplicateHeaderMode, format.getDuplicateHeaderMode(), "DuplicateHeaderMode"); - Assertions.assertEquals(allowMissingColumnNames, format.getAllowMissingColumnNames(), "AllowMissingColumnNames"); - Assertions.assertArrayEquals(headers, format.getHeader(), "Header"); + assertEquals(duplicateHeaderMode, format.getDuplicateHeaderMode(), "DuplicateHeaderMode"); + assertEquals(allowMissingColumnNames, format.getAllowMissingColumnNames(), "AllowMissingColumnNames"); + assertArrayEquals(headers, format.getHeader(), "Header"); } else { - Assertions.assertThrows(IllegalArgumentException.class, builder::get); + assertThrows(IllegalArgumentException.class, builder::get); } } @@ -305,7 +308,7 @@ public void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode, */ @ParameterizedTest @MethodSource(value = {"duplicateHeaderData"}) - public void testCSVParser(final DuplicateHeaderMode duplicateHeaderMode, + void testCSVParser(final DuplicateHeaderMode duplicateHeaderMode, final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final String[] headers, @@ -327,10 +330,10 @@ public void testCSVParser(final DuplicateHeaderMode duplicateHeaderMode, try (CSVParser parser = CSVParser.parse(input, format)) { // Parser ignores null headers final List expected = Arrays.stream(headers).filter(s -> s != null).collect(Collectors.toList()); - Assertions.assertEquals(expected, parser.getHeaderNames(), "HeaderNames"); + assertEquals(expected, parser.getHeaderNames(), "HeaderNames"); } } else { - Assertions.assertThrows(IllegalArgumentException.class, () -> CSVParser.parse(input, format)); + assertThrows(IllegalArgumentException.class, () -> CSVParser.parse(input, format)); } } } diff --git a/src/test/java/org/apache/commons/csv/CSVFileParserTest.java b/src/test/java/org/apache/commons/csv/CSVFileParserTest.java index fd989779dd..e74d0e6884 100644 --- a/src/test/java/org/apache/commons/csv/CSVFileParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVFileParserTest.java @@ -40,7 +40,7 @@ /** * Parse tests using test files */ -public class CSVFileParserTest { +class CSVFileParserTest { private static final File BASE_DIR = new File("src/test/resources/org/apache/commons/csv/CSVFileParser"); @@ -59,7 +59,7 @@ private String readTestData(final BufferedReader reader) throws IOException { @ParameterizedTest @MethodSource("generateData") - public void testCSVFile(final File testFile) throws Exception { + void testCSVFile(final File testFile) throws Exception { try (FileReader fr = new FileReader(testFile); BufferedReader testDataReader = new BufferedReader(fr)) { String line = readTestData(testDataReader); assertNotNull("file must contain config line", line); @@ -104,7 +104,7 @@ public void testCSVFile(final File testFile) throws Exception { @ParameterizedTest @MethodSource("generateData") - public void testCSVUrl(final File testFile) throws Exception { + void testCSVUrl(final File testFile) throws Exception { try (FileReader fr = new FileReader(testFile); BufferedReader testData = new BufferedReader(fr)) { String line = readTestData(testData); assertNotNull("file must contain config line", line); diff --git a/src/test/java/org/apache/commons/csv/CSVFormatPredefinedTest.java b/src/test/java/org/apache/commons/csv/CSVFormatPredefinedTest.java index e907d4f17f..dad08cdb1d 100644 --- a/src/test/java/org/apache/commons/csv/CSVFormatPredefinedTest.java +++ b/src/test/java/org/apache/commons/csv/CSVFormatPredefinedTest.java @@ -26,7 +26,7 @@ /** * Tests {@link CSVFormat.Predefined}. */ -public class CSVFormatPredefinedTest { +class CSVFormatPredefinedTest { private void test(final CSVFormat format, final String enumName) { assertEquals(format, CSVFormat.Predefined.valueOf(enumName).getFormat()); @@ -34,52 +34,52 @@ private void test(final CSVFormat format, final String enumName) { } @Test - public void testDefault() { + void testDefault() { test(CSVFormat.DEFAULT, "Default"); } @Test - public void testExcel() { + void testExcel() { test(CSVFormat.EXCEL, "Excel"); } @Test - public void testMongoDbCsv() { + void testMongoDbCsv() { test(CSVFormat.MONGODB_CSV, "MongoDBCsv"); } @Test - public void testMongoDbTsv() { + void testMongoDbTsv() { test(CSVFormat.MONGODB_TSV, "MongoDBTsv"); } @Test - public void testMySQL() { + void testMySQL() { test(CSVFormat.MYSQL, "MySQL"); } @Test - public void testOracle() { + void testOracle() { test(CSVFormat.ORACLE, "Oracle"); } @Test - public void testPostgreSqlCsv() { + void testPostgreSqlCsv() { test(CSVFormat.POSTGRESQL_CSV, "PostgreSQLCsv"); } @Test - public void testPostgreSqlText() { + void testPostgreSqlText() { test(CSVFormat.POSTGRESQL_TEXT, "PostgreSQLText"); } @Test - public void testRFC4180() { + void testRFC4180() { test(CSVFormat.RFC4180, "RFC4180"); } @Test - public void testTDF() { + void testTDF() { test(CSVFormat.TDF, "TDF"); } } diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java index 9677d8ecc2..ed20898de9 100644 --- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java +++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java @@ -26,6 +26,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotSame; import static org.junit.jupiter.api.Assertions.assertNull; @@ -48,13 +49,12 @@ import java.util.Objects; import org.apache.commons.csv.CSVFormat.Builder; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; /** * Tests {@link CSVFormat}. */ -public class CSVFormatTest { +class CSVFormatTest { public enum EmptyEnum { // empty enum. @@ -64,16 +64,16 @@ public enum Header { Name, Email, Phone } - private static void assertNotEquals(final Object right, final Object left) { - Assertions.assertNotEquals(right, left); - Assertions.assertNotEquals(left, right); + private static void assertNotEqualsFlip(final Object right, final Object left) { + assertNotEquals(right, left); + assertNotEquals(left, right); } private static CSVFormat copy(final CSVFormat format) { return format.builder().setDelimiter(format.getDelimiter()).get(); } - private void assertNotEquals(final String name, final String type, final Object left, final Object right) { + private void assertNotEqualsHash(final String name, final String type, final Object left, final Object right) { if (left.equals(right) || right.equals(left)) { fail("Objects must not compare equal for " + name + "(" + type + ")"); } @@ -83,45 +83,65 @@ private void assertNotEquals(final String name, final String type, final Object } @Test - public void testBuildVsGet() { + void testBuildVsGet() { final Builder builder = CSVFormat.DEFAULT.builder(); assertNotSame(builder.get(), builder.build()); } @Test - public void testDelimiterEmptyStringThrowsException1() { + void testDelimiterCharLineBreakCrThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter(Constants.CR).get()); + } + + @Test + void testDelimiterCharLineBreakLfThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter(Constants.LF).get()); + } + + @Test + void testDelimiterEmptyStringThrowsException1() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter("").get()); } @SuppressWarnings("deprecation") @Test - public void testDelimiterSameAsCommentStartThrowsException_Deprecated() { + void testDelimiterSameAsCommentStartThrowsException_Deprecated() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter('!').withCommentMarker('!')); } @Test - public void testDelimiterSameAsCommentStartThrowsException1() { + void testDelimiterSameAsCommentStartThrowsException1() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter('!').setCommentMarker('!').get()); } @SuppressWarnings("deprecation") @Test - public void testDelimiterSameAsEscapeThrowsException_Deprecated() { + void testDelimiterSameAsEscapeThrowsException_Deprecated() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter('!').withEscape('!')); } @Test - public void testDelimiterSameAsEscapeThrowsException1() { + void testDelimiterSameAsEscapeThrowsException1() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter('!').setEscape('!').get()); } @Test - public void testDelimiterSameAsRecordSeparatorThrowsException() { + void testDelimiterSameAsRecordSeparatorThrowsException() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat(CR)); } @Test - public void testDuplicateHeaderElements() { + void testDelimiterStringLineBreakCrThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter(String.valueOf(Constants.CR)).get()); + } + + @Test + void testDelimiterStringLineBreakLfThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter(String.valueOf(Constants.LF)).get()); + } + + @Test + void testDuplicateHeaderElements() { final String[] header = { "A", "A" }; final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader(header).get(); assertEquals(2, format.getHeader().length); @@ -130,7 +150,7 @@ public void testDuplicateHeaderElements() { @SuppressWarnings("deprecation") @Test - public void testDuplicateHeaderElements_Deprecated() { + void testDuplicateHeaderElements_Deprecated() { final String[] header = { "A", "A" }; final CSVFormat format = CSVFormat.DEFAULT.withHeader(header); assertEquals(2, format.getHeader().length); @@ -138,48 +158,48 @@ public void testDuplicateHeaderElements_Deprecated() { } @Test - public void testDuplicateHeaderElementsFalse() { + void testDuplicateHeaderElementsFalse() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(false).setHeader("A", "A").get()); } @SuppressWarnings("deprecation") @Test - public void testDuplicateHeaderElementsFalse_Deprecated() { + void testDuplicateHeaderElementsFalse_Deprecated() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withAllowDuplicateHeaderNames(false).withHeader("A", "A")); } @Test - public void testDuplicateHeaderElementsTrue() { + void testDuplicateHeaderElementsTrue() { CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(true).setHeader("A", "A").get(); } @SuppressWarnings("deprecation") @Test - public void testDuplicateHeaderElementsTrue_Deprecated() { + void testDuplicateHeaderElementsTrue_Deprecated() { CSVFormat.DEFAULT.withAllowDuplicateHeaderNames(true).withHeader("A", "A"); } @Test - public void testDuplicateHeaderElementsTrueContainsEmpty1() { + void testDuplicateHeaderElementsTrueContainsEmpty1() { CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(false).setHeader("A", "", "B", "").get(); } @Test - public void testDuplicateHeaderElementsTrueContainsEmpty2() { + void testDuplicateHeaderElementsTrueContainsEmpty2() { CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).setHeader("A", "", "B", "").get(); } @Test - public void testDuplicateHeaderElementsTrueContainsEmpty3() { + void testDuplicateHeaderElementsTrueContainsEmpty3() { CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(false).setAllowMissingColumnNames(true).setHeader("A", "", "B", "").get(); } @Test - public void testEquals() { + void testEquals() { final CSVFormat right = CSVFormat.DEFAULT; final CSVFormat left = copy(right); - Assertions.assertNotEquals(null, right); - Assertions.assertNotEquals("A String Instance", right); + assertNotEquals(null, right); + assertNotEquals("A String Instance", right); assertEquals(right, right); assertEquals(right, left); assertEquals(left, right); @@ -188,49 +208,49 @@ public void testEquals() { } @Test - public void testEqualsCommentStart() { + void testEqualsCommentStart() { final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').setCommentMarker('#').setQuoteMode(QuoteMode.ALL).get(); final CSVFormat left = right.builder().setCommentMarker('!').get(); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @SuppressWarnings("deprecation") @Test - public void testEqualsCommentStart_Deprecated() { + void testEqualsCommentStart_Deprecated() { final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"').withCommentMarker('#').withQuoteMode(QuoteMode.ALL); final CSVFormat left = right.withCommentMarker('!'); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @Test - public void testEqualsDelimiter() { + void testEqualsDelimiter() { final CSVFormat right = CSVFormat.newFormat('!'); final CSVFormat left = CSVFormat.newFormat('?'); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @Test - public void testEqualsEscape() { + void testEqualsEscape() { final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').setCommentMarker('#').setEscape('+').setQuoteMode(QuoteMode.ALL).get(); final CSVFormat left = right.builder().setEscape('!').get(); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @SuppressWarnings("deprecation") @Test - public void testEqualsEscape_Deprecated() { + void testEqualsEscape_Deprecated() { final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"').withCommentMarker('#').withEscape('+').withQuoteMode(QuoteMode.ALL); final CSVFormat left = right.withEscape('!'); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @Test - public void testEqualsHash() throws Exception { + void testEqualsHash() throws Exception { final Method[] methods = CSVFormat.class.getDeclaredMethods(); for (final Method method : methods) { if (Modifier.isPublic(method.getModifiers())) { @@ -242,49 +262,49 @@ public void testEqualsHash() throws Exception { case "boolean": { final Object defTrue = method.invoke(CSVFormat.DEFAULT, Boolean.TRUE); final Object defFalse = method.invoke(CSVFormat.DEFAULT, Boolean.FALSE); - assertNotEquals(name, type, defTrue, defFalse); + assertNotEqualsHash(name, type, defTrue, defFalse); break; } case "char": { final Object a = method.invoke(CSVFormat.DEFAULT, 'a'); final Object b = method.invoke(CSVFormat.DEFAULT, 'b'); - assertNotEquals(name, type, a, b); + assertNotEqualsHash(name, type, a, b); break; } case "java.lang.Character": { final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] { null }); final Object b = method.invoke(CSVFormat.DEFAULT, Character.valueOf('d')); - assertNotEquals(name, type, a, b); + assertNotEqualsHash(name, type, a, b); break; } case "java.lang.String": { final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] { null }); final Object b = method.invoke(CSVFormat.DEFAULT, "e"); - assertNotEquals(name, type, a, b); + assertNotEqualsHash(name, type, a, b); break; } case "java.lang.String[]": { final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] { new String[] { null, null } }); final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] { new String[] { "f", "g" } }); - assertNotEquals(name, type, a, b); + assertNotEqualsHash(name, type, a, b); break; } case "org.apache.commons.csv.QuoteMode": { final Object a = method.invoke(CSVFormat.DEFAULT, QuoteMode.MINIMAL); final Object b = method.invoke(CSVFormat.DEFAULT, QuoteMode.ALL); - assertNotEquals(name, type, a, b); + assertNotEqualsHash(name, type, a, b); break; } case "org.apache.commons.csv.DuplicateHeaderMode": { final Object a = method.invoke(CSVFormat.DEFAULT, DuplicateHeaderMode.ALLOW_ALL); final Object b = method.invoke(CSVFormat.DEFAULT, DuplicateHeaderMode.DISALLOW); - assertNotEquals(name, type, a, b); + assertNotEqualsHash(name, type, a, b); break; } case "java.lang.Object[]": { final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] { new Object[] { null, null } }); final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] { new Object[] { new Object(), new Object() } }); - assertNotEquals(name, type, a, b); + assertNotEqualsHash(name, type, a, b); break; } default: @@ -302,81 +322,89 @@ public void testEqualsHash() throws Exception { } @Test - public void testEqualsHeader() { + void testEqualsHeader() { final CSVFormat right = CSVFormat.newFormat('\'').builder().setRecordSeparator(CR).setCommentMarker('#').setEscape('+').setHeader("One", "Two", "Three") .setIgnoreEmptyLines(true).setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).get(); final CSVFormat left = right.builder().setHeader("Three", "Two", "One").get(); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @SuppressWarnings("deprecation") @Test - public void testEqualsHeader_Deprecated() { + void testEqualsHeader_Deprecated() { final CSVFormat right = CSVFormat.newFormat('\'').withRecordSeparator(CR).withCommentMarker('#').withEscape('+').withHeader("One", "Two", "Three") .withIgnoreEmptyLines().withIgnoreSurroundingSpaces().withQuote('"').withQuoteMode(QuoteMode.ALL); final CSVFormat left = right.withHeader("Three", "Two", "One"); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @Test - public void testEqualsIgnoreEmptyLines() { + void testEqualsIgnoreEmptyLines() { final CSVFormat right = CSVFormat.newFormat('\'').builder().setCommentMarker('#').setEscape('+').setIgnoreEmptyLines(true) .setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).get(); final CSVFormat left = right.builder().setIgnoreEmptyLines(false).get(); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @SuppressWarnings("deprecation") @Test - public void testEqualsIgnoreEmptyLines_Deprecated() { + void testEqualsIgnoreEmptyLines_Deprecated() { final CSVFormat right = CSVFormat.newFormat('\'').withCommentMarker('#').withEscape('+').withIgnoreEmptyLines().withIgnoreSurroundingSpaces() .withQuote('"').withQuoteMode(QuoteMode.ALL); final CSVFormat left = right.withIgnoreEmptyLines(false); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @Test - public void testEqualsIgnoreSurroundingSpaces() { + void testEqualsIgnoreSurroundingSpaces() { final CSVFormat right = CSVFormat.newFormat('\'').builder().setCommentMarker('#').setEscape('+').setIgnoreSurroundingSpaces(true).setQuote('"') .setQuoteMode(QuoteMode.ALL).get(); final CSVFormat left = right.builder().setIgnoreSurroundingSpaces(false).get(); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @SuppressWarnings("deprecation") @Test - public void testEqualsIgnoreSurroundingSpaces_Deprecated() { + void testEqualsIgnoreSurroundingSpaces_Deprecated() { final CSVFormat right = CSVFormat.newFormat('\'').withCommentMarker('#').withEscape('+').withIgnoreSurroundingSpaces().withQuote('"') .withQuoteMode(QuoteMode.ALL); final CSVFormat left = right.withIgnoreSurroundingSpaces(false); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @Test - public void testEqualsLeftNoQuoteRightQuote() { + void testEqualsLeftNoQuoteRightQuote() { final CSVFormat left = CSVFormat.newFormat(',').builder().setQuote(null).get(); final CSVFormat right = left.builder().setQuote('#').get(); - assertNotEquals(left, right); + assertNotEqualsFlip(left, right); } @SuppressWarnings("deprecation") @Test - public void testEqualsLeftNoQuoteRightQuote_Deprecated() { + void testEqualsLeftNoQuoteRightQuote_Deprecated() { final CSVFormat left = CSVFormat.newFormat(',').withQuote(null); final CSVFormat right = left.withQuote('#'); - assertNotEquals(left, right); + assertNotEqualsFlip(left, right); } @Test - public void testEqualsNoQuotes() { + void testEqualsMaxRows() { + final CSVFormat right = CSVFormat.DEFAULT.builder().setMaxRows(10).get(); + final CSVFormat left = CSVFormat.DEFAULT.builder().setMaxRows(1000).get(); + assertNotEqualsFlip(right, left); + assertNotEquals(right.hashCode(), left.hashCode()); + } + + @Test + void testEqualsNoQuotes() { final CSVFormat left = CSVFormat.newFormat(',').builder().setQuote(null).get(); final CSVFormat right = left.builder().setQuote(null).get(); @@ -385,7 +413,7 @@ public void testEqualsNoQuotes() { @SuppressWarnings("deprecation") @Test - public void testEqualsNoQuotes_Deprecated() { + void testEqualsNoQuotes_Deprecated() { final CSVFormat left = CSVFormat.newFormat(',').withQuote(null); final CSVFormat right = left.withQuote(null); @@ -393,26 +421,26 @@ public void testEqualsNoQuotes_Deprecated() { } @Test - public void testEqualsNullString() { + void testEqualsNullString() { final CSVFormat right = CSVFormat.newFormat('\'').builder().setRecordSeparator(CR).setCommentMarker('#').setEscape('+').setIgnoreEmptyLines(true) .setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).setNullString("null").get(); final CSVFormat left = right.builder().setNullString("---").get(); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @SuppressWarnings("deprecation") @Test - public void testEqualsNullString_Deprecated() { + void testEqualsNullString_Deprecated() { final CSVFormat right = CSVFormat.newFormat('\'').withRecordSeparator(CR).withCommentMarker('#').withEscape('+').withIgnoreEmptyLines() .withIgnoreSurroundingSpaces().withQuote('"').withQuoteMode(QuoteMode.ALL).withNullString("null"); final CSVFormat left = right.withNullString("---"); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @Test - public void testEqualsOne() { + void testEqualsOne() { final CSVFormat csvFormatOne = CSVFormat.INFORMIX_UNLOAD; final CSVFormat csvFormatTwo = CSVFormat.MYSQL; @@ -475,7 +503,7 @@ public void testEqualsOne() { assertFalse(csvFormatTwo.isCommentMarkerSet()); assertNotSame(csvFormatTwo, csvFormatOne); - Assertions.assertNotEquals(csvFormatTwo, csvFormatOne); + assertNotEquals(csvFormatTwo, csvFormatOne); assertEquals('\\', (char) csvFormatOne.getEscapeCharacter()); assertNull(csvFormatOne.getQuoteMode()); @@ -534,86 +562,86 @@ public void testEqualsOne() { assertNotSame(csvFormatOne, csvFormatTwo); assertNotSame(csvFormatTwo, csvFormatOne); - Assertions.assertNotEquals(csvFormatOne, csvFormatTwo); - Assertions.assertNotEquals(csvFormatTwo, csvFormatOne); + assertNotEquals(csvFormatOne, csvFormatTwo); + assertNotEquals(csvFormatTwo, csvFormatOne); - Assertions.assertNotEquals(csvFormatTwo, csvFormatOne); + assertNotEquals(csvFormatTwo, csvFormatOne); } @Test - public void testEqualsQuoteChar() { + void testEqualsQuoteChar() { final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').get(); final CSVFormat left = right.builder().setQuote('!').get(); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @SuppressWarnings("deprecation") @Test - public void testEqualsQuoteChar_Deprecated() { + void testEqualsQuoteChar_Deprecated() { final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"'); final CSVFormat left = right.withQuote('!'); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @Test - public void testEqualsQuotePolicy() { + void testEqualsQuotePolicy() { final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').setQuoteMode(QuoteMode.ALL).get(); final CSVFormat left = right.builder().setQuoteMode(QuoteMode.MINIMAL).get(); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @SuppressWarnings("deprecation") @Test - public void testEqualsQuotePolicy_Deprecated() { + void testEqualsQuotePolicy_Deprecated() { final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"').withQuoteMode(QuoteMode.ALL); final CSVFormat left = right.withQuoteMode(QuoteMode.MINIMAL); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @Test - public void testEqualsRecordSeparator() { + void testEqualsRecordSeparator() { final CSVFormat right = CSVFormat.newFormat('\'').builder().setRecordSeparator(CR).setCommentMarker('#').setEscape('+').setIgnoreEmptyLines(true) .setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).get(); final CSVFormat left = right.builder().setRecordSeparator(LF).get(); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @SuppressWarnings("deprecation") @Test - public void testEqualsRecordSeparator_Deprecated() { + void testEqualsRecordSeparator_Deprecated() { final CSVFormat right = CSVFormat.newFormat('\'').withRecordSeparator(CR).withCommentMarker('#').withEscape('+').withIgnoreEmptyLines() .withIgnoreSurroundingSpaces().withQuote('"').withQuoteMode(QuoteMode.ALL); final CSVFormat left = right.withRecordSeparator(LF); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } - public void testEqualsSkipHeaderRecord() { + void testEqualsSkipHeaderRecord() { final CSVFormat right = CSVFormat.newFormat('\'').builder().setRecordSeparator(CR).setCommentMarker('#').setEscape('+').setIgnoreEmptyLines(true) .setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).setNullString("null").setSkipHeaderRecord(true).get(); final CSVFormat left = right.builder().setSkipHeaderRecord(false).get(); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @SuppressWarnings("deprecation") @Test - public void testEqualsSkipHeaderRecord_Deprecated() { + void testEqualsSkipHeaderRecord_Deprecated() { final CSVFormat right = CSVFormat.newFormat('\'').withRecordSeparator(CR).withCommentMarker('#').withEscape('+').withIgnoreEmptyLines() .withIgnoreSurroundingSpaces().withQuote('"').withQuoteMode(QuoteMode.ALL).withNullString("null").withSkipHeaderRecord(); final CSVFormat left = right.withSkipHeaderRecord(false); - assertNotEquals(right, left); + assertNotEqualsFlip(right, left); } @Test - public void testEqualsWithNull() { + void testEqualsWithNull() { final CSVFormat csvFormat = CSVFormat.POSTGRESQL_TEXT; @@ -671,23 +699,23 @@ public void testEqualsWithNull() { assertNull(csvFormat.getQuoteCharacter()); assertTrue(csvFormat.isNullStringSet()); - Assertions.assertNotEquals(null, csvFormat); + assertNotEquals(null, csvFormat); } @Test - public void testEscapeSameAsCommentStartThrowsException() { + void testEscapeSameAsCommentStartThrowsException() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setEscape('!').setCommentMarker('!').get()); } @SuppressWarnings("deprecation") @Test - public void testEscapeSameAsCommentStartThrowsException_Deprecated() { + void testEscapeSameAsCommentStartThrowsException_Deprecated() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape('!').withCommentMarker('!')); } @Test - public void testEscapeSameAsCommentStartThrowsExceptionForWrapperType() { + void testEscapeSameAsCommentStartThrowsExceptionForWrapperType() { // Cannot assume that callers won't use different Character objects assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setEscape(Character.valueOf('!')).setCommentMarker(Character.valueOf('!')).get()); @@ -695,13 +723,13 @@ public void testEscapeSameAsCommentStartThrowsExceptionForWrapperType() { @SuppressWarnings("deprecation") @Test - public void testEscapeSameAsCommentStartThrowsExceptionForWrapperType_Deprecated() { + void testEscapeSameAsCommentStartThrowsExceptionForWrapperType_Deprecated() { // Cannot assume that callers won't use different Character objects assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(Character.valueOf('!')).withCommentMarker(Character.valueOf('!'))); } @Test - public void testFormat() { + void testFormat() { final CSVFormat format = CSVFormat.DEFAULT; assertEquals("", format.format()); @@ -710,7 +738,7 @@ public void testFormat() { } @Test // I assume this to be a defect. - public void testFormatThrowsNullPointerException() { + void testFormatThrowsNullPointerException() { final CSVFormat csvFormat = CSVFormat.MYSQL; @@ -719,7 +747,7 @@ public void testFormatThrowsNullPointerException() { } @Test - public void testFormatToString() { + void testFormatToString() { // @formatter:off final CSVFormat format = CSVFormat.RFC4180 .withEscape('?') @@ -739,7 +767,7 @@ public void testFormatToString() { } @Test - public void testGetAllowDuplicateHeaderNames() { + void testGetAllowDuplicateHeaderNames() { final Builder builder = CSVFormat.DEFAULT.builder(); assertTrue(builder.get().getAllowDuplicateHeaderNames()); assertTrue(builder.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).get().getAllowDuplicateHeaderNames()); @@ -748,7 +776,7 @@ public void testGetAllowDuplicateHeaderNames() { } @Test - public void testGetDuplicateHeaderMode() { + void testGetDuplicateHeaderMode() { final Builder builder = CSVFormat.DEFAULT.builder(); assertEquals(DuplicateHeaderMode.ALLOW_ALL, builder.get().getDuplicateHeaderMode()); @@ -758,7 +786,7 @@ public void testGetDuplicateHeaderMode() { } @Test - public void testGetHeader() { + void testGetHeader() { final String[] header = { "one", "two", "three" }; final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header); // getHeader() makes a copy of the header array. @@ -771,7 +799,7 @@ public void testGetHeader() { } @Test - public void testHashCodeAndWithIgnoreHeaderCase() { + void testHashCodeAndWithIgnoreHeaderCase() { final CSVFormat csvFormat = CSVFormat.INFORMIX_UNLOAD_CSV; final CSVFormat csvFormatTwo = csvFormat.withIgnoreHeaderCase(); @@ -781,7 +809,7 @@ public void testHashCodeAndWithIgnoreHeaderCase() { assertTrue(csvFormatTwo.getIgnoreHeaderCase()); // now different assertFalse(csvFormatTwo.getTrailingDelimiter()); - Assertions.assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal + assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal assertFalse(csvFormatTwo.getAllowMissingColumnNames()); assertFalse(csvFormatTwo.getTrim()); @@ -789,18 +817,18 @@ public void testHashCodeAndWithIgnoreHeaderCase() { } @Test - public void testJiraCsv236() { + void testJiraCsv236() { CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(true).setHeader("CC", "VV", "VV").get(); } @SuppressWarnings("deprecation") @Test - public void testJiraCsv236__Deprecated() { + void testJiraCsv236__Deprecated() { CSVFormat.DEFAULT.withAllowDuplicateHeaderNames().withHeader("CC", "VV", "VV"); } @Test - public void testNewFormat() { + void testNewFormat() { final CSVFormat csvFormat = CSVFormat.newFormat('X'); @@ -861,7 +889,7 @@ public void testNewFormat() { } @Test - public void testNullRecordSeparatorCsv106() { + void testNullRecordSeparatorCsv106() { final CSVFormat format = CSVFormat.newFormat(';').builder().setSkipHeaderRecord(true).setHeader("H1", "H2").get(); final String formatStr = format.format("A", "B"); assertNotNull(formatStr); @@ -870,7 +898,7 @@ public void testNullRecordSeparatorCsv106() { @SuppressWarnings("deprecation") @Test - public void testNullRecordSeparatorCsv106__Deprecated() { + void testNullRecordSeparatorCsv106__Deprecated() { final CSVFormat format = CSVFormat.newFormat(';').withSkipHeaderRecord().withHeader("H1", "H2"); final String formatStr = format.format("A", "B"); assertNotNull(formatStr); @@ -878,7 +906,7 @@ public void testNullRecordSeparatorCsv106__Deprecated() { } @Test - public void testPrintRecord() throws IOException { + void testPrintRecord() throws IOException { final Appendable out = new StringBuilder(); final CSVFormat format = CSVFormat.RFC4180; format.printRecord(out, "a", "b", "c"); @@ -886,7 +914,7 @@ public void testPrintRecord() throws IOException { } @Test - public void testPrintRecordEmpty() throws IOException { + void testPrintRecordEmpty() throws IOException { final Appendable out = new StringBuilder(); final CSVFormat format = CSVFormat.RFC4180; format.printRecord(out); @@ -894,7 +922,7 @@ public void testPrintRecordEmpty() throws IOException { } @Test - public void testPrintWithEscapesEndWithCRLF() throws IOException { + void testPrintWithEscapesEndWithCRLF() throws IOException { final Reader in = new StringReader("x,y,x\r\na,?b,c\r\n"); final Appendable out = new StringBuilder(); final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',').withQuote(null).withRecordSeparator(CRLF); @@ -903,7 +931,7 @@ public void testPrintWithEscapesEndWithCRLF() throws IOException { } @Test - public void testPrintWithEscapesEndWithoutCRLF() throws IOException { + void testPrintWithEscapesEndWithoutCRLF() throws IOException { final Reader in = new StringReader("x,y,x"); final Appendable out = new StringBuilder(); final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',').withQuote(null).withRecordSeparator(CRLF); @@ -912,7 +940,7 @@ public void testPrintWithEscapesEndWithoutCRLF() throws IOException { } @Test - public void testPrintWithoutQuotes() throws IOException { + void testPrintWithoutQuotes() throws IOException { final Reader in = new StringReader(""); final Appendable out = new StringBuilder(); final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NON_NUMERIC); @@ -921,7 +949,7 @@ public void testPrintWithoutQuotes() throws IOException { } @Test - public void testPrintWithQuoteModeIsNONE() throws IOException { + void testPrintWithQuoteModeIsNONE() throws IOException { final Reader in = new StringReader("a,b,c"); final Appendable out = new StringBuilder(); final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NONE); @@ -930,7 +958,7 @@ public void testPrintWithQuoteModeIsNONE() throws IOException { } @Test - public void testPrintWithQuotes() throws IOException { + void testPrintWithQuotes() throws IOException { final Reader in = new StringReader("\"a,b,c\r\nx,y,z"); final Appendable out = new StringBuilder(); final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NON_NUMERIC); @@ -938,43 +966,89 @@ public void testPrintWithQuotes() throws IOException { assertEquals("\"\"\"a,b,c\r\nx,y,z\"", out.toString()); } + /** + * Tests CSV-326. + */ @Test - public void testQuoteCharSameAsCommentStartThrowsException() { + void testPrintWithQuotesEscapeBeforeQuote() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder() + .setEscape('\\') + .setQuote('"') + .get(); + final String value = "\\\""; + final Appendable out = new StringBuilder(); + format.print(new StringReader(value), out, true); + try (CSVParser parser = CSVParser.parse(out.toString(), format)) { + assertEquals(value, parser.getRecords().get(0).get(0)); + } + } + + @Test + void testQuoteCharSameAsCommentStartThrowsException() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote('!').setCommentMarker('!').get()); } @SuppressWarnings("deprecation") @Test - public void testQuoteCharSameAsCommentStartThrowsException_Deprecated() { + void testQuoteCharSameAsCommentStartThrowsException_Deprecated() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote('!').withCommentMarker('!')); } @Test - public void testQuoteCharSameAsCommentStartThrowsExceptionForWrapperType() { + void testQuoteCharSameAsCommentStartThrowsExceptionForWrapperType() { // Cannot assume that callers won't use different Character objects assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote(Character.valueOf('!')).setCommentMarker('!').get()); } @SuppressWarnings("deprecation") @Test - public void testQuoteCharSameAsCommentStartThrowsExceptionForWrapperType_Deprecated() { + void testQuoteCharSameAsCommentStartThrowsExceptionForWrapperType_Deprecated() { // Cannot assume that callers won't use different Character objects assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote(Character.valueOf('!')).withCommentMarker('!')); } @Test - public void testQuoteCharSameAsDelimiterThrowsException() { + void testQuoteCharSameAsDelimiterThrowsException() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote('!').setDelimiter('!').get()); } @SuppressWarnings("deprecation") @Test - public void testQuoteCharSameAsDelimiterThrowsException_Deprecated() { + void testQuoteCharSameAsDelimiterThrowsException_Deprecated() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote('!').withDelimiter('!')); } @Test - public void testQuoteModeNoneShouldReturnMeaningfulExceptionMessage() { + void testQuotedNullStringTracksQuoteCharacter() throws IOException { + final StringBuilder out = new StringBuilder(); + // @formatter:off + final Builder builder = CSVFormat.DEFAULT.builder(); + final CSVFormat format = builder + .setQuoteMode(QuoteMode.ALL) + .setNullString("NULL") + .get(); + // @formatter:on + format.print(null, out, true); + assertEquals("\"NULL\"", out.toString()); + // set + out.setLength(0); + builder.setQuote('\''); + builder.get().print(null, out, true); + assertEquals("'NULL'", out.toString()); + // reset + out.setLength(0); + builder.setQuote((Character) null); + builder.get().print(null, out, true); + assertEquals("\"NULL\"", out.toString()); + // reset, reverse setter order + out.setLength(0); + builder.setNullString(null).setQuote((Character) null).setNullString("NULL"); + builder.get().print(null, out, true); + assertEquals("\"NULL\"", out.toString()); + } + + @Test + void testQuoteModeNoneShouldReturnMeaningfulExceptionMessage() { final Exception exception = assertThrows(IllegalArgumentException.class, () -> // @formatter:off CSVFormat.DEFAULT.builder() @@ -989,18 +1063,18 @@ public void testQuoteModeNoneShouldReturnMeaningfulExceptionMessage() { } @Test - public void testQuotePolicyNoneWithoutEscapeThrowsException() { + void testQuotePolicyNoneWithoutEscapeThrowsException() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat('!').builder().setQuoteMode(QuoteMode.NONE).get()); } @SuppressWarnings("deprecation") @Test - public void testQuotePolicyNoneWithoutEscapeThrowsException_Deprecated() { + void testQuotePolicyNoneWithoutEscapeThrowsException_Deprecated() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat('!').withQuoteMode(QuoteMode.NONE)); } @Test - public void testRFC4180() { + void testRFC4180() { assertNull(RFC4180.getCommentMarker()); assertEquals(',', RFC4180.getDelimiter()); assertNull(RFC4180.getEscapeCharacter()); @@ -1012,7 +1086,7 @@ public void testRFC4180() { @SuppressWarnings("boxing") // no need to worry about boxing here @Test - public void testSerialization() throws Exception { + void testSerialization() throws Exception { final ByteArrayOutputStream out = new ByteArrayOutputStream(); try (ObjectOutputStream oos = new ObjectOutputStream(out)) { @@ -1034,7 +1108,7 @@ public void testSerialization() throws Exception { } @Test - public void testToString() { + void testToString() { final String string = CSVFormat.INFORMIX_UNLOAD.toString(); @@ -1043,7 +1117,7 @@ public void testToString() { } @Test - public void testToStringAndWithCommentMarkerTakingCharacter() { + void testToStringAndWithCommentMarkerTakingCharacter() { final CSVFormat.Predefined csvFormatPredefined = CSVFormat.Predefined.Default; final CSVFormat csvFormat = csvFormatPredefined.getFormat(); @@ -1136,7 +1210,7 @@ public void testToStringAndWithCommentMarkerTakingCharacter() { assertNotSame(csvFormat, csvFormatTwo); assertNotSame(csvFormatTwo, csvFormat); - Assertions.assertNotEquals(csvFormatTwo, csvFormat); + assertNotEquals(csvFormatTwo, csvFormat); assertNull(csvFormat.getEscapeCharacter()); assertTrue(csvFormat.isQuoteCharacterSet()); @@ -1195,16 +1269,16 @@ public void testToStringAndWithCommentMarkerTakingCharacter() { assertNotSame(csvFormat, csvFormatTwo); assertNotSame(csvFormatTwo, csvFormat); - Assertions.assertNotEquals(csvFormat, csvFormatTwo); + assertNotEquals(csvFormat, csvFormatTwo); - Assertions.assertNotEquals(csvFormatTwo, csvFormat); - assertEquals("Delimiter=<,> QuoteChar=<\"> CommentStart= " + "RecordSeparator=<\r\n> EmptyLines:ignored SkipHeaderRecord:false", + assertNotEquals(csvFormatTwo, csvFormat); + assertEquals("Delimiter=<,> QuoteChar=<\"> CommentStart= RecordSeparator=<\r\n> EmptyLines:ignored SkipHeaderRecord:false", csvFormatTwo.toString()); } @Test - public void testTrim() throws IOException { + void testTrim() throws IOException { final CSVFormat formatWithTrim = CSVFormat.DEFAULT.withDelimiter(',').withTrim().withQuote(null).withRecordSeparator(CRLF); CharSequence in = "a,b,c"; @@ -1229,29 +1303,29 @@ public void testTrim() throws IOException { } @Test - public void testWithCommentStart() { + void testWithCommentStart() { final CSVFormat formatWithCommentStart = CSVFormat.DEFAULT.withCommentMarker('#'); assertEquals(Character.valueOf('#'), formatWithCommentStart.getCommentMarker()); } @Test - public void testWithCommentStartCRThrowsException() { + void testWithCommentStartCRThrowsException() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withCommentMarker(CR)); } @Test - public void testWithDelimiter() { + void testWithDelimiter() { final CSVFormat formatWithDelimiter = CSVFormat.DEFAULT.withDelimiter('!'); assertEquals('!', formatWithDelimiter.getDelimiter()); } @Test - public void testWithDelimiterLFThrowsException() { + void testWithDelimiterLFThrowsException() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(LF)); } @Test - public void testWithEmptyDuplicates() { + void testWithEmptyDuplicates() { final CSVFormat formatWithEmptyDuplicates = CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).get(); assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, formatWithEmptyDuplicates.getDuplicateHeaderMode()); @@ -1259,31 +1333,31 @@ public void testWithEmptyDuplicates() { } @Test - public void testWithEmptyEnum() { + void testWithEmptyEnum() { final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(EmptyEnum.class); assertEquals(0, formatWithHeader.getHeader().length); } @Test - public void testWithEscape() { + void testWithEscape() { final CSVFormat formatWithEscape = CSVFormat.DEFAULT.withEscape('&'); assertEquals(Character.valueOf('&'), formatWithEscape.getEscapeCharacter()); } @Test - public void testWithEscapeCRThrowsExceptions() { + void testWithEscapeCRThrowsExceptions() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(CR)); } @Test - public void testWithFirstRecordAsHeader() { + void testWithFirstRecordAsHeader() { final CSVFormat formatWithFirstRecordAsHeader = CSVFormat.DEFAULT.withFirstRecordAsHeader(); assertTrue(formatWithFirstRecordAsHeader.getSkipHeaderRecord()); assertEquals(0, formatWithFirstRecordAsHeader.getHeader().length); } @Test - public void testWithHeader() { + void testWithHeader() { final String[] header = { "one", "two", "three" }; // withHeader() makes a copy of the header array. final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header); @@ -1292,7 +1366,7 @@ public void testWithHeader() { } @Test - public void testWithHeaderComments() { + void testWithHeaderComments() { final CSVFormat csvFormat = CSVFormat.DEFAULT; @@ -1383,7 +1457,7 @@ public void testWithHeaderComments() { assertNotSame(csvFormat, csvFormatTwo); assertNotSame(csvFormatTwo, csvFormat); - Assertions.assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal + assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal final String string = csvFormatTwo.format(objectArray); @@ -1445,88 +1519,88 @@ public void testWithHeaderComments() { assertNotSame(csvFormatTwo, csvFormat); assertNotNull(string); - Assertions.assertNotEquals(csvFormat, csvFormatTwo); // CSV-244 - should not be equal + assertNotEquals(csvFormat, csvFormatTwo); // CSV-244 - should not be equal - Assertions.assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal + assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal assertEquals(",,,,,,,", string); } @Test - public void testWithHeaderEnum() { + void testWithHeaderEnum() { final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(Header.class); assertArrayEquals(new String[] { "Name", "Email", "Phone" }, formatWithHeader.getHeader()); } @Test - public void testWithHeaderEnumNull() { + void testWithHeaderEnumNull() { final CSVFormat format = CSVFormat.DEFAULT; final Class> simpleName = null; format.withHeader(simpleName); } @Test - public void testWithHeaderResultSetNull() throws SQLException { + void testWithHeaderResultSetNull() throws SQLException { final CSVFormat format = CSVFormat.DEFAULT; final ResultSet resultSet = null; format.withHeader(resultSet); } @Test - public void testWithIgnoreEmptyLines() { + void testWithIgnoreEmptyLines() { assertFalse(CSVFormat.DEFAULT.withIgnoreEmptyLines(false).getIgnoreEmptyLines()); assertTrue(CSVFormat.DEFAULT.withIgnoreEmptyLines().getIgnoreEmptyLines()); } @Test - public void testWithIgnoreSurround() { + void testWithIgnoreSurround() { assertFalse(CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces()); assertTrue(CSVFormat.DEFAULT.withIgnoreSurroundingSpaces().getIgnoreSurroundingSpaces()); } @Test - public void testWithNullString() { + void testWithNullString() { final CSVFormat formatWithNullString = CSVFormat.DEFAULT.withNullString("null"); assertEquals("null", formatWithNullString.getNullString()); } @Test - public void testWithQuoteChar() { + void testWithQuoteChar() { final CSVFormat formatWithQuoteChar = CSVFormat.DEFAULT.withQuote('"'); assertEquals(Character.valueOf('"'), formatWithQuoteChar.getQuoteCharacter()); } @Test - public void testWithQuoteLFThrowsException() { + void testWithQuoteLFThrowsException() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote(LF)); } @Test - public void testWithQuotePolicy() { + void testWithQuotePolicy() { final CSVFormat formatWithQuotePolicy = CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL); assertEquals(QuoteMode.ALL, formatWithQuotePolicy.getQuoteMode()); } @Test - public void testWithRecordSeparatorCR() { + void testWithRecordSeparatorCR() { final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(CR); assertEquals(String.valueOf(CR), formatWithRecordSeparator.getRecordSeparator()); } @Test - public void testWithRecordSeparatorCRLF() { + void testWithRecordSeparatorCRLF() { final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(CRLF); assertEquals(CRLF, formatWithRecordSeparator.getRecordSeparator()); } @Test - public void testWithRecordSeparatorLF() { + void testWithRecordSeparatorLF() { final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(LF); assertEquals(String.valueOf(LF), formatWithRecordSeparator.getRecordSeparator()); } @Test - public void testWithSystemRecordSeparator() { + void testWithSystemRecordSeparator() { final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withSystemRecordSeparator(); assertEquals(System.lineSeparator(), formatWithRecordSeparator.getRecordSeparator()); } diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 38d442e55b..6d9bdd9e80 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -22,7 +22,7 @@ import static org.apache.commons.csv.Constants.CR; import static org.apache.commons.csv.Constants.CRLF; import static org.apache.commons.csv.Constants.LF; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.apache.commons.csv.CsvAssertions.assertValuesEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertInstanceOf; @@ -33,6 +33,7 @@ import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.io.PipedReader; import java.io.PipedWriter; @@ -62,14 +63,15 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; +import org.junit.jupiter.params.provider.ValueSource; /** - * CSVParserTest + * Tests {@link CSVParser}. * * The test are organized in three different sections: The 'setter/getter' section, the lexer section and finally the parser section. In case a test fails, you * should follow a top-down approach for fixing a potential bug (its likely that the parser itself fails if the lexer has problems...). */ -public class CSVParserTest { +class CSVParserTest { private static final CSVFormat EXCEL_WITH_HEADER = CSVFormat.EXCEL.withHeader(); @@ -77,9 +79,13 @@ public class CSVParserTest { private static final String UTF_8_NAME = UTF_8.name(); - private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n" + + // @formatter:off + private static final String CSV_INPUT = "a,b,c,d\n" + + " a , b , 1 2 \n" + + "\"foo baar\", b,\n" + // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n"; " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping + // @formatter:on private static final String CSV_INPUT_1 = "a,b,c,d"; @@ -138,7 +144,7 @@ private void parseFully(final CSVParser parser) { } @Test - public void testBackslashEscaping() throws IOException { + void testBackslashEscaping() throws IOException { // To avoid confusion over the need for escaping chars in java code, // We will test with a forward slash as the escape char, and a single // quote as the encapsulator. @@ -169,17 +175,17 @@ public void testBackslashEscaping() throws IOException { try (CSVParser parser = CSVParser.parse(code, format)) { final List records = parser.getRecords(); assertFalse(records.isEmpty()); - Utils.compare("Records do not match expected result", res, records); + Utils.compare("Records do not match expected result", res, records, -1); } } @Test - public void testBackslashEscaping2() throws IOException { + void testBackslashEscaping2() throws IOException { // To avoid confusion over the need for escaping chars in java code, // We will test with a forward slash as the escape char, and a single // quote as the encapsulator. // @formatter:off - final String code = "" + " , , \n" + // 1) + final String code = " , , \n" + // 1) " \t , , \n" + // 2) " // , /, , /,\n" + // 3) ""; @@ -192,15 +198,24 @@ public void testBackslashEscaping2() throws IOException { try (CSVParser parser = CSVParser.parse(code, format)) { final List records = parser.getRecords(); assertFalse(records.isEmpty()); - Utils.compare("", res, records); + Utils.compare("", res, records, -1); } } @Test @Disabled - public void testBackslashEscapingOld() throws IOException { - final String code = "one,two,three\n" + "on\\\"e,two\n" + "on\"e,two\n" + "one,\"tw\\\"o\"\n" + "one,\"t\\,wo\"\n" + "one,two,\"th,ree\"\n" + - "\"a\\\\\"\n" + "a\\,b\n" + "\"a\\\\,b\""; + void testBackslashEscapingOld() throws IOException { + // @formatter:off + final String code = "one,two,three\n" + + "on\\\"e,two\n" + + "on\"e,two\n" + + "one,\"tw\\\"o\"\n" + + "one,\"t\\,wo\"\n" + + "one,two,\"th,ree\"\n" + + "\"a\\\\\"\n" + + "a\\,b\n" + + "\"a\\\\,b\""; + // @formatter:on final String[][] res = { { "one", "two", "three" }, { "on\\\"e", "two" }, { "on\"e", "two" }, { "one", "tw\"o" }, { "one", "t\\,wo" }, // backslash in // quotes only // escapes a @@ -215,14 +230,14 @@ public void testBackslashEscapingOld() throws IOException { assertEquals(res.length, records.size()); assertFalse(records.isEmpty()); for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); + assertValuesEquals(res[i], records.get(i)); } } } @Test @Disabled("CSV-107") - public void testBOM() throws IOException { + void testBOM() throws IOException { final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/bom.csv"); try (CSVParser parser = CSVParser.parse(url, StandardCharsets.UTF_8, EXCEL_WITH_HEADER)) { parser.forEach(record -> assertNotNull(record.get("Date"))); @@ -230,7 +245,7 @@ public void testBOM() throws IOException { } @Test - public void testBOMInputStreamParserWithInputStream() throws IOException { + void testBOMInputStreamParserWithInputStream() throws IOException { try (BOMInputStream inputStream = createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"); CSVParser parser = CSVParser.parse(inputStream, UTF_8, EXCEL_WITH_HEADER)) { parser.forEach(record -> assertNotNull(record.get("Date"))); @@ -238,7 +253,7 @@ public void testBOMInputStreamParserWithInputStream() throws IOException { } @Test - public void testBOMInputStreamParserWithReader() throws IOException { + void testBOMInputStreamParserWithReader() throws IOException { try (Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME); CSVParser parser = CSVParser.builder() .setReader(reader) @@ -249,7 +264,7 @@ public void testBOMInputStreamParserWithReader() throws IOException { } @Test - public void testBOMInputStreamParseWithReader() throws IOException { + void testBOMInputStreamParseWithReader() throws IOException { try (Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME); CSVParser parser = CSVParser.builder() .setReader(reader) @@ -260,7 +275,7 @@ public void testBOMInputStreamParseWithReader() throws IOException { } @Test - public void testCarriageReturnEndings() throws IOException { + void testCarriageReturnEndings() throws IOException { final String string = "foo\rbaar,\rhello,world\r,kanu"; try (CSVParser parser = CSVParser.builder().setCharSequence(string).get()) { final List records = parser.getRecords(); @@ -269,7 +284,7 @@ public void testCarriageReturnEndings() throws IOException { } @Test - public void testCarriageReturnLineFeedEndings() throws IOException { + void testCarriageReturnLineFeedEndings() throws IOException { final String string = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; try (CSVParser parser = CSVParser.builder().setCharSequence(string).get()) { final List records = parser.getRecords(); @@ -278,7 +293,7 @@ public void testCarriageReturnLineFeedEndings() throws IOException { } @Test - public void testClose() throws Exception { + void testClose() throws Exception { final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z"); final Iterator records; try (CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) { @@ -290,32 +305,32 @@ public void testClose() throws Exception { } @Test - public void testCSV141CSVFormat_DEFAULT() throws Exception { + void testCSV141CSVFormat_DEFAULT() throws Exception { testCSV141Failure(CSVFormat.DEFAULT, 3); } @Test - public void testCSV141CSVFormat_INFORMIX_UNLOAD() throws Exception { + void testCSV141CSVFormat_INFORMIX_UNLOAD() throws Exception { testCSV141Failure(CSVFormat.INFORMIX_UNLOAD, 1); } @Test - public void testCSV141CSVFormat_INFORMIX_UNLOAD_CSV() throws Exception { + void testCSV141CSVFormat_INFORMIX_UNLOAD_CSV() throws Exception { testCSV141Failure(CSVFormat.INFORMIX_UNLOAD_CSV, 3); } @Test - public void testCSV141CSVFormat_ORACLE() throws Exception { + void testCSV141CSVFormat_ORACLE() throws Exception { testCSV141Failure(CSVFormat.ORACLE, 2); } @Test - public void testCSV141CSVFormat_POSTGRESQL_CSV() throws Exception { + void testCSV141CSVFormat_POSTGRESQL_CSV() throws Exception { testCSV141Failure(CSVFormat.POSTGRESQL_CSV, 3); } @Test - public void testCSV141Excel() throws Exception { + void testCSV141Excel() throws Exception { testCSV141Ok(CSVFormat.EXCEL); } @@ -385,12 +400,12 @@ record = parser.nextRecord(); } @Test - public void testCSV141RFC4180() throws Exception { + void testCSV141RFC4180() throws Exception { testCSV141Failure(CSVFormat.RFC4180, 3); } @Test - public void testCSV235() throws IOException { + void testCSV235() throws IOException { final String dqString = "\"aaa\",\"b\"\"bb\",\"ccc\""; // "aaa","b""bb","ccc" try (CSVParser parser = CSVFormat.RFC4180.parse(new StringReader(dqString))) { final Iterator records = parser.iterator(); @@ -404,7 +419,7 @@ public void testCSV235() throws IOException { } @Test - public void testCSV57() throws Exception { + void testCSV57() throws Exception { try (CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) { final List list = parser.getRecords(); assertNotNull(list); @@ -413,9 +428,9 @@ public void testCSV57() throws Exception { } @Test - public void testDefaultFormat() throws IOException { + void testDefaultFormat() throws IOException { // @formatter:off - final String code = "" + "a,b#\n" + // 1) + final String code = "a,b#\n" + // 1) "\"\n\",\" \",#\n" + // 2) "#,\"\"\n" + // 3) "# Final comment\n" // 4) @@ -428,30 +443,55 @@ public void testDefaultFormat() throws IOException { try (CSVParser parser = CSVParser.parse(code, format)) { final List records = parser.getRecords(); assertFalse(records.isEmpty()); - Utils.compare("Failed to parse without comments", res, records); + Utils.compare("Failed to parse without comments", res, records, -1); format = CSVFormat.DEFAULT.withCommentMarker('#'); } try (CSVParser parser = CSVParser.parse(code, format)) { final List records = parser.getRecords(); - Utils.compare("Failed to parse with comments", resComments, records); + Utils.compare("Failed to parse with comments", resComments, records, -1); } } @Test - public void testDuplicateHeadersAllowedByDefault() throws Exception { + void testDuplicateHeadersAllowedByDefault() throws Exception { try (CSVParser parser = CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader())) { // noop } } @Test - public void testDuplicateHeadersNotAllowed() { + void testDuplicateHeadersNotAllowed() { assertThrows(IllegalArgumentException.class, () -> CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader().withAllowDuplicateHeaderNames(false))); } + /** + * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, + * the empty field at the delimiter boundary must survive. The delimiter look-ahead is consumed while skipping + * leading whitespace, so re-evaluating it would drop the empty field and merge the following field's value. + */ + @Test + void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); + try (CSVParser parser = CSVParser.parse(" |a", format)) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "", "a" }, records.get(0)); + } + try (CSVParser parser = CSVParser.parse("a | |b", format)) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "a", "", "b" }, records.get(0)); + } + try (CSVParser parser = CSVParser.parse("a | |b |", format)) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "a", "", "b", "" }, records.get(0)); + } + } + @Test - public void testEmptyFile() throws Exception { + void testEmptyFile() throws Exception { try (CSVParser parser = CSVParser.parse(Paths.get("src/test/resources/org/apache/commons/csv/empty.txt"), StandardCharsets.UTF_8, CSVFormat.DEFAULT)) { assertNull(parser.nextRecord()); @@ -459,7 +499,7 @@ public void testEmptyFile() throws Exception { } @Test - public void testEmptyFileHeaderParsing() throws Exception { + void testEmptyFileHeaderParsing() throws Exception { try (CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT.withFirstRecordAsHeader())) { assertNull(parser.nextRecord()); assertTrue(parser.getHeaderNames().isEmpty()); @@ -467,7 +507,7 @@ public void testEmptyFileHeaderParsing() throws Exception { } @Test - public void testEmptyLineBehaviorCSV() throws Exception { + void testEmptyLineBehaviorCSV() throws Exception { final String[] codes = { "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" }; final String[][] res = { { "hello", "" } // CSV format ignores empty lines }; @@ -477,14 +517,14 @@ public void testEmptyLineBehaviorCSV() throws Exception { assertEquals(res.length, records.size()); assertFalse(records.isEmpty()); for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); + assertValuesEquals(res[i], records.get(i)); } } } } @Test - public void testEmptyLineBehaviorExcel() throws Exception { + void testEmptyLineBehaviorExcel() throws Exception { final String[] codes = { "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" }; final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines { "" } }; @@ -494,21 +534,21 @@ public void testEmptyLineBehaviorExcel() throws Exception { assertEquals(res.length, records.size()); assertFalse(records.isEmpty()); for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); + assertValuesEquals(res[i], records.get(i)); } } } } @Test - public void testEmptyString() throws Exception { + void testEmptyString() throws Exception { try (CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) { assertNull(parser.nextRecord()); } } @Test - public void testEndOfFileBehaviorCSV() throws Exception { + void testEndOfFileBehaviorCSV() throws Exception { final String[] codes = { "hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", "hello,\r\n\r\nworld,\"\"", "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\"" }; final String[][] res = { { "hello", "" }, // CSV format ignores empty lines @@ -519,14 +559,14 @@ public void testEndOfFileBehaviorCSV() throws Exception { assertEquals(res.length, records.size()); assertFalse(records.isEmpty()); for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); + assertValuesEquals(res[i], records.get(i)); } } } } @Test - public void testEndOfFileBehaviorExcel() throws Exception { + void testEndOfFileBehaviorExcel() throws Exception { final String[] codes = { "hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", "hello,\r\n\r\nworld,\"\"", "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\"" }; final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines @@ -538,15 +578,15 @@ public void testEndOfFileBehaviorExcel() throws Exception { assertEquals(res.length, records.size()); assertFalse(records.isEmpty()); for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); + assertValuesEquals(res[i], records.get(i)); } } } } @Test - public void testExcelFormat1() throws IOException { - final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n x,,," + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n"; + void testExcelFormat1() throws IOException { + final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n x,,,\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n"; final String[][] res = { { "value1", "value2", "value3", "value4" }, { "a", "b", "c", "d" }, { " x", "", "", "" }, { "" }, { "\"hello\"", " \"world\"", "abc\ndef", "" } }; try (CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { @@ -554,13 +594,13 @@ public void testExcelFormat1() throws IOException { assertEquals(res.length, records.size()); assertFalse(records.isEmpty()); for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); + assertValuesEquals(res[i], records.get(i)); } } } @Test - public void testExcelFormat2() throws Exception { + void testExcelFormat2() throws Exception { final String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n"; final String[][] res = { { "foo", "baar" }, { "" }, { "hello", "" }, { "" }, { "world", "" } }; try (CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { @@ -568,7 +608,7 @@ public void testExcelFormat2() throws Exception { assertEquals(res.length, records.size()); assertFalse(records.isEmpty()); for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); + assertValuesEquals(res[i], records.get(i)); } } } @@ -577,7 +617,7 @@ public void testExcelFormat2() throws Exception { * Tests an exported Excel worksheet with a header row and rows that have more columns than the headers */ @Test - public void testExcelHeaderCountLessThanData() throws Exception { + void testExcelHeaderCountLessThanData() throws Exception { final String code = "A,B,C,,\r\na,b,c,d,e\r\n"; try (CSVParser parser = CSVParser.parse(code, EXCEL_WITH_HEADER)) { parser.getRecords().forEach(record -> { @@ -589,7 +629,7 @@ public void testExcelHeaderCountLessThanData() throws Exception { } @Test - public void testFirstEndOfLineCr() throws IOException { + void testFirstEndOfLineCr() throws IOException { final String data = "foo\rbaar,\rhello,world\r,kanu"; try (CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { final List records = parser.getRecords(); @@ -599,7 +639,7 @@ public void testFirstEndOfLineCr() throws IOException { } @Test - public void testFirstEndOfLineCrLf() throws IOException { + void testFirstEndOfLineCrLf() throws IOException { final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; try (CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { final List records = parser.getRecords(); @@ -609,7 +649,7 @@ public void testFirstEndOfLineCrLf() throws IOException { } @Test - public void testFirstEndOfLineLf() throws IOException { + void testFirstEndOfLineLf() throws IOException { final String data = "foo\nbaar,\nhello,world\n,kanu"; try (CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { final List records = parser.getRecords(); @@ -619,7 +659,7 @@ public void testFirstEndOfLineLf() throws IOException { } @Test - public void testForEach() throws Exception { + void testForEach() throws Exception { try (Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); CSVParser parser = CSVFormat.DEFAULT.parse(in)) { final List records = new ArrayList<>(); @@ -627,14 +667,108 @@ public void testForEach() throws Exception { records.add(record); } assertEquals(3, records.size()); - assertArrayEquals(new String[] { "a", "b", "c" }, records.get(0).values()); - assertArrayEquals(new String[] { "1", "2", "3" }, records.get(1).values()); - assertArrayEquals(new String[] { "x", "y", "z" }, records.get(2).values()); + assertValuesEquals(new String[] { "a", "b", "c" }, records.get(0)); + assertValuesEquals(new String[] { "1", "2", "3" }, records.get(1)); + assertValuesEquals(new String[] { "x", "y", "z" }, records.get(2)); + } + } + + @Test + void testGetBytePositionMultiCharacterDelimiter() throws IOException { + final String code = "aa[|]bb\ncc[|]dd\n"; + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get(); + try (CSVParser parser = CSVParser.builder() + .setReader(new StringReader(code)) + .setFormat(format) + .setCharset(StandardCharsets.UTF_8) + .setTrackBytes(true) + .get()) { + final Iterator it = parser.iterator(); + final CSVRecord first = it.next(); + final CSVRecord second = it.next(); + assertEquals(0, first.getBytePosition()); + assertEquals(8, second.getBytePosition()); } } + /** + * Tests CSV-329. + */ @Test - public void testGetHeaderComment_HeaderComment1() throws IOException { + void testGetBytePositionMultiCharacterDelimiterWithSupplementaryCharacter() throws IOException { + final String delimiter = "x😀"; + final String code = "ax😀b\ncx😀d\n"; + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(delimiter).get(); + try (CSVParser parser = CSVParser.builder() + .setReader(new StringReader(code)) + .setFormat(format) + .setCharset(UTF_8) + .setTrackBytes(true) + .get()) { + final CSVRecord first = parser.nextRecord(); + final CSVRecord second = parser.nextRecord(); + assertNotNull(first); + assertNotNull(second); + assertValuesEquals(new String[] { "a", "b" }, first); + assertValuesEquals(new String[] { "c", "d" }, second); + assertEquals(0, first.getBytePosition()); + assertEquals("ax😀b\n".getBytes(UTF_8).length, second.getBytePosition()); + } + } + + @Test + void testGetBytePositionWithCharacterOffsetAndMultiBytePrefix() throws Exception { + final String row0 = "é,x\n"; + final Charset charset = UTF_8; + // row0 char count is 4 + assertEquals(4, row0.length()); + // row0 byte count is 5 + final int record1ByteOffset = row0.getBytes(charset).length; + assertEquals(5, record1ByteOffset); + final String row1 = "b,c\n"; + final String rows = row0 + row1; + final long record1CharOffset = row0.length(); + final long expectedByteOffset = row0.getBytes(charset).length; + try (CSVParser parser = CSVParser.builder() + .setReader(new StringReader(row1)) + .setFormat(CSVFormat.DEFAULT) + .setCharset(charset) + .setTrackBytes(true) + .setByteOffset(record1ByteOffset) + .setCharacterOffset(record1CharOffset) + .setRecordNumber(2) // not relevant but a better use case example. + .get()) { + final CSVRecord record = parser.nextRecord(); + assertNotNull(record); + assertEquals(4, record.getCharacterPosition()); + assertEquals(record1CharOffset, record.getCharacterPosition()); + assertEquals(expectedByteOffset, record.getBytePosition()); + } + } + + @Test + void testGetBytePositionWithSingleByteCharset() throws IOException { + // A single-byte charset cannot encode U+FFFF, the char value of the EOF sentinel. + // Byte counting must skip the EOF read so a valid file parses without throwing. + final String code = "a,b\nc,d\n"; + try (CSVParser parser = CSVParser.builder() + .setReader(new StringReader(code)) + .setFormat(CSVFormat.DEFAULT) + .setCharset(StandardCharsets.ISO_8859_1) + .setTrackBytes(true) + .get()) { + final CSVRecord first = parser.nextRecord(); + final CSVRecord second = parser.nextRecord(); + assertNotNull(first); + assertNotNull(second); + assertNull(parser.nextRecord()); + assertEquals(0, first.getBytePosition()); + assertEquals(4, second.getBytePosition()); + } + } + + @Test + void testGetHeaderComment_HeaderComment1() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) { parser.getRecords(); // Expect a header comment @@ -644,7 +778,7 @@ public void testGetHeaderComment_HeaderComment1() throws IOException { } @Test - public void testGetHeaderComment_HeaderComment2() throws IOException { + void testGetHeaderComment_HeaderComment2() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) { parser.getRecords(); // Expect a header comment @@ -654,7 +788,7 @@ public void testGetHeaderComment_HeaderComment2() throws IOException { } @Test - public void testGetHeaderComment_HeaderComment3() throws IOException { + void testGetHeaderComment_HeaderComment3() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { parser.getRecords(); // Expect no header comment - the text "comment" is attached to the first record @@ -664,7 +798,7 @@ public void testGetHeaderComment_HeaderComment3() throws IOException { } @Test - public void testGetHeaderComment_HeaderTrailerComment() throws IOException { + void testGetHeaderComment_HeaderTrailerComment() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { parser.getRecords(); // Expect a header comment @@ -674,7 +808,7 @@ public void testGetHeaderComment_HeaderTrailerComment() throws IOException { } @Test - public void testGetHeaderComment_NoComment1() throws IOException { + void testGetHeaderComment_NoComment1() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_AUTO_HEADER)) { parser.getRecords(); // Expect no header comment @@ -684,7 +818,7 @@ public void testGetHeaderComment_NoComment1() throws IOException { } @Test - public void testGetHeaderComment_NoComment2() throws IOException { + void testGetHeaderComment_NoComment2() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER)) { parser.getRecords(); // Expect no header comment @@ -694,7 +828,7 @@ public void testGetHeaderComment_NoComment2() throws IOException { } @Test - public void testGetHeaderComment_NoComment3() throws IOException { + void testGetHeaderComment_NoComment3() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { parser.getRecords(); // Expect no header comment @@ -704,7 +838,7 @@ public void testGetHeaderComment_NoComment3() throws IOException { } @Test - public void testGetHeaderMap() throws Exception { + void testGetHeaderMap() throws Exception { try (CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { final Map headerMap = parser.getHeaderMap(); final Iterator columnNames = headerMap.keySet().iterator(); @@ -728,7 +862,7 @@ public void testGetHeaderMap() throws Exception { } @Test - public void testGetHeaderNames() throws IOException { + void testGetHeaderNames() throws IOException { try (CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { final Map nameIndexMap = parser.getHeaderMap(); final List headerNames = parser.getHeaderNames(); @@ -742,7 +876,7 @@ public void testGetHeaderNames() throws IOException { } @Test - public void testGetHeaderNamesReadOnly() throws IOException { + void testGetHeaderNamesReadOnly() throws IOException { try (CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { final List headerNames = parser.getHeaderNames(); assertNotNull(headerNames); @@ -751,10 +885,10 @@ public void testGetHeaderNamesReadOnly() throws IOException { } @Test - public void testGetLine() throws IOException { + void testGetLine() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { for (final String[] re : RESULT) { - assertArrayEquals(re, parser.nextRecord().values()); + assertValuesEquals(re, parser.nextRecord()); } assertNull(parser.nextRecord()); @@ -762,25 +896,25 @@ public void testGetLine() throws IOException { } @Test - public void testGetLineNumberWithCR() throws Exception { + void testGetLineNumberWithCR() throws Exception { validateLineNumbers(String.valueOf(CR)); } @Test - public void testGetLineNumberWithCRLF() throws Exception { + void testGetLineNumberWithCRLF() throws Exception { validateLineNumbers(CRLF); } @Test - public void testGetLineNumberWithLF() throws Exception { + void testGetLineNumberWithLF() throws Exception { validateLineNumbers(String.valueOf(LF)); } @Test - public void testGetOneLine() throws IOException { + void testGetOneLine() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_1, CSVFormat.DEFAULT)) { final CSVRecord record = parser.getRecords().get(0); - assertArrayEquals(RESULT[0], record.values()); + assertValuesEquals(RESULT[0], record); } } @@ -790,26 +924,27 @@ public void testGetOneLine() throws IOException { * @throws IOException when an I/O error occurs. */ @Test - public void testGetOneLineOneParser() throws IOException { + void testGetOneLineOneParser() throws IOException { final CSVFormat format = CSVFormat.DEFAULT; try (PipedWriter writer = new PipedWriter(); + PipedReader origin = new PipedReader(writer); CSVParser parser = CSVParser.builder() - .setReader(new PipedReader(writer)) + .setReader(origin) .setFormat(format) .get()) { writer.append(CSV_INPUT_1); writer.append(format.getRecordSeparator()); final CSVRecord record1 = parser.nextRecord(); - assertArrayEquals(RESULT[0], record1.values()); + assertValuesEquals(RESULT[0], record1); writer.append(CSV_INPUT_2); writer.append(format.getRecordSeparator()); final CSVRecord record2 = parser.nextRecord(); - assertArrayEquals(RESULT[1], record2.values()); + assertValuesEquals(RESULT[1], record2); } } @Test - public void testGetRecordFourBytesRead() throws Exception { + void testGetRecordFourBytesRead() throws Exception { final String code = "id,a,b,c\n" + "1,😊,🤔,😂\n" + "2,😊,🤔,😂\n" + @@ -843,52 +978,83 @@ public void testGetRecordFourBytesRead() throws Exception { } @Test - public void testGetRecordNumberWithCR() throws Exception { + void testGetRecordNumberWithCR() throws Exception { validateRecordNumbers(String.valueOf(CR)); } @Test - public void testGetRecordNumberWithCRLF() throws Exception { + void testGetRecordNumberWithCRLF() throws Exception { validateRecordNumbers(CRLF); } @Test - public void testGetRecordNumberWithLF() throws Exception { + void testGetRecordNumberWithLF() throws Exception { validateRecordNumbers(String.valueOf(LF)); } @Test - public void testGetRecordPositionWithCRLF() throws Exception { + void testGetRecordPositionWithCRLF() throws Exception { validateRecordPosition(CRLF); } @Test - public void testGetRecordPositionWithLF() throws Exception { + void testGetRecordPositionWithLF() throws Exception { validateRecordPosition(String.valueOf(LF)); } @Test - public void testGetRecords() throws IOException { + void testGetRecords() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { final List records = parser.getRecords(); assertEquals(RESULT.length, records.size()); assertFalse(records.isEmpty()); for (int i = 0; i < RESULT.length; i++) { - assertArrayEquals(RESULT[i], records.get(i).values()); + assertValuesEquals(RESULT[i], records.get(i)); } } } @Test - public void testGetRecordsFromBrokenInputStream() throws IOException { + void testGetRecordsFromBrokenInputStream() throws IOException { @SuppressWarnings("resource") // We also get an exception on close, which is OK but can't assert in a try. final CSVParser parser = CSVParser.parse(new BrokenInputStream(), UTF_8, CSVFormat.DEFAULT); assertThrows(UncheckedIOException.class, parser::getRecords); } + @ParameterizedTest + @ValueSource(longs = { -1, 0, 1, 2, 3, 4, Long.MAX_VALUE }) + void testGetRecordsMaxRows(final long maxRows) throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.builder().setIgnoreSurroundingSpaces(true).setMaxRows(maxRows).get())) { + final List records = parser.getRecords(); + final long expectedLength = maxRows <= 0 || maxRows > RESULT.length ? RESULT.length : maxRows; + assertEquals(expectedLength, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < expectedLength; i++) { + assertValuesEquals(RESULT[i], records.get(i)); + } + } + } + + /** + * Tests CSV-327. + */ + @Test + void testGetRecordsMaxRowsWithRecordNumberOffset() throws IOException { + try (CSVParser parser = CSVParser.builder() + .setReader(new StringReader("a,b\nc,d\n")) + .setFormat(CSVFormat.DEFAULT.builder().setMaxRows(1).get()) + .setRecordNumber(2) + .get()) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertEquals(2, records.get(0).getRecordNumber()); + assertValuesEquals(new String[] { "a", "b" }, records.get(0)); + } + } + @Test - public void testGetRecordThreeBytesRead() throws Exception { + void testGetRecordThreeBytesRead() throws Exception { final String code = "id,date,val5,val4\n" + "11111111111111,'4017-09-01',きちんと節分近くには咲いてる~,v4\n" + "22222222222222,'4017-01-01',おはよう私の友人~,v4\n" + @@ -924,7 +1090,7 @@ public void testGetRecordThreeBytesRead() throws Exception { } @Test - public void testGetRecordWithMultiLineValues() throws Exception { + void testGetRecordWithMultiLineValues() throws Exception { try (CSVParser parser = CSVParser.parse("\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"", CSVFormat.DEFAULT.withRecordSeparator(CRLF))) { CSVRecord record; @@ -949,7 +1115,7 @@ public void testGetRecordWithMultiLineValues() throws Exception { } @Test - public void testGetTrailerComment_HeaderComment1() throws IOException { + void testGetTrailerComment_HeaderComment1() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) { parser.getRecords(); assertFalse(parser.hasTrailerComment()); @@ -958,7 +1124,7 @@ public void testGetTrailerComment_HeaderComment1() throws IOException { } @Test - public void testGetTrailerComment_HeaderComment2() throws IOException { + void testGetTrailerComment_HeaderComment2() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) { parser.getRecords(); assertFalse(parser.hasTrailerComment()); @@ -967,7 +1133,7 @@ public void testGetTrailerComment_HeaderComment2() throws IOException { } @Test - public void testGetTrailerComment_HeaderComment3() throws IOException { + void testGetTrailerComment_HeaderComment3() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { parser.getRecords(); assertFalse(parser.hasTrailerComment()); @@ -976,7 +1142,7 @@ public void testGetTrailerComment_HeaderComment3() throws IOException { } @Test - public void testGetTrailerComment_HeaderTrailerComment1() throws IOException { + void testGetTrailerComment_HeaderTrailerComment1() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { parser.getRecords(); assertTrue(parser.hasTrailerComment()); @@ -985,7 +1151,7 @@ public void testGetTrailerComment_HeaderTrailerComment1() throws IOException { } @Test - public void testGetTrailerComment_HeaderTrailerComment2() throws IOException { + void testGetTrailerComment_HeaderTrailerComment2() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER)) { parser.getRecords(); assertTrue(parser.hasTrailerComment()); @@ -994,7 +1160,7 @@ public void testGetTrailerComment_HeaderTrailerComment2() throws IOException { } @Test - public void testGetTrailerComment_HeaderTrailerComment3() throws IOException { + void testGetTrailerComment_HeaderTrailerComment3() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { parser.getRecords(); assertTrue(parser.hasTrailerComment()); @@ -1003,7 +1169,7 @@ public void testGetTrailerComment_HeaderTrailerComment3() throws IOException { } @Test - public void testGetTrailerComment_MultilineComment() throws IOException { + void testGetTrailerComment_MultilineComment() throws IOException { try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { parser.getRecords(); assertTrue(parser.hasTrailerComment()); @@ -1012,7 +1178,7 @@ public void testGetTrailerComment_MultilineComment() throws IOException { } @Test - public void testHeader() throws Exception { + void testHeader() throws Exception { final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) { @@ -1031,7 +1197,7 @@ public void testHeader() throws Exception { } @Test - public void testHeaderComment() throws Exception { + void testHeaderComment() throws Exception { final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) { final Iterator records = parser.iterator(); @@ -1047,7 +1213,7 @@ public void testHeaderComment() throws Exception { } @Test - public void testHeaderMissing() throws Exception { + void testHeaderMissing() throws Exception { final Reader in = new StringReader("a,,c\n1,2,3\nx,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in)) { final Iterator records = parser.iterator(); @@ -1062,7 +1228,7 @@ public void testHeaderMissing() throws Exception { } @Test - public void testHeaderMissingWithNull() throws Exception { + void testHeaderMissingWithNull() throws Exception { final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("").withAllowMissingColumnNames().parse(in)) { parser.iterator(); @@ -1070,7 +1236,7 @@ public void testHeaderMissingWithNull() throws Exception { } @Test - public void testHeadersMissing() throws Exception { + void testHeadersMissing() throws Exception { try (Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); CSVParser parser = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in)) { parser.iterator(); @@ -1078,19 +1244,19 @@ public void testHeadersMissing() throws Exception { } @Test - public void testHeadersMissingException() { + void testHeadersMissingException() { final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator()); } @Test - public void testHeadersMissingOneColumnException() { + void testHeadersMissingOneColumnException() { final Reader in = new StringReader("a,,c,d,e\n1,2,3,4,5\nv,w,x,y,z"); assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator()); } @Test - public void testHeadersWithNullColumnName() throws IOException { + void testHeadersWithNullColumnName() throws IOException { final Reader in = new StringReader("header1,null,header3\n1,2,3\n4,5,6"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("null").withAllowMissingColumnNames().parse(in)) { final Iterator records = parser.iterator(); @@ -1104,7 +1270,7 @@ public void testHeadersWithNullColumnName() throws IOException { } @Test - public void testIgnoreCaseHeaderMapping() throws Exception { + void testIgnoreCaseHeaderMapping() throws Exception { final Reader reader = new StringReader("1,2,3"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader("One", "TWO", "three").withIgnoreHeaderCase().parse(reader)) { final Iterator records = parser.iterator(); @@ -1116,7 +1282,7 @@ public void testIgnoreCaseHeaderMapping() throws Exception { } @Test - public void testIgnoreEmptyLines() throws IOException { + void testIgnoreEmptyLines() throws IOException { final String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n"; // String code = "world\r\n\n"; // String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n"; @@ -1127,30 +1293,58 @@ public void testIgnoreEmptyLines() throws IOException { } @Test - public void testInvalidFormat() { + void testInvalidFormat() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR)); } @Test - public void testIterator() throws Exception { + void testIterator() throws Exception { final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.parse(in)) { final Iterator iterator = parser.iterator(); assertTrue(iterator.hasNext()); assertThrows(UnsupportedOperationException.class, iterator::remove); - assertArrayEquals(new String[] { "a", "b", "c" }, iterator.next().values()); - assertArrayEquals(new String[] { "1", "2", "3" }, iterator.next().values()); + assertValuesEquals(new String[] { "a", "b", "c" }, iterator.next()); + assertValuesEquals(new String[] { "1", "2", "3" }, iterator.next()); assertTrue(iterator.hasNext()); assertTrue(iterator.hasNext()); assertTrue(iterator.hasNext()); - assertArrayEquals(new String[] { "x", "y", "z" }, iterator.next().values()); + assertValuesEquals(new String[] { "x", "y", "z" }, iterator.next()); + assertFalse(iterator.hasNext()); + assertThrows(NoSuchElementException.class, iterator::next); + } + } + + @ParameterizedTest + @ValueSource(longs = { -1, 0, 1, 2, 3, 4, 5, Long.MAX_VALUE }) + void testIteratorMaxRows(final long maxRows) throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.builder().setMaxRows(maxRows).get().parse(in)) { + final Iterator iterator = parser.iterator(); + assertTrue(iterator.hasNext()); + assertThrows(UnsupportedOperationException.class, iterator::remove); + assertValuesEquals(new String[] { "a", "b", "c" }, iterator.next()); + final boolean noLimit = maxRows <= 0; + final int fixtureLen = 3; + final long expectedLen = noLimit ? fixtureLen : Math.min(fixtureLen, maxRows); + if (expectedLen > 1) { + assertTrue(iterator.hasNext()); + assertValuesEquals(new String[] { "1", "2", "3" }, iterator.next()); + } + assertEquals(expectedLen > 2, iterator.hasNext()); + // again + assertEquals(expectedLen > 2, iterator.hasNext()); + if (expectedLen == fixtureLen) { + assertTrue(iterator.hasNext()); + assertValuesEquals(new String[] { "x", "y", "z" }, iterator.next()); + } assertFalse(iterator.hasNext()); assertThrows(NoSuchElementException.class, iterator::next); } } @Test - public void testIteratorSequenceBreaking() throws IOException { + void testIteratorSequenceBreaking() throws IOException { final String fiveRows = "1\n2\n3\n4\n5\n"; // Iterator hasNext() shouldn't break sequence try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { @@ -1205,7 +1399,7 @@ public void testIteratorSequenceBreaking() throws IOException { } @Test - public void testLineFeedEndings() throws IOException { + void testLineFeedEndings() throws IOException { final String code = "foo\nbaar,\nhello,world\n,kanu"; try (CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { final List records = parser.getRecords(); @@ -1214,7 +1408,7 @@ public void testLineFeedEndings() throws IOException { } @Test - public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception { + void testMappedButNotSetAsOutlook2007ContactExport() throws Exception { final Reader in = new StringReader("a,b,c\n1,2\nx,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord().parse(in)) { final Iterator records = parser.iterator(); @@ -1249,7 +1443,7 @@ record = records.next(); @Test @Disabled - public void testMongoDbCsv() throws Exception { + void testMongoDbCsv() throws Exception { try (CSVParser parser = CSVParser.parse("\"a a\",b,c" + LF + "d,e,f", CSVFormat.MONGODB_CSV)) { final Iterator itr1 = parser.iterator(); final Iterator itr2 = parser.iterator(); @@ -1268,7 +1462,7 @@ public void testMongoDbCsv() throws Exception { @Test // TODO this may lead to strange behavior, throw an exception if iterator() has already been called? - public void testMultipleIterators() throws Exception { + void testMultipleIterators() throws Exception { try (CSVParser parser = CSVParser.parse("a,b,c" + CRLF + "d,e,f", CSVFormat.DEFAULT)) { final Iterator itr1 = parser.iterator(); @@ -1285,24 +1479,24 @@ public void testMultipleIterators() throws Exception { } @Test - public void testNewCSVParserNullReaderFormat() { + void testNewCSVParserNullReaderFormat() { assertThrows(NullPointerException.class, () -> new CSVParser(null, CSVFormat.DEFAULT)); } @Test - public void testNewCSVParserReaderNullFormat() { + void testNewCSVParserReaderNullFormat() { assertThrows(NullPointerException.class, () -> new CSVParser(new StringReader(""), null)); } @Test - public void testNoHeaderMap() throws Exception { + void testNoHeaderMap() throws Exception { try (CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT)) { assertNull(parser.getHeaderMap()); } } @Test - public void testNotValueCSV() throws IOException { + void testNotValueCSV() throws IOException { final String source = "#"; final CSVFormat csvFormat = CSVFormat.DEFAULT.withCommentMarker('#'); try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { @@ -1312,9 +1506,8 @@ public void testNotValueCSV() throws IOException { } @Test - public void testParse() throws Exception { - final ClassLoader loader = ClassLoader.getSystemClassLoader(); - final URL url = loader.getResource("org/apache/commons/csv/CSVFileParser/test.csv"); + void testParse() throws Exception { + final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/test.csv"); final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader("A", "B", "C", "D").get(); final Charset charset = StandardCharsets.UTF_8; // Reader @@ -1380,47 +1573,86 @@ public void testParse() throws Exception { } @Test - public void testParseFileNullFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse(new File("CSVFileParser/test.csv"), Charset.defaultCharset(), null)); + void testParseFileCharsetNullFormat() throws IOException { + final File file = new File("src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv"); + try (CSVParser parser = CSVParser.parse(file, Charset.defaultCharset(), null)) { + // null maps to DEFAULT. + parseFully(parser); + } + } + + @Test + void testParseInputStreamCharsetNullFormat() throws IOException { + try (InputStream in = Files.newInputStream(Paths.get("src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv")); + CSVParser parser = CSVParser.parse(in, Charset.defaultCharset(), null)) { + // null maps to DEFAULT. + parseFully(parser); + } } @Test - public void testParseNullFileFormat() { + void testParseNullFileFormat() { assertThrows(NullPointerException.class, () -> CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); } @Test - public void testParseNullPathFormat() { + void testParseNullPathFormat() { assertThrows(NullPointerException.class, () -> CSVParser.parse((Path) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); } @Test - public void testParseNullStringFormat() { + void testParseNullStringFormat() { assertThrows(NullPointerException.class, () -> CSVParser.parse((String) null, CSVFormat.DEFAULT)); } @Test - public void testParseNullUrlCharsetFormat() { + void testParseNullUrlCharsetFormat() { assertThrows(NullPointerException.class, () -> CSVParser.parse((URL) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); } @Test - public void testParserUrlNullCharsetFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse(new URL("https://commons.apache.org"), null, CSVFormat.DEFAULT)); + void testParsePathCharsetNullFormat() throws IOException { + final Path path = Paths.get("src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv"); + try (CSVParser parser = CSVParser.parse(path, Charset.defaultCharset(), null)) { + // null maps to DEFAULT. + parseFully(parser); + } + } + + @Test + void testParserUrlNullCharsetFormat() throws IOException { + final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/test.csv"); + try (CSVParser parser = CSVParser.parse(url, null, CSVFormat.DEFAULT)) { + // null maps to DEFAULT. + parseFully(parser); + } } @Test - public void testParseStringNullFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse("csv data", (CSVFormat) null)); + void testParseStringNullFormat() throws IOException { + try (CSVParser parser = CSVParser.parse("1,2,3", null)) { + // null maps to DEFAULT. + final List records = parser.getRecords(); + assertEquals(1, records.size()); + final CSVRecord record = records.get(0); + assertEquals(3, record.size()); + assertEquals("1", record.get(0)); + assertEquals("2", record.get(1)); + assertEquals("3", record.get(2)); + } } @Test - public void testParseUrlCharsetNullFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse(new URL("https://commons.apache.org"), Charset.defaultCharset(), null)); + void testParseUrlCharsetNullFormat() throws IOException { + final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/test.csv"); + try (CSVParser parser = CSVParser.parse(url, Charset.defaultCharset(), null)) { + // null maps to DEFAULT. + parseFully(parser); + } } @Test - public void testParseWithDelimiterStringWithEscape() throws IOException { + void testParseWithDelimiterStringWithEscape() throws IOException { final String source = "a![!|!]b![|]c[|]xyz\r\nabc[abc][|]xyz"; final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').get(); try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { @@ -1434,7 +1666,7 @@ public void testParseWithDelimiterStringWithEscape() throws IOException { } @Test - public void testParseWithDelimiterStringWithQuote() throws IOException { + void testParseWithDelimiterStringWithQuote() throws IOException { final String source = "'a[|]b[|]c'[|]xyz\r\nabc[abc][|]xyz"; final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').get(); try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { @@ -1448,7 +1680,7 @@ public void testParseWithDelimiterStringWithQuote() throws IOException { } @Test - public void testParseWithDelimiterWithEscape() throws IOException { + void testParseWithDelimiterWithEscape() throws IOException { final String source = "a!,b!,c,xyz"; final CSVFormat csvFormat = CSVFormat.DEFAULT.withEscape('!'); try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { @@ -1459,7 +1691,7 @@ public void testParseWithDelimiterWithEscape() throws IOException { } @Test - public void testParseWithDelimiterWithQuote() throws IOException { + void testParseWithDelimiterWithQuote() throws IOException { final String source = "'a,b,c',xyz"; final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\''); try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { @@ -1470,7 +1702,7 @@ public void testParseWithDelimiterWithQuote() throws IOException { } @Test - public void testParseWithQuoteThrowsException() { + void testParseWithQuoteThrowsException() { final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\''); assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c','")).nextRecord()); assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c'abc,xyz")).nextRecord()); @@ -1478,7 +1710,7 @@ public void testParseWithQuoteThrowsException() { } @Test - public void testParseWithQuoteWithEscape() throws IOException { + void testParseWithQuoteWithEscape() throws IOException { final String source = "'a?,b?,c?d',xyz"; final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'').withEscape('?'); try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { @@ -1490,7 +1722,7 @@ public void testParseWithQuoteWithEscape() throws IOException { @ParameterizedTest @EnumSource(CSVFormat.Predefined.class) - public void testParsingPrintedEmptyFirstColumn(final CSVFormat.Predefined format) throws Exception { + void testParsingPrintedEmptyFirstColumn(final CSVFormat.Predefined format) throws Exception { final String[][] lines = { { "a", "b" }, { "", "x" } }; final StringWriter buf = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(buf, format.getFormat())) { @@ -1501,19 +1733,61 @@ public void testParsingPrintedEmptyFirstColumn(final CSVFormat.Predefined format .setFormat(format.getFormat()) .get()) { for (final String[] line : lines) { - assertArrayEquals(line, csvRecords.nextRecord().values()); + assertValuesEquals(line, csvRecords.nextRecord()); } assertNull(csvRecords.nextRecord()); } } + /** + * A truncated escaped multi-character delimiter at EOF must stay literal data and not be completed from a stale + * escape delimiter look-ahead. + */ @Test - public void testProvidedHeader() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + void testPartialEscapedMultiCharacterDelimiterAtEOF() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').get(); + try (CSVParser parser = format.parse(new StringReader("x![!|!]y![!|"))) { + final CSVRecord record = parser.nextRecord(); + assertEquals("x[|]y![!|", record.get(0)); + assertEquals(1, record.size()); + } + } + /** + * Tests CSV-324. + */ + @Test + void testPartialMultiCharacterDelimiterAtEOF() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get(); + try (CSVParser parser = format.parse(new StringReader("a[|]b[|"))) { + final CSVRecord record = parser.nextRecord(); + assertEquals("a", record.get(0)); + assertEquals("b[|", record.get(1)); + assertEquals(2, record.size()); + } + } + + /** + * A truncated multi-character delimiter at EOF must not be completed from the look-ahead buffer left dirty by an + * earlier non-matching peek in the same token. + */ + @Test + void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get(); + // The "[a]" peek leaves ']' in the look-ahead buffer; the trailing "[|" must not match "[|]". + final String recordString = "x[a][|"; + try (CSVParser parser = format.parse(new StringReader(recordString))) { + final CSVRecord record = parser.nextRecord(); + assertEquals(recordString, record.get(0)); + assertEquals(1, record.size()); + } + } + + @Test + void testProvidedHeader() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in)) { final Iterator records = parser.iterator(); - for (int i = 0; i < 3; i++) { assertTrue(records.hasNext()); final CSVRecord record = records.next(); @@ -1525,18 +1799,15 @@ public void testProvidedHeader() throws Exception { assertEquals(record.get(1), record.get("B")); assertEquals(record.get(2), record.get("C")); } - assertFalse(records.hasNext()); } } @Test - public void testProvidedHeaderAuto() throws Exception { + void testProvidedHeaderAuto() throws Exception { final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - try (CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) { final Iterator records = parser.iterator(); - for (int i = 0; i < 2; i++) { assertTrue(records.hasNext()); final CSVRecord record = records.next(); @@ -1548,13 +1819,12 @@ public void testProvidedHeaderAuto() throws Exception { assertEquals(record.get(1), record.get("b")); assertEquals(record.get(2), record.get("c")); } - assertFalse(records.hasNext()); } } @Test - public void testRepeatedHeadersAreReturnedInCSVRecordHeaderNames() throws IOException { + void testRepeatedHeadersAreReturnedInCSVRecordHeaderNames() throws IOException { final Reader in = new StringReader("header1,header2,header1\n1,2,3\n4,5,6"); try (CSVParser parser = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().parse(in)) { final Iterator records = parser.iterator(); @@ -1566,7 +1836,7 @@ public void testRepeatedHeadersAreReturnedInCSVRecordHeaderNames() throws IOExce } @Test - public void testRoundtrip() throws Exception { + void testRoundtrip() throws Exception { final StringWriter out = new StringWriter(); final String data = "a,b,c\r\n1,2,3\r\nx,y,z\r\n"; try (CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT); @@ -1579,7 +1849,7 @@ public void testRoundtrip() throws Exception { } @Test - public void testSkipAutoHeader() throws Exception { + void testSkipAutoHeader() throws Exception { final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) { final Iterator records = parser.iterator(); @@ -1591,7 +1861,7 @@ public void testSkipAutoHeader() throws Exception { } @Test - public void testSkipHeaderOverrideDuplicateHeaders() throws Exception { + void testSkipHeaderOverrideDuplicateHeaders() throws Exception { final Reader in = new StringReader("a,a,a\n1,2,3\nx,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)) { final Iterator records = parser.iterator(); @@ -1603,7 +1873,7 @@ public void testSkipHeaderOverrideDuplicateHeaders() throws Exception { } @Test - public void testSkipSetAltHeaders() throws Exception { + void testSkipSetAltHeaders() throws Exception { final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)) { final Iterator records = parser.iterator(); @@ -1615,7 +1885,7 @@ public void testSkipSetAltHeaders() throws Exception { } @Test - public void testSkipSetHeader() throws Exception { + void testSkipSetHeader() throws Exception { final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord().parse(in)) { final Iterator records = parser.iterator(); @@ -1628,7 +1898,7 @@ public void testSkipSetHeader() throws Exception { @Test @Disabled - public void testStartWithEmptyLinesThenHeaders() throws Exception { + void testStartWithEmptyLinesThenHeaders() throws Exception { final String[] codes = { "\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" }; final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines { "" } }; @@ -1638,26 +1908,43 @@ public void testStartWithEmptyLinesThenHeaders() throws Exception { assertEquals(res.length, records.size()); assertFalse(records.isEmpty()); for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); + assertValuesEquals(res[i], records.get(i)); } } } } @Test - public void testStream() throws Exception { + void testStream() throws Exception { final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.parse(in)) { final List list = parser.stream().collect(Collectors.toList()); assertFalse(list.isEmpty()); - assertArrayEquals(new String[] { "a", "b", "c" }, list.get(0).values()); - assertArrayEquals(new String[] { "1", "2", "3" }, list.get(1).values()); - assertArrayEquals(new String[] { "x", "y", "z" }, list.get(2).values()); + assertValuesEquals(new String[] { "a", "b", "c" }, list.get(0)); + assertValuesEquals(new String[] { "1", "2", "3" }, list.get(1)); + assertValuesEquals(new String[] { "x", "y", "z" }, list.get(2)); + } + } + + @ParameterizedTest + @ValueSource(longs = { -1, 0, 1, 2, 3, 4, Long.MAX_VALUE }) + void testStreamMaxRows(final long maxRows) throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.builder().setMaxRows(maxRows).get().parse(in)) { + final List list = parser.stream().collect(Collectors.toList()); + assertFalse(list.isEmpty()); + assertValuesEquals(new String[] { "a", "b", "c" }, list.get(0)); + if (maxRows <= 0 || maxRows > 1) { + assertValuesEquals(new String[] { "1", "2", "3" }, list.get(1)); + } + if (maxRows <= 0 || maxRows > 2) { + assertValuesEquals(new String[] { "x", "y", "z" }, list.get(2)); + } } } @Test - public void testThrowExceptionWithLineAndPosition() throws IOException { + void testThrowExceptionWithLineAndPosition() throws IOException { final String csvContent = "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10\nrec1,rec2,rec3,rec4,rec5,rec6,rec7,rec8,\"\"rec9\"\",rec10"; final StringReader stringReader = new StringReader(csvContent); // @formatter:off @@ -1675,7 +1962,7 @@ public void testThrowExceptionWithLineAndPosition() throws IOException { } @Test - public void testTrailingDelimiter() throws Exception { + void testTrailingDelimiter() throws Exception { final Reader in = new StringReader("a,a,a,\n\"1\",\"2\",\"3\",\nx,y,z,"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrailingDelimiter().parse(in)) { final Iterator records = parser.iterator(); @@ -1688,7 +1975,24 @@ public void testTrailingDelimiter() throws Exception { } @Test - public void testTrim() throws Exception { + void testTrailingDelimiterKeepsQuotedEmptyLastField() throws Exception { + final CSVFormat format = CSVFormat.DEFAULT.builder().setTrailingDelimiter(true).get(); + try (CSVParser parser = CSVParser.parse("a,b,\"\"", format)) { + final CSVRecord record = parser.iterator().next(); + assertEquals(3, record.size()); + assertEquals("a", record.get(0)); + assertEquals("b", record.get(1)); + assertEquals("", record.get(2)); + } + // An unquoted trailing delimiter still drops the empty field. + try (CSVParser parser = CSVParser.parse("a,b,", format)) { + final CSVRecord record = parser.iterator().next(); + assertEquals(2, record.size()); + } + } + + @Test + void testTrim() throws Exception { final Reader in = new StringReader("a,a,a\n\" 1 \",\" 2 \",\" 3 \"\nx,y,z"); try (CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrim().parse(in)) { final Iterator records = parser.iterator(); diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java index f457460c9b..9ae80c1e51 100644 --- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java +++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java @@ -69,17 +69,21 @@ import org.h2.tools.SimpleResultSet; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; /** * Tests {@link CSVPrinter}. */ -public class CSVPrinterTest { +class CSVPrinterTest { private static final int TABLE_RECORD_COUNT = 2; + private static final int TABLE_AND_HEADER_RECORD_COUNT = TABLE_RECORD_COUNT + 1; private static final char DQUOTE_CHAR = '"'; private static final char EURO_CH = '\u20AC'; - private static final int ITERATIONS_FOR_RANDOM_TEST = 50000; + private static final int ITERATIONS_FOR_RANDOM_TEST = 50_000; private static final char QUOTE_CH = '\''; + private static final String RECORD_SEPARATOR = CSVFormat.DEFAULT.getRecordSeparator(); private static String printable(final String s) { final StringBuilder sb = new StringBuilder(); @@ -96,12 +100,16 @@ private static String printable(final String s) { private String longText2; - private final String recordSeparator = CSVFormat.DEFAULT.getRecordSeparator(); - private void assertInitialState(final CSVPrinter printer) { assertEquals(0, printer.getRecordCount()); } + private void assertRowCount(final CSVFormat format, final String resultString, final int rowCount) throws IOException { + try (CSVParser parser = format.parse(new StringReader(resultString))) { + assertEquals(rowCount, parser.getRecords().size()); + } + } + private File createTempFile() throws IOException { return createTempPath().toFile(); } @@ -138,7 +146,7 @@ private void doOneRandom(final CSVFormat format) throws Exception { for (int i = 0; i < expected.length; i++) { expected[i] = expectNulls(expected[i], format); } - Utils.compare("Printer output :" + printable(result), expected, parseResult); + Utils.compare("Printer output :" + printable(result), expected, parseResult, -1); } } @@ -149,8 +157,8 @@ private void doRandom(final CSVFormat format, final int iter) throws Exception { } /** - * Converts an input CSV array into expected output values WRT NULLs. NULL strings are converted to null values because the parser will convert these - * strings to null. + * Converts an input CSV array into expected output values, including NULLs. NULL strings are converted to null values because the parser will convert + * these strings to null. */ private T[] expectNulls(final T[] original, final CSVFormat csvFormat) { final T[] fixed = original.clone(); @@ -257,7 +265,7 @@ private void setUpTable(final Connection connection) throws SQLException { } @Test - public void testCloseBackwardCompatibility() throws IOException { + void testCloseBackwardCompatibility() throws IOException { try (Writer writer = mock(Writer.class)) { final CSVFormat csvFormat = CSVFormat.DEFAULT; try (CSVPrinter printer = new CSVPrinter(writer, csvFormat)) { @@ -269,7 +277,7 @@ public void testCloseBackwardCompatibility() throws IOException { } @Test - public void testCloseWithCsvFormatAutoFlushOff() throws IOException { + void testCloseWithCsvFormatAutoFlushOff() throws IOException { try (Writer writer = mock(Writer.class)) { final CSVFormat csvFormat = CSVFormat.DEFAULT.withAutoFlush(false); try (CSVPrinter printer = new CSVPrinter(writer, csvFormat)) { @@ -281,7 +289,7 @@ public void testCloseWithCsvFormatAutoFlushOff() throws IOException { } @Test - public void testCloseWithCsvFormatAutoFlushOn() throws IOException { + void testCloseWithCsvFormatAutoFlushOn() throws IOException { // System.out.println("start method"); try (Writer writer = mock(Writer.class)) { final CSVFormat csvFormat = CSVFormat.DEFAULT.withAutoFlush(true); @@ -294,7 +302,7 @@ public void testCloseWithCsvFormatAutoFlushOn() throws IOException { } @Test - public void testCloseWithFlushOff() throws IOException { + void testCloseWithFlushOff() throws IOException { try (Writer writer = mock(Writer.class)) { final CSVFormat csvFormat = CSVFormat.DEFAULT; @SuppressWarnings("resource") @@ -308,7 +316,7 @@ public void testCloseWithFlushOff() throws IOException { } @Test - public void testCloseWithFlushOn() throws IOException { + void testCloseWithFlushOn() throws IOException { try (Writer writer = mock(Writer.class)) { @SuppressWarnings("resource") final CSVPrinter printer = new CSVPrinter(writer, CSVFormat.DEFAULT); @@ -320,7 +328,7 @@ public void testCloseWithFlushOn() throws IOException { } @Test - public void testCRComment() throws IOException { + void testCRComment() throws IOException { final StringWriter sw = new StringWriter(); final Object value = "abc"; try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { @@ -328,14 +336,14 @@ public void testCRComment() throws IOException { printer.print(value); assertEquals(0, printer.getRecordCount()); printer.printComment("This is a comment\r\non multiple lines\rthis is next comment\r"); - assertEquals("abc" + recordSeparator + "# This is a comment" + recordSeparator + "# on multiple lines" + recordSeparator + - "# this is next comment" + recordSeparator + "# " + recordSeparator, sw.toString()); + assertEquals("abc" + RECORD_SEPARATOR + "# This is a comment" + RECORD_SEPARATOR + "# on multiple lines" + RECORD_SEPARATOR + + "# this is next comment" + RECORD_SEPARATOR + "# " + RECORD_SEPARATOR, sw.toString()); assertEquals(0, printer.getRecordCount()); } } @Test - public void testCSV135() throws IOException { + void testCSV135() throws IOException { final List list = new LinkedList<>(); list.add("\"\""); // "" list.add("\\\\"); // \\ @@ -358,7 +366,7 @@ public void testCSV135() throws IOException { } @Test - public void testCSV259() throws IOException { + void testCSV259() throws IOException { final StringWriter sw = new StringWriter(); try (Reader reader = new FileReader("src/test/resources/org/apache/commons/csv/CSV-259/sample.txt"); CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape('!').withQuote(null))) { @@ -369,7 +377,7 @@ public void testCSV259() throws IOException { } @Test - public void testDelimeterQuoted() throws IOException { + void testDelimeterQuoted() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { assertInitialState(printer); @@ -380,7 +388,7 @@ public void testDelimeterQuoted() throws IOException { } @Test - public void testDelimeterQuoteNone() throws IOException { + void testDelimeterQuoteNone() throws IOException { final StringWriter sw = new StringWriter(); final CSVFormat format = CSVFormat.DEFAULT.withEscape('!').withQuoteMode(QuoteMode.NONE); try (CSVPrinter printer = new CSVPrinter(sw, format)) { @@ -392,7 +400,7 @@ public void testDelimeterQuoteNone() throws IOException { } @Test - public void testDelimeterStringQuoted() throws IOException { + void testDelimeterStringQuoted() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').get())) { assertInitialState(printer); @@ -403,7 +411,7 @@ public void testDelimeterStringQuoted() throws IOException { } @Test - public void testDelimeterStringQuoteNone() throws IOException { + void testDelimeterStringQuoteNone() throws IOException { final StringWriter sw = new StringWriter(); final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').setQuoteMode(QuoteMode.NONE).get(); try (CSVPrinter printer = new CSVPrinter(sw, format)) { @@ -416,7 +424,7 @@ public void testDelimeterStringQuoteNone() throws IOException { } @Test - public void testDelimiterEscaped() throws IOException { + void testDelimiterEscaped() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape('!').withQuote(null))) { assertInitialState(printer); @@ -427,7 +435,7 @@ public void testDelimiterEscaped() throws IOException { } @Test - public void testDelimiterPlain() throws IOException { + void testDelimiterPlain() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { assertInitialState(printer); @@ -438,7 +446,7 @@ public void testDelimiterPlain() throws IOException { } @Test - public void testDelimiterStringEscaped() throws IOException { + void testDelimiterStringEscaped() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.builder().setDelimiter("|||").setEscape('!').setQuote(null).get())) { assertInitialState(printer); @@ -449,7 +457,7 @@ public void testDelimiterStringEscaped() throws IOException { } @Test - public void testDisabledComment() throws IOException { + void testDisabledComment() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { assertInitialState(printer); @@ -460,17 +468,17 @@ public void testDisabledComment() throws IOException { } @Test - public void testDontQuoteEuroFirstChar() throws IOException { + void testDontQuoteEuroFirstChar() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) { assertInitialState(printer); printer.printRecord(EURO_CH, "Deux"); - assertEquals(EURO_CH + ",Deux" + recordSeparator, sw.toString()); + assertEquals(EURO_CH + ",Deux" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testEolEscaped() throws IOException { + void testEolEscaped() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withEscape('!'))) { assertInitialState(printer); @@ -481,7 +489,7 @@ public void testEolEscaped() throws IOException { } @Test - public void testEolPlain() throws IOException { + void testEolPlain() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { assertInitialState(printer); @@ -492,7 +500,7 @@ public void testEolPlain() throws IOException { } @Test - public void testEolQuoted() throws IOException { + void testEolQuoted() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { assertInitialState(printer); @@ -504,7 +512,7 @@ public void testEolQuoted() throws IOException { @SuppressWarnings("unlikely-arg-type") @Test - public void testEquals() throws IOException { + void testEquals() throws IOException { // Don't use assertNotEquals here assertFalse(CSVFormat.DEFAULT.equals(null)); // Don't use assertNotEquals here @@ -512,7 +520,7 @@ public void testEquals() throws IOException { } @Test - public void testEscapeBackslash1() throws IOException { + void testEscapeBackslash1() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { assertInitialState(printer); @@ -522,7 +530,7 @@ public void testEscapeBackslash1() throws IOException { } @Test - public void testEscapeBackslash2() throws IOException { + void testEscapeBackslash2() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { assertInitialState(printer); @@ -532,7 +540,7 @@ public void testEscapeBackslash2() throws IOException { } @Test - public void testEscapeBackslash3() throws IOException { + void testEscapeBackslash3() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { assertInitialState(printer); @@ -542,7 +550,7 @@ public void testEscapeBackslash3() throws IOException { } @Test - public void testEscapeBackslash4() throws IOException { + void testEscapeBackslash4() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { assertInitialState(printer); @@ -552,7 +560,7 @@ public void testEscapeBackslash4() throws IOException { } @Test - public void testEscapeBackslash5() throws IOException { + void testEscapeBackslash5() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { assertInitialState(printer); @@ -562,7 +570,58 @@ public void testEscapeBackslash5() throws IOException { } @Test - public void testEscapeNull1() throws IOException { + void testEscapeCommentMarkerFirstChar() throws IOException { + // No quoting available in escape mode, so a leading comment marker must be escaped or the + // record reads back as a comment and is dropped. Mirrors the quoting fix for QuoteMode.MINIMAL. + final CSVFormat format = CSVFormat.DEFAULT.builder().setQuote(null).setEscape('\\').setCommentMarker(';').get(); + final StringWriter sw = new StringWriter(); + final String col1 = ";comment-like"; + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.printRecord(col1, "b"); + printer.printRecord(new StringReader(col1), new StringReader("b")); + // The marker past the first character does not start a comment and is left alone. + printer.printRecord("a;b", ";c"); + } + final String string = sw.toString(); + assertEquals("\\;comment-like,b" + RECORD_SEPARATOR + + "\\;comment-like,b" + RECORD_SEPARATOR + + "a;b,\\;c" + RECORD_SEPARATOR, string); + // The emitted records must read back as the original values, none parsed as a comment. + try (CSVParser parser = CSVParser.parse(string, format)) { + final List records = parser.getRecords(); + assertEquals(3, records.size()); + assertEquals(col1, records.get(0).get(0)); + assertEquals("b", records.get(0).get(1)); + assertEquals(col1, records.get(1).get(0)); + assertEquals("b", records.get(1).get(1)); + assertEquals("a;b", records.get(2).get(0)); + assertEquals(";c", records.get(2).get(1)); + } + } + + @Test + void testEscapeCommentMarkerFirstCharWithQuoteModeNone() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setEscape('\\').setQuoteMode(QuoteMode.NONE).setCommentMarker(';').get(); + final StringWriter sw = new StringWriter(); + final String col1 = ";bar"; + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.printRecord(col1, "b"); + printer.printRecord(new StringReader(col1), new StringReader("b")); + } + final String string = sw.toString(); + assertEquals("\\;bar,b" + RECORD_SEPARATOR + "\\;bar,b" + RECORD_SEPARATOR, string); + try (CSVParser parser = CSVParser.parse(string, format)) { + final List records = parser.getRecords(); + assertEquals(2, records.size()); + for (final CSVRecord record : records) { + assertEquals(col1, record.get(0)); + assertEquals("b", record.get(1)); + } + } + } + + @Test + void testEscapeNull1() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { assertInitialState(printer); @@ -572,7 +631,7 @@ public void testEscapeNull1() throws IOException { } @Test - public void testEscapeNull2() throws IOException { + void testEscapeNull2() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { assertInitialState(printer); @@ -582,7 +641,7 @@ public void testEscapeNull2() throws IOException { } @Test - public void testEscapeNull3() throws IOException { + void testEscapeNull3() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { assertInitialState(printer); @@ -592,7 +651,7 @@ public void testEscapeNull3() throws IOException { } @Test - public void testEscapeNull4() throws IOException { + void testEscapeNull4() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { assertInitialState(printer); @@ -602,7 +661,7 @@ public void testEscapeNull4() throws IOException { } @Test - public void testEscapeNull5() throws IOException { + void testEscapeNull5() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { assertInitialState(printer); @@ -612,127 +671,133 @@ public void testEscapeNull5() throws IOException { } @Test - public void testExcelPrintAllArrayOfArrays() throws IOException { + void testExcelPrintAllArrayOfArrays() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { assertInitialState(printer); printer.printRecords((Object[]) new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } }); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); + assertEquals("r1c1,r1c2" + RECORD_SEPARATOR + "r2c1,r2c2" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testExcelPrintAllArrayOfArraysWithFirstEmptyValue2() throws IOException { + void testExcelPrintAllArrayOfArraysWithFirstEmptyValue2() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { assertInitialState(printer); printer.printRecords((Object[]) new String[][] { { "" } }); - assertEquals("\"\"" + recordSeparator, sw.toString()); + assertEquals("\"\"" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testExcelPrintAllArrayOfArraysWithFirstSpaceValue1() throws IOException { + void testExcelPrintAllArrayOfArraysWithFirstSpaceValue1() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { assertInitialState(printer); printer.printRecords((Object[]) new String[][] { { " ", "r1c2" } }); - assertEquals("\" \",r1c2" + recordSeparator, sw.toString()); + assertEquals("\" \",r1c2" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testExcelPrintAllArrayOfArraysWithFirstTabValue1() throws IOException { + void testExcelPrintAllArrayOfArraysWithFirstTabValue1() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { assertInitialState(printer); printer.printRecords((Object[]) new String[][] { { "\t", "r1c2" } }); - assertEquals("\"\t\",r1c2" + recordSeparator, sw.toString()); + assertEquals("\"\t\",r1c2" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testExcelPrintAllArrayOfLists() throws IOException { + void testExcelPrintAllArrayOfLists() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { assertInitialState(printer); printer.printRecords((Object[]) new List[] { Arrays.asList("r1c1", "r1c2"), Arrays.asList("r2c1", "r2c2") }); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); + assertEquals("r1c1,r1c2" + RECORD_SEPARATOR + "r2c1,r2c2" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testExcelPrintAllArrayOfListsWithFirstEmptyValue2() throws IOException { + void testExcelPrintAllArrayOfListsWithFirstEmptyValue2() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { assertInitialState(printer); printer.printRecords((Object[]) new List[] { Arrays.asList("") }); - assertEquals("\"\"" + recordSeparator, sw.toString()); + assertEquals("\"\"" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testExcelPrintAllIterableOfArrays() throws IOException { + void testExcelPrintAllIterableOfArrays() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { assertInitialState(printer); printer.printRecords(Arrays.asList(new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } })); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); + assertEquals("r1c1,r1c2" + RECORD_SEPARATOR + "r2c1,r2c2" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testExcelPrintAllIterableOfArraysWithFirstEmptyValue2() throws IOException { + void testExcelPrintAllIterableOfArraysWithFirstEmptyValue2() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { assertInitialState(printer); printer.printRecords(Arrays.asList(new String[][] { { "" } })); - assertEquals("\"\"" + recordSeparator, sw.toString()); + assertEquals("\"\"" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testExcelPrintAllIterableOfLists() throws IOException { + void testExcelPrintAllIterableOfLists() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { assertInitialState(printer); printer.printRecords(Arrays.asList(Arrays.asList("r1c1", "r1c2"), Arrays.asList("r2c1", "r2c2"))); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); + assertEquals("r1c1,r1c2" + RECORD_SEPARATOR + "r2c1,r2c2" + RECORD_SEPARATOR, sw.toString()); } } - @Test - public void testExcelPrintAllStreamOfArrays() throws IOException { + @ParameterizedTest + @ValueSource(longs = { -1, 0, 1, 2, Long.MAX_VALUE }) + void testExcelPrintAllStreamOfArrays(final long maxRows) throws IOException { final StringWriter sw = new StringWriter(); - try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + final CSVFormat format = CSVFormat.EXCEL.builder().setMaxRows(maxRows).get(); + try (CSVPrinter printer = new CSVPrinter(sw, format)) { assertInitialState(printer); printer.printRecords(Stream.of(new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } })); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); + String expected = "r1c1,r1c2" + RECORD_SEPARATOR; + if (maxRows != 1) { + expected += "r2c1,r2c2" + RECORD_SEPARATOR; + } + assertEquals(expected, sw.toString()); } } @Test - public void testExcelPrinter1() throws IOException { + void testExcelPrinter1() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { assertInitialState(printer); printer.printRecord("a", "b"); - assertEquals("a,b" + recordSeparator, sw.toString()); + assertEquals("a,b" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testExcelPrinter2() throws IOException { + void testExcelPrinter2() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { assertInitialState(printer); printer.printRecord("a,b", "b"); - assertEquals("\"a,b\",b" + recordSeparator, sw.toString()); + assertEquals("\"a,b\",b" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testHeader() throws IOException { + void testHeader() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3"))) { assertEquals(1, printer.getRecordCount()); @@ -743,7 +808,7 @@ public void testHeader() throws IOException { } @Test - public void testHeaderCommentExcel() throws IOException { + void testHeaderCommentExcel() throws IOException { final StringWriter sw = new StringWriter(); final Date now = new Date(); final CSVFormat format = CSVFormat.EXCEL; @@ -753,7 +818,7 @@ public void testHeaderCommentExcel() throws IOException { } @Test - public void testHeaderCommentTdf() throws IOException { + void testHeaderCommentTdf() throws IOException { final StringWriter sw = new StringWriter(); final Date now = new Date(); final CSVFormat format = CSVFormat.TDF; @@ -763,7 +828,7 @@ public void testHeaderCommentTdf() throws IOException { } @Test - public void testHeaderNotSet() throws IOException { + void testHeaderNotSet() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { assertInitialState(printer); @@ -774,12 +839,12 @@ public void testHeaderNotSet() throws IOException { } @Test - public void testInvalidFormat() { + void testInvalidFormat() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR)); } @Test - public void testJdbcPrinter() throws IOException, ClassNotFoundException, SQLException { + void testJdbcPrinter() throws IOException, ClassNotFoundException, SQLException { final StringWriter sw = new StringWriter(); final CSVFormat csvFormat = CSVFormat.DEFAULT; try (Connection connection = getH2Connection()) { @@ -793,8 +858,8 @@ public void testJdbcPrinter() throws IOException, ClassNotFoundException, SQLExc } } final String csv = sw.toString(); - assertEquals("1,r1,\"long text 1\",\"YmluYXJ5IGRhdGEgMQ==\r\n\"" + recordSeparator + "2,r2,\"" + longText2 + "\",\"YmluYXJ5IGRhdGEgMg==\r\n\"" + - recordSeparator, csv); + assertEquals("1,r1,\"long text 1\",\"YmluYXJ5IGRhdGEgMQ==\"" + RECORD_SEPARATOR + "2,r2,\"" + longText2 + "\",\"YmluYXJ5IGRhdGEgMg==\"" + + RECORD_SEPARATOR, csv); // Round trip the data try (StringReader reader = new StringReader(csv); CSVParser csvParser = csvFormat.parse(reader)) { @@ -803,17 +868,17 @@ public void testJdbcPrinter() throws IOException, ClassNotFoundException, SQLExc assertEquals("1", record.get(0)); assertEquals("r1", record.get(1)); assertEquals("long text 1", record.get(2)); - assertEquals("YmluYXJ5IGRhdGEgMQ==\r\n", record.get(3)); + assertEquals("YmluYXJ5IGRhdGEgMQ==", record.get(3)); // Row 2 record = csvParser.nextRecord(); assertEquals("2", record.get(0)); assertEquals("r2", record.get(1)); - assertEquals("YmluYXJ5IGRhdGEgMg==\r\n", record.get(3)); + assertEquals("YmluYXJ5IGRhdGEgMg==", record.get(3)); } } @Test - public void testJdbcPrinterWithFirstEmptyValue2() throws IOException, ClassNotFoundException, SQLException { + void testJdbcPrinterWithFirstEmptyValue2() throws IOException, ClassNotFoundException, SQLException { final StringWriter sw = new StringWriter(); try (Connection connection = getH2Connection()) { try (Statement stmt = connection.createStatement(); @@ -822,65 +887,85 @@ public void testJdbcPrinterWithFirstEmptyValue2() throws IOException, ClassNotFo printer.printRecords(resultSet); } } - assertEquals("EMPTYVALUE" + recordSeparator + "\"\"" + recordSeparator, sw.toString()); + assertEquals("EMPTYVALUE" + RECORD_SEPARATOR + "\"\"" + RECORD_SEPARATOR, sw.toString()); } - @Test - public void testJdbcPrinterWithResultSet() throws IOException, ClassNotFoundException, SQLException { + @ParameterizedTest + @ValueSource(longs = { -1, 0, 1, 2, 3, 4, Long.MAX_VALUE }) + void testJdbcPrinterWithResultSet(final long maxRows) throws IOException, ClassNotFoundException, SQLException { final StringWriter sw = new StringWriter(); + final CSVFormat format = CSVFormat.DEFAULT.builder().setMaxRows(maxRows).get(); try (Connection connection = getH2Connection()) { setUpTable(connection); try (Statement stmt = connection.createStatement(); ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST"); - CSVPrinter printer = CSVFormat.DEFAULT.withHeader(resultSet).print(sw)) { + CSVPrinter printer = format.withHeader(resultSet).print(sw)) { printer.printRecords(resultSet); } } - assertEquals("ID,NAME,TEXT" + recordSeparator + "1,r1,\"long text 1\"" + recordSeparator + "2,r2,\"" + longText2 + "\"" + recordSeparator, - sw.toString()); + final String resultString = sw.toString(); + final String header = "ID,NAME,TEXT"; + final String headerRow1 = header + RECORD_SEPARATOR + "1,r1,\"long text 1\"" + RECORD_SEPARATOR; + final String allRows = headerRow1 + "2,r2,\"" + longText2 + "\"" + RECORD_SEPARATOR; + final int expectedRowsWithHeader; + if (maxRows == 1) { + assertEquals(headerRow1, resultString); + expectedRowsWithHeader = 2; + } else { + assertEquals(allRows, resultString); + expectedRowsWithHeader = TABLE_AND_HEADER_RECORD_COUNT; + } + assertRowCount(CSVFormat.DEFAULT, resultString, expectedRowsWithHeader); } - @Test - public void testJdbcPrinterWithResultSetHeader() throws IOException, ClassNotFoundException, SQLException { + @ParameterizedTest + @ValueSource(longs = { -1, 0, 3, 4, Long.MAX_VALUE }) + void testJdbcPrinterWithResultSetHeader(final long maxRows) throws IOException, ClassNotFoundException, SQLException { final StringWriter sw = new StringWriter(); try (Connection connection = getH2Connection()) { setUpTable(connection); + final CSVFormat format = CSVFormat.DEFAULT.builder().setMaxRows(maxRows).get(); try (Statement stmt = connection.createStatement(); - CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + CSVPrinter printer = new CSVPrinter(sw, format)) { try (ResultSet resultSet = stmt.executeQuery("select ID, NAME from TEST")) { printer.printRecords(resultSet, true); assertEquals(TABLE_RECORD_COUNT, printer.getRecordCount()); - assertEquals("ID,NAME" + recordSeparator + "1,r1" + recordSeparator + "2,r2" + recordSeparator, sw.toString()); + assertEquals("ID,NAME" + RECORD_SEPARATOR + "1,r1" + RECORD_SEPARATOR + "2,r2" + RECORD_SEPARATOR, sw.toString()); } + assertRowCount(format, sw.toString(), TABLE_AND_HEADER_RECORD_COUNT); try (ResultSet resultSet = stmt.executeQuery("select ID, NAME from TEST")) { printer.printRecords(resultSet, false); assertEquals(TABLE_RECORD_COUNT * 2, printer.getRecordCount()); - assertNotEquals("ID,NAME" + recordSeparator + "1,r1" + recordSeparator + "2,r2" + recordSeparator, sw.toString()); + assertNotEquals("ID,NAME" + RECORD_SEPARATOR + "1,r1" + RECORD_SEPARATOR + "2,r2" + RECORD_SEPARATOR, sw.toString()); } + assertRowCount(CSVFormat.DEFAULT, sw.toString(), TABLE_AND_HEADER_RECORD_COUNT + TABLE_RECORD_COUNT); } } } - @Test - public void testJdbcPrinterWithResultSetMetaData() throws IOException, ClassNotFoundException, SQLException { + @ParameterizedTest + @ValueSource(longs = { -1, 0, 3, 4, Long.MAX_VALUE }) + void testJdbcPrinterWithResultSetMetaData(final long maxRows) throws IOException, ClassNotFoundException, SQLException { final StringWriter sw = new StringWriter(); try (Connection connection = getH2Connection()) { setUpTable(connection); + final CSVFormat format = CSVFormat.DEFAULT.builder().setMaxRows(maxRows).get(); try (Statement stmt = connection.createStatement(); ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST"); - CSVPrinter printer = CSVFormat.DEFAULT.withHeader(resultSet.getMetaData()).print(sw)) { + CSVPrinter printer = format.withHeader(resultSet.getMetaData()).print(sw)) { // The header is the first record. assertEquals(1, printer.getRecordCount()); printer.printRecords(resultSet); assertEquals(3, printer.getRecordCount()); - assertEquals("ID,NAME,TEXT" + recordSeparator + "1,r1,\"long text 1\"" + recordSeparator + "2,r2,\"" + longText2 + "\"" + recordSeparator, + assertEquals("ID,NAME,TEXT" + RECORD_SEPARATOR + "1,r1,\"long text 1\"" + RECORD_SEPARATOR + "2,r2,\"" + longText2 + "\"" + RECORD_SEPARATOR, sw.toString()); } + assertRowCount(format, sw.toString(), TABLE_AND_HEADER_RECORD_COUNT); } } @Test - public void testJira135_part1() throws IOException { + void testJira135_part1() throws IOException { final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); final StringWriter sw = new StringWriter(); final List list = new LinkedList<>(); @@ -896,7 +981,7 @@ public void testJira135_part1() throws IOException { @Test @Disabled - public void testJira135_part2() throws IOException { + void testJira135_part2() throws IOException { final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); final StringWriter sw = new StringWriter(); final List list = new LinkedList<>(); @@ -911,7 +996,7 @@ public void testJira135_part2() throws IOException { } @Test - public void testJira135_part3() throws IOException { + void testJira135_part3() throws IOException { final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); final StringWriter sw = new StringWriter(); final List list = new LinkedList<>(); @@ -927,7 +1012,7 @@ public void testJira135_part3() throws IOException { @Test @Disabled - public void testJira135All() throws IOException { + void testJira135All() throws IOException { final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); final StringWriter sw = new StringWriter(); final List list = new LinkedList<>(); @@ -944,86 +1029,86 @@ public void testJira135All() throws IOException { } @Test - public void testMongoDbCsvBasic() throws IOException { + void testMongoDbCsvBasic() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { printer.printRecord("a", "b"); - assertEquals("a,b" + recordSeparator, sw.toString()); + assertEquals("a,b" + RECORD_SEPARATOR, sw.toString()); assertEquals(1, printer.getRecordCount()); } } @Test - public void testMongoDbCsvCommaInValue() throws IOException { + void testMongoDbCsvCommaInValue() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { printer.printRecord("a,b", "c"); - assertEquals("\"a,b\",c" + recordSeparator, sw.toString()); + assertEquals("\"a,b\",c" + RECORD_SEPARATOR, sw.toString()); assertEquals(1, printer.getRecordCount()); } } @Test - public void testMongoDbCsvDoubleQuoteInValue() throws IOException { + void testMongoDbCsvDoubleQuoteInValue() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { printer.printRecord("a \"c\" b", "d"); - assertEquals("\"a \"\"c\"\" b\",d" + recordSeparator, sw.toString()); + assertEquals("\"a \"\"c\"\" b\",d" + RECORD_SEPARATOR, sw.toString()); assertEquals(1, printer.getRecordCount()); } } @Test - public void testMongoDbCsvTabInValue() throws IOException { + void testMongoDbCsvTabInValue() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { printer.printRecord("a\tb", "c"); - assertEquals("a\tb,c" + recordSeparator, sw.toString()); + assertEquals("a\tb,c" + RECORD_SEPARATOR, sw.toString()); assertEquals(1, printer.getRecordCount()); } } @Test - public void testMongoDbTsvBasic() throws IOException { + void testMongoDbTsvBasic() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { printer.printRecord("a", "b"); - assertEquals("a\tb" + recordSeparator, sw.toString()); + assertEquals("a\tb" + RECORD_SEPARATOR, sw.toString()); assertEquals(1, printer.getRecordCount()); } } @Test - public void testMongoDbTsvCommaInValue() throws IOException { + void testMongoDbTsvCommaInValue() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { printer.printRecord("a,b", "c"); - assertEquals("a,b\tc" + recordSeparator, sw.toString()); + assertEquals("a,b\tc" + RECORD_SEPARATOR, sw.toString()); assertEquals(1, printer.getRecordCount()); } } @Test - public void testMongoDbTsvTabInValue() throws IOException { + void testMongoDbTsvTabInValue() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { printer.printRecord("a\tb", "c"); - assertEquals("\"a\tb\"\tc" + recordSeparator, sw.toString()); + assertEquals("\"a\tb\"\tc" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testMultiLineComment() throws IOException { + void testMultiLineComment() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { printer.printComment("This is a comment\non multiple lines"); - assertEquals("# This is a comment" + recordSeparator + "# on multiple lines" + recordSeparator, sw.toString()); + assertEquals("# This is a comment" + RECORD_SEPARATOR + "# on multiple lines" + RECORD_SEPARATOR, sw.toString()); assertEquals(0, printer.getRecordCount()); } } @Test - public void testMySqlNullOutput() throws IOException { + void testMySqlNullOutput() throws IOException { Object[] s = new String[] { "NULL", null }; CSVFormat format = CSVFormat.MYSQL.withQuote(DQUOTE_CHAR).withNullString("NULL").withQuoteMode(QuoteMode.NON_NUMERIC); StringWriter writer = new StringWriter(); @@ -1125,39 +1210,39 @@ public void testMySqlNullOutput() throws IOException { } @Test - public void testMySqlNullStringDefault() { + void testMySqlNullStringDefault() { assertEquals("\\N", CSVFormat.MYSQL.getNullString()); } @Test - public void testNewCsvPrinterAppendableNullFormat() { + void testNewCsvPrinterAppendableNullFormat() { assertThrows(NullPointerException.class, () -> new CSVPrinter(new StringWriter(), null)); } @Test - public void testNewCsvPrinterNullAppendableFormat() { + void testNewCsvPrinterNullAppendableFormat() { assertThrows(NullPointerException.class, () -> new CSVPrinter(null, CSVFormat.DEFAULT)); } @Test - public void testNotFlushable() throws IOException { + void testNotFlushable() throws IOException { final Appendable out = new StringBuilder(); try (CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT)) { printer.printRecord("a", "b", "c"); - assertEquals("a,b,c" + recordSeparator, out.toString()); + assertEquals("a,b,c" + RECORD_SEPARATOR, out.toString()); printer.flush(); } } @Test - public void testParseCustomNullValues() throws IOException { + void testParseCustomNullValues() throws IOException { final StringWriter sw = new StringWriter(); final CSVFormat format = CSVFormat.DEFAULT.withNullString("NULL"); try (CSVPrinter printer = new CSVPrinter(sw, format)) { printer.printRecord("a", null, "b"); } final String csvString = sw.toString(); - assertEquals("a,NULL,b" + recordSeparator, csvString); + assertEquals("a,NULL,b" + RECORD_SEPARATOR, csvString); try (CSVParser iterable = format.parse(new StringReader(csvString))) { final Iterator iterator = iterable.iterator(); final CSVRecord record = iterator.next(); @@ -1169,7 +1254,7 @@ public void testParseCustomNullValues() throws IOException { } @Test - public void testPlainEscaped() throws IOException { + void testPlainEscaped() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withEscape('!'))) { printer.print("abc"); @@ -1179,7 +1264,7 @@ public void testPlainEscaped() throws IOException { } @Test - public void testPlainPlain() throws IOException { + void testPlainPlain() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { printer.print("abc"); @@ -1189,7 +1274,7 @@ public void testPlainPlain() throws IOException { } @Test - public void testPlainQuoted() throws IOException { + void testPlainQuoted() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { printer.print("abc"); @@ -1199,7 +1284,7 @@ public void testPlainQuoted() throws IOException { @Test @Disabled - public void testPostgreSqlCsvNullOutput() throws IOException { + void testPostgreSqlCsvNullOutput() throws IOException { Object[] s = new String[] { "NULL", null }; CSVFormat format = CSVFormat.POSTGRESQL_CSV.withQuote(DQUOTE_CHAR).withNullString("NULL").withQuoteMode(QuoteMode.ALL_NON_NULL); StringWriter writer = new StringWriter(); @@ -1302,7 +1387,7 @@ public void testPostgreSqlCsvNullOutput() throws IOException { @Test @Disabled - public void testPostgreSqlCsvTextOutput() throws IOException { + void testPostgreSqlCsvTextOutput() throws IOException { Object[] s = new String[] { "NULL", null }; CSVFormat format = CSVFormat.POSTGRESQL_TEXT.withQuote(DQUOTE_CHAR).withNullString("NULL").withQuoteMode(QuoteMode.ALL_NON_NULL); StringWriter writer = new StringWriter(); @@ -1404,27 +1489,27 @@ public void testPostgreSqlCsvTextOutput() throws IOException { } @Test - public void testPostgreSqlNullStringDefaultCsv() { + void testPostgreSqlNullStringDefaultCsv() { assertEquals("", CSVFormat.POSTGRESQL_CSV.getNullString()); } @Test - public void testPostgreSqlNullStringDefaultText() { + void testPostgreSqlNullStringDefaultText() { assertEquals("\\N", CSVFormat.POSTGRESQL_TEXT.getNullString()); } @Test - public void testPrint() throws IOException { + void testPrint() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = CSVFormat.DEFAULT.print(sw)) { assertInitialState(printer); printer.printRecord("a", "b\\c"); - assertEquals("a,b\\c" + recordSeparator, sw.toString()); + assertEquals("a,b\\c" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testPrintCSVParser() throws IOException { + void testPrintCSVParser() throws IOException { // @formatter:off final String code = "a1,b1\n" + // 1) "a2,b2\n" + // 2) @@ -1442,12 +1527,12 @@ public void testPrintCSVParser() throws IOException { try (CSVParser parser = CSVParser.parse(sw.toString(), format)) { final List records = parser.getRecords(); assertFalse(records.isEmpty()); - Utils.compare("Fail", res, records); + Utils.compare("Fail", res, records, -1); } } @Test - public void testPrintCSVRecord() throws IOException { + void testPrintCSVRecord() throws IOException { // @formatter:off final String code = "a1,b1\n" + // 1) "a2,b2\n" + // 2) @@ -1470,20 +1555,21 @@ public void testPrintCSVRecord() throws IOException { try (CSVParser parser = CSVParser.parse(sw.toString(), format)) { final List records = parser.getRecords(); assertFalse(records.isEmpty()); - Utils.compare("Fail", res, records); + Utils.compare("Fail", res, records, -1); } } - @Test - public void testPrintCSVRecords() throws IOException { + @ParameterizedTest + @ValueSource(longs = { -1, 0, 3, 4, Long.MAX_VALUE }) + void testPrintCSVRecords(final long maxRows) throws IOException { // @formatter:off final String code = "a1,b1\n" + // 1) "a2,b2\n" + // 2) "a3,b3\n" + // 3) "a4,b4\n"; // 4) // @formatter:on - final String[][] res = { { "a1", "b1" }, { "a2", "b2" }, { "a3", "b3" }, { "a4", "b4" } }; - final CSVFormat format = CSVFormat.DEFAULT; + final String[][] expected = { { "a1", "b1" }, { "a2", "b2" }, { "a3", "b3" }, { "a4", "b4" } }; + final CSVFormat format = CSVFormat.DEFAULT.builder().setMaxRows(maxRows).get(); final StringWriter sw = new StringWriter(); try (CSVPrinter printer = format.print(sw); CSVParser parser = CSVParser.parse(code, format)) { @@ -1493,103 +1579,103 @@ public void testPrintCSVRecords() throws IOException { try (CSVParser parser = CSVParser.parse(sw.toString(), format)) { final List records = parser.getRecords(); assertFalse(records.isEmpty()); - Utils.compare("Fail", res, records); + Utils.compare("Fail", expected, records, maxRows); } } @Test - public void testPrintCustomNullValues() throws IOException { + void testPrintCustomNullValues() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withNullString("NULL"))) { assertInitialState(printer); printer.printRecord("a", null, "b"); - assertEquals("a,NULL,b" + recordSeparator, sw.toString()); + assertEquals("a,NULL,b" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testPrinter1() throws IOException { + void testPrinter1() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { assertInitialState(printer); printer.printRecord("a", "b"); assertEquals(1, printer.getRecordCount()); - assertEquals("a,b" + recordSeparator, sw.toString()); + assertEquals("a,b" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testPrinter2() throws IOException { + void testPrinter2() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { assertInitialState(printer); printer.printRecord("a,b", "b"); - assertEquals("\"a,b\",b" + recordSeparator, sw.toString()); + assertEquals("\"a,b\",b" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testPrinter3() throws IOException { + void testPrinter3() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { assertInitialState(printer); printer.printRecord("a, b", "b "); - assertEquals("\"a, b\",\"b \"" + recordSeparator, sw.toString()); + assertEquals("\"a, b\",\"b \"" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testPrinter4() throws IOException { + void testPrinter4() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { assertInitialState(printer); printer.printRecord("a", "b\"c"); - assertEquals("a,\"b\"\"c\"" + recordSeparator, sw.toString()); + assertEquals("a,\"b\"\"c\"" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testPrinter5() throws IOException { + void testPrinter5() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { assertInitialState(printer); printer.printRecord("a", "b\nc"); - assertEquals("a,\"b\nc\"" + recordSeparator, sw.toString()); + assertEquals("a,\"b\nc\"" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testPrinter6() throws IOException { + void testPrinter6() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { assertInitialState(printer); printer.printRecord("a", "b\r\nc"); - assertEquals("a,\"b\r\nc\"" + recordSeparator, sw.toString()); + assertEquals("a,\"b\r\nc\"" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testPrinter7() throws IOException { + void testPrinter7() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { assertInitialState(printer); printer.printRecord("a", "b\\c"); - assertEquals("a,b\\c" + recordSeparator, sw.toString()); + assertEquals("a,b\\c" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testPrintNullValues() throws IOException { + void testPrintNullValues() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { assertInitialState(printer); printer.printRecord("a", null, "b"); - assertEquals("a,,b" + recordSeparator, sw.toString()); + assertEquals("a,,b" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testPrintOnePositiveInteger() throws IOException { + void testPrintOnePositiveInteger() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.MINIMAL))) { assertInitialState(printer); @@ -1609,7 +1695,7 @@ public void testPrintOnePositiveInteger() throws IOException { * @throws IOException Not expected to happen */ @Test - public void testPrintReaderWithoutQuoteToAppendable() throws IOException { + void testPrintReaderWithoutQuoteToAppendable() throws IOException { final StringBuilder sb = new StringBuilder(); final String content = "testValue"; try (CSVPrinter printer = new CSVPrinter(sb, CSVFormat.DEFAULT.withQuote(null))) { @@ -1630,7 +1716,7 @@ public void testPrintReaderWithoutQuoteToAppendable() throws IOException { * @throws IOException Not expected to happen */ @Test - public void testPrintReaderWithoutQuoteToWriter() throws IOException { + void testPrintReaderWithoutQuoteToWriter() throws IOException { final StringWriter sw = new StringWriter(); final String content = "testValue"; try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { @@ -1641,7 +1727,7 @@ public void testPrintReaderWithoutQuoteToWriter() throws IOException { } @Test - public void testPrintRecordStream() throws IOException { + void testPrintRecordStream() throws IOException { // @formatter:off final String code = "a1,b1\n" + // 1) "a2,b2\n" + // 2) @@ -1662,12 +1748,12 @@ public void testPrintRecordStream() throws IOException { try (CSVParser parser = CSVParser.parse(sw.toString(), format)) { final List records = parser.getRecords(); assertFalse(records.isEmpty()); - Utils.compare("Fail", res, records); + Utils.compare("Fail", res, records, -1); } } @Test - public void testPrintRecordsWithCSVRecord() throws IOException { + void testPrintRecordsWithCSVRecord() throws IOException { final String[] values = { "A", "B", "C" }; final String rowData = StringUtils.join(values, ','); final CharArrayWriter charArrayWriter = new CharArrayWriter(0); @@ -1684,7 +1770,7 @@ public void testPrintRecordsWithCSVRecord() throws IOException { } @Test - public void testPrintRecordsWithEmptyVector() throws IOException { + void testPrintRecordsWithEmptyVector() throws IOException { final PrintStream out = System.out; try { System.setOut(new PrintStream(NullOutputStream.INSTANCE)); @@ -1702,7 +1788,7 @@ public void testPrintRecordsWithEmptyVector() throws IOException { } @Test - public void testPrintRecordsWithObjectArray() throws IOException { + void testPrintRecordsWithObjectArray() throws IOException { final CharArrayWriter charArrayWriter = new CharArrayWriter(0); final Object[] objectArray = new Object[6]; try (CSVPrinter printer = CSVFormat.INFORMIX_UNLOAD.print(charArrayWriter)) { @@ -1716,7 +1802,7 @@ public void testPrintRecordsWithObjectArray() throws IOException { } @Test - public void testPrintRecordsWithResultSetOneRow() throws IOException, SQLException { + void testPrintRecordsWithResultSetOneRow() throws IOException, SQLException { try (CSVPrinter printer = CSVFormat.MYSQL.printer()) { try (ResultSet resultSet = new SimpleResultSet()) { assertInitialState(printer); @@ -1728,119 +1814,169 @@ public void testPrintRecordsWithResultSetOneRow() throws IOException, SQLExcepti } @Test - public void testPrintToFileWithCharsetUtf16Be() throws IOException { + void testPrintToFileWithCharsetUtf16Be() throws IOException { final File file = createTempFile(); try (CSVPrinter printer = CSVFormat.DEFAULT.print(file, StandardCharsets.UTF_16BE)) { printer.printRecord("a", "b\\c"); } - assertEquals("a,b\\c" + recordSeparator, FileUtils.readFileToString(file, StandardCharsets.UTF_16BE)); + assertEquals("a,b\\c" + RECORD_SEPARATOR, FileUtils.readFileToString(file, StandardCharsets.UTF_16BE)); } @Test - public void testPrintToFileWithDefaultCharset() throws IOException { + void testPrintToFileWithDefaultCharset() throws IOException { final File file = createTempFile(); try (CSVPrinter printer = CSVFormat.DEFAULT.print(file, Charset.defaultCharset())) { printer.printRecord("a", "b\\c"); } - assertEquals("a,b\\c" + recordSeparator, FileUtils.readFileToString(file, Charset.defaultCharset())); + assertEquals("a,b\\c" + RECORD_SEPARATOR, FileUtils.readFileToString(file, Charset.defaultCharset())); } @Test - public void testPrintToPathWithDefaultCharset() throws IOException { + void testPrintToPathWithDefaultCharset() throws IOException { final Path file = createTempPath(); try (CSVPrinter printer = CSVFormat.DEFAULT.print(file, Charset.defaultCharset())) { printer.printRecord("a", "b\\c"); } - assertEquals("a,b\\c" + recordSeparator, new String(Files.readAllBytes(file), Charset.defaultCharset())); + assertEquals("a,b\\c" + RECORD_SEPARATOR, new String(Files.readAllBytes(file), Charset.defaultCharset())); } @Test - public void testQuoteAll() throws IOException { + void testQuoteAll() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL))) { printer.printRecord("a", "b\nc", "d"); - assertEquals("\"a\",\"b\nc\",\"d\"" + recordSeparator, sw.toString()); + assertEquals("\"a\",\"b\nc\",\"d\"" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testQuoteCharEscapedWithQuoteModeNone() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setQuote('"').setEscape('?').setQuoteMode(QuoteMode.NONE).get(); + final StringWriter sw = new StringWriter(); + final String col1 = "\"abc"; + final String col2 = "x\"y"; + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.printRecord(col1, col2); + printer.printRecord(new StringReader(col1), new StringReader(col2)); + } + assertEquals("?\"abc,x?\"y" + RECORD_SEPARATOR + "?\"abc,x?\"y" + RECORD_SEPARATOR, sw.toString()); + // The emitted records must read back as the original values. + try (CSVParser parser = CSVParser.parse(sw.toString(), format)) { + final List records = parser.getRecords(); + assertEquals(2, records.size()); + for (final CSVRecord record : records) { + assertEquals(col1, record.get(0)); + assertEquals(col2, record.get(1)); + } } } @Test - public void testQuoteCommaFirstChar() throws IOException { + void testQuoteCommaFirstChar() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) { printer.printRecord(","); - assertEquals("\",\"" + recordSeparator, sw.toString()); + assertEquals("\",\"" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testQuoteCommentMarkerFirstChar() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setCommentMarker(';').get(); + final StringWriter sw = new StringWriter(); + final String col1 = ";comment-like"; + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + // A real comment is written with the marker, unquoted. + printer.printComment("a real comment"); + // A value starting with the marker is quoted, so it does not read back as a comment. + printer.printRecord(col1, "b"); + // The marker past the first character does not start a comment, so only the leading-marker value is quoted. + printer.printRecord("a;b", ";c"); + } + final String string = sw.toString(); + assertEquals("; a real comment" + RECORD_SEPARATOR + + "\";comment-like\",b" + RECORD_SEPARATOR + + "a;b,\";c\"" + RECORD_SEPARATOR, string); + // The comment is dropped on read; both data records survive intact. + try (CSVParser parser = CSVParser.parse(string, format)) { + final List records = parser.getRecords(); + assertEquals(2, records.size()); + assertEquals(col1, records.get(0).get(0)); + assertEquals("b", records.get(0).get(1)); + assertEquals("a;b", records.get(1).get(0)); + assertEquals(";c", records.get(1).get(1)); } } @Test - public void testQuoteNonNumeric() throws IOException { + void testQuoteNonNumeric() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.NON_NUMERIC))) { printer.printRecord("a", "b\nc", Integer.valueOf(1)); - assertEquals("\"a\",\"b\nc\",1" + recordSeparator, sw.toString()); + assertEquals("\"a\",\"b\nc\",1" + RECORD_SEPARATOR, sw.toString()); } } @Test - public void testRandomDefault() throws Exception { + void testRandomDefault() throws Exception { doRandom(CSVFormat.DEFAULT, ITERATIONS_FOR_RANDOM_TEST); } @Test - public void testRandomExcel() throws Exception { + void testRandomExcel() throws Exception { doRandom(CSVFormat.EXCEL, ITERATIONS_FOR_RANDOM_TEST); } @Test @Disabled - public void testRandomMongoDbCsv() throws Exception { + void testRandomMongoDbCsv() throws Exception { doRandom(CSVFormat.MONGODB_CSV, ITERATIONS_FOR_RANDOM_TEST); } @Test - public void testRandomMySql() throws Exception { + void testRandomMySql() throws Exception { doRandom(CSVFormat.MYSQL, ITERATIONS_FOR_RANDOM_TEST); } @Test @Disabled - public void testRandomOracle() throws Exception { + void testRandomOracle() throws Exception { doRandom(CSVFormat.ORACLE, ITERATIONS_FOR_RANDOM_TEST); } @Test @Disabled - public void testRandomPostgreSqlCsv() throws Exception { + void testRandomPostgreSqlCsv() throws Exception { doRandom(CSVFormat.POSTGRESQL_CSV, ITERATIONS_FOR_RANDOM_TEST); } @Test - public void testRandomPostgreSqlText() throws Exception { + void testRandomPostgreSqlText() throws Exception { doRandom(CSVFormat.POSTGRESQL_TEXT, ITERATIONS_FOR_RANDOM_TEST); } @Test - public void testRandomRfc4180() throws Exception { + void testRandomRfc4180() throws Exception { doRandom(CSVFormat.RFC4180, ITERATIONS_FOR_RANDOM_TEST); } @Test - public void testRandomTdf() throws Exception { + void testRandomTdf() throws Exception { doRandom(CSVFormat.TDF, ITERATIONS_FOR_RANDOM_TEST); } @Test - public void testSingleLineComment() throws IOException { + void testSingleLineComment() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { printer.printComment("This is a comment"); - assertEquals("# This is a comment" + recordSeparator, sw.toString()); + assertEquals("# This is a comment" + RECORD_SEPARATOR, sw.toString()); assertEquals(0, printer.getRecordCount()); } } @Test - public void testSingleQuoteQuoted() throws IOException { + void testSingleQuoteQuoted() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { printer.print("a'b'c"); @@ -1850,7 +1986,7 @@ public void testSingleQuoteQuoted() throws IOException { } @Test - public void testSkipHeaderRecordFalse() throws IOException { + void testSkipHeaderRecordFalse() throws IOException { // functionally identical to testHeader, used to test CSV-153 final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3").withSkipHeaderRecord(false))) { @@ -1861,7 +1997,7 @@ public void testSkipHeaderRecordFalse() throws IOException { } @Test - public void testSkipHeaderRecordTrue() throws IOException { + void testSkipHeaderRecordTrue() throws IOException { // functionally identical to testHeaderNotSet, used to test CSV-153 final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3").withSkipHeaderRecord(true))) { @@ -1872,7 +2008,7 @@ public void testSkipHeaderRecordTrue() throws IOException { } @Test - public void testTrailingDelimiterOnTwoColumns() throws IOException { + void testTrailingDelimiterOnTwoColumns() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrailingDelimiter())) { printer.printRecord("A", "B"); @@ -1881,7 +2017,7 @@ public void testTrailingDelimiterOnTwoColumns() throws IOException { } @Test - public void testTrimOffOneColumn() throws IOException { + void testTrimOffOneColumn() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim(false))) { printer.print(" A "); @@ -1890,7 +2026,7 @@ public void testTrimOffOneColumn() throws IOException { } @Test - public void testTrimOnOneColumn() throws IOException { + void testTrimOnOneColumn() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim())) { printer.print(" A "); @@ -1899,7 +2035,7 @@ public void testTrimOnOneColumn() throws IOException { } @Test - public void testTrimOnTwoColumns() throws IOException { + void testTrimOnTwoColumns() throws IOException { final StringWriter sw = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim())) { printer.print(" A "); diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java b/src/test/java/org/apache/commons/csv/CSVRecordTest.java index f9fedc4b98..94060d62b2 100644 --- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java +++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java @@ -45,7 +45,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -public class CSVRecordTest { +class CSVRecordTest { private enum EnumFixture { UNKNOWN_COLUMN @@ -86,7 +86,7 @@ record = parser.iterator().next(); } @Test - public void testCSVRecordNULLValues() throws IOException { + void testCSVRecordNULLValues() throws IOException { try (CSVParser parser = CSVParser.parse("A,B\r\nONE,TWO", CSVFormat.DEFAULT.withHeader())) { final CSVRecord csvRecord = new CSVRecord(parser, null, null, 0L, 0L, 0L); assertEquals(0, csvRecord.size()); @@ -95,7 +95,7 @@ public void testCSVRecordNULLValues() throws IOException { } @Test - public void testDuplicateHeaderGet() throws IOException { + void testDuplicateHeaderGet() throws IOException { final String csv = "A,A,B,B\n1,2,5,6\n"; final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().get(); @@ -110,7 +110,7 @@ public void testDuplicateHeaderGet() throws IOException { } @Test - public void testDuplicateHeaderToMap() throws IOException { + void testDuplicateHeaderToMap() throws IOException { final String csv = "A,A,B,B\n1,2,5,6\n"; final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().get(); @@ -126,64 +126,64 @@ public void testDuplicateHeaderToMap() throws IOException { } @Test - public void testGetInt() { + void testGetInt() { assertEquals(values[0], record.get(0)); assertEquals(values[1], record.get(1)); assertEquals(values[2], record.get(2)); } @Test - public void testGetNullEnum() { + void testGetNullEnum() { assertThrows(IllegalArgumentException.class, () -> recordWithHeader.get((Enum) null)); } @Test - public void testGetString() { + void testGetString() { assertEquals(values[0], recordWithHeader.get(EnumHeader.FIRST.name())); assertEquals(values[1], recordWithHeader.get(EnumHeader.SECOND.name())); assertEquals(values[2], recordWithHeader.get(EnumHeader.THIRD.name())); } @Test - public void testGetStringInconsistentRecord() { + void testGetStringInconsistentRecord() { headerMap.put("fourth", Integer.valueOf(4)); assertThrows(IllegalArgumentException.class, () -> recordWithHeader.get("fourth")); } @Test - public void testGetStringNoHeader() { + void testGetStringNoHeader() { assertThrows(IllegalStateException.class, () -> record.get("first")); } @Test - public void testGetUnmappedEnum() { + void testGetUnmappedEnum() { assertThrows(IllegalArgumentException.class, () -> recordWithHeader.get(EnumFixture.UNKNOWN_COLUMN)); } @Test - public void testGetUnmappedName() { + void testGetUnmappedName() { assertThrows(IllegalArgumentException.class, () -> assertNull(recordWithHeader.get("fourth"))); } @Test - public void testGetUnmappedNegativeInt() { + void testGetUnmappedNegativeInt() { assertThrows(ArrayIndexOutOfBoundsException.class, () -> recordWithHeader.get(Integer.MIN_VALUE)); } @Test - public void testGetUnmappedPositiveInt() { + void testGetUnmappedPositiveInt() { assertThrows(ArrayIndexOutOfBoundsException.class, () -> recordWithHeader.get(Integer.MAX_VALUE)); } @Test - public void testGetWithEnum() { + void testGetWithEnum() { assertEquals(recordWithHeader.get("FIRST"), recordWithHeader.get(EnumHeader.FIRST)); assertEquals(recordWithHeader.get("SECOND"), recordWithHeader.get(EnumHeader.SECOND)); assertThrows(IllegalArgumentException.class, () -> recordWithHeader.get(EnumFixture.UNKNOWN_COLUMN)); } @Test - public void testIsConsistent() { + void testIsConsistent() { assertTrue(record.isConsistent()); assertTrue(recordWithHeader.isConsistent()); final Map map = recordWithHeader.getParser().getHeaderMap(); @@ -193,7 +193,7 @@ public void testIsConsistent() { } @Test - public void testIsInconsistent() throws IOException { + void testIsInconsistent() throws IOException { final String[] headers = { "first", "second", "third" }; final String rowData = StringUtils.join(values, ','); try (CSVParser parser = CSVFormat.DEFAULT.withHeader(headers).parse(new StringReader(rowData))) { @@ -205,14 +205,14 @@ public void testIsInconsistent() throws IOException { } @Test - public void testIsMapped() { + void testIsMapped() { assertFalse(record.isMapped("first")); assertTrue(recordWithHeader.isMapped(EnumHeader.FIRST.name())); assertFalse(recordWithHeader.isMapped("fourth")); } @Test - public void testIsSetInt() { + void testIsSetInt() { assertFalse(record.isSet(-1)); assertTrue(record.isSet(0)); assertTrue(record.isSet(2)); @@ -222,14 +222,14 @@ public void testIsSetInt() { } @Test - public void testIsSetString() { + void testIsSetString() { assertFalse(record.isSet("first")); assertTrue(recordWithHeader.isSet(EnumHeader.FIRST.name())); assertFalse(recordWithHeader.isSet("DOES NOT EXIST")); } @Test - public void testIterator() { + void testIterator() { int i = 0; for (final String value : record) { assertEquals(values[i], value); @@ -238,7 +238,7 @@ public void testIterator() { } @Test - public void testPutInMap() { + void testPutInMap() { final Map map = new ConcurrentHashMap<>(); this.recordWithHeader.putIn(map); validateMap(map, false); @@ -248,7 +248,7 @@ public void testPutInMap() { } @Test - public void testRemoveAndAddColumns() throws IOException { + void testRemoveAndAddColumns() throws IOException { // do: try (CSVPrinter printer = new CSVPrinter(new StringBuilder(), CSVFormat.DEFAULT)) { final Map map = recordWithHeader.toMap(); @@ -263,7 +263,7 @@ public void testRemoveAndAddColumns() throws IOException { } @Test - public void testSerialization() throws IOException, ClassNotFoundException { + void testSerialization() throws IOException, ClassNotFoundException { final CSVRecord shortRec; try (CSVParser parser = CSVParser.parse("A,B\n#my comment\nOne,Two", CSVFormat.DEFAULT.withHeader().withCommentMarker('#'))) { shortRec = parser.iterator().next(); @@ -296,7 +296,7 @@ public void testSerialization() throws IOException, ClassNotFoundException { } @Test - public void testStream() { + void testStream() { final AtomicInteger i = new AtomicInteger(); record.stream().forEach(value -> { assertEquals(values[i.get()], value); @@ -305,7 +305,7 @@ public void testStream() { } @Test - public void testToListAdd() { + void testToListAdd() { final String[] expected = values.clone(); final List list = record.toList(); list.add("Last"); @@ -315,7 +315,7 @@ public void testToListAdd() { } @Test - public void testToListFor() { + void testToListFor() { int i = 0; for (final String value : record.toList()) { assertEquals(values[i], value); @@ -324,7 +324,7 @@ public void testToListFor() { } @Test - public void testToListForEach() { + void testToListForEach() { final AtomicInteger i = new AtomicInteger(); record.toList().forEach(e -> { assertEquals(values[i.getAndIncrement()], e); @@ -332,7 +332,7 @@ public void testToListForEach() { } @Test - public void testToListSet() { + void testToListSet() { final String[] expected = values.clone(); final List list = record.toList(); list.set(list.size() - 1, "Last"); @@ -342,13 +342,13 @@ public void testToListSet() { } @Test - public void testToMap() { + void testToMap() { final Map map = this.recordWithHeader.toMap(); validateMap(map, true); } @Test - public void testToMapWithNoHeader() throws Exception { + void testToMapWithNoHeader() throws Exception { try (CSVParser parser = CSVParser.parse("a,b", CSVFormat.newFormat(','))) { final CSVRecord shortRec = parser.iterator().next(); final Map map = shortRec.toMap(); @@ -358,7 +358,7 @@ public void testToMapWithNoHeader() throws Exception { } @Test - public void testToMapWithShortRecord() throws Exception { + void testToMapWithShortRecord() throws Exception { try (CSVParser parser = CSVParser.parse("a,b", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { final CSVRecord shortRec = parser.iterator().next(); shortRec.toMap(); @@ -366,7 +366,7 @@ public void testToMapWithShortRecord() throws Exception { } @Test - public void testToString() { + void testToString() { assertNotNull(recordWithHeader.toString()); assertTrue(recordWithHeader.toString().contains("comment=")); assertTrue(recordWithHeader.toString().contains("recordNumber=")); diff --git a/src/test/java/org/apache/commons/csv/CsvAssertions.java b/src/test/java/org/apache/commons/csv/CsvAssertions.java new file mode 100644 index 0000000000..b6c2b5d9cd --- /dev/null +++ b/src/test/java/org/apache/commons/csv/CsvAssertions.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; + +public class CsvAssertions { + + public static void assertValuesEquals(final String[] expected, final CSVRecord actual) { + assertArrayEquals(expected, actual.values()); + } +} diff --git a/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java b/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java index b7db39f529..b8d9b9f198 100644 --- a/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java +++ b/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java @@ -26,13 +26,14 @@ import static org.junit.jupiter.api.Assertions.assertNull; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import org.junit.jupiter.api.Test; /** * Test {@link ExtendedBufferedReader}. */ -public class ExtendedBufferedReaderTest { +class ExtendedBufferedReaderTest { static final String LF = "\n"; static final String CR = "\r"; @@ -44,7 +45,7 @@ private ExtendedBufferedReader createBufferedReader(final String s) { } @Test - public void testEmptyInput() throws Exception { + void testEmptyInput() throws Exception { try (ExtendedBufferedReader br = createBufferedReader("")) { assertEquals(EOF, br.read()); assertEquals(EOF, br.peek()); @@ -58,7 +59,7 @@ public void testEmptyInput() throws Exception { * Test to illustrate https://issues.apache.org/jira/browse/CSV-75 */ @Test - public void testReadChar() throws Exception { + void testReadChar() throws Exception { final String test = "a" + LF + "b" + CR + "c" + LF + LF + "d" + CR + CR + "e" + LFCR + "f " + CRLF; // EOL eol EOL EOL eol eol EOL+CR EOL final int eolCount = 9; @@ -94,7 +95,7 @@ public void testReadChar() throws Exception { } @Test - public void testReadingInDifferentBuffer() throws Exception { + void testReadingInDifferentBuffer() throws Exception { final char[] tmp1 = new char[2]; final char[] tmp2 = new char[4]; try (ExtendedBufferedReader reader = createBufferedReader("1\r\n2\r\n")) { @@ -105,7 +106,20 @@ public void testReadingInDifferentBuffer() throws Exception { } @Test - public void testReadLine() throws Exception { + void testReadingSupplementaryCharacterTracksBytes() throws Exception { + final String input = "😀"; + final char[] buffer = new char[input.length()]; + try (ExtendedBufferedReader reader = new ExtendedBufferedReader(new StringReader(input), StandardCharsets.UTF_8, true)) { + assertEquals(input.length(), reader.read(buffer, 0, buffer.length)); + assertArrayEquals(input.toCharArray(), buffer); + assertEquals(input.getBytes(StandardCharsets.UTF_8).length, reader.getBytesRead()); + assertEquals(input.length(), reader.getPosition()); + assertEquals(input.charAt(input.length() - 1), reader.getLastChar()); + } + } + + @Test + void testReadLine() throws Exception { try (ExtendedBufferedReader br = createBufferedReader("")) { assertNull(br.readLine()); } @@ -148,7 +162,7 @@ public void testReadLine() throws Exception { } @Test - public void testReadLookahead1() throws Exception { + void testReadLookahead1() throws Exception { try (ExtendedBufferedReader br = createBufferedReader("1\n2\r3\n")) { assertEquals(0, br.getLineNumber()); assertEquals('1', br.peek()); @@ -207,7 +221,7 @@ public void testReadLookahead1() throws Exception { } @Test - public void testReadLookahead2() throws Exception { + void testReadLookahead2() throws Exception { final char[] ref = new char[5]; final char[] res = new char[5]; diff --git a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java index cff3a77294..aaf8e206b3 100644 --- a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java +++ b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package org.apache.commons.csv; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -27,34 +28,48 @@ import org.junit.jupiter.api.Test; -public class JiraCsv196Test { +class JiraCsv196Test { private Reader getTestInput(final String path) { return new InputStreamReader(ClassLoader.getSystemClassLoader().getResourceAsStream(path)); } @Test - public void testParseFourBytes() throws IOException { + void testParseFourBytes() throws IOException { final CSVFormat format = CSVFormat.Builder.create().setDelimiter(',').setQuote('\'').get(); - try (CSVParser parser = new CSVParser.Builder().setFormat(format).setReader(getTestInput("org/apache/commons/csv/CSV-196/emoji.csv")) - .setCharset(StandardCharsets.UTF_8).setTrackBytes(true).get()) { + // @formatter:off + try (@SuppressWarnings("resource") // parser closes the reader. + CSVParser parser = new CSVParser.Builder() + .setFormat(format) + .setReader(getTestInput("org/apache/commons/csv/CSV-196/emoji.csv")) + .setCharset(StandardCharsets.UTF_8) + .setTrackBytes(true) + .get()) { + // @formatter:on final long[] charByteKey = { 0, 84, 701, 1318, 1935 }; int idx = 0; - for (CSVRecord record : parser) { - assertEquals(charByteKey[idx++], record.getBytePosition(), "index " + idx); + for (final CSVRecord record : parser) { + assertEquals(charByteKey[idx++], record.getBytePosition(), "At index " + idx); } } } @Test - public void testParseThreeBytes() throws IOException { + void testParseThreeBytes() throws IOException { final CSVFormat format = CSVFormat.Builder.create().setDelimiter(',').setQuote('\'').get(); - try (CSVParser parser = new CSVParser.Builder().setFormat(format).setReader(getTestInput("org/apache/commons/csv/CSV-196/japanese.csv")) - .setCharset(StandardCharsets.UTF_8).setTrackBytes(true).get()) { + // @formatter:off + try (@SuppressWarnings("resource") // parser closes the reader. + CSVParser parser = new CSVParser.Builder() + .setFormat(format) + .setReader(getTestInput("org/apache/commons/csv/CSV-196/japanese.csv")) + .setCharset(StandardCharsets.UTF_8) + .setTrackBytes(true) + .get()) { + // @formatter:on final long[] charByteKey = { 0, 89, 242, 395 }; int idx = 0; - for (CSVRecord record : parser) { - assertEquals(charByteKey[idx++], record.getBytePosition(), "index " + idx); + for (final CSVRecord record : parser) { + assertEquals(charByteKey[idx++], record.getBytePosition(), "At index " + idx); } } } diff --git a/src/test/java/org/apache/commons/csv/JiraCsv318Test.java b/src/test/java/org/apache/commons/csv/JiraCsv318Test.java new file mode 100644 index 0000000000..984509e87d --- /dev/null +++ b/src/test/java/org/apache/commons/csv/JiraCsv318Test.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import org.apache.commons.io.function.IOConsumer; +import org.apache.commons.io.function.IOStream; +import org.apache.commons.lang3.ArrayUtils; +import org.junit.jupiter.api.Test; + +/** + * Tests https://issues.apache.org/jira/projects/CSV/issues/CSV-318?filter=allopenissues + * + * @see CSVPrinter + */ +class JiraCsv318Test { + + private void checkOutput(final ByteArrayOutputStream baos) { + checkOutput(baos.toString()); + } + + private void checkOutput(final String string) { + assertEquals("col a,col b,col c", string.trim()); + } + + private Stream newParallelStream() { + // returned stream is intermediate + return newStream().parallel(); + } + + private CSVPrinter newPrinter(final ByteArrayOutputStream baos) throws IOException { + return new CSVPrinter(new PrintWriter(baos), CSVFormat.DEFAULT); + } + + private Stream newSequentialStream() { + // returned stream is intermediate + return newStream().sequential(); + } + + private Stream newStream() { + return Stream.of("col a", "col b", "col c"); + } + + @Test + void testDefaultStream() throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (CSVPrinter printer = newPrinter(baos)) { + printer.printRecord(newStream()); + } + checkOutput(baos); + } + + @SuppressWarnings("resource") + @Test + void testParallelIOStream() throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (CSVPrinter printer = newPrinter(baos)) { + IOStream.adapt(newParallelStream()).forEachOrdered(printer::print); + } + // No EOR marker in this test intentionally, so checkOutput will trim. + checkOutput(baos); + } + + @SuppressWarnings("resource") + @Test + void testParallelIOStreamSynchronizedPrinterNotUsed() throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (CSVPrinter printer = newPrinter(baos)) { + synchronized (printer) { + IOStream.adapt(newParallelStream()).forEachOrdered(IOConsumer.noop()); + } + } + final List list = new ArrayList<>(); + try (CSVPrinter printer = newPrinter(baos)) { + synchronized (printer) { + IOStream.adapt(newParallelStream()).forEachOrdered(list::add); + } + } + // No EOR marker in this test intentionally, so checkOutput will trim. + checkOutput(String.join(",", list.toArray(ArrayUtils.EMPTY_STRING_ARRAY))); + } + + @Test + void testParallelStream() throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (CSVPrinter printer = newPrinter(baos)) { + printer.printRecord(newParallelStream()); + } + checkOutput(baos); + } + + @Test + void testSequentialStream() throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (CSVPrinter printer = newPrinter(baos)) { + printer.printRecord(newSequentialStream()); + } + checkOutput(baos); + } +} diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java index 38ab125530..a76f6e513b 100644 --- a/src/test/java/org/apache/commons/csv/LexerTest.java +++ b/src/test/java/org/apache/commons/csv/LexerTest.java @@ -41,7 +41,7 @@ /** */ -public class LexerTest { +class LexerTest { private static void assertContent(final String expectedContent, final Token actualToken) { assertEquals(expectedContent, actualToken.content.toString()); @@ -71,7 +71,7 @@ public void setUp() { // simple token with escaping enabled @Test - public void testBackslashWithEscaping() throws IOException { + void testBackslashWithEscaping() throws IOException { /* * file: a,\,,b \,, */ @@ -91,7 +91,7 @@ public void testBackslashWithEscaping() throws IOException { // simple token with escaping not enabled @Test - public void testBackslashWithoutEscaping() throws IOException { + void testBackslashWithoutEscaping() throws IOException { /* * file: a,\,,b \,, */ @@ -113,16 +113,22 @@ public void testBackslashWithoutEscaping() throws IOException { } @Test - public void testBackspace() throws Exception { + void testBackspace() throws Exception { try (Lexer lexer = createLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping)) { assertNextToken("character" + BACKSPACE + "NotEscaped", lexer); } } @Test - public void testComments() throws IOException { - final String code = "first,line,\n" + "second,line,tokenWith#no-comment\n" + "# comment line \n" + - "third,line,#no-comment\n" + "# penultimate comment\n" + "# Final comment\n"; + void testComments() throws IOException { + // @formatter:off + final String code = "first,line,\n" + + "second,line,tokenWith#no-comment\n" + + "# comment line \n" + + "third,line,#no-comment\n" + + "# penultimate comment\n" + + "# Final comment\n"; + // @formatter:on final CSVFormat format = CSVFormat.DEFAULT.withCommentMarker('#'); try (Lexer lexer = createLexer(code, format)) { assertNextToken(TOKEN, "first", lexer); @@ -143,7 +149,7 @@ public void testComments() throws IOException { } @Test - public void testCommentsAndEmptyLines() throws IOException { + void testCommentsAndEmptyLines() throws IOException { final String code = "1,2,3,\n" + // 1 "\n" + // 1b "\n" + // 1c @@ -189,7 +195,7 @@ public void testCommentsAndEmptyLines() throws IOException { } @Test - public void testCR() throws Exception { + void testCR() throws Exception { try (Lexer lexer = createLexer("character" + CR + "NotEscaped", formatWithEscaping)) { assertNextToken("character", lexer); assertNextToken("NotEscaped", lexer); @@ -198,7 +204,7 @@ public void testCR() throws Exception { // From CSV-1 @Test - public void testDelimiterIsWhitespace() throws IOException { + void testDelimiterIsWhitespace() throws IOException { final String code = "one\ttwo\t\tfour \t five\t six"; try (Lexer lexer = createLexer(code, CSVFormat.TDF)) { assertNextToken(TOKEN, "one", lexer); @@ -210,8 +216,27 @@ public void testDelimiterIsWhitespace() throws IOException { } } + /** + * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, + * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the + * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped. + */ @Test - public void testEOFWithoutClosingQuote() throws Exception { + void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); + try (Lexer lexer = createLexer(" |a", format)) { + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "a", lexer); + } + try (Lexer lexer = createLexer("a | |b", format)) { + assertNextToken(TOKEN, "a", lexer); + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "b", lexer); + } + } + + @Test + void testEOFWithoutClosingQuote() throws Exception { final String code = "a,\"b"; try (Lexer lexer = createLexer(code, CSVFormat.Builder.create().setLenientEof(true).get())) { assertNextToken(TOKEN, "a", lexer); @@ -224,21 +249,21 @@ public void testEOFWithoutClosingQuote() throws Exception { } @Test // TODO is this correct? Do we expect BACKSPACE to be unescaped? - public void testEscapedBackspace() throws Exception { + void testEscapedBackspace() throws Exception { try (Lexer lexer = createLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping)) { assertNextToken("character" + BACKSPACE + "Escaped", lexer); } } @Test - public void testEscapedCharacter() throws Exception { + void testEscapedCharacter() throws Exception { try (Lexer lexer = createLexer("character\\aEscaped", formatWithEscaping)) { assertNextToken("character\\aEscaped", lexer); } } @Test - public void testEscapedControlCharacter() throws Exception { + void testEscapedControlCharacter() throws Exception { // we are explicitly using an escape different from \ here try (Lexer lexer = createLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'))) { assertNextToken("character" + CR + "Escaped", lexer); @@ -246,35 +271,35 @@ public void testEscapedControlCharacter() throws Exception { } @Test - public void testEscapedControlCharacter2() throws Exception { + void testEscapedControlCharacter2() throws Exception { try (Lexer lexer = createLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'))) { assertNextToken("character" + CR + "Escaped", lexer); } } @Test - public void testEscapedCR() throws Exception { + void testEscapedCR() throws Exception { try (Lexer lexer = createLexer("character\\" + CR + "Escaped", formatWithEscaping)) { assertNextToken("character" + CR + "Escaped", lexer); } } @Test // TODO is this correct? Do we expect FF to be unescaped? - public void testEscapedFF() throws Exception { + void testEscapedFF() throws Exception { try (Lexer lexer = createLexer("character\\" + FF + "Escaped", formatWithEscaping)) { assertNextToken("character" + FF + "Escaped", lexer); } } @Test - public void testEscapedLF() throws Exception { + void testEscapedLF() throws Exception { try (Lexer lexer = createLexer("character\\" + LF + "Escaped", formatWithEscaping)) { assertNextToken("character" + LF + "Escaped", lexer); } } @Test - public void testEscapedMySqlNullValue() throws Exception { + void testEscapedMySqlNullValue() throws Exception { // MySQL uses \N to symbolize null values. We have to restore this try (Lexer lexer = createLexer("character\\NEscaped", formatWithEscaping)) { assertNextToken("character\\NEscaped", lexer); @@ -282,7 +307,7 @@ public void testEscapedMySqlNullValue() throws Exception { } @Test // TODO is this correct? Do we expect TAB to be unescaped? - public void testEscapedTab() throws Exception { + void testEscapedTab() throws Exception { try (Lexer lexer = createLexer("character\\" + TAB + "Escaped", formatWithEscaping)) { assertNextToken("character" + TAB + "Escaped", lexer); } @@ -290,7 +315,7 @@ public void testEscapedTab() throws Exception { } @Test - public void testEscapingAtEOF() throws Exception { + void testEscapingAtEOF() throws Exception { final String code = "escaping at EOF is evil\\"; try (Lexer lexer = createLexer(code, formatWithEscaping)) { assertThrows(IOException.class, () -> lexer.nextToken(new Token())); @@ -298,16 +323,29 @@ public void testEscapingAtEOF() throws Exception { } @Test - public void testFF() throws Exception { + void testFF() throws Exception { try (Lexer lexer = createLexer("character" + FF + "NotEscaped", formatWithEscaping)) { assertNextToken("character" + FF + "NotEscaped", lexer); } } @Test - public void testIgnoreEmptyLines() throws IOException { - final String code = "first,line,\n" + "\n" + "\n" + "second,line\n" + "\n" + "\n" + "third line \n" + "\n" + - "\n" + "last, line \n" + "\n" + "\n" + "\n"; + void testIgnoreEmptyLines() throws IOException { + // @formatter:off + final String code = "first,line,\n" + + "\n" + + "\n" + + "second,line\n" + + "\n" + + "\n" + + "third line \n" + + "\n" + + "\n" + + "last, line \n" + + "\n" + + "\n" + + "\n"; + // @formatter:on final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines(); try (Lexer lexer = createLexer(code, format)) { assertNextToken(TOKEN, "first", lexer); @@ -324,7 +362,7 @@ public void testIgnoreEmptyLines() throws IOException { } @Test - public void testIsMetaCharCommentStart() throws IOException { + void testIsMetaCharCommentStart() throws IOException { try (Lexer lexer = createLexer("#", CSVFormat.DEFAULT.withCommentMarker('#'))) { final int ch = lexer.readEscape(); assertEquals('#', ch); @@ -332,7 +370,7 @@ public void testIsMetaCharCommentStart() throws IOException { } @Test - public void testLF() throws Exception { + void testLF() throws Exception { try (Lexer lexer = createLexer("character" + LF + "NotEscaped", formatWithEscaping)) { assertNextToken("character", lexer); assertNextToken("NotEscaped", lexer); @@ -341,7 +379,7 @@ public void testLF() throws Exception { // encapsulator tokenizer (single line) @Test - public void testNextToken4() throws IOException { + void testNextToken4() throws IOException { /* * file: a,"foo",b a, " foo",b a,"foo " ,b // whitespace after closing encapsulator a, " foo " ,b */ @@ -365,7 +403,7 @@ public void testNextToken4() throws IOException { // encapsulator tokenizer (multi line, delimiter in string) @Test - public void testNextToken5() throws IOException { + void testNextToken5() throws IOException { final String code = "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\""; try (Lexer lexer = createLexer(code, CSVFormat.DEFAULT)) { assertNextToken(TOKEN, "a", lexer); @@ -378,7 +416,7 @@ public void testNextToken5() throws IOException { // change delimiters, comment, encapsulater @Test - public void testNextToken6() throws IOException { + void testNextToken6() throws IOException { /* * file: a;'b and \' more ' !comment;;;; ;; */ @@ -390,8 +428,46 @@ public void testNextToken6() throws IOException { } } + /** + * A truncated escaped multi-character delimiter at EOF must not be accepted by reusing the previous escape delimiter + * look-ahead in {@link Lexer#isEscapeDelimiter()}. + */ + @Test + void testPartialEscapedMultiCharacterDelimiterAtEOF() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').get(); + try (Lexer lexer = createLexer("x![!|!]y![!|", format)) { + assertNextToken(EOF, "x[|]y![!|", lexer); + } + } + + /** + * Tests CSV-324. + */ + @Test + void testPartialMultiCharacterDelimiterAtEOF() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get(); + try (Lexer lexer = createLexer("a[|]b[|", format)) { + assertNextToken(TOKEN, "a", lexer); + assertNextToken(EOF, "b[|", lexer); + } + } + + /** + * A truncated multi-character delimiter at EOF must not be accepted by reusing the look-ahead buffer left dirty by an + * earlier non-matching peek in the same token (CSV-324 only cleared the buffer once per token). + */ + @Test + void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get(); + // The "[a]" peek leaves ']' in the look-ahead buffer; the trailing "[|" must not match "[|]". + final String recordString = "x[a][|"; + try (Lexer lexer = createLexer(recordString, format)) { + assertNextToken(EOF, recordString, lexer); + } + } + @Test - public void testReadEscapeBackspace() throws IOException { + void testReadEscapeBackspace() throws IOException { try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) { final int ch = lexer.readEscape(); assertEquals(BACKSPACE, ch); @@ -399,7 +475,7 @@ public void testReadEscapeBackspace() throws IOException { } @Test - public void testReadEscapeFF() throws IOException { + void testReadEscapeFF() throws IOException { try (Lexer lexer = createLexer("f", CSVFormat.DEFAULT.withEscape('\f'))) { final int ch = lexer.readEscape(); assertEquals(FF, ch); @@ -407,7 +483,7 @@ public void testReadEscapeFF() throws IOException { } @Test - public void testReadEscapeTab() throws IOException { + void testReadEscapeTab() throws IOException { try (Lexer lexer = createLexer("t", CSVFormat.DEFAULT.withEscape('\t'))) { final int ch = lexer.readEscape(); assertNextToken(EOF, "", lexer); @@ -416,7 +492,7 @@ public void testReadEscapeTab() throws IOException { } @Test - public void testSurroundingSpacesAreDeleted() throws IOException { + void testSurroundingSpacesAreDeleted() throws IOException { final String code = "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,,"; try (Lexer lexer = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { assertNextToken(TOKEN, "noSpaces", lexer); @@ -430,7 +506,7 @@ public void testSurroundingSpacesAreDeleted() throws IOException { } @Test - public void testSurroundingTabsAreDeleted() throws IOException { + void testSurroundingTabsAreDeleted() throws IOException { final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,"; try (Lexer lexer = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { assertNextToken(TOKEN, "noTabs", lexer); @@ -444,14 +520,14 @@ public void testSurroundingTabsAreDeleted() throws IOException { } @Test - public void testTab() throws Exception { + void testTab() throws Exception { try (Lexer lexer = createLexer("character" + TAB + "NotEscaped", formatWithEscaping)) { assertNextToken("character" + TAB + "NotEscaped", lexer); } } @Test - public void testTrailingTextAfterQuote() throws Exception { + void testTrailingTextAfterQuote() throws Exception { final String code = "\"a\" b,\"a\" \" b,\"a\" b \"\""; try (Lexer lexer = createLexer(code, CSVFormat.Builder.create().setTrailingData(true).get())) { assertNextToken(TOKEN, "a b", lexer); @@ -464,7 +540,7 @@ public void testTrailingTextAfterQuote() throws Exception { } @Test - public void testTrimTrailingSpacesZeroLength() throws Exception { + void testTrimTrailingSpacesZeroLength() throws Exception { final StringBuilder buffer = new StringBuilder(""); try (Lexer lexer = createLexer(buffer.toString(), CSVFormat.DEFAULT)) { lexer.trimTrailingSpaces(buffer); diff --git a/src/test/java/org/apache/commons/csv/PerformanceTest.java b/src/test/java/org/apache/commons/csv/PerformanceTest.java index bf0d483897..9284828e6c 100644 --- a/src/test/java/org/apache/commons/csv/PerformanceTest.java +++ b/src/test/java/org/apache/commons/csv/PerformanceTest.java @@ -44,7 +44,7 @@ * Basic test harness. */ @SuppressWarnings("boxing") -public class PerformanceTest { +class PerformanceTest { @FunctionalInterface private interface CSVParserFactory { diff --git a/src/test/java/org/apache/commons/csv/TokenTest.java b/src/test/java/org/apache/commons/csv/TokenTest.java new file mode 100644 index 0000000000..075c1b1d9c --- /dev/null +++ b/src/test/java/org/apache/commons/csv/TokenTest.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +/** + * Tests {@link Token}. + */ +class TokenTest { + + @ParameterizedTest + @EnumSource(Token.Type.class) + void testToString(final Token.Type type) { + // Should never blow up + final Token token = new Token(); + final String resetName = Token.Type.INVALID.name(); + assertTrue(token.toString().contains(resetName)); + token.reset(); + assertTrue(token.toString().contains(resetName)); + token.type = null; + assertFalse(token.toString().isEmpty()); + token.reset(); + token.type = type; + assertTrue(token.toString().contains(type.name())); + token.content.setLength(1000); + assertTrue(token.toString().contains(type.name())); + } +} diff --git a/src/test/java/org/apache/commons/csv/UserGuideTest.java b/src/test/java/org/apache/commons/csv/UserGuideTest.java index 6b97ccded9..6cd8c72d7f 100644 --- a/src/test/java/org/apache/commons/csv/UserGuideTest.java +++ b/src/test/java/org/apache/commons/csv/UserGuideTest.java @@ -35,7 +35,7 @@ /** * Tests for the user guide. */ -public class UserGuideTest { +class UserGuideTest { @TempDir Path tempDir; @@ -54,7 +54,7 @@ public InputStreamReader newReader(final Path path) throws IOException { } @Test - public void testBomFull() throws UnsupportedEncodingException, IOException { + void testBomFull() throws UnsupportedEncodingException, IOException { final Path path = tempDir.resolve("test1.csv"); Files.copy(Utils.createUtf8Input("ColumnA, ColumnB, ColumnC\r\nA, B, C\r\n".getBytes(StandardCharsets.UTF_8), true), path); // @formatter:off @@ -74,7 +74,7 @@ public void testBomFull() throws UnsupportedEncodingException, IOException { } @Test - public void testBomUtil() throws UnsupportedEncodingException, IOException { + void testBomUtil() throws UnsupportedEncodingException, IOException { final Path path = tempDir.resolve("test2.csv"); Files.copy(Utils.createUtf8Input("ColumnA, ColumnB, ColumnC\r\nA, B, C\r\n".getBytes(StandardCharsets.UTF_8), true), path); try (Reader reader = newReader(path); diff --git a/src/test/java/org/apache/commons/csv/Utils.java b/src/test/java/org/apache/commons/csv/Utils.java index c99b77aca9..5b5a05e043 100644 --- a/src/test/java/org/apache/commons/csv/Utils.java +++ b/src/test/java/org/apache/commons/csv/Utils.java @@ -37,9 +37,10 @@ final class Utils { * @param message the message to be displayed * @param expected the 2d array of expected results * @param actual the List of {@link CSVRecord} entries, each containing an array of values + * @param maxRows the maximum number of rows expected, less than or equal to zero means no limit. */ - public static void compare(final String message, final String[][] expected, final List actual) { - final int expectedLength = expected.length; + public static void compare(final String message, final String[][] expected, final List actual, final long maxRows) { + final long expectedLength = maxRows > 0 ? Math.min(maxRows, expected.length) : expected.length; assertEquals(expectedLength, actual.size(), message + " - outer array size"); for (int i = 0; i < expectedLength; i++) { assertArrayEquals(expected[i], actual.get(i).values(), message + " (entry " + i + ")"); diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java index 71b056834c..67f1b785d5 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java @@ -24,10 +24,10 @@ import org.apache.commons.csv.QuoteMode; import org.junit.jupiter.api.Test; -public class JiraCsv148Test { +class JiraCsv148Test { @Test - public void testWithIgnoreSurroundingSpacesEmpty() { + void testWithIgnoreSurroundingSpacesEmpty() { // @formatter:off final CSVFormat format = CSVFormat.DEFAULT.builder() .setQuoteMode(QuoteMode.ALL) @@ -47,7 +47,7 @@ public void testWithIgnoreSurroundingSpacesEmpty() { * quotation marks, while withIgnoreSurroundingSpace() cannot The same point: you can remove the leading and trailing spaces, tabs and other symbols. */ @Test - public void testWithTrimEmpty() { + void testWithTrimEmpty() { // @formatter:off final CSVFormat format = CSVFormat.DEFAULT.builder() .setQuoteMode(QuoteMode.ALL) @@ -55,7 +55,7 @@ public void testWithTrimEmpty() { .get(); // @formatter:on assertEquals( - "\"\",\"\",\"Single space on the left\",\"Single space on the right\"," + "\"Single spaces on both sides\",\"Multiple spaces on the left\"," + + "\"\",\"\",\"Single space on the left\",\"Single space on the right\",\"Single spaces on both sides\",\"Multiple spaces on the left\"," + "\"Multiple spaces on the right\",\"Multiple spaces on both sides\"", format.format("", " ", " Single space on the left", "Single space on the right ", " Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ", " Multiple spaces on both sides ")); diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv149Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv149Test.java index 5b3d40c9f0..b32e965665 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv149Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv149Test.java @@ -29,12 +29,12 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; -public class JiraCsv149Test { +class JiraCsv149Test { private static final String CR_LF = "\r\n"; @Test - public void testJiraCsv149EndWithEOL() throws IOException { + void testJiraCsv149EndWithEOL() throws IOException { testJiraCsv149EndWithEolAtEof(true); } @@ -61,7 +61,7 @@ private void testJiraCsv149EndWithEolAtEof(final boolean eolAtEof) throws IOExce } @Test - public void testJiraCsv149EndWithoutEOL() throws IOException { + void testJiraCsv149EndWithoutEOL() throws IOException { testJiraCsv149EndWithEolAtEof(false); } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv150Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv150Test.java index f10477292c..eec91d52d0 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv150Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv150Test.java @@ -27,7 +27,7 @@ import org.apache.commons.csv.CSVParser; import org.junit.jupiter.api.Test; -public class JiraCsv150Test { +class JiraCsv150Test { private void testDisable(final CSVFormat format, final StringReader reader) throws IOException { try (CSVParser csvParser = CSVParser.builder().setReader(reader).setFormat(format).get()) { @@ -36,19 +36,19 @@ private void testDisable(final CSVFormat format, final StringReader reader) thro } @Test - public void testDisableComment() throws IOException { + void testDisableComment() throws IOException { final StringReader stringReader = new StringReader("\"66\u2441\",,\"\",\"DeutscheBK\ufffe\",\"000\"\r\n"); testDisable(CSVFormat.DEFAULT.builder().setCommentMarker(null).get(), stringReader); } @Test - public void testDisableEncapsulation() throws IOException { + void testDisableEncapsulation() throws IOException { final StringReader stringReader = new StringReader("66\u2441,,\"\",\ufffeDeutscheBK,\"000\"\r\n"); testDisable(CSVFormat.DEFAULT.builder().setQuote(null).get(), stringReader); } @Test - public void testDisableEscaping() throws IOException { + void testDisableEscaping() throws IOException { final StringReader stringReader = new StringReader("\"66\u2441\",,\"\",\"DeutscheBK\ufffe\",\"000\"\r\n"); testDisable(CSVFormat.DEFAULT.builder().setEscape(null).get(), stringReader); } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv154Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv154Test.java index c045cdd269..90d657fcd1 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv154Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv154Test.java @@ -26,10 +26,10 @@ import org.apache.commons.csv.CSVPrinter; import org.junit.jupiter.api.Test; -public class JiraCsv154Test { +class JiraCsv154Test { @Test - public void testJiraCsv154_withCommentMarker() throws IOException { + void testJiraCsv154_withCommentMarker() throws IOException { final String comment = "This is a header comment"; // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() @@ -48,7 +48,7 @@ public void testJiraCsv154_withCommentMarker() throws IOException { } @Test - public void testJiraCsv154_withHeaderComments() throws IOException { + void testJiraCsv154_withHeaderComments() throws IOException { final String comment = "This is a header comment"; // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java index b7e3bae858..607d0cf2a3 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java @@ -31,7 +31,7 @@ import org.apache.commons.csv.QuoteMode; import org.junit.jupiter.api.Test; -public class JiraCsv167Test { +class JiraCsv167Test { private Reader getTestReader() { return new InputStreamReader( @@ -39,7 +39,7 @@ private Reader getTestReader() { } @Test - public void testParse() throws IOException { + void testParse() throws IOException { int totcomment = 0; int totrecs = 0; try (Reader reader = getTestReader(); BufferedReader br = new BufferedReader(reader)) { @@ -77,7 +77,7 @@ public void testParse() throws IOException { int comments = 0; int records = 0; try (Reader reader = getTestReader(); CSVParser parser = format.parse(reader)) { - for (CSVRecord csvRecord : parser) { + for (final CSVRecord csvRecord : parser) { records++; if (csvRecord.hasComment()) { comments++; diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java index 641797fe88..1117c12ac9 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java @@ -31,7 +31,7 @@ import org.apache.commons.csv.CSVParser; import org.junit.jupiter.api.Test; -public class JiraCsv198Test { +class JiraCsv198Test { // @formatter:off private static final CSVFormat CSV_FORMAT = CSVFormat.EXCEL.builder() @@ -42,7 +42,7 @@ public class JiraCsv198Test { // @formatter:on @Test - public void test() throws UnsupportedEncodingException, IOException { + void test() throws UnsupportedEncodingException, IOException { final InputStream pointsOfReference = getClass().getResourceAsStream("/org/apache/commons/csv/CSV-198/optd_por_public.csv"); assertNotNull(pointsOfReference); try (@SuppressWarnings("resource") diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv203Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv203Test.java index 8eee041560..2c9226506c 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv203Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv203Test.java @@ -29,10 +29,10 @@ * JIRA: withNullString value is printed without quotes when * QuoteMode.ALL is specified */ -public class JiraCsv203Test { +class JiraCsv203Test { @Test - public void testQuoteModeAll() throws Exception { + void testQuoteModeAll() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") @@ -48,7 +48,7 @@ public void testQuoteModeAll() throws Exception { } @Test - public void testQuoteModeAllNonNull() throws Exception { + void testQuoteModeAllNonNull() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") @@ -64,7 +64,7 @@ public void testQuoteModeAllNonNull() throws Exception { } @Test - public void testQuoteModeMinimal() throws Exception { + void testQuoteModeMinimal() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") @@ -80,7 +80,7 @@ public void testQuoteModeMinimal() throws Exception { } @Test - public void testQuoteModeNonNumeric() throws Exception { + void testQuoteModeNonNumeric() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") @@ -96,7 +96,7 @@ public void testQuoteModeNonNumeric() throws Exception { } @Test - public void testWithEmptyValues() throws Exception { + void testWithEmptyValues() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") @@ -113,7 +113,7 @@ public void testWithEmptyValues() throws Exception { } @Test - public void testWithoutNullString() throws Exception { + void testWithoutNullString() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() //.setNullString("N/A") @@ -129,7 +129,7 @@ public void testWithoutNullString() throws Exception { } @Test - public void testWithoutQuoteMode() throws Exception { + void testWithoutQuoteMode() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv206Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv206Test.java index 3b69b173ba..2fecd10f16 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv206Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv206Test.java @@ -30,10 +30,10 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; -public class JiraCsv206Test { +class JiraCsv206Test { @Test - public void testJiraCsv206MultipleCharacterDelimiter() throws IOException { + void testJiraCsv206MultipleCharacterDelimiter() throws IOException { // Read with multiple character delimiter final String source = "FirstName[|]LastName[|]Address\r\nJohn[|]Smith[|]123 Main St."; final StringReader reader = new StringReader(source); diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv211Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv211Test.java index a4e3960c96..28b559d1e1 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv211Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv211Test.java @@ -28,10 +28,10 @@ import org.apache.commons.csv.CSVParser; import org.junit.jupiter.api.Test; -public class JiraCsv211Test { +class JiraCsv211Test { @Test - public void testJiraCsv211Format() throws IOException { + void testJiraCsv211Format() throws IOException { // @formatter:off final CSVFormat printFormat = CSVFormat.DEFAULT.builder() .setDelimiter('\t') diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv213Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv213Test.java index d700843f47..90f5da4c5a 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv213Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv213Test.java @@ -38,7 +38,7 @@ * you want, you need to open a new CSVParser. *

*/ -public class JiraCsv213Test { +class JiraCsv213Test { private void createEndChannel(final File csvFile) { // @formatter:off @@ -64,7 +64,7 @@ private void createEndChannel(final File csvFile) { } @Test - public void test() { + void test() { createEndChannel(new File("src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv")); } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv227Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv227Test.java new file mode 100644 index 0000000000..2b9e335a8f --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv227Test.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.QuoteMode; +import org.junit.jupiter.api.Test; + +/** + * Tests https://issues.apache.org/jira/browse/CSV-227 + */ +class JiraCsv227Test { + + @Test + public void test() throws IOException { + final StringBuilder out = new StringBuilder(); + try (CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.MINIMAL))) { + printer.printRecord("ㅁㅎㄷㄹ", "ㅁㅎㄷㄹ", "", "test2"); + printer.printRecord("한글3", "hello3", "3한글3", "test3"); + printer.printRecord("", "hello4", "", "test4"); + } + // ㅁㅎㄷㄹ,ㅁㅎㄷㄹ,,test2 + // 한글3,hello3,3한글3,test3 + // "",hello4,,test4 + assertEquals("ㅁㅎㄷㄹ,ㅁㅎㄷㄹ,,test2\r\n한글3,hello3,3한글3,test3\r\n\"\",hello4,,test4\r\n", out.toString()); + } +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv247Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv247Test.java index 85d9676fde..c2d9ac5910 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv247Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv247Test.java @@ -34,10 +34,10 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; -public class JiraCsv247Test { +class JiraCsv247Test { @Test - public void testHeadersMissingOneColumnWhenAllowingMissingColumnNames() throws Exception { + void testHeadersMissingOneColumnWhenAllowingMissingColumnNames() throws Exception { final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setAllowMissingColumnNames(true).get(); assertTrue(format.getAllowMissingColumnNames(), "We should allow missing column names"); @@ -63,7 +63,7 @@ record = iterator.next(); } @Test - public void testHeadersMissingThrowsWhenNotAllowingMissingColumnNames() { + void testHeadersMissingThrowsWhenNotAllowingMissingColumnNames() { final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().get(); assertFalse(format.getAllowMissingColumnNames(), "By default we should not allow missing column names"); diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java index a629a080fd..480a9dffa9 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java @@ -33,14 +33,14 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; -public class JiraCsv248Test { +class JiraCsv248Test { private static InputStream getTestInput() { return ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/CSV-248/csvRecord.bin"); } /** - * Test deserialisation of a CSVRecord created using version 1.6. + * Test deserialization of a CSVRecord created using version 1.6. * *

* This test asserts that serialization from 1.8 onwards is consistent with previous versions. Serialization was @@ -50,7 +50,7 @@ private static InputStream getTestInput() { * @throws ClassNotFoundException If the CSVRecord cannot be deserialized */ @Test - public void testJiraCsv248() throws IOException, ClassNotFoundException { + void testJiraCsv248() throws IOException, ClassNotFoundException { // Record was originally created using CSV version 1.6 with the following code: // try (CSVParser parser = CSVParser.parse("A,B\n#my comment\nOne,Two", // CSVFormat.DEFAULT.builder().setHeader().setCommentMarker('#'))) { diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv249Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv249Test.java index f5fe47ca6a..4034b04bd7 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv249Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv249Test.java @@ -32,10 +32,10 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; -public class JiraCsv249Test { +class JiraCsv249Test { @Test - public void testJiraCsv249() throws IOException { + void testJiraCsv249() throws IOException { final CSVFormat format = CSVFormat.DEFAULT.builder().setEscape('\\').get(); final StringWriter stringWriter = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(stringWriter, format)) { diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv253Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv253Test.java index 5b266a3e3a..13bb6a8270 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv253Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv253Test.java @@ -18,7 +18,7 @@ */ package org.apache.commons.csv.issues; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.apache.commons.csv.CsvAssertions.assertValuesEquals; import java.io.IOException; import java.io.StringReader; @@ -33,24 +33,24 @@ /** * Setting QuoteMode:ALL_NON_NULL or NON_NUMERIC can distinguish between empty string columns and absent value columns. */ -public class JiraCsv253Test { - - private void assertArrayEqual(final String[] expected, final CSVRecord actual) { - for (int i = 0; i < expected.length; i++) { - assertEquals(expected[i], actual.get(i)); - } - } +class JiraCsv253Test { @Test - public void testHandleAbsentValues() throws IOException { - final String source = "\"John\",,\"Doe\"\n" + ",\"AA\",123\n" + "\"John\",90,\n" + "\"\",,90"; + void testHandleAbsentValues() throws IOException { + // @formatter:off + final String source = + "\"John\",,\"Doe\"\n" + + ",\"AA\",123\n" + + "\"John\",90,\n" + + "\"\",,90"; + // @formatter:on final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.NON_NUMERIC).get(); try (CSVParser parser = csvFormat.parse(new StringReader(source))) { final Iterator csvRecords = parser.iterator(); - assertArrayEqual(new String[] {"John", null, "Doe"}, csvRecords.next()); - assertArrayEqual(new String[] {null, "AA", "123"}, csvRecords.next()); - assertArrayEqual(new String[] {"John", "90", null}, csvRecords.next()); - assertArrayEqual(new String[] {"", null, "90"}, csvRecords.next()); + assertValuesEquals(new String[] {"John", null, "Doe"}, csvRecords.next()); + assertValuesEquals(new String[] {null, "AA", "123"}, csvRecords.next()); + assertValuesEquals(new String[] {"John", "90", null}, csvRecords.next()); + assertValuesEquals(new String[] {"", null, "90"}, csvRecords.next()); } } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv254Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv254Test.java new file mode 100644 index 0000000000..629b42ee6b --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv254Test.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.apache.commons.csv.CsvAssertions.assertValuesEquals; + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Iterator; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; +import org.junit.jupiter.api.Test; + +/** + * Tests https://issues.apache.org/jira/browse/CSV-254. + */ +class JiraCsv254Test { + + @Test + void test() throws IOException { + final CSVFormat csvFormat = CSVFormat.POSTGRESQL_CSV; + try (BufferedReader reader = Files.newBufferedReader(Paths.get("src/test/resources/org/apache/commons/csv/CSV-254/csv-254.csv"), + StandardCharsets.UTF_8); CSVParser parser = csvFormat.parse(reader)) { + final Iterator csvRecords = parser.iterator(); + assertValuesEquals(new String[] { "AA", "33", null }, csvRecords.next()); + assertValuesEquals(new String[] { "AA", null, "" }, csvRecords.next()); + assertValuesEquals(new String[] { null, "33", "CC" }, csvRecords.next()); + } + } +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv257Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv257Test.java new file mode 100644 index 0000000000..4234a7a0fa --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv257Test.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.io.IOException; +import java.io.StringReader; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.junit.jupiter.api.Test; + +/** + * Tests https://issues.apache.org/jira/browse/CSV-257 + */ +class JiraCsv257Test { + + private static final String INPUT = ","; + + @Test + void testHeaderBuilder() throws IOException { + // @formatter:off + final CSVFormat format = CSVFormat.RFC4180.builder() + .setDelimiter(INPUT.charAt(0)) + .setHeader() + .setSkipHeaderRecord(true) + .setIgnoreSurroundingSpaces(true) + .get(); + // @formatter:on + // Document the current behavior: Throw a IllegalArgumentException is a header name is missing. + assertThrows(IllegalArgumentException.class, () -> { + try (CSVParser parser = CSVParser.parse(INPUT, format)) { + // empty + } + }); + } + + @Test + void testHeaderDepreacted() throws IOException { + // @formatter:off + final CSVFormat format = CSVFormat.RFC4180 + .withDelimiter(INPUT.charAt(0)) + .withFirstRecordAsHeader() + .withIgnoreSurroundingSpaces(); + // @formatter:on + // Document the current behavior: Throw a IllegalArgumentException is a header name is missing. + assertThrows(IllegalArgumentException.class, () -> { + try (CSVParser parser = new CSVParser(new StringReader(INPUT), format)) { + // empty + } + }); + } + + @Test + void testNoHeaderBuilder() throws IOException { + // @formatter:off + final CSVFormat format = CSVFormat.RFC4180.builder() + .setDelimiter(INPUT.charAt(0)) + .setIgnoreSurroundingSpaces(true) + .get(); + // @formatter:on + try (CSVParser parser = CSVParser.parse(INPUT, format)) { + // empty + } + } +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv263Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv263Test.java index abae0ea2f5..18bb9580a3 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv263Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv263Test.java @@ -32,10 +32,10 @@ /** * Tests [CSV-263] Print from Reader with embedded quotes generates incorrect output. */ -public class JiraCsv263Test { +class JiraCsv263Test { @Test - public void testPrintFromReaderWithQuotes() throws IOException { + void testPrintFromReaderWithQuotes() throws IOException { // @formatter:off final CSVFormat format = CSVFormat.RFC4180.builder() .setDelimiter(',') diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java index d910ef5828..857e42cb8f 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java @@ -35,7 +35,7 @@ * * @see Jira Ticker */ -public class JiraCsv264Test { +class JiraCsv264Test { private static final String CSV_STRING = "\"\",\"B\",\"\"\n" + "\"1\",\"2\",\"3\"\n" + @@ -49,7 +49,7 @@ public class JiraCsv264Test { "\"6\",\"7\",\"\",\"\",\"10\""; @Test - public void testJiraCsv264() { + void testJiraCsv264() { final CSVFormat csvFormat = CSVFormat.DEFAULT .builder() .setHeader() @@ -62,7 +62,7 @@ public void testJiraCsv264() { } @Test - public void testJiraCsv264WithGapAllowEmpty() throws IOException { + void testJiraCsv264WithGapAllowEmpty() throws IOException { final CSVFormat csvFormat = CSVFormat.DEFAULT .builder() .setHeader() @@ -75,7 +75,7 @@ public void testJiraCsv264WithGapAllowEmpty() throws IOException { } @Test - public void testJiraCsv264WithGapDisallow() { + void testJiraCsv264WithGapDisallow() { final CSVFormat csvFormat = CSVFormat.DEFAULT .builder() .setHeader() diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java index 4853672dc0..1bccad702f 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java @@ -33,10 +33,10 @@ /** * Tests [CSV-265] {@link CSVRecord#getCharacterPosition()} returns the correct position after encountering a comment. */ -public class JiraCsv265Test { +class JiraCsv265Test { @Test - public void testCharacterPositionWithComments() throws IOException { + void testCharacterPositionWithComments() throws IOException { // @formatter:off final String csv = "# Comment1\n" + @@ -62,7 +62,7 @@ public void testCharacterPositionWithComments() throws IOException { } @Test - public void testCharacterPositionWithCommentsSpanningMultipleLines() throws IOException { + void testCharacterPositionWithCommentsSpanningMultipleLines() throws IOException { // @formatter:off final String csv = "# Comment1\n" + diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv271Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv271Test.java index 5ee5e0a01e..0269dec5d1 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv271Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv271Test.java @@ -29,10 +29,10 @@ import org.apache.commons.csv.CSVPrinter; import org.junit.jupiter.api.Test; -public class JiraCsv271Test { +class JiraCsv271Test { @Test - public void testJiraCsv271_withArray() throws IOException { + void testJiraCsv271_withArray() throws IOException { final CSVFormat csvFormat = CSVFormat.DEFAULT; final StringWriter stringWriter = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(stringWriter, csvFormat)) { @@ -43,7 +43,7 @@ public void testJiraCsv271_withArray() throws IOException { } @Test - public void testJiraCsv271_withList() throws IOException { + void testJiraCsv271_withList() throws IOException { final CSVFormat csvFormat = CSVFormat.DEFAULT; final StringWriter stringWriter = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(stringWriter, csvFormat)) { diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java index 0be6a52f81..065ee6bb37 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java @@ -31,7 +31,7 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; -public class JiraCsv288Test { +class JiraCsv288Test { private void print(final CSVRecord csvRecord, final CSVPrinter csvPrinter) throws IOException { for (final String value : csvRecord) { @@ -42,7 +42,7 @@ private void print(final CSVRecord csvRecord, final CSVPrinter csvPrinter) throw @Test // Before fix: // expected: but was: - public void testParseWithABADelimiter() throws Exception { + void testParseWithABADelimiter() throws Exception { final Reader in = new StringReader("a|~|b|~|c|~|d|~||~|f"); final StringBuilder stringBuilder = new StringBuilder(); try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); @@ -57,7 +57,7 @@ public void testParseWithABADelimiter() throws Exception { @Test // Before fix: // expected: but was: - public void testParseWithDoublePipeDelimiter() throws Exception { + void testParseWithDoublePipeDelimiter() throws Exception { final Reader in = new StringReader("a||b||c||d||||f"); final StringBuilder stringBuilder = new StringBuilder(); try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); @@ -72,7 +72,7 @@ public void testParseWithDoublePipeDelimiter() throws Exception { @Test // Regression, already passed before fix - public void testParseWithDoublePipeDelimiterDoubleCharValue() throws Exception { + void testParseWithDoublePipeDelimiterDoubleCharValue() throws Exception { final Reader in = new StringReader("a||bb||cc||dd||f"); final StringBuilder stringBuilder = new StringBuilder(); try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); @@ -87,7 +87,7 @@ public void testParseWithDoublePipeDelimiterDoubleCharValue() throws Exception { @Test // Before fix: // expected: but was: - public void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws Exception { + void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws Exception { final Reader in = new StringReader("a||b||c||d||||f||"); final StringBuilder stringBuilder = new StringBuilder(); try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); @@ -102,7 +102,7 @@ public void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws Exception @Test // Before fix: // expected: but was: - public void testParseWithDoublePipeDelimiterQuoted() throws Exception { + void testParseWithDoublePipeDelimiterQuoted() throws Exception { final Reader in = new StringReader("a||\"b||c\"||d||||f"); final StringBuilder stringBuilder = new StringBuilder(); try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); @@ -116,7 +116,7 @@ public void testParseWithDoublePipeDelimiterQuoted() throws Exception { @Test // Regression, already passed before fix - public void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws Exception { + void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws Exception { final Reader in = new StringReader("a|b|c|d||f|"); final StringBuilder stringBuilder = new StringBuilder(); try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); @@ -131,7 +131,7 @@ public void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws Exception @Test // Before fix: // expected: but was: - public void testParseWithTriplePipeDelimiter() throws Exception { + void testParseWithTriplePipeDelimiter() throws Exception { final Reader in = new StringReader("a|||b|||c|||d||||||f"); final StringBuilder stringBuilder = new StringBuilder(); try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); @@ -145,7 +145,7 @@ public void testParseWithTriplePipeDelimiter() throws Exception { @Test // Regression, already passed before fix - public void testParseWithTwoCharDelimiter1() throws Exception { + void testParseWithTwoCharDelimiter1() throws Exception { final Reader in = new StringReader("a~|b~|c~|d~|~|f"); final StringBuilder stringBuilder = new StringBuilder(); try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); @@ -159,7 +159,7 @@ public void testParseWithTwoCharDelimiter1() throws Exception { @Test // Regression, already passed before fix - public void testParseWithTwoCharDelimiter2() throws Exception { + void testParseWithTwoCharDelimiter2() throws Exception { final Reader in = new StringReader("a~|b~|c~|d~|~|f~"); final StringBuilder stringBuilder = new StringBuilder(); try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); @@ -173,7 +173,7 @@ public void testParseWithTwoCharDelimiter2() throws Exception { @Test // Regression, already passed before fix - public void testParseWithTwoCharDelimiter3() throws Exception { + void testParseWithTwoCharDelimiter3() throws Exception { final Reader in = new StringReader("a~|b~|c~|d~|~|f|"); final StringBuilder stringBuilder = new StringBuilder(); try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); @@ -187,7 +187,7 @@ public void testParseWithTwoCharDelimiter3() throws Exception { @Test // Regression, already passed before fix - public void testParseWithTwoCharDelimiter4() throws Exception { + void testParseWithTwoCharDelimiter4() throws Exception { final Reader in = new StringReader("a~|b~|c~|d~|~|f~~||g"); final StringBuilder stringBuilder = new StringBuilder(); try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); @@ -202,7 +202,7 @@ public void testParseWithTwoCharDelimiter4() throws Exception { @Test // Before fix: // expected: but was: - public void testParseWithTwoCharDelimiterEndsWithDelimiter() throws Exception { + void testParseWithTwoCharDelimiterEndsWithDelimiter() throws Exception { final Reader in = new StringReader("a~|b~|c~|d~|~|f~|"); final StringBuilder stringBuilder = new StringBuilder(); try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv290Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv290Test.java index f9dd6e9530..f251eeb7a5 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv290Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv290Test.java @@ -58,7 +58,7 @@ // 2 xyz \\b:\b \\t:\t \\n:\n \\r:\r a b // 3 a b,c,d "quoted" e // -public class JiraCsv290Test { +class JiraCsv290Test { private void testHelper(final String fileName, final CSVFormat format) throws Exception { List> content = new ArrayList<>(); @@ -84,17 +84,17 @@ private void testHelper(final String fileName, final CSVFormat format) throws Ex } @Test - public void testPostgresqlCsv() throws Exception { + void testPostgresqlCsv() throws Exception { testHelper("psql.csv", CSVFormat.POSTGRESQL_CSV); } @Test - public void testPostgresqlText() throws Exception { + void testPostgresqlText() throws Exception { testHelper("psql.tsv", CSVFormat.POSTGRESQL_TEXT); } @Test - public void testWriteThenRead() throws Exception { + void testWriteThenRead() throws Exception { final StringWriter sw = new StringWriter(); final CSVFormat format = CSVFormat.POSTGRESQL_CSV.builder().setHeader().setSkipHeaderRecord(true).get(); try (CSVPrinter printer = new CSVPrinter(sw, format)) { diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv294Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv294Test.java index 2cc62628e7..0e5de0751b 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv294Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv294Test.java @@ -36,7 +36,7 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; -public class JiraCsv294Test { +class JiraCsv294Test { private static void testInternal(final CSVFormat format, final String expectedSubstring) throws IOException { final ByteArrayOutputStream bos = new ByteArrayOutputStream(); @@ -58,24 +58,24 @@ private static void testInternal(final CSVFormat format, final String expectedSu } @Test - public void testDefaultCsvFormatWithBackslashEscapeWorks() throws IOException { + void testDefaultCsvFormatWithBackslashEscapeWorks() throws IOException { testInternal(CSVFormat.Builder.create().setEscape('\\').get(), ",\"b \\\"\\\"\","); } @Test - public void testDefaultCsvFormatWithNullEscapeWorks() throws IOException { + void testDefaultCsvFormatWithNullEscapeWorks() throws IOException { testInternal(CSVFormat.Builder.create().setEscape(null).get(), ",\"b \"\"\"\"\","); } @Test - public void testDefaultCsvFormatWithQuoteEscapeWorks() throws IOException { + void testDefaultCsvFormatWithQuoteEscapeWorks() throws IOException { // this one doesn't actually work but should behave like setEscape(null) // Printer is writing the expected content but Parser is unable to consume it testInternal(CSVFormat.Builder.create().setEscape('"').get(), ",\"b \"\"\"\"\","); } @Test - public void testDefaultCsvFormatWorks() throws IOException { + void testDefaultCsvFormatWorks() throws IOException { testInternal(CSVFormat.Builder.create().get(), ",\"b \"\"\"\"\","); } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv93Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv93Test.java index e34a8b02f6..7816412265 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv93Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv93Test.java @@ -43,7 +43,7 @@ * Jira CSV-253 to a certain extent. *

*/ -public class JiraCsv93Test { +class JiraCsv93Test { private static Object[] objects1 = {"abc", "", null, "a,b,c", 123}; private static Object[] objects2 = {"abc", "NULL", null, "a,b,c", 123}; @@ -61,7 +61,7 @@ private void every(final CSVFormat csvFormat, final Object[] objects, final Stri } @Test - public void testWithNotSetNullString() throws IOException { + void testWithNotSetNullString() throws IOException { // @formatter:off every(CSVFormat.DEFAULT, objects1, @@ -91,7 +91,7 @@ public void testWithNotSetNullString() throws IOException { } @Test - public void testWithSetNullStringEmptyString() throws IOException { + void testWithSetNullStringEmptyString() throws IOException { // @formatter:off every(CSVFormat.DEFAULT.builder().setNullString("").get(), objects1, @@ -121,7 +121,7 @@ public void testWithSetNullStringEmptyString() throws IOException { } @Test - public void testWithSetNullStringNULL() throws IOException { + void testWithSetNullStringNULL() throws IOException { // @formatter:off every(CSVFormat.DEFAULT.builder().setNullString("NULL").get(), objects2, diff --git a/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java b/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java index efdb8f0418..bead12378d 100644 --- a/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java +++ b/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java @@ -43,7 +43,7 @@ * * To run this test, use: mvn test -Dtest=PerformanceTest */ -public class PerformanceTest { +class PerformanceTest { private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz"; @@ -110,7 +110,7 @@ public long testParseBigFile(final boolean traverseColumns) throws Exception { } @Test - public void testParseBigFileRepeat() throws Exception { + void testParseBigFileRepeat() throws Exception { long bestTime = Long.MAX_VALUE; for (int i = 0; i < this.max; i++) { bestTime = Math.min(testParseBigFile(false), bestTime); @@ -119,7 +119,7 @@ public void testParseBigFileRepeat() throws Exception { } @Test - public void testReadBigFile() throws Exception { + void testReadBigFile() throws Exception { long bestTime = Long.MAX_VALUE; long count; for (int i = 0; i < this.max; i++) { diff --git a/src/test/resources/org/apache/commons/csv/CSV-254/csv-254.csv b/src/test/resources/org/apache/commons/csv/CSV-254/csv-254.csv new file mode 100644 index 0000000000..e7d2972c5a --- /dev/null +++ b/src/test/resources/org/apache/commons/csv/CSV-254/csv-254.csv @@ -0,0 +1,3 @@ +AA,33, +AA,,"" +,33,CC