diff --git a/.asf.yaml b/.asf.yaml new file mode 100644 index 0000000000..cdb8cd101b --- /dev/null +++ b/.asf.yaml @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +github: + description: "Apache Commons CSV" + homepage: https://commons.apache.org/csv/ + +notifications: + commits: commits@commons.apache.org + issues: issues@commons.apache.org + pullrequests: issues@commons.apache.org + jira_options: link label + jobs: notifications@commons.apache.org + # commits_bot_dependabot: dependabot@commons.apache.org + issues_bot_dependabot: dependabot@commons.apache.org + pullrequests_bot_dependabot: dependabot@commons.apache.org + issues_bot_codecov-commenter: notifications@commons.apache.org + pullrequests_bot_codecov-commenter: notifications@commons.apache.org diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..f42866e4bd --- /dev/null +++ b/.gitattributes @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +* text=auto diff --git a/.github/GH-ROBOTS.txt b/.github/GH-ROBOTS.txt new file mode 100644 index 0000000000..64a88674fe --- /dev/null +++ b/.github/GH-ROBOTS.txt @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Keeps on creating FUD PRs in test code +# Does not follow Apache disclosure policies +User-agent: JLLeitschuh/security-research +Disallow: * diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 9ebcd0ebb1..90ec55f742 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,7 +5,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -18,10 +18,8 @@ updates: - package-ecosystem: "maven" directory: "/" schedule: - interval: "weekly" - day: "friday" + interval: "quarterly" - package-ecosystem: "github-actions" directory: "/" schedule: - interval: "weekly" - day: "friday" + interval: "quarterly" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000000..9ff35c83e7 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,30 @@ + + +Thanks for your contribution to [Apache Commons](https://commons.apache.org/)! Your help is appreciated! + +Before you push a pull request, review this list: + +- [ ] Read the [contribution guidelines](CONTRIBUTING.md) for this project. +- [ ] Read the [ASF Generative Tooling Guidance](https://www.apache.org/legal/generative-tooling.html) if you use Artificial Intelligence (AI). +- [ ] I used AI to create any part of, or all of, this pull request. Which AI tool was used to create this pull request, and to what extent did it contribute? +- [ ] Run a successful build using the default [Maven](https://maven.apache.org/) goal with `mvn`; that's `mvn` on the command line by itself. +- [ ] Write unit tests that match behavioral changes, where the tests fail if the changes to the runtime are not applied. This may not always be possible, but it is a best practice. +- [ ] Write a pull request description that is detailed enough to understand what the pull request does, how, and why. +- [ ] Each commit in the pull request should have a meaningful subject line and body. Note that a maintainer may squash commits during the merge process. diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 87325a9f5b..cca38e5121 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -1,80 +1,86 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: "CodeQL" - -on: - push: - branches: [ master ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ master ] - schedule: - - cron: '33 9 * * 4' - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - permissions: - actions: read - contents: read - security-events: write - - strategy: - fail-fast: false - matrix: - language: [ 'java' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] - # Learn more about CodeQL language support at https://git.io/codeql-language-support - - steps: - - name: Checkout repository - uses: actions/checkout@v3.0.2 - - uses: actions/cache@v3.0.8 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-maven- - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v2 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - # queries: ./path/to/local/query, your-org/your-repo/queries@main - - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v2 - - # โ„น๏ธ Command-line programs to run using the OS shell. - # ๐Ÿ“š https://git.io/JvXDl - - # โœ๏ธ If the Autobuild fails above, remove it and uncomment the following three lines - # and modify them (or add more) to build your code if your project - # uses a compiled language - - #- run: | - # make bootstrap - # make release - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: "CodeQL" + +on: + push: + branches: [ master ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ master ] + schedule: + - cron: '33 9 * * 4' + +permissions: + contents: read + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + max-parallel: 20 + fail-fast: false + matrix: + language: [ 'java' ] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] + # Learn more about CodeQL language support at https://git.io/codeql-language-support + + steps: + - name: Checkout repository + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + persist-credentials: false + - uses: actions/cache@55cc8345863c7cc4c66a329aec7e433d2d1c52a9 #v6.1.0 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + # queries: ./path/to/local/query, your-org/your-repo/queries@main + + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2 + + # โ„น๏ธ Command-line programs to run using the OS shell. + # ๐Ÿ“š https://git.io/JvXDl + + # โœ๏ธ If the Autobuild fails above, remove it and uncomment the following three lines + # and modify them (or add more) to build your code if your project + # uses a compiled language + + #- run: | + # make bootstrap + # make release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2 diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml deleted file mode 100644 index 8529d27866..0000000000 --- a/.github/workflows/coverage.yml +++ /dev/null @@ -1,50 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: Coverage - -on: [push, pull_request] - -permissions: - contents: read - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - java: [ 8 ] - - steps: - - uses: actions/checkout@v3.0.2 - - uses: actions/cache@v3.0.8 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-maven- - - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v3 - with: - distribution: 'temurin' - java-version: ${{ matrix.java }} - - name: Build with Maven - run: mvn -V test jacoco:report --file pom.xml --no-transfer-progress - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - files: ./target/site/jacoco/jacoco.xml diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml new file mode 100644 index 0000000000..7bc02bdd23 --- /dev/null +++ b/.github/workflows/dependency-review.yml @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: 'Dependency Review' +on: [pull_request] + +permissions: + contents: read + +jobs: + dependency-review: + runs-on: ubuntu-latest + steps: + - name: 'Checkout Repository' + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + - name: 'Dependency Review PR' + uses: actions/dependency-review-action@a1d282b36b6f3519aa1f3fc636f609c47dddb294 # v5.0.0 diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 3a265157b4..17ba7dd386 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -1,47 +1,61 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: Java CI - -on: [push, pull_request] - -jobs: - build: - - runs-on: ubuntu-latest - continue-on-error: ${{ matrix.experimental }} - strategy: - matrix: - java: [ 8, 11, 17 ] - experimental: [false] -# include: -# - java: 18-ea -# experimental: true - - steps: - - uses: actions/checkout@v3.0.2 - - uses: actions/cache@v3.0.8 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-maven- - - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v3 - with: - distribution: 'temurin' - java-version: ${{ matrix.java }} - - name: Build with Maven - run: mvn --file pom.xml --no-transfer-progress +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java CI + +on: + push: + branches: + - 'master' + pull_request: {} + +permissions: + contents: read + +jobs: + build: + + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.experimental }} + strategy: + max-parallel: 20 + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] + java: [ 8, 11, 17, 21, 25, 26 ] + experimental: [false] + # Keep the same parameter order as the matrix above + include: + - os: ubuntu-latest + java: 27-ea + experimental: true + + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + persist-credentials: false + - uses: actions/cache@55cc8345863c7cc4c66a329aec7e433d2d1c52a9 #v6.1.0 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@1bcf9fb12cf4aa7d266a90ae39939e61372fe520 # v5.4.0 + with: + distribution: ${{ runner.os == 'macOS' && matrix.java == '8' && 'zulu' || 'temurin' }} + java-version: ${{ matrix.java }} + - name: Build with Maven + run: mvn -Ddoclint=all --show-version --batch-mode --no-transfer-progress diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml index abd6992715..e1868cb462 100644 --- a/.github/workflows/scorecards-analysis.yml +++ b/.github/workflows/scorecards-analysis.yml @@ -5,7 +5,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -31,19 +31,21 @@ jobs: name: "Scorecards analysis" runs-on: ubuntu-latest permissions: - security-events: write # Needed to upload the results to the code-scanning dashboard. + # Needed to upload the results to the code-scanning dashboard. + security-events: write actions: read - contents: read + id-token: write # This is required for requesting the JWT + contents: read # This is required for actions/checkout steps: - name: "Checkout code" - uses: actions/checkout@2541b1294d2704b0964813337f33b291d3f8596b # 3.0.2 + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # 7.0.0 with: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@ce330fde6b1a5c9c75b417e7efc510b822a35564 # 1.1.2 + uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # 2.4.3 with: results_file: results.sarif results_format: sarif @@ -55,13 +57,13 @@ jobs: publish_results: true - name: "Upload artifact" - uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # 3.1.0 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: SARIF file path: results.sarif retention-days: 5 - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@b398f525a5587552e573b247ac661067fafa920b # 2.1.22 + uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2 with: sarif_file: results.sarif diff --git a/.gitignore b/.gitignore index 4b377d5762..2ff17ae4a8 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,8 @@ buildNumber.properties *.iml /.vscode/ +/.DS_Store + +# NetBeans files +nb-configuration.xml +nbactions.xml diff --git a/BENCHMARK.md b/BENCHMARK.md index e8b579b2e2..c45918a289 100644 --- a/BENCHMARK.md +++ b/BENCHMARK.md @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 3ed501501d..b4342f33ca 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 636aff79ef..3423e18ad2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -41,38 +41,40 @@ Contributing to Apache Commons CSV ====================== -You have found a bug or you have an idea for a cool new feature? Contributing code is a great way to give something back to -the open source community. Before you dig right into the code there are a few guidelines that we need contributors to -follow so that we can have a chance of keeping on top of things. +Have you found a bug or have an idea for a cool new feature? Contributing code is a great way to give something back to the open-source community. +Before you dig right into the code, we need contributors to follow a few guidelines to have a chance of keeping on top of things. Getting Started --------------- + Make sure you have a [JIRA account](https://issues.apache.org/jira/). -+ Make sure you have a [GitHub account](https://github.com/signup/free). ++ Make sure you have a [GitHub account](https://github.com/signup). This is not essential, but makes providing patches much easier. + If you're planning to implement a new feature it makes sense to discuss your changes on the [dev list](https://commons.apache.org/mail-lists.html) first. This way you can make sure you're not wasting your time on something that isn't considered to be in Apache Commons CSV's scope. + Submit a [Jira Ticket][jira] for your issue, assuming one does not already exist. + Clearly describe the issue including steps to reproduce when it is a bug. + Make sure you fill in the earliest version that you know has the issue. + Find the corresponding [repository on GitHub](https://github.com/apache/?query=commons-), -[fork](https://help.github.com/articles/fork-a-repo/) and check out your forked repository. +[fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) and check out your forked repository. If you don't have a GitHub account, you can still clone the Commons repository. Making Changes -------------- + Create a _topic branch_ for your isolated work. - * Usually you should base your branch on the `master` branch. - * A good topic branch name can be the JIRA bug id plus a keyword, e.g. `CSV-123-InputStream`. + * Usually you should base your branch from the `master` branch. + * A good topic branch name can be the JIRA bug ID plus a keyword, e.g. `CSV-123-InputStream`. * If you have submitted multiple JIRA issues, try to maintain separate branches and pull requests. + Make commits of logical units. * Make sure your commit messages are meaningful and in the proper format. Your commit message should contain the key of the JIRA issue. - * e.g. `CSV-123: Close input stream earlier` + * For example, `[CSV-123] Close input stream sooner` + Respect the original code style: - + Only use spaces for indentation. + + Only use spaces for indentation; you can check for unnecessary whitespace with `git diff` before committing. + Create minimal diffs - disable _On Save_ actions like _Reformat Source Code_ or _Organize Imports_. If you feel the source code should be reformatted create a separate PR for this change first. - + Check for unnecessary whitespace with `git diff` -- check before committing. -+ Make sure you have added the necessary tests for your changes, typically in `src/test/java`. -+ Run all the tests with `mvn clean verify` to assure nothing else was accidentally broken. ++ Write unit tests that match behavioral changes, where the tests fail if the changes to the runtime are not applied. This may not always be possible but is a best practice. +Unit tests are typically in the `src/test/java` directory. ++ Run a successful build using the default [Maven](https://maven.apache.org/) goal with `mvn`; that's `mvn` on the command line by itself. ++ Write a pull request description that is detailed enough to understand what the pull request does, how, and why. ++ Each commit in the pull request should have a meaningful subject line and body. Note that commits might be squashed by a maintainer on merge. + Making Trivial Changes ---------------------- @@ -80,7 +82,7 @@ Making Trivial Changes The JIRA tickets are used to generate the changelog for the next release. For changes of a trivial nature to comments and documentation, it is not always necessary to create a new ticket in JIRA. -In this case, it is appropriate to start the first line of a commit with '(doc)' instead of a ticket number. +In this case, it is appropriate to start the first line of a commit with '[doc]' or '[javadoc]' instead of a ticket number. Submitting Changes @@ -106,10 +108,9 @@ Additional Resources + [Contributing patches](https://commons.apache.org/patches.html) + [Apache Commons CSV JIRA project page][jira] + [Contributor License Agreement][cla] -+ [General GitHub documentation](https://help.github.com/) -+ [GitHub pull request documentation](https://help.github.com/articles/creating-a-pull-request/) ++ [General GitHub documentation](https://docs.github.com/) ++ [GitHub pull request documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) + [Apache Commons Twitter Account](https://twitter.com/ApacheCommons) -+ `#apache-commons` IRC channel on `irc.freenode.net` [cla]:https://www.apache.org/licenses/#clas [jira]:https://issues.apache.org/jira/browse/CSV diff --git a/LICENSE.txt b/LICENSE.txt index d645695673..ff9ad4530f 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -193,7 +193,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/NOTICE.txt b/NOTICE.txt index ae103a0ad1..06d3824a28 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,5 +1,5 @@ Apache Commons CSV -Copyright 2005-2022 The Apache Software Foundation +Copyright 2005-2026 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (https://www.apache.org/). diff --git a/README.md b/README.md index 38c3d4268e..f30de4b9c9 100644 --- a/README.md +++ b/README.md @@ -1,106 +1,119 @@ - - -Apache Commons CSV -=================== - -[![GitHub Actions Status](https://github.com/apache/commons-csv/workflows/Java%20CI/badge.svg)](https://github.com/apache/commons-csv/actions) -[![Coverage Status](https://codecov.io/gh/apache/commons-csv/branch/master/graph/badge.svg)](https://app.codecov.io/gh/apache/commons-csv/branch/master) -[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-csv/badge.svg?gav=true)](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-csv/?gav=true) -[![Javadocs](https://javadoc.io/badge/org.apache.commons/commons-csv/1.9.0.svg)](https://javadoc.io/doc/org.apache.commons/commons-csv/1.9.0) -[![CodeQL](https://github.com/apache/commons-csv/workflows/CodeQL/badge.svg)](https://github.com/apache/commons-csv/actions/workflows/codeql-analysis.yml?query=workflow%3ACodeQL) - -The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. - -Documentation -------------- - -More information can be found on the [Apache Commons CSV homepage](https://commons.apache.org/proper/commons-csv). -The [Javadoc](https://commons.apache.org/proper/commons-csv/apidocs) can be browsed. -Questions related to the usage of Apache Commons CSV should be posted to the [user mailing list][ml]. - -Where can I get the latest release? ------------------------------------ -You can download source and binaries from our [download page](https://commons.apache.org/proper/commons-csv/download_csv.cgi). - -Alternatively you can pull it from the central Maven repositories: - -```xml - - org.apache.commons - commons-csv - 1.9.0 - -``` - -Contributing ------------- - -We accept Pull Requests via GitHub. The [developer mailing list][ml] is the main channel of communication for contributors. -There are some guidelines which will make applying PRs easier for us: -+ No tabs! Please use spaces for indentation. -+ Respect the code style. -+ Create minimal diffs - disable on save actions like reformat source code or organize imports. If you feel the source code should be reformatted create a separate PR for this change. -+ Provide JUnit tests for your changes and make sure your changes don't break any existing tests by running ```mvn```. - -If you plan to contribute on a regular basis, please consider filing a [contributor license agreement](https://www.apache.org/licenses/#clas). -You can learn more about contributing via GitHub in our [contribution guidelines](CONTRIBUTING.md). - -License -------- -This code is under the [Apache Licence v2](https://www.apache.org/licenses/LICENSE-2.0). - -See the `NOTICE.txt` file for required notices and attributions. - -Donations ---------- -You like Apache Commons CSV? Then [donate back to the ASF](https://www.apache.org/foundation/contributing.html) to support the development. - -Additional Resources --------------------- - -+ [Apache Commons Homepage](https://commons.apache.org/) -+ [Apache Issue Tracker (JIRA)](https://issues.apache.org/jira/browse/CSV) -+ [Apache Commons Twitter Account](https://twitter.com/ApacheCommons) -+ `#apache-commons` IRC channel on `irc.freenode.org` - -[ml]:https://commons.apache.org/mail-lists.html + + +Apache Commons CSV +=================== + +[![Java CI](https://github.com/apache/commons-csv/actions/workflows/maven.yml/badge.svg)](https://github.com/apache/commons-csv/actions/workflows/maven.yml) +[![Maven Central](https://img.shields.io/maven-central/v/org.apache.commons/commons-csv?label=Maven%20Central)](https://search.maven.org/artifact/org.apache.commons/commons-csv) +[![Javadocs](https://javadoc.io/badge/org.apache.commons/commons-csv/1.14.1.svg)](https://javadoc.io/doc/org.apache.commons/commons-csv/1.14.1) +[![CodeQL](https://github.com/apache/commons-csv/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/apache/commons-csv/actions/workflows/codeql-analysis.yml) +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/apache/commons-csv/badge)](https://api.securityscorecards.dev/projects/github.com/apache/commons-csv) + +The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. + +Documentation +------------- + +More information can be found on the [Apache Commons CSV homepage](https://commons.apache.org/proper/commons-csv). +The [Javadoc](https://commons.apache.org/proper/commons-csv/apidocs) can be browsed. +Questions related to the usage of Apache Commons CSV should be posted to the [user mailing list](https://commons.apache.org/mail-lists.html). + +Getting the latest release +-------------------------- +You can download source and binaries from our [download page](https://commons.apache.org/proper/commons-csv/download_csv.cgi). + +Alternatively, you can pull it from the central Maven repositories: + +```xml + + org.apache.commons + commons-csv + 1.14.1 + +``` + +Building +-------- + +Building requires a Java JDK and [Apache Maven](https://maven.apache.org/). +The required Java version is found in the `pom.xml` as the `maven.compiler.source` property. + +From a command shell, run `mvn` without arguments to invoke the default Maven goal to run all tests and checks. + +Contributing +------------ + +We accept Pull Requests via GitHub. The [developer mailing list](https://commons.apache.org/mail-lists.html) is the main channel of communication for contributors. +There are some guidelines which will make applying PRs easier for us: ++ No tabs! Please use spaces for indentation. ++ Respect the existing code style for each file. ++ Create minimal diffs - disable on save actions like reformat source code or organize imports. If you feel the source code should be reformatted create a separate PR for this change. ++ Provide JUnit tests for your changes and make sure your changes don't break any existing tests by running `mvn`. ++ Before you pushing a PR, run `mvn` (by itself), this runs the default goal, which contains all build checks. ++ To see the code coverage report, regardless of coverage failures, run `mvn clean site -Dcommons.jacoco.haltOnFailure=false -Pjacoco` + +If you plan to contribute on a regular basis, please consider filing a [contributor license agreement](https://www.apache.org/licenses/#clas). +You can learn more about contributing via GitHub in our [contribution guidelines](CONTRIBUTING.md). + +License +------- +This code is licensed under the [Apache License v2](https://www.apache.org/licenses/LICENSE-2.0). + +See the `NOTICE.txt` file for required notices and attributions. + +Donating +-------- +You like Apache Commons CSV? Then [donate back to the ASF](https://www.apache.org/foundation/contributing.html) to support development. + +Additional Resources +-------------------- + ++ [Apache Commons Homepage](https://commons.apache.org/) ++ [Apache Issue Tracker (JIRA)](https://issues.apache.org/jira/browse/CSV) ++ [Apache Commons Slack Channel](https://the-asf.slack.com/archives/C60NVB8AD) ++ [Apache Commons Twitter Account](https://twitter.com/ApacheCommons) + +Apache Commons Components +------------------------- + +Please see the [list of components](https://commons.apache.org/components.html) diff --git a/RELEASE-NOTES.txt b/RELEASE-NOTES.txt index e427967ab0..bfeb4bb8de 100644 --- a/RELEASE-NOTES.txt +++ b/RELEASE-NOTES.txt @@ -1,100 +1,104 @@ - Apache Commons CSV - Version 1.9.0 - Release Notes +Apache Commons CSV 1.14.1 Release Notes +--------------------------------------- +The Apache Commons CSV team is pleased to announce the release of Apache Commons CSV 1.14.1. -INTRODUCTION: -This document contains the release notes for the 1.9.0 version of Apache Commons CSV. +This document contains the release notes for the 1.14.1 version of Apache Commons CSV. +Commons CSV reads and writes files in Comma Separated Value (CSV) format variations. + +Commons CSV requires at least Java 8. + +The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. + +This is a feature and maintenance release. Java 8 or later is required. + +Changes in this version include: + + +Fixed Bugs +---------- + +* CSV-318: CSVPrinter.printRecord(Stream) hangs if given a parallel stream. Thanks to Joseph Shraibman, Gary Gregory. +* CSV-318: CSVPrinter now uses an internal lock instead of synchronized methods. Thanks to Joseph Shraibman, Gary Gregory. +* org.apache.commons.csv.CSVPrinter.printRecords(ResultSet) now writes one record at a time using a lock. Thanks to Gary Gregory. + +Changes +------- + +* Bump org.apache.commons:commons-parent from 81 to 85 #542. Thanks to Gary Gregory, Dependabot. +* Bump commons-io:commons-io from 2.18.0 to 2.20.0. Thanks to Gary Gregory. +* Bump com.opencsv:opencsv from 5.10 to 5.11.2 #545, #551, #553. Thanks to Gary Gregory, Dependabot. +* Bump org.apache.commons:commons-lang3 from 3.17.0 to 3.18.0 #556. Thanks to Gary Gregory, Dependabot. +* Bump commons-codec:commons-codec from 1.18.0 to 1.19.0. Thanks to Gary Gregory. + + +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html + +For complete information on Apache Commons CSV, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Commons CSV website: + +https://commons.apache.org/proper/commons-csv/ + +Download page: https://commons.apache.org/proper/commons-csv/download_csv.cgi + +Have fun! +-Apache Commons CSV team + +------------------------------------------------------------------------------ + +Apache Commons CSV 1.14.0 Release Notes +--------------------------------------- + +This document contains the release notes for the 1.14.0 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. Commons CSV requires at least Java 8. The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. -Feature and bug fix release (Java 8) +This is a feature and maintenance release. Java 8 or later is required. Changes in this version include: -NEW FEATURES -============ - -o CSV-275: Make CSVRecord#toList() public. Thanks to Michael Wyraz, Gary Gregory. -o Add CSVRecord#stream(). Thanks to Gary Gregory. -o Add CSVParser#stream(). Thanks to Gary Gregory. -o CSV-184: Make the method CSVRecord.putIn(Map) public. Thanks to Gaurav Agarwal, M. Steiger, Gary Gregory. -o Add test cases for CSVRecord with get(Enum) and toString. #54. Thanks to dota17. -o Add and use CSVFormat.Builder, deprecated CSVFormat#with methods, based on #73. Thanks to Gary Gregory, dota17. -o CSV-206: Add support for String delimiters #76. Thanks to Gary Gregory, dota17. - -FIXED BUGS -========== - -o Replace FindBugs with SpotBugs #56. Thanks to Amey Jadiye. -o Javadoc typo in CSVFormat let's -> lets #57. Thanks to Chen. -o CSV-259: CSVFormat.printWithEscapes throws StringIndexOutOfBoundsException when value is Reader #61. Thanks to Chen. -o Improve CSVFormat test coverage #63. Thanks to Chen. -o Fix CSVFileParserTest.java to allow for a null return value from record.getComment() #62. Thanks to Chen. -o Improve test coverage in CSVFormatTest #65. Thanks to Chen. -o Removed invalid Javadoc markup for CSVFormat EXCEL #64. Thanks to Chen. -o Improve CSVRecord and CSVPrinter code coverage #66. Thanks to Chen. -o Improve lexer and token coverage #67. Thanks to Chen. -o CSV-211: CSVFormat.format trims last delimiter if the delimiter is a white space #71. Thanks to Alpesh Kulkarni, Chen. -o Replace org.apache.commons.csv.Assertions.notNull() with Objects.requireNonNull(). Thanks to Gary Gregory. -o CSV-149: Line number is not proper at EOF. Thanks to Kranthi, Gary Gregory, Brent Worden, dota17. -o CSV-195: Parser iterates over the last CSV Record twice. Thanks to Rodolfo Duldulao, Rodolfo Duldulao, Michael Vitz, dota17. -o CSV-267: Minor improvements #126, #127, #130. Thanks to Arturo Bernal. -o CSV-123: Add possibility to use ResultSet header meta data as CSV header #11. Thanks to Emmanuel Bourg, Benedikt Ritter, shivakrishnaah, Gary Gregory. -o CSV-148: Add test cases for withIgnoreSurroundingSpaces() and withTrim() #70. Thanks to dota17. -o Update CSVParser.parse(File, Charset, CSVFormat) from IO to NIO. Thanks to Gary Gregory. -o CSV-271: Missing separator with print(object) followed by printRecord(Object[]) #157. Thanks to Amar Prakash Pandey. -o CSV-158: Fix EOL checking for read array in ExtendedBufferedReader #5. Thanks to Alexander Bondarev, Benedikt Ritter, Gary Gregory, Chen. -o CSV-263: Print from Reader with embedded quotes generates incorrect output #78. Thanks to Jason A. Guild, Gary Gregory. -o Replace JUnit assert by simpler but equivalent calls. #159. Thanks to Arturo Bernal. -o Update gitignore to ignore idea and vscode #160. Thanks to Seth Falco. -o CSV-281: Update CSVBenchmark #165. Thanks to belugabehr. -o CSV-283: Remove Whitespace Check Determines Delimiter Twice #167. Thanks to belugabehr. -o CSV-283: Document and Automate CSV Benchmark Harness #166. Thanks to belugabehr. -o CSV-279: Optimize Lexer Delimiter Check for One Character Delimiter #163. Thanks to belugabehr. -o SpotBugs Error: Medium: org.apache.commons.csv.CSVParser.getHeaderNames() may expose internal representation by returning CSVParser.headerNames [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 599] EI_EXPOSE_REP. Thanks to Gary Gregory. -o SpotBugs Error: Medium: new org.apache.commons.csv.CSVParser(Reader, CSVFormat, long, long) may expose internal representation by storing an externally mutable object into CSVParser.format [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 433] EI_EXPOSE_REP2. Thanks to Gary Gregory. -o SpotBugs Error: Medium: new org.apache.commons.csv.CSVParser(Reader, CSVFormat, long, long) may expose internal representation by storing an externally mutable object into CSVParser.headerMap [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 437] EI_EXPOSE_REP2. Thanks to Gary Gregory. -o SpotBugs Error: Medium: new org.apache.commons.csv.CSVParser(Reader, CSVFormat, long, long) may expose internal representation by storing an externally mutable object into CSVParser.headerNames [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 438] EI_EXPOSE_REP2. Thanks to Gary Gregory. -o SpotBugs Error: Medium: new org.apache.commons.csv.CSVPrinter(Appendable, CSVFormat) may expose internal representation by storing an externally mutable object into CSVPrinter.format [org.apache.commons.csv.CSVPrinter] At CSVPrinter.java:[line 100] EI_EXPOSE_REP2. Thanks to Gary Gregory. -o CSV-284: Formalize PerformanceTest #168. Thanks to belugabehr. -o CSV-278: Reuse Buffers in Lexer for Delimiter Detection #162. Thanks to belugabehr. -o CSV-286: Cleanup and Document Performance Test Harness #170. Thanks to belugabehr. -o CSV-265: Update buffer position when reading line comment #120. Thanks to belugabehr. - -CHANGES -======= - -o Update org.junit.jupiter:junit-jupiter from 5.6.0 to 5.7.0, #84 #109 Thanks to Gary Gregory. -o Update tests from Apache Commons Lang 3.9 to 3.12.0. Thanks to Gary Gregory. -o Update tests from commons-io:commons-io 2.6 to 2.11.0, #108. Thanks to Gary Gregory. -o Bump actions/checkout from v1 to v2.3.4, #79, #92, #121. Thanks to Dependabot. -o Bump commons-parent from 50 to 51 #80. Thanks to Dependabot. -o Bump tests from opencsv from 3.1 to 5.5.1 #81, #137, #158. Thanks to Dependabot, Gary Gregory. -o Update tests from super-csv from 2.2.1 to 2.4.0 #86. Thanks to Gary Gregory. -o Bump build actions/setup-java from v1.4.0 to v2, #101, #113. Thanks to Dependabot, Gary Gregory. -o Bump maven-pmd-plugin from 3.13.0 to 3.14.0 #122. Thanks to Dependabot. -o Bump tests from org.mockito:mockito-core 3.2.4 -> 3.11.2; #88, #107, #110, #123, #128, #129, #156. Thanks to Gary Gregory, Dependabot. -o Bump actions/cache from v2 to v2.1.6 #132, #153. Thanks to Dependabot. -o Bump maven-checkstyle-plugin from 3.0.0 to 3.1.2 #131. Thanks to Gary Gregory, Dependabot. -o Bump checkstyle from 8.29 to 8.44. Thanks to Gary Gregory. -o Bump junit-jupiter from 5.7.0 to 5.8.0-M1 #133, #149. Thanks to Dependabot, Gary Gregory. -o Bump commons.jacoco.version from 0.8.5 to 0.8.7 (Java 16). Thanks to Dependabot. -o Bump commons.spotbugs.version from 4.0.4 to 4.3.0 (Java 16). Thanks to Dependabot. -o Bump maven-javadoc-plugin from 3.2.0 to 3.3.0. Thanks to Gary Gregory. -o Bump jmh-generator-annprocess from 1.5.2 to 1.32 #151. Thanks to Dependabot. -o Bump PMD core from 6.29.0 to 6.36.0. Thanks to Dependabot. -o Bump biz.aQute.bnd:biz.aQute.bndlib from 5.1.2 to 5.3.0. Thanks to Dependabot. - - -Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html +New Features +------------ + +* Define and use Maven property commons.jmh.version. Thanks to Gary Gregory. +* Add CSVFormat.Builder.setMaxRows(long). Thanks to Gary Gregory. +* Add CSVFormat.getMaxRows(). Thanks to Gary Gregory. +* CSVPrinter.printRecords(ResultSet) knows how to use CSVFormat's maxRows. Thanks to Gary Gregory. +* CSVPrinter.printRecords(Iterable) knows how to use CSVFormat's maxRows. Thanks to Gary Gregory. +* CSVPrinter.printRecords(Stream) knows how to use CSVFormat's maxRows. Thanks to Gary Gregory. +* CSVParser.stream() knows how to use CSVFormat's maxRows. Thanks to Gary Gregory. +* CSVParser.getRecords() knows how to use CSVFormat's maxRows. Thanks to Gary Gregory. +* CSVParser.iterator() knows how to use CSVFormat's maxRows. Thanks to Gary Gregory. + +Fixed Bugs +---------- + +* CSV-317: Release history link changed from changes-report.html to changes.html #516. Thanks to Filipe Roque. +* Remove -nouses directive from maven-bundle-plugin. OSGi package imports now state 'uses' definitions for package imports, this doesn't affect JPMS (from org.apache.commons:commons-parent:80). Thanks to Gary Gregory. +* CSVParser.parse(URL, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory. +* CSVParser.parse(String, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory. +* CSVParser.parse(File, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory. +* CSVParser.parse(Path, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory. +* CSVParser.parse(InputStream, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). Thanks to Gary Gregory. +* CSVParser.parse(*) methods with a null Charset maps to Charset.defaultCharset(). Thanks to Gary Gregory. +* Fix possible NullPointerException in Token.toString(). Thanks to Gary Gregory. + +Changes +------- + +* Bump com.opencsv:opencsv from 5.9 to 5.10. Thanks to Gary Gregory. +* Bump commons-codec:commons-codec from 1.17.2 to 1.18.0 #522. Thanks to Gary Gregory. +* Bump org.apache.commons:commons-parent from 79 to 81. Thanks to Gary Gregory. + + +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -105,12 +109,364 @@ Have fun! ------------------------------------------------------------------------------ - Apache Commons CSV - Version 1.8 - Release Notes +Apache Commons CSV 1.13.0 Release Notes +--------------------------------------- + +This document contains the release notes for the 1.13.0 version of Apache Commons CSV. +Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. + +Commons CSV requires at least Java 8. + +The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. + +This is a feature and maintenance release. Java 8 or later is required. + +Changes in this version include: + +New Features +------------ + +* CSV-313: Add CSVPrinter.getRecordCount(). Thanks to Gary Gregory. +* Add and use CSVParser.Builder and builder() and deprecate CSVParser constructors. Thanks to Gary Gregory. +* CSVFormat.Builder implements Supplier. Thanks to Gary Gregory. +* Deprecate CSVFormat.Builder.build() for get(). Thanks to Gary Gregory. +* CSV-196: Track byte position #502. Thanks to Yuzhan Jiang, Gary Gregory. +Fixed Bugs +---------- + +* CSV-314: Required OSGi Import-Package version numbers in MANIFEST.MF #504. Thanks to Gary Gregory. +* CSV-314: CSVParser.nextRecord() should throw CSVException (an IOException subclass) instead of IOException and IllegalStateException, no method signature changes needed. Thanks to Gary Gregory. + +Changes +------- + +* Bump org.apache.commons:commons-parent from 76 to 78 #486, #495. Thanks to Gary Gregory, Dependabot. +* Bump org.codehaus.mojo:taglist-maven-plugin from 3.1.0 to 3.2.1 #493. Thanks to Gary Gregory, Dependabot. +* Bump commons-io:commons-io from 2.17.0 to 2.18.0 #505. Thanks to Gary Gregory, Dependabot. +* Bump commons-codec:commons-codec from 1.17.1 to 1.17.2. Thanks to Gary Gregory. +* Bump org.apache.commons:commons-parent from 78 to 79. Thanks to Gary Gregory. + + +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html + +For complete information on Apache Commons CSV, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Commons CSV website: + +https://commons.apache.org/proper/commons-csv/ + +Download page: https://commons.apache.org/proper/commons-csv/download_csv.cgi + +Have fun! +-Apache Commons CSV team + +------------------------------------------------------------------------------ + +Apache Commons CSV 1.12.0 Release Notes +--------------------------------------- + +This document contains the release notes for the 1.12.0 version of Apache Commons CSV. +Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. + +Commons CSV requires at least Java 8. + +The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. + +Feature and bug fix release (Java 8 or above) + +Changes in this version include: + +New Features +------------ + +* CSV-270: Add CSVException that extends IOException thrown on invalid input instead of IOException. Thanks to Thomas Kamps, Gary Gregory. + +Fixed Bugs +---------- + +* Fix PMD issues for port to PMD 7.1.0. Thanks to Gary Gregory. +* Fix some Javadoc links #442. Thanks to Dรกvid Szigecsรกn, Gary Gregory. +* Extract duplicated code into a method #444. Thanks to Dรกvid Szigecsรกn. +* Migrate CSVFormat#print(File, Charset) to NIO #445. Thanks to Dรกvid Szigecsรกn. +* Fix documentation for CSVFormat private constructor #466. Thanks to Dรกvid Szigecsรกn. +* CSV-294: CSVFormat does not support explicit " as escape char. Thanks to Joern Huxhorn, Gary Gregory. +* CSV-150: Escaping is not disableable. Thanks to dota17, Gary Gregory, Jรถrn Huxhorn. +* Fix Javadoc warnings on Java 23. Thanks to Gary Gregory. +* Improve parser performance by up to 20%, YMMV. Thanks to Gary Gregory. + +Changes +------- + +* Bump commons-codec:commons-codec from 1.16.1 to 1.17.1 #422, #449. Thanks to Dependabot. +* Bump org.apache.commons:commons-parent from 69 to 75 #435, #452, #465, #468, #475. Thanks to Gary Gregory. +* Bump org.codehaus.mojo:taglist-maven-plugin from 3.0.0 to 3.1.0 #441. Thanks to Gary Gregory. +* Bump org.apache.commons:commons-lang3 from 3.14.0 to 3.17.0 #450, #459, #470. Thanks to Gary Gregory. +* Bump org.hamcrest:hamcrest from 2.2 to 3.0 #455. Thanks to Gary Gregory. +* Bump commons-io:commons-io from 2.16.1 to 2.17.0 #476. Thanks to Gary Gregory, Dependabot. + + +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html + +For complete information on Apache Commons CSV, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Commons CSV website: + +https://commons.apache.org/proper/commons-csv/ + +Download page: https://commons.apache.org/proper/commons-csv/download_csv.cgi + +Have fun! +-Apache Commons CSV team + +------------------------------------------------------------------------------ -INTRODUCTION: +Apache Commons CSV 1.11.0 Release Notes +--------------------------------------- + +This document contains the release notes for the 1.11.0 version of Apache Commons CSV. +Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. + +Commons CSV requires at least Java 8. + +The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. + +Feature and bug fix release (Java 8 or above) + +Changes in this version include: + +New Features +------------ + +* CSV-308: [Javadoc] Add example to CSVFormat#setHeaderComments() #344. Thanks to Buddhi De Silva, Gary Gregory. +* Add and use CSVFormat#setTrailingData(boolean) in CSVFormat.EXCEL for Excel compatibility #303. Thanks to DamjanJovanovic, Gary Gregory. +* Add and use CSVFormat#setLenientEof(boolean) in CSVFormat.EXCEL for Excel compatibility #303. Thanks to DamjanJovanovic, Gary Gregory. + +Fixed Bugs +---------- + +* CSV-306: Replace deprecated method in user guide, update external link #324, #325. Thanks to Sam Ng, Bruno P. Kinoshita. +* Document duplicate header behavior #309. Thanks to Seth Falco, Bruno P. Kinoshita. +* Add missing docs #328. Thanks to jkbkupczyk. +* [StepSecurity] CI: Harden GitHub Actions #329, #330. Thanks to step-security-bot. +* CSV-147: Better error message during faulty CSV record read #347. Thanks to Steven Peterson, Benedikt Ritter, Gary Gregory, Joerg Schaible, Buddhi De Silva, Elliotte Rusty Harold. +* CSV-310: Misleading error message when QuoteMode set to None #352. Thanks to Buddhi De Silva. +* CSV-311: OutOfMemory for very long rows despite using column value of type Reader. Thanks to Christian Feuersaenger, Gary Gregory. +* Use try-with-resources to manage JDBC Clob in CSVPrinter.printRecords(ResultSet). Thanks to Gary Gregory. +* JDBC Blob columns are now output as Base64 instead of Object#toString(), which usually is InputStream#toString(). Thanks to Gary Gregory. +* Support unusual Excel use cases: Add support for trailing data after the closing quote, and EOF without a final closing quote #303. Thanks to DamjanJovanovic, Gary Gregory. +* MongoDB CSV empty first column parsing fix #412. Thanks to Igor Kamyshnikov, Gary Gregory. + +Changes +------- + +* Bump commons-io:commons-io: from 2.11.0 to 2.16.1 #408, #413. Thanks to Gary Gregory. +* Bump commons-parent from 57 to 69 #410. Thanks to Gary Gregory, Dependabot. +* Bump h2 from 2.1.214 to 2.2.224 #333, #349, #359. Thanks to Dependabot. +* Bump commons-lang3 from 3.12.0 to 3.14.0. Thanks to Gary Gregory. +* Update exception message in CSVRecord#getNextRecord() #348. Thanks to Buddhi De Silva, Michael Osipov, Gary Gregory. +* Bump tests using com.opencsv:opencsv from 5.8 to 5.9 #373. Thanks to Dependabot. + + +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html + +For complete information on Apache Commons CSV, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Commons CSV website: + +https://commons.apache.org/proper/commons-csv/ + +Download page: https://commons.apache.org/proper/commons-csv/download_csv.cgi + +Have fun! +-Apache Commons CSV team + +------------------------------------------------------------------------------ + +Apache Commons CSV 1.10.0 Release Notes +--------------------------------------- + +This document contains the release notes for the 1.10.0 version of Apache Commons CSV. +Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. + +Commons CSV requires at least Java 8. + +The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. + +Feature and bug fix release (Java 8 or above) + +Changes in this version include: + +New Features +------------ + +* CSV-291: Make CSVRecord#values() public. Thanks to Gary Gregory. +* CSV-264: Add DuplicateHeaderMode for flexibility with header strictness. #114. Thanks to Sagar Tiwari, Seth Falco, Alex Herbert, Gary Gregory. +* CSV-295: Support for parallelism in CSVPrinter. Thanks to Gary Gregory. +* CSV-295: Add CSVPrinter.printRecord[s](Stream). Thanks to Gary Gregory. +* CSV-304: Add accessors for header/trailer comments #257. Thanks to Peter Hull, Bruno P. Kinoshita, Gary Gregory. +* Add github/codeql-action. + +Fixed Bugs +---------- + +* Minor changes #172. Thanks to Arturo Bernal. +* CSV-292: No Automatic-Module-Name prevents usage in JPMS projects without repacking the JAR. Thanks to Rob Vesse. +* CSV-288: Fix for multi-char delimiter not working as expected #218. Thanks to Santhsoh, Angus. +* CSV-269: CSVRecord.get(Enum) should use Enum.name() instead of Enum.toString(). Thanks to Auke te Winkel, Gary Gregory. +* Allow org.apache.commons.csv.IOUtils.copy(Reader, Appendable, CharBuffer) to compile on Java 11 and run on Java 8. Thanks to Gary Gregory. +* CSV-300: CSVRecord.toList() does not give write access to the new List. Thanks to Markus Spann, Gary Gregory. +* CSVParser.getRecords() now throws UncheckedIOException instead of IOException. Thanks to Gary Gregory. +* CSV-274: Add comments to iterator() and stream() #270. Thanks to Peter Hull, Bruno P. Kinoshita, Gary Gregory. +* CSV-290: Fix wrong assumptions in PostgreSQL formats #265. Thanks to angusdev, Gary Gregory. +* Validate input to setDelimiter(String) for empty string #266. Thanks to Mykola Faryma. +* Bump CSVFormat#serialVersionUID from 1 to 2. Thanks to Dependabot. +* CSVParser: Identify duplicates in null, empty and blank header names #279. Thanks to Alex Herbert. + +Changes +------- + +* Bump actions/cache from 2.1.6 to 3.0.10 #196, #233, #243, #267, #271. Thanks to Dependabot, Gary Gregory. +* Bump actions/checkout from 2.3.4 to 3.1.0 #188, #195, #220, #272. Thanks to Dependabot, Gary Gregory. +* Bump actions/setup-java from 2 to 3.5.1. Thanks to Gary Gregory. +* Bump actions/upload-artifact from 3.1.0 to 3.1.1 #280. Thanks to Dependabot. +* Bump commons-parent from 52 to 56 #264, #288, #298. Thanks to Gary Gregory. +* Bump checkstyle from 8.44 to 9.2.1 #180, #190, #194, #202, #207. Thanks to Dependabot. +* Bump junit-jupiter from 5.8.0-M1 to 5.9.1 #179, #186, #201, #244, #263. Thanks to Dependabot. +* Bump jmh-core from 1.32 to 1.36 #176, #208, #229, #285. Thanks to Dependabot. +* Bump jmh-generator-annprocess from 1.32 to 1.36 #175, #206, #226, #283. Thanks to Dependabot. +* Bump mockito-core from 3.11.2 to 4.11.0 #187, #197, #204, #212, #230, #237, #251, #259, #284, #292, #297. Thanks to Dependabot, Gary Gregory. +* Bump maven-pmd-plugin from 3.14.0 to 3.19.0 #184, #219, #238, #254, #258. Thanks to Dependabot. +* Bump pmd from 6.36.0 to 6.52.0 #173, #189, #193, #199, #227, #233, #214, #236, #240, #247, #255, #273. Thanks to Dependabot, Gary Gregory. +* Bump opencsv from 5.5.1 to 5.7.1 #182, #221, #260, #281. Thanks to Gary Gregory. +* Bump spotbugs-maven-plugin from 4.3.0 to 4.7.3.0 #192, #198, #203, #211, #225, #234, #242, #245, #261, #275, #282. Thanks to Dependabot. +* Bump com.github.spotbugs:spotbugs from 4.5.3 to 4.7.2. Thanks to Gary Gregory. +* Bump h2 from 1.4.200 to 2.1.214 #200, #205, #213, #239. Thanks to Dependabot. +* Bump maven-javadoc-plugin from 3.3.0 to 3.4.1. Thanks to Gary Gregory. +* Bump biz.aQute.bnd:biz.aQute.bndlib from 5.3.0 to 6.3.1. Thanks to Gary Gregory. +* Bump jacoco-maven-plugin from 0.8.7 to 0.8.8. Thanks to Gary Gregory. +* Bump japicmp-maven-plugin from 0.15.3 to 0.16.0. Thanks to Gary Gregory. +* Bump maven-checkstyle-plugin from 3.1.2 to 3.2.0 #253. Thanks to Dependabot. + +Removed: +* Serialization in CSVFormat is not supported from one version to the next. + +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html + +For complete information on Apache Commons CSV, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Commons CSV website: + +https://commons.apache.org/proper/commons-csv/ + +Download page: https://commons.apache.org/proper/commons-csv/download_csv.cgi + +Have fun! +-Apache Commons CSV team + +------------------------------------------------------------------------------ + +Apache Commons CSV 1.9.0 Release Notes +-------------------------------------- + +This document contains the release notes for the 1.9.0 version of Apache Commons CSV. +Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. + +Commons CSV requires at least Java 8. + +The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. + +Feature and bug fix release (Java 8 or above) + +Changes in this version include: + +New Features +------------ + +* CSV-275: Make CSVRecord#toList() public. Thanks to Michael Wyraz, Gary Gregory. +* Add CSVRecord#stream(). Thanks to Gary Gregory. +* Add CSVParser#stream(). Thanks to Gary Gregory. +* CSV-184: Make the method CSVRecord.putIn(Map) public. Thanks to Gaurav Agarwal, M. Steiger, Gary Gregory. +* Add test cases for CSVRecord with get(Enum) and toString. #54. Thanks to dota17. +* Add and use CSVFormat.Builder, deprecated CSVFormat#with methods, based on #73. Thanks to Gary Gregory, dota17. +* CSV-206: Add support for String delimiters #76. Thanks to Gary Gregory, dota17. + +Fixed Bugs +---------- + +* Replace FindBugs with SpotBugs #56. Thanks to Amey Jadiye. +* Javadoc typo in CSVFormat let's -> lets #57. Thanks to Chen. +* CSV-259: CSVFormat.printWithEscapes throws StringIndexOutOfBoundsException when value is Reader #61. Thanks to Chen. +* Improve CSVFormat test coverage #63. Thanks to Chen. +* Fix CSVFileParserTest.java to allow for a null return value from record.getComment() #62. Thanks to Chen. +* Improve test coverage in CSVFormatTest #65. Thanks to Chen. +* Removed invalid Javadoc markup for CSVFormat EXCEL #64. Thanks to Chen. +* Improve CSVRecord and CSVPrinter code coverage #66. Thanks to Chen. +* Improve lexer and token coverage #67. Thanks to Chen. +* CSV-211: CSVFormat.format trims last delimiter if the delimiter is a white space #71. Thanks to Alpesh Kulkarni, Chen. +* Replace org.apache.commons.csv.Assertions.notNull() with Objects.requireNonNull(). Thanks to Gary Gregory. +* CSV-149: Line number is not proper at EOF. Thanks to Kranthi, Gary Gregory, Brent Worden, dota17. +* CSV-195: Parser iterates over the last CSV Record twice. Thanks to Rodolfo Duldulao, Rodolfo Duldulao, Michael Vitz, dota17. +* CSV-267: Minor improvements #126, #127, #130. Thanks to Arturo Bernal. +* CSV-123: Add possibility to use ResultSet header meta data as CSV header #11. Thanks to Emmanuel Bourg, Benedikt Ritter, shivakrishnaah, Gary Gregory. +* CSV-148: Add test cases for withIgnoreSurroundingSpaces() and withTrim() #70. Thanks to dota17. +* Update CSVParser.parse(File, Charset, CSVFormat) from IO to NIO. Thanks to Gary Gregory. +* CSV-271: Missing separator with print(object) followed by printRecord(Object[]) #157. Thanks to Amar Prakash Pandey. +* CSV-158: Fix EOL checking for read array in ExtendedBufferedReader #5. Thanks to Alexander Bondarev, Benedikt Ritter, Gary Gregory, Chen. +* CSV-263: Print from Reader with embedded quotes generates incorrect output #78. Thanks to Jason A. Guild, Gary Gregory. +* Replace JUnit assert by simpler but equivalent calls. #159. Thanks to Arturo Bernal. +* Update gitignore to ignore idea and vscode #160. Thanks to Seth Falco. +* CSV-281: Update CSVBenchmark #165. Thanks to belugabehr. +* CSV-283: Remove Whitespace Check Determines Delimiter Twice #167. Thanks to belugabehr. +* CSV-283: Document and Automate CSV Benchmark Harness #166. Thanks to belugabehr. +* CSV-279: Optimize Lexer Delimiter Check for One Character Delimiter #163. Thanks to belugabehr. +* SpotBugs Error: Medium: org.apache.commons.csv.CSVParser.getHeaderNames() may expose internal representation by returning CSVParser.headerNames [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 599] EI_EXPOSE_REP. Thanks to Gary Gregory. +* SpotBugs Error: Medium: new org.apache.commons.csv.CSVParser(Reader, CSVFormat, long, long) may expose internal representation by storing an externally mutable object into CSVParser.format [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 433] EI_EXPOSE_REP2. Thanks to Gary Gregory. +* SpotBugs Error: Medium: new org.apache.commons.csv.CSVParser(Reader, CSVFormat, long, long) may expose internal representation by storing an externally mutable object into CSVParser.headerMap [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 437] EI_EXPOSE_REP2. Thanks to Gary Gregory. +* SpotBugs Error: Medium: new org.apache.commons.csv.CSVParser(Reader, CSVFormat, long, long) may expose internal representation by storing an externally mutable object into CSVParser.headerNames [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 438] EI_EXPOSE_REP2. Thanks to Gary Gregory. +* SpotBugs Error: Medium: new org.apache.commons.csv.CSVPrinter(Appendable, CSVFormat) may expose internal representation by storing an externally mutable object into CSVPrinter.format [org.apache.commons.csv.CSVPrinter] At CSVPrinter.java:[line 100] EI_EXPOSE_REP2. Thanks to Gary Gregory. +* CSV-284: Formalize PerformanceTest #168. Thanks to belugabehr. +* CSV-278: Reuse Buffers in Lexer for Delimiter Detection #162. Thanks to belugabehr. +* CSV-286: Cleanup and Document Performance Test Harness #170. Thanks to belugabehr. +* CSV-265: Update buffer position when reading line comment #120. Thanks to belugabehr. + +Changes +------- + +* Update org.junit.jupiter:junit-jupiter from 5.6.0 to 5.7.0, #84 #109 Thanks to Gary Gregory. +* Update tests from Apache Commons Lang 3.9 to 3.12.0. Thanks to Gary Gregory. +* Update tests from commons-io:commons-io 2.6 to 2.11.0, #108. Thanks to Gary Gregory. +* Bump actions/checkout from v1 to v2.3.4, #79, #92, #121. Thanks to Dependabot. +* Bump commons-parent from 50 to 51 #80. Thanks to Dependabot. +* Bump tests from opencsv from 3.1 to 5.5.1 #81, #137, #158. Thanks to Dependabot, Gary Gregory. +* Update tests from super-csv from 2.2.1 to 2.4.0 #86. Thanks to Gary Gregory. +* Bump build actions/setup-java from v1.4.0 to v2, #101, #113. Thanks to Dependabot, Gary Gregory. +* Bump maven-pmd-plugin from 3.13.0 to 3.14.0 #122. Thanks to Dependabot. +* Bump tests from org.mockito:mockito-core 3.2.4 -> 3.11.2; #88, #107, #110, #123, #128, #129, #156. Thanks to Gary Gregory, Dependabot. +* Bump actions/cache from v2 to v2.1.6 #132, #153. Thanks to Dependabot. +* Bump maven-checkstyle-plugin from 3.0.0 to 3.1.2 #131. Thanks to Gary Gregory, Dependabot. +* Bump checkstyle from 8.29 to 8.44. Thanks to Gary Gregory. +* Bump junit-jupiter from 5.7.0 to 5.8.0-M1 #133, #149. Thanks to Dependabot, Gary Gregory. +* Bump commons.jacoco.version from 0.8.5 to 0.8.7 (Java 16). Thanks to Dependabot. +* Bump commons.spotbugs.version from 4.0.4 to 4.3.0 (Java 16). Thanks to Dependabot. +* Bump maven-javadoc-plugin from 3.2.0 to 3.3.0. Thanks to Gary Gregory. +* Bump jmh-generator-annprocess from 1.5.2 to 1.32 #151. Thanks to Dependabot. +* Bump PMD core from 6.29.0 to 6.36.0. Thanks to Dependabot. +* Bump biz.aQute.bnd:biz.aQute.bndlib from 5.1.2 to 5.3.0. Thanks to Dependabot. + + +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html + +For complete information on Apache Commons CSV, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Commons CSV website: + +https://commons.apache.org/proper/commons-csv/ + +Download page: https://commons.apache.org/proper/commons-csv/download_csv.cgi + +Have fun! +-Apache Commons CSV team + +------------------------------------------------------------------------------ + +Apache Commons CSV 1.8 Release Notes +------------------------------------ This document contains the release notes for the 1.8 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -122,39 +478,39 @@ This release fixes serialization compatibility of CSVRecord with versions 1.0 to Changes in this version include: -NEW FEATURES -============== +New Features +------------== -o CSV-255: Add CSVRecord.isSet(int) method #52. Thanks to 0x100. +* CSV-255: Add CSVRecord.isSet(int) method #52. Thanks to 0x100. -FIXED BUGS -============ +Fixed Bugs +----------== -o CSV-135: Char escape doesn't work properly with quoting. Thanks to Mateusz Zakarczemny. -o CSV-244: Test case failures following CSVFormat#equals() update. -o CSV-243: CSVFormat withTrim() and withIgnoreSurroundingSpaces() need better docs. -o CSV-242: CSVFormat equals() and hashCode() don't use all fields. -o CSV-241: CSVFormat#validate() does not account for allowDuplicateHeaderNames #43. Thanks to LuckyIlam, Gary Gregory. -o CSV-245: Post 1.7 release fixes. Thanks to Alex Herbert. -o CSV-252: Upgrade test framework to JUnit 5 Jupiter #49, #50. Thanks to Alex Herbert. -o CSV-247: A single empty header is allowed when not allowing empty column headers. #47. Thanks to Alex Herbert, Gary Gregory. -o CSV-248: CSVRecord is not Serializable. Thanks to Alex Herbert. -o Use test scope for supercsv #48. Thanks to Alex Herbert. +* CSV-135: Char escape doesn't work properly with quoting. Thanks to Mateusz Zakarczemny. +* CSV-244: Test case failures following CSVFormat#equals() update. +* CSV-243: CSVFormat withTrim() and withIgnoreSurroundingSpaces() need better docs. +* CSV-242: CSVFormat equals() and hashCode() don't use all fields. +* CSV-241: CSVFormat#validate() does not account for allowDuplicateHeaderNames #43. Thanks to LuckyIlam, Gary Gregory. +* CSV-245: Post 1.7 release fixes. Thanks to Alex Herbert. +* CSV-252: Upgrade test framework to JUnit 5 Jupiter #49, #50. Thanks to Alex Herbert. +* CSV-247: A single empty header is allowed when not allowing empty column headers. #47. Thanks to Alex Herbert, Gary Gregory. +* CSV-248: CSVRecord is not Serializable. Thanks to Alex Herbert. +* Use test scope for supercsv #48. Thanks to Alex Herbert. -CHANGES -========= +Changes +-------== -o Update tests from H2 1.4.199 to 1.4.200. Thanks to Gary Gregory. -o Update tests from Hamcrest 2.1 to 2.2. Thanks to Gary Gregory. -o Update tests from Mockito 3.1.0 to 3.2.4. Thanks to Gary Gregory. -o Fix typos in site and test #53. Thanks to Chen. -o Fix typo performance test #55. Thanks to Chen. +* Update tests from H2 1.4.199 to 1.4.200. Thanks to Gary Gregory. +* Update tests from Hamcrest 2.1 to 2.2. Thanks to Gary Gregory. +* Update tests from Mockito 3.1.0 to 3.2.4. Thanks to Gary Gregory. +* Fix typos in site and test #53. Thanks to Chen. +* Fix typo performance test #55. Thanks to Chen. -Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -165,12 +521,8 @@ Have fun! ------------------------------------------------------------------------------ - Apache Commons CSV - Version 1.7 - Release Notes - - -INTRODUCTION: +Apache Commons CSV 1.7 Release Notes +------------------------------------ This document contains the release notes for the 1.7 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -178,35 +530,35 @@ Commons CSV reads and writes files in variations of the Comma Separated Value (C The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. -Feature and bug fix release (Java 8) +Feature and bug fix release (Java 8 or above) Changes in this version include: -NEW FEATURES -============== +New Features +------------== -o CSV-233: Add predefined CSVFormats for printing MongoDB CSV and TSV. Thanks to Gary Gregory. -o CSV-234: Add support for java.sql.Clob. Thanks to Roberto Benedetti, Gary Gregory. -o CSV-239: Cannot get headers in column order from CSVRecord. Thanks to Gary Gregory, Dave Moten. +* CSV-233: Add predefined CSVFormats for printing MongoDB CSV and TSV. Thanks to Gary Gregory. +* CSV-234: Add support for java.sql.Clob. Thanks to Roberto Benedetti, Gary Gregory. +* CSV-239: Cannot get headers in column order from CSVRecord. Thanks to Gary Gregory, Dave Moten. -FIXED BUGS -============ +Fixed Bugs +----------== -o CSV-208: Fix escape character for POSTGRESQL_TEXT and POSTGRESQL_CSV formats. Thanks to Jurrie Overgoor. -o CSV-232: Site link "Source Repository" does not work. Thanks to Jurrie Overgoor, Gary Gregory. -o CSV-238: Escape quotes in CLOBs #39. Thanks to Stephen Olander-Waters. +* CSV-208: Fix escape character for POSTGRESQL_TEXT and POSTGRESQL_CSV formats. Thanks to Jurrie Overgoor. +* CSV-232: Site link "Source Repository" does not work. Thanks to Jurrie Overgoor, Gary Gregory. +* CSV-238: Escape quotes in CLOBs #39. Thanks to Stephen Olander-Waters. -CHANGES -========= +Changes +-------== -o CSV-237: Update to Java 8. Thanks to Gary Gregory. -o Update tests from H2 1.4.198 to 1.4.199. Thanks to Gary Gregory. +* CSV-237: Update to Java 8. Thanks to Gary Gregory. +* Update tests from H2 1.4.198 to 1.4.199. Thanks to Gary Gregory. -Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -217,12 +569,8 @@ Have fun! ------------------------------------------------------------------------------ - Apache Commons CSV - Version 1.6 - Release Notes - - -INTRODUCTION: +Apache Commons CSV 1.6 Release Notes +------------------------------------ This document contains the release notes for the 1.6 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the @@ -237,33 +585,33 @@ Feature and bug fix release. Changes in this version include: -NEW FEATURES -============== +New Features +------------== -o CSV-217: Add autoFlush option for CsvPrinter. PR #24. +* CSV-217: Add autoFlush option for CsvPrinter. PR #24. Thanks to Korolyov Alexei. -o CSV-220: Add API org.apache.commons.csv.CSVFormat.withSystemRecordSeparator(). +* CSV-220: Add API org.apache.commons.csv.CSVFormat.withSystemRecordSeparator(). Thanks to Gary Gregory. -FIXED BUGS -============ +Fixed Bugs +----------== -o CSV-219: The behavior of quote char using is not similar as Excel does when +* CSV-219: The behavior of quote char using is not similar as Excel does when the first string contains CJK char(s). Thanks to Zhang Hongda. -o CSV-172: Don't quote cells just because they have UTF-8 encoded characters. +* CSV-172: Don't quote cells just because they have UTF-8 encoded characters. Thanks to Andrew Pennebaker. -o CSV-223: Inconsistency between Javadoc of CSVFormat DEFAULT EXCEL. +* CSV-223: Inconsistency between Javadoc of CSVFormat DEFAULT EXCEL. Thanks to Samuel Martin. -o CSV-209: Create CSVFormat.ORACLE preset. Thanks to Gary Gregory. -o CSV-224: Some multi-iterator parsing peek sequences incorrectly consume +* CSV-209: Create CSVFormat.ORACLE preset. Thanks to Gary Gregory. +* CSV-224: Some multi-iterator parsing peek sequences incorrectly consume elements. Thanks to David Warshaw. -o CSV-225: Parse method should avoid creating a redundant BufferedReader. +* CSV-225: Parse method should avoid creating a redundant BufferedReader. Thanks to Anson Schwabecher. -CHANGES -========= +Changes +-------== -o CSV-231: Add more documentation to CSVPrinter. +* CSV-231: Add more documentation to CSVPrinter. Have fun! @@ -271,12 +619,8 @@ Have fun! ------------------------------------------------------------------------------ - Apache Commons CSV - Version 1.5 - Release Notes - - -INTRODUCTION: +Apache Commons CSV 1.5 Release Notes +------------------------------------ This document contains the release notes for the 1.5 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -290,36 +634,36 @@ Feature and bug fix release Changes in this version include: -NEW FEATURES -============== +New Features +------------== -o CSV-189: CSVParser: Add factory method accepting InputStream. Thanks to Peter Holzwarth, Gary Gregory. -o CSV-190: Add convenience API CSVFormat.print(File, Charset). Thanks to Gary Gregory. -o CSV-191: Add convenience API CSVFormat.print(Path, Charset). Thanks to Gary Gregory. -o CSV-192: Add convenience API CSVParser.parse(Path, Charset, CSVFormat). Thanks to Gary Gregory. -o CSV-205: Add convenience API CSVFormat#printer() to print to System.out. Thanks to Gary Gregory. -o CSV-207: Provide a CSV Format for printing PostgreSQL CSV and Text formats. Thanks to Gary Gregory. -o CSV-214: Adding a placeholder in the Lexer and CSV parser to store the end-of-line string. Thanks to Nitin Mahendru, Gary Gregory. +* CSV-189: CSVParser: Add factory method accepting InputStream. Thanks to Peter Holzwarth, Gary Gregory. +* CSV-190: Add convenience API CSVFormat.print(File, Charset). Thanks to Gary Gregory. +* CSV-191: Add convenience API CSVFormat.print(Path, Charset). Thanks to Gary Gregory. +* CSV-192: Add convenience API CSVParser.parse(Path, Charset, CSVFormat). Thanks to Gary Gregory. +* CSV-205: Add convenience API CSVFormat#printer() to print to System.out. Thanks to Gary Gregory. +* CSV-207: Provide a CSV Format for printing PostgreSQL CSV and Text formats. Thanks to Gary Gregory. +* CSV-214: Adding a placeholder in the Lexer and CSV parser to store the end-of-line string. Thanks to Nitin Mahendru, Gary Gregory. -FIXED BUGS -============ +Fixed Bugs +----------== -o CSV-203: withNullString value is printed without quotes when QuoteMode.ALL is specified; add QuoteMode.ALL_NON_NULL. PR #17. Thanks to Richard Wheeldon, Kai Paroth. -o CSV-194: Fix outdated comments about FileReader in CSVParser #13. Thanks to Marc Prud'hommeaux. -o CSV-193: Fix incorrect method name 'withFirstRowAsHeader' in user guide. Thanks to Matthias Wiehl. -o CSV-171: Negative numeric values in the first column are always quoted in minimal mode. Thanks to Gary Gregory, Michael Graessle, Adrian Bridgett. +* CSV-203: withNullString value is printed without quotes when QuoteMode.ALL is specified; add QuoteMode.ALL_NON_NULL. PR #17. Thanks to Richard Wheeldon, Kai Paroth. +* CSV-194: Fix outdated comments about FileReader in CSVParser #13. Thanks to Marc Prud'hommeaux. +* CSV-193: Fix incorrect method name 'withFirstRowAsHeader' in user guide. Thanks to Matthias Wiehl. +* CSV-171: Negative numeric values in the first column are always quoted in minimal mode. Thanks to Gary Gregory, Michael Graessle, Adrian Bridgett. -CHANGES -========= +Changes +-------== -o CSV-187: Update platform requirement from Java 6 to 7. Thanks to Gary Gregory. -o CSV-201: Do not use RuntimeException in CSVParser.iterator().new Iterator() {...}.getNextRecord(). Thanks to Benedikt Ritter, Gary Gregory. +* CSV-187: Update platform requirement from Java 6 to 7. Thanks to Gary Gregory. +* CSV-201: Do not use RuntimeException in CSVParser.iterator().new Iterator() {...}.getNextRecord(). Thanks to Benedikt Ritter, Gary Gregory. -Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -328,12 +672,8 @@ Have fun! ------------------------------------------------------------------------------ - Apache Commons CSV - Version 1.4 - Release Notes - - -INTRODUCTION: +Apache Commons CSV 1.4 Release Notes +------------------------------------ This document contains the release notes for the 1.4 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -347,23 +687,23 @@ Feature and bug fix release Changes in this version include: -NEW FEATURES -============== +New Features +------------== -o CSV-182: Allow some printing operations directly from CSVFormat. Thanks to Gary Gregory. +* CSV-182: Allow some printing operations directly from CSVFormat. Thanks to Gary Gregory. -CHANGES -========= +Changes +-------== -o CSV-181: Make CSVPrinter.print(Object) GC-free. Thanks to Gary Gregory. -o CSV-183: Drop ferc.gov tests. +* CSV-181: Make CSVPrinter.print(Object) GC-free. Thanks to Gary Gregory. +* CSV-183: Drop ferc.gov tests. -Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -372,12 +712,8 @@ Have fun! ------------------------------------------------------------------------------ - Apache Commons CSV - Version 1.3 - Release Notes - - -INTRODUCTION: +Apache Commons CSV 1.3 Release Notes +------------------------------------ This document contains the release notes for the 1.3 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -391,29 +727,29 @@ Feature and bug fix release. Changes in this version include: -NEW FEATURES -============== +New Features +------------== -o CSV-179: Add shortcut method for using first record as header to CSVFormat. -o CSV-180: Add withHeader(Class? extends Enum>) to CSVFormat. -o CSV-159: Add IgnoreCase option for accessing header names. Thanks to Yamil Medina. -o CSV-169: The null string should be case-sensitive when reading records. Thanks to Gary Gregory. -o CSV-175: Support for ignoring trailing delimiter. Thanks to Gary Gregory, Chris Jones. -o CSV-177: Support trimming leading and trailing blanks. Thanks to Gary Gregory. -o CSV-178: Create default formats for Informix UNLOAD and UNLOAD CSV. Thanks to Gary Gregory. +* CSV-179: Add shortcut method for using first record as header to CSVFormat. +* CSV-180: Add withHeader(Class? extends Enum>) to CSVFormat. +* CSV-159: Add IgnoreCase option for accessing header names. Thanks to Yamil Medina. +* CSV-169: The null string should be case-sensitive when reading records. Thanks to Gary Gregory. +* CSV-175: Support for ignoring trailing delimiter. Thanks to Gary Gregory, Chris Jones. +* CSV-177: Support trimming leading and trailing blanks. Thanks to Gary Gregory. +* CSV-178: Create default formats for Informix UNLOAD and UNLOAD CSV. Thanks to Gary Gregory. -FIXED BUGS -============ +Fixed Bugs +----------== -o CSV-168: CSVFormat.nullString should not be escaped. Thanks to Gary Gregory, cornel creanga. -o CSV-170: CSVFormat.MYSQL nullString should be "\N". Thanks to Gary Gregory, cornel creanga. -o CSV-161: Fix Javadoc to say CSVFormat with() methods return a new CSVFormat. Thanks to Gary Gregory, Kristof Meixner, Emmanuel Bourg. +* CSV-168: CSVFormat.nullString should not be escaped. Thanks to Gary Gregory, cornel creanga. +* CSV-170: CSVFormat.MYSQL nullString should be "\N". Thanks to Gary Gregory, cornel creanga. +* CSV-161: Fix Javadoc to say CSVFormat with() methods return a new CSVFormat. Thanks to Gary Gregory, Kristof Meixner, Emmanuel Bourg. -CHANGES -============ +Changes +-------===== -o CSV-167: Comment line hides next record; update Javadoc to make behavior clear. Thanks to Rene. -o CSV-153: CSVPrinter doesn't skip creation of header record if skipHeaderRecord is set to true. Thanks to Wren. +* CSV-167: Comment line hides next record; update Javadoc to make behavior clear. Thanks to Rene. +* CSV-153: CSVPrinter doesn't skip creation of header record if skipHeaderRecord is set to true. Thanks to Wren. Have fun! @@ -421,12 +757,8 @@ Have fun! ------------------------------------------------------------------------------ - Apache Commons CSV - Version 1.2 - Release Notes - - -INTRODUCTION: +Apache Commons CSV 1.2 Release Notes +------------------------------------ This document contains the release notes for the 1.2 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -440,22 +772,22 @@ Feature and bug fix release Changes in this version include: -NEW FEATURES -============== +New Features +------------== -o CSV-157: Add enum CSVFormat.Predefined that contains the default CSVFormat values. +* CSV-157: Add enum CSVFormat.Predefined that contains the default CSVFormat values. -FIXED BUGS -============ +Fixed Bugs +----------== -o CSV-145: CSVFormat.with* methods clear the header comments. Thanks to Frank Ulbricht. -o CSV-156: Incorrect Javadoc on QuoteMode.NONE. Thanks to Jason Steenstra-Pickens. +* CSV-145: CSVFormat.with* methods clear the header comments. Thanks to Frank Ulbricht. +* CSV-156: Incorrect Javadoc on QuoteMode.NONE. Thanks to Jason Steenstra-Pickens. -Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -464,12 +796,8 @@ Have fun! ------------------------------------------------------------------------------ - Apache Commons CSV - Version 1.1 - Release Notes - - -INTRODUCTION: +Apache Commons CSV 1.1 Release Notes +------------------------------------ This document contains the release notes for the 1.1 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -484,24 +812,24 @@ This is our second release. Changes in this version include: New features: -o [CSV-129] Add CSVFormat#with 0-arg methods matching boolean arg methods. -o [CSV-131] Save positions of records to enable random access. Thanks to Holger Stratmann. -o [CSV-139] CSVPrinter.printRecord(ResultSet) with metadata. +* [CSV-129] Add CSVFormat#with 0-arg methods matching boolean arg methods. +* [CSV-131] Save positions of records to enable random access. Thanks to Holger Stratmann. +* [CSV-139] CSVPrinter.printRecord(ResultSet) with metadata. Fixed Bugs: -o [CSV-140] QuoteMode.NON_NUMERIC doesn't work with CSVPrinter.printRecords(ResultSet). Thanks to Damjan Jovanovic. -o [CSV-130] CSVFormat#withHeader doesn't work well with #printComment, add withHeaderComments(String...). Thanks to Sergei Lebedev. -o [CSV-128] CSVFormat.EXCEL should ignore empty header names. -o [CSV-132] Incorrect Javadoc referencing org.apache.commons.csv.CSVFormat withQuote(). Thanks to Sascha Szott. +* [CSV-140] QuoteMode.NON_NUMERIC doesn't work with CSVPrinter.printRecords(ResultSet). Thanks to Damjan Jovanovic. +* [CSV-130] CSVFormat#withHeader doesn't work well with #printComment, add withHeaderComments(String...). Thanks to Sergei Lebedev. +* [CSV-128] CSVFormat.EXCEL should ignore empty header names. +* [CSV-132] Incorrect Javadoc referencing org.apache.commons.csv.CSVFormat withQuote(). Thanks to Sascha Szott. Changes: -o [CSV-124] Improve toString() implementation of CSVRecord. Thanks to Kalyan. -o [CSV-134] Unified parameter validation. Thanks to wu wen. +* [CSV-124] Improve toString() implementation of CSVRecord. Thanks to Kalyan. +* [CSV-134] Unified parameter validation. Thanks to wu wen. -Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -510,12 +838,8 @@ Have fun! ------------------------------------------------------------------------------- - Apache Commons CSV - Version 1.0 - Release Notes - - -INTRODUCTION: +Apache Commons CSV 1.0 Release Notes +------------------------------------ This document contains the release notes for the 1.0 version of Apache Commons CSV. Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. @@ -529,74 +853,74 @@ First release Changes in this version include: -NEW FEATURES -============ +New Features +------------ -o CSV-121: IllegalArgumentException thrown when the header contains duplicate names when the column names are empty. +* CSV-121: IllegalArgumentException thrown when the header contains duplicate names when the column names are empty. Thanks to Sebastian Hardt. -o CSV-120: CSVFormat#withHeader doesn't work with CSVPrinter Thanks to Sergei Lebedev. -o CSV-119: CSVFormat is missing a print(...) method Thanks to Sergei Lebedev. -o CSV-105: Add Map conversion API to CSVRecord -o CSV-52: Keep track of record number -o CSV-92: Need a way to extract parsed headers, e.g. for use in formatting +* CSV-120: CSVFormat#withHeader doesn't work with CSVPrinter Thanks to Sergei Lebedev. +* CSV-119: CSVFormat is missing a print(...) method Thanks to Sergei Lebedev. +* CSV-105: Add Map conversion API to CSVRecord +* CSV-52: Keep track of record number +* CSV-92: Need a way to extract parsed headers, e.g. for use in formatting output -o CSV-65: Header support -o CSV-48: Predefined format for MYSQL +* CSV-65: Header support +* CSV-48: Predefined format for MYSQL -FIXED BUGS -========== +Fixed Bugs +---------- -o CSV-125: No longer works with Java 6 -o CSV-122: NullPointerException when empty header string and and null string of "". +* CSV-125: No longer works with Java 6 +* CSV-122: NullPointerException when empty header string and null string of "". Thanks to Mike Lewis. -o CSV-118: CSVRecord.toMap() throws NPE on formats with no +* CSV-118: CSVRecord.toMap() throws NPE on formats with no headers. Thanks to Enrique Lara. -o CSV-113: Check whether ISE/IAE are being used appropriately -o CSV-114: CSVFormat constructor should reject a header array with duplicate +* CSV-113: Check whether ISE/IAE are being used appropriately +* CSV-114: CSVFormat constructor should reject a header array with duplicate entries -o CSV-112: HeaderMap is inconsistent when it is parsed from an input with +* CSV-112: HeaderMap is inconsistent when it is parsed from an input with duplicate columns names -o CSV-111: CSVRecord.toMap() fails if row length shorter than header length -o CSV-106: CSVFormat.format allways append null -o CSV-100: CSVParser: getHeaderMap throws NPE -o CSV-53: CSVRecord does not verify that the length of the header mapping +* CSV-111: CSVRecord.toMap() fails if row length shorter than header length +* CSV-106: CSVFormat.format allways append null +* CSV-100: CSVParser: getHeaderMap throws NPE +* CSV-53: CSVRecord does not verify that the length of the header mapping matches the number of values -o CSV-54: Confusing semantic of the ignore leading/trailing spaces parameters -o CSV-34: CSVFormat describes itself as immutable, but it is not - in +* CSV-54: Confusing semantic of the ignore leading/trailing spaces parameters +* CSV-34: CSVFormat describes itself as immutable, but it is not - in particular it is not thread-safe -o CSV-36: Endless loops in CSV parser -o CSV-13: NullPointerException in CSVPrinter.print()/println() -o CSV-23: Excel strategy uses wrong separator - -CHANGES -======= - -o CSV-117: Validate format parameters in constructor -o CSV-42: Lots of possible changes Thanks to Bob Smith. -o CSV-78: Use Character instead of char for char fields except delimiter -o CSV-99: Revert Builder implementation in CSVFormat -o CSV-93: Allow the handling of NULL values -o CSV-68: Use the Builder pattern for CSVFormat -o CSV-84: Clarify comment handling -o CSV-25: CSVParser.nextValue() seems pointless -o CSV-97: Allow the String value for null to be customized for the CSV +* CSV-36: Endless loops in CSV parser +* CSV-13: NullPointerException in CSVPrinter.print()/println() +* CSV-23: Excel strategy uses wrong separator + +Changes +------- + +* CSV-117: Validate format parameters in constructor +* CSV-42: Lots of possible changes Thanks to Bob Smith. +* CSV-78: Use Character instead of char for char fields except delimiter +* CSV-99: Revert Builder implementation in CSVFormat +* CSV-93: Allow the handling of NULL values +* CSV-68: Use the Builder pattern for CSVFormat +* CSV-84: Clarify comment handling +* CSV-25: CSVParser.nextValue() seems pointless +* CSV-97: Allow the String value for null to be customized for the CSV printer -o CSV-88: Not possible to create a CSVFormat from scratch -o CSV-94: Lexer should only use char fields -o CSV-71: Add convenience methods to CSVLexer -o CSV-59: Is CharBuffer really needed, now that StringBuilder is available? -o CSV-55: Replace while(true)-loop in CSVParser.getRecord with do-while-loop -o CSV-45: CSVPrinter overhaul -o CSV-49: CSVStrategy has modifiable public static variables Thanks to Bob Smith. -o CSV-46: Reduce visibility of methods in internal classes -o CSV-26: ExtendedBufferedReader does too much -o CSV-27: Decide whether to keep the csv.writer subpackage +* CSV-88: Not possible to create a CSVFormat from scratch +* CSV-94: Lexer should only use char fields +* CSV-71: Add convenience methods to CSVLexer +* CSV-59: Is CharBuffer really needed, now that StringBuilder is available? +* CSV-55: Replace while(true)-loop in CSVParser.getRecord with do-while-loop +* CSV-45: CSVPrinter overhaul +* CSV-49: CSVStrategy has modifiable public static variables Thanks to Bob Smith. +* CSV-46: Reduce visibility of methods in internal classes +* CSV-26: ExtendedBufferedReader does too much +* CSV-27: Decide whether to keep the csv.writer subpackage -Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ diff --git a/SECURITY.md b/SECURITY.md index 51943ba7b4..744d4cddbb 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/benchmark-prereq.sh b/benchmark-prereq.sh index 1d03f6773a..bd1db91821 100755 --- a/benchmark-prereq.sh +++ b/benchmark-prereq.sh @@ -8,7 +8,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/pom.xml b/pom.xml index 38d5ca56dd..8cb13ed7c2 100644 --- a/pom.xml +++ b/pom.xml @@ -1,4 +1,4 @@ - + - + 4.0.0 org.apache.commons commons-parent - 54 + 102 commons-csv - 1.10.0-SNAPSHOT + 1.15.0-SNAPSHOT Apache Commons CSV https://commons.apache.org/proper/commons-csv/ 2005 The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. - org.junit.jupiter junit-jupiter test - - org.hamcrest - hamcrest - 2.2 - test - org.mockito mockito-core - 4.8.0 + ${commons.mockito.version} test commons-io commons-io - 2.11.0 - test + ${commons.io.version} + + + commons-codec + commons-codec + ${commons.codec.version} org.apache.commons commons-lang3 - 3.12.0 + 3.20.0 test com.h2database h2 - 2.1.214 + + 2.2.224 + test + + + org.openjdk.jmh + jmh-core + ${commons.jmh.version} test - - - - bayard - Henri Yandell - bayard@apache.org - The Apache Software Foundation - - - Martin van den Bemt - mvdb - mvdb@apache.org - The Apache Software Foundation - - - Yonik Seeley - yonik - yonik@apache.org - The Apache Software Foundation - - - Emmanuel Bourg - ebourg - ebourg@apache.org - Apache - - - ggregory - Gary Gregory - ggregory at apache.org - https://www.garygregory.com - The Apache Software Foundation - https://www.apache.org/ - - PMC Member - - America/New_York - - https://people.apache.org/~ggregory/img/garydgregory80.png - - - - Benedikt Ritter - britter - britter@apache.org - The Apache Software Foundation - - - Rob Tompkins - chtompki - chtompki@apache.org - The Apache Software Foundation - - - - - Bob Smith - - - - scm:git:http://gitbox.apache.org/repos/asf/commons-csv.git + scm:git:https://gitbox.apache.org/repos/asf/commons-csv.git scm:git:https://gitbox.apache.org/repos/asf/commons-csv.git https://gitbox.apache.org/repos/asf?p=commons-csv.git - - jira https://issues.apache.org/jira/browse/CSV - + + github + https://github.com/apache/commons-csv/actions + apache.website @@ -144,50 +89,55 @@ scm:svn:https://svn.apache.org/repos/infra/websites/production/commons/content/proper/commons-csv/ - - 1.10.0 - (Java 8) + 1.15.0 + (Java 8 or above) RC1 - 1.9.0 + 1.14.1 + 1.15.1 csv org.apache.commons.csv CSV 12313222 1.8 1.8 - http://docs.oracle.com/javase/8/docs/api/ UTF-8 UTF-8 UTF-8 - - 3.2.0 - 9.3 - ${basedir}/src/site/resources/checkstyle/checkstyle-header.txt - ${basedir}/src/site/resources/checkstyle/checkstyle.xml - ${basedir}/src/site/resources/checkstyle/checkstyle-suppressions.xml - LICENSE.txt, NOTICE.txt, **/maven-archiver/pom.properties - - 5.9.1 - 3.19.0 - 6.49.0 - 0.8.8 - 4.7.2.0 - 4.7.2 - 0.16.0 - 3.4.1 - 6.3.1 false - true - Gary Gregory - 86fdc7e2a11262cb + 2025-07-30T14:51:35Z + 1.22.0 + 2.22.0 + + + org.apache.commons.codec.binary;version="${commons.codec.version}", + org.apache.commons.io;version="${commons.io.version}", + org.apache.commons.io.build;version="${commons.io.version}", + org.apache.commons.io.function;version="${commons.io.version}", + org.apache.commons.io.input;version="${commons.io.version}", + org.apache.commons.io.output;version="${commons.io.version}", + * + + + + true + 1.00 + 0.99 + 0.99 + 0.97 + 0.99 + 0.97 + + ${basedir}/src/conf/checkstyle/checkstyle-header.txt + ${basedir}/src/conf/checkstyle/checkstyle.xml + ${basedir}/src/conf/checkstyle/checkstyle-suppressions.xml + LICENSE.txt, NOTICE.txt, **/maven-archiver/pom.properties - - clean verify apache-rat:check japicmp:cmp checkstyle:check spotbugs:check pmd:check pmd:cpd-check javadoc:javadoc + clean verify apache-rat:check japicmp:cmp spotbugs:check pmd:check pmd:cpd-check javadoc:javadoc checkstyle:check @@ -197,38 +147,18 @@ ${checkstyle.config.file} false + target/generated-test-sources/**/*.java ${checkstyle.suppress.file} + true - - - com.puppycrawl.tools - checkstyle - ${commons.checkstyle.version} - - org.apache.maven.plugins maven-pmd-plugin ${commons.pmd.version} - - - net.sourceforge.pmd - pmd-core - ${commons.pmd-impl.version} - - - net.sourceforge.pmd - pmd-java - ${commons.pmd-impl.version} - - ${maven.compiler.target} false - - ${basedir}/src/site/resources/pmd/pmd-ruleset.xml - @@ -238,29 +168,35 @@ apache-rat-plugin - + - src/test/resources/org/apache/commons/csv/empty.txt - src/test/resources/org/apache/commons/csv/csv-167/sample1.csv - src/test/resources/org/apache/commons/csv/CSV-198/optd_por_public.csv - src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv - src/test/resources/org/apache/commons/csv/CSVFileParser/bom.csv - src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv - src/test/resources/org/apache/commons/csv/CSVFileParser/test_default.txt - src/test/resources/org/apache/commons/csv/CSVFileParser/test_default_comment.txt - src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180.txt - src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180_trim.txt - src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85.csv - src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_default.txt - src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_ignoreEmpty.txt + src/test/resources/org/apache/commons/csv/empty.txt + src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv + src/test/resources/org/apache/commons/csv/csv-167/sample1.csv + src/test/resources/org/apache/commons/csv/CSV-198/optd_por_public.csv + src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv + src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv + src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv + src/test/resources/org/apache/commons/csv/CSV-254/csv-254.csv + src/test/resources/org/apache/commons/csv/CSVFileParser/bom.csv + src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv + src/test/resources/org/apache/commons/csv/CSVFileParser/test_default.txt + src/test/resources/org/apache/commons/csv/CSVFileParser/test_default_comment.txt + src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180.txt + src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180_trim.txt + src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85.csv + src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_default.txt + src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_ignoreEmpty.txt - src/test/resources/org/apache/commons/csv/ferc.gov/contract.txt - src/test/resources/org/apache/commons/csv/ferc.gov/transaction.txt - src/test/resources/**/*.bin - src/test/resources/org/apache/commons/csv/CSV-259/sample.txt - src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv - src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt - + src/test/resources/org/apache/commons/csv/ferc.gov/contract.txt + src/test/resources/org/apache/commons/csv/ferc.gov/transaction.txt + src/test/resources/**/*.bin + src/test/resources/org/apache/commons/csv/CSV-259/sample.txt + src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv + src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt + src/test/resources/org/apache/commons/csv/CSV-290/psql.csv + src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv + @@ -319,7 +255,6 @@ com.github.spotbugs spotbugs-maven-plugin - ${commons.spotbugs.version} ${basedir}/src/site/resources/spotbugs/spotbugs-exclude-filter.xml @@ -356,7 +291,6 @@ com.github.spotbugs spotbugs-maven-plugin - ${commons.spotbugs.version} ${basedir}/src/site/resources/spotbugs/spotbugs-exclude-filter.xml @@ -368,7 +302,6 @@ org.codehaus.mojo taglist-maven-plugin - 3.0.0 @@ -423,17 +356,11 @@ benchmark - - org.openjdk.jmh - jmh-core - 1.35 - test - org.openjdk.jmh jmh-generator-annprocess - 1.35 + ${commons.jmh.version} test @@ -454,7 +381,7 @@ com.opencsv opencsv - 5.7.0 + 5.12.0 test @@ -464,14 +391,16 @@ 2.4.0 test - - + org.skife.kasparov csv 1.0 @@ -481,7 +410,7 @@ org.apache.commons commons-lang3 - 3.12.0 + 3.20.0 @@ -534,17 +463,63 @@ - - - java9 - - 9 - + + + + bayard + Henri Yandell + bayard@apache.org + The Apache Software Foundation + + + Martin van den Bemt + mvdb + mvdb@apache.org + The Apache Software Foundation + + + Yonik Seeley + yonik + yonik@apache.org + The Apache Software Foundation + + + Emmanuel Bourg + ebourg + ebourg@apache.org + Apache + + + ggregory + Gary Gregory + ggregory at apache.org + https://www.garygregory.com + The Apache Software Foundation + https://www.apache.org/ + + PMC Member + + America/New_York - - true + https://people.apache.org/~ggregory/img/garydgregory80.png - - - + + + Benedikt Ritter + britter + britter@apache.org + The Apache Software Foundation + + + Rob Tompkins + chtompki + chtompki@apache.org + The Apache Software Foundation + + + + + Bob Smith + + diff --git a/src/assembly/bin.xml b/src/assembly/bin.xml index 016f1fee9f..3526ca9e91 100644 --- a/src/assembly/bin.xml +++ b/src/assembly/bin.xml @@ -1,54 +1,56 @@ - - - bin - - tar.gz - zip - - - - - LICENSE.txt - NOTICE.txt - RELEASE-NOTES.txt - - - - target - - - ${artifactId}-${version}.jar - - - - target/site/apidocs - apidocs - - **/* - - - - - target - - - ${artifactId}-${version}-sources.jar - - - - + + + bin + + tar.gz + zip + + + + + LICENSE.txt + NOTICE.txt + RELEASE-NOTES.txt + + + + target + + + ${artifactId}-${version}.jar + + + + target/site/apidocs + apidocs + + **/* + + + + + target + + + ${artifactId}-${version}-sources.jar + + + + diff --git a/src/assembly/src.xml b/src/assembly/src.xml index 1c7392f955..1330db01f6 100644 --- a/src/assembly/src.xml +++ b/src/assembly/src.xml @@ -1,43 +1,45 @@ - - - src - - tar.gz - zip - - ${artifactId}-${version}-src - - - - LICENSE* - NOTICE* - RELEASE-NOTES.txt - pom.xml - findbugs-exclude-filter.xml - checkstyle*.xml - - - - src - - **/*Benchmark.java - - - - - + + + src + + tar.gz + zip + + ${artifactId}-${version}-src + + + + LICENSE* + NOTICE* + RELEASE-NOTES.txt + pom.xml + findbugs-exclude-filter.xml + checkstyle*.xml + + + + src + + **/*Benchmark.java + + + + + diff --git a/src/changes/changes.xml b/src/changes/changes.xml index beaa380117..93952e9f18 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -1,306 +1,451 @@ - - - - - - - - Apache Commons CSV Release Notes - - - - - Minor changes #172. - No Automatic-Module-Name prevents usage in JPMS projects without repacking the JAR. - Fix for multi-char delimiter not working as expected #218. - CSVRecord.get(Enum) should use Enum.name() instead of Enum.toString(). - Allow org.apache.commons.csv.IOUtils.copy(Reader, Appendable, CharBuffer) to compile on Java 11 and run on Java 8. - CSVRecord.toList() does not give write access to the new List. - CSVParser.getRecords() now throws UncheckedIOException instead of IOException. - - Make CSVRecord#values() public. - Add DuplicateHeaderMode for flexibility with header strictness. #114. - Support for parallelism in CSVPrinter. - Add CSVPrinter.printRecord[s](Stream). - Add accessors for header/trailer comments #257. - Add github/codeql-action. - - Bump actions/cache from 2.1.6 to 3.0.8 #196, #233, #243. - Bump actions/checkout from 2.3.4 to 3.0.2 #188, #195, #220. - Bump actions/setup-java from 2 to 3. - Bump commons-parent from 52 to 54 #264. - Bump checkstyle from 8.44 to 9.2.1 #180, #190, #194, #202, #207. - Bump junit-jupiter from 5.8.0-M1 to 5.9.1 #179, #186, #201, #244, #263. - Bump jmh-core from 1.32 to 1.35 #176, #208, #229. - Bump jmh-generator-annprocess from 1.32 to 1.35 #175, #206, #226. - Bump mockito-core from 3.11.2 to 4.8.0 #187, #197, #204, #212, #230, #237, #251, #259. - Bump maven-pmd-plugin from 3.14.0 to 3.19.0 #184, #219, #238, #254, #258. - Bump pmd from 6.36.0 to 6.49.0 #173, #189, #193, #199, #227, #233, #214, #236, #240, #247, #255. - Bump opencsv from 5.5.1 to 5.7.0 #182, #221, #260. - Bump spotbugs-maven-plugin from 4.3.0 to 4.7.2.0 #192, #198, #203, #211, #225, #234, #242, #245, #261. - Bump com.github.spotbugs:spotbugs from 4.5.3 to 4.7.2. - Bump h2 from 1.4.200 to 2.1.214 #200, #205, #213, #239. - Bump maven-javadoc-plugin from 3.3.0 to 3.4.1. - Bump biz.aQute.bnd:biz.aQute.bndlib from 5.3.0 to 6.3.1. - Bump jacoco-maven-plugin from 0.8.7 to 0.8.8. - Bump japicmp-maven-plugin from 0.15.3 to 0.16.0. - Bump maven-checkstyle-plugin from 3.1.2 to 3.2.0 #253. - - - - Replace FindBugs with SpotBugs #56. - Javadoc typo in CSVFormat let's -> lets #57. - CSVFormat.printWithEscapes throws StringIndexOutOfBoundsException when value is Reader #61. - Improve CSVFormat test coverage #63. - Fix CSVFileParserTest.java to allow for a null return value from record.getComment() #62. - Improve test coverage in CSVFormatTest #65. - Removed invalid Javadoc markup for CSVFormat EXCEL #64. - Improve CSVRecord and CSVPrinter code coverage #66. - Improve lexer and token coverage #67. - CSVFormat.format trims last delimiter if the delimiter is a white space #71. - Replace org.apache.commons.csv.Assertions.notNull() with Objects.requireNonNull(). - Line number is not proper at EOF. - Parser iterates over the last CSV Record twice. - Minor improvements #126, #127, #130. - Add possibility to use ResultSet header meta data as CSV header #11. - Add test cases for withIgnoreSurroundingSpaces() and withTrim() #70. - Update CSVParser.parse(File, Charset, CSVFormat) from IO to NIO. - Missing separator with print(object) followed by printRecord(Object[]) #157. - Fix EOL checking for read array in ExtendedBufferedReader #5. - Print from Reader with embedded quotes generates incorrect output #78. - Replace JUnit assert by simpler but equivalent calls. #159. - Update gitignore to ignore idea and vscode #160. - Update CSVBenchmark #165. - Remove Whitespace Check Determines Delimiter Twice #167. - Document and Automate CSV Benchmark Harness #166. - Optimize Lexer Delimiter Check for One Character Delimiter #163. - SpotBugs Error: Medium: org.apache.commons.csv.CSVParser.getHeaderNames() may expose internal representation by returning CSVParser.headerNames [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 599] EI_EXPOSE_REP. - SpotBugs Error: Medium: new org.apache.commons.csv.CSVParser(Reader, CSVFormat, long, long) may expose internal representation by storing an externally mutable object into CSVParser.format [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 433] EI_EXPOSE_REP2. - SpotBugs Error: Medium: new org.apache.commons.csv.CSVParser(Reader, CSVFormat, long, long) may expose internal representation by storing an externally mutable object into CSVParser.headerMap [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 437] EI_EXPOSE_REP2. - SpotBugs Error: Medium: new org.apache.commons.csv.CSVParser(Reader, CSVFormat, long, long) may expose internal representation by storing an externally mutable object into CSVParser.headerNames [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 438] EI_EXPOSE_REP2. - SpotBugs Error: Medium: new org.apache.commons.csv.CSVPrinter(Appendable, CSVFormat) may expose internal representation by storing an externally mutable object into CSVPrinter.format [org.apache.commons.csv.CSVPrinter] At CSVPrinter.java:[line 100] EI_EXPOSE_REP2. - Formalize PerformanceTest #168. - Reuse Buffers in Lexer for Delimiter Detection #162. - Cleanup and Document Performance Test Harness #170. - Update buffer position when reading line comment #120. - - Make CSVRecord#toList() public. - Add CSVRecord#stream(). - Add CSVParser#stream(). - Make the method CSVRecord.putIn(Map) public. - Add test cases for CSVRecord with get(Enum) and toString. #54. - Add and use CSVFormat.Builder, deprecated CSVFormat#with methods, based on #73. - Add support for String delimiters #76. - - Update org.junit.jupiter:junit-jupiter from 5.6.0 to 5.7.0, #84 #109 - Update tests from Apache Commons Lang 3.9 to 3.12.0. - Update tests from commons-io:commons-io 2.6 to 2.11.0, #108. - Bump actions/checkout from v1 to v2.3.4, #79, #92, #121. - Bump commons-parent from 50 to 51 #80. - Bump tests from opencsv from 3.1 to 5.5.1 #81, #137, #158. - Update tests from super-csv from 2.2.1 to 2.4.0 #86. - Bump build actions/setup-java from v1.4.0 to v2, #101, #113. - Bump maven-pmd-plugin from 3.13.0 to 3.14.0 #122. - Bump tests from org.mockito:mockito-core 3.2.4 -> 3.11.2; #88, #107, #110, #123, #128, #129, #156. - Bump actions/cache from v2 to v2.1.6 #132, #153. - Bump maven-checkstyle-plugin from 3.0.0 to 3.1.2 #131. - Bump checkstyle from 8.29 to 8.44. - Bump junit-jupiter from 5.7.0 to 5.8.0-M1 #133, #149. - Bump commons.jacoco.version from 0.8.5 to 0.8.7 (Java 16). - Bump commons.spotbugs.version from 4.0.4 to 4.3.0 (Java 16). - Bump maven-javadoc-plugin from 3.2.0 to 3.3.0. - Bump jmh-generator-annprocess from 1.5.2 to 1.32 #151. - Bump PMD core from 6.29.0 to 6.36.0. - Bump biz.aQute.bnd:biz.aQute.bndlib from 5.1.2 to 5.3.0. - - - Add CSVRecord.isSet(int) method #52. - Char escape doesn't work properly with quoting. - Test case failures following CSVFormat#equals() update. - CSVFormat withTrim() and withIgnoreSurroundingSpaces() need better docs. - CSVFormat equals() and hashCode() don't use all fields. - CSVFormat#validate() does not account for allowDuplicateHeaderNames #43. - Post 1.7 release fixes. - Upgrade test framework to JUnit 5 Jupiter #49, #50. - A single empty header is allowed when not allowing empty column headers. #47. - CSVRecord is not Serializable. - Use test scope for supercsv #48. - Update tests from H2 1.4.199 to 1.4.200. - Update tests from Hamcrest 2.1 to 2.2. - Update tests from Mockito 3.1.0 to 3.2.4. - Fix typos in site and test #53. - Fix typo performance test #55. - - - Add predefined CSVFormats for printing MongoDB CSV and TSV. - Fix escape character for POSTGRESQL_TEXT and POSTGRESQL_CSV formats. - Site link "Source Repository" does not work. - Add support for java.sql.Clob. - Update to Java 8. - Escape quotes in CLOBs #39. - Cannot get headers in column order from CSVRecord. - Update tests from H2 1.4.198 to 1.4.199. - - - Add more documentation to CSVPrinter. - Add autoFlush option for CsvPrinter. PR #24. - The behavior of quote char using is not similar as Excel does when the first string contains CJK char(s). - Don't quote cells just because they have UTF-8 encoded characters. - Add API org.apache.commons.csv.CSVFormat.withSystemRecordSeparator(). - Inconsistency between Javadoc of CSVFormat DEFAULT EXCEL. - Create CSVFormat.ORACLE preset. - Some multi-iterator parsing peek sequences incorrectly consume elements. - Parse method should avoid creating a redundant BufferedReader. - Add predefined CSVFormats for printing MongoDB CSV and TSV. - - - withNullString value is printed without quotes when QuoteMode.ALL is specified; add QuoteMode.ALL_NON_NULL. PR #17. - Fix outdated comments about FileReader in CSVParser #13 - Fix incorrect method name 'withFirstRowAsHeader' in user guide. - Negative numeric values in the first column are always quoted in minimal mode. - Update platform requirement from Java 6 to 7. - Do not use RuntimeException in CSVParser.iterator().new Iterator() {...}.getNextRecord() - CSVParser: Add factory method accepting InputStream. - Add convenience API CSVFormat.print(File, Charset) - Add convenience API CSVFormat.print(Path, Charset) - Add convenience API CSVParser.parse(Path, Charset, CSVFormat) - Add convenience API CSVFormat#printer() to print to System.out - Provide a CSV Format for printing PostgreSQL CSV and Text formats. - Adding a placeholder in the Lexer and CSV parser to store the end-of-line string. - - - Make CSVPrinter.print(Object) GC-free. - Allow some printing operations directly from CSVFormat. - Drop ferc.gov tests. - - - Add shortcut method for using first record as header to CSVFormat - Add withHeader(Class<? extends Enum>) to CSVFormat - Comment line hides next record; update Javadoc to make behavior clear - CSVPrinter doesn't skip creation of header record if skipHeaderRecord is set to true - Add IgnoreCase option for accessing header names - The null string should be case-sensitive when reading records - CSVFormat.nullString should not be escaped - CSVFormat.MYSQL nullString should be "\N" - Fix Javadoc to say CSVFormat with() methods return a new CSVFormat - Support for ignoring trailing delimiter. - Support trimming leading and trailing blanks. - Create default formats for Informix UNLOAD and UNLOAD CSV. - - - CSVFormat.with* methods clear the header comments - Incorrect Javadoc on QuoteMode.NONE - Add enum CSVFormat.Predefined that contains the default CSVFormat values. - - - QuoteMode.NON_NUMERIC doesn't work with CSVPrinter.printRecords(ResultSet) - CSVFormat#withHeader doesn't work well with #printComment, add withHeaderComments(String...) - CSVFormat.EXCEL should ignore empty header names - Incorrect Javadoc referencing org.apache.commons.csv.CSVFormat withQuote() - Improve toString() implementation of CSVRecord - Unified parameter validation - Add CSVFormat#with 0-arg methods matching boolean arg methods - Save positions of records to enable random access - CSVPrinter.printRecord(ResultSet) with metadata - - - No longer works with Java 6 - NullPointerException when empty header string and and null string of "" - Validate format parameters in constructor - IllegalArgumentException thrown when the header contains duplicate names when the column names are empty. - CSVFormat#withHeader doesn't work with CSVPrinter - CSVFormat is missing a print(...) method - CSVRecord.toMap() throws NPE on formats with no - headers. - Check whether ISE/IAE are being used appropriately - CSVFormat constructor should reject a header array with duplicate - entries - - HeaderMap is inconsistent when it is parsed from an input with - duplicate columns names - - CSVRecord.toMap() fails if row length shorter than header length - - CSVFormat.format allways append null - Add Map conversion API to CSVRecord - CSVParser: getHeaderMap throws NPE - Lots of possible changes - Use Character instead of char for char fields except delimiter - - Revert Builder implementation in CSVFormat - CSVRecord does not verify that the length of the header mapping - matches the number of values - - Allow the handling of NULL values - Use the Builder pattern for CSVFormat - Clarify comment handling - CSVParser.nextValue() seems pointless - Allow the String value for null to be customized for the CSV - printer - - Not possible to create a CSVFormat from scratch - Keep track of record number - Lexer should only use char fields - Need a way to extract parsed headers, e.g. for use in formatting - output - - Header support - Confusing semantic of the ignore leading/trailing spaces parameters - - Add convenience methods to CSVLexer - Is CharBuffer really needed, now that StringBuilder is available? - - Replace while(true)-loop in CSVParser.getRecord with do-while-loop - - CSVFormat describes itself as immutable, but it is not - in - particular it is not thread-safe - - Endless loops in CSV parser - NullPointerException in CSVPrinter.print()/println() - CSVPrinter overhaul - Excel strategy uses wrong separator - CSVStrategy has modifiable public static variables - - Predefined format for MYSQL - Reduce visibility of methods in internal classes - ExtendedBufferedReader does too much - Decide whether to keep the csv.writer subpackage - - - - + + + + + + + + Apache Commons CSV Release Notes + + + + + Remove Spotbugs dependency and use exclude-filter instead #564. + Remove broken website link #577. + Fix Apache RAT plugin console warnings. + [Javadoc] Clarify behavior of deprecated CSVFormat#withFirstRecordAsHeader() #2413. + CSVFormat.equals()/hashCode() ignores maxRows (#600). + ExtendedBufferedReader byte tracking leads to an incorrect CSVRecord.getBytePosition() (#601). + CSVFormat.Builder.setQuote() does not refresh quotedNullString (#2447). + Lexer.isDelimiter() accepts a partial multi-character delimiter at EOF (#603). + CSVParser applies characterOffset to bytePosition (#604). + CSVPrinter Reader printing with quote and escape can emit CSV that its parser cannot read back. + CSVParser applies maxRows to record numbers instead of rows produced when setRecordNumber(...) is used. + CSVParser with trackBytes enabled throws on multi-character delimiters containing supplementary Unicode characters. + CSVFormat.Builder.setNullString(String) can build an invalid quoted null string after setQuote(null). + Escape Reader values with quote and escape (#606). + Clear escape delimiter buffer before peek in Lexer.isEscapeDelimiter() (#608, #611). + Escape quote char in printWithEscapes when QuoteMode is NONE (#609). + Quote value starting with comment marker in minimal quote mode (#610). + Escape leading comment marker in printWithEscapes (#614). + Skip byte counting at EOF in ExtendedBufferedReader.read (#615). + Keep quoted empty trailing field with trailingDelimiter (#616). + Evaluate isDelimiter once in nextToken whitespace skip (#618).. + + Add an "Android Compatibility" section to the web site. + Add CSVParser.Builder.setByteOffset(long) (#604). + + Bump org.apache.commons:commons-parent from 85 to 102 #573, #595. + [test] Bump com.opencsv:opencsv from 5.11.2 to 5.12.0 #558. + Bump org.apache.commons:commons-lang3 from 3.18.0 to 3.20.0. + Bump commons-codec:commons-codec from 1.19.0 to 1.22.0. + Bump commons-io:commons-io from 2.20.0 to 2.22.0 #594. + + + + CSVPrinter.printRecord(Stream) hangs if given a parallel stream. + CSVPrinter now uses an internal lock instead of synchronized methods. + org.apache.commons.csv.CSVPrinter.printRecords(ResultSet) now writes one record at a time using a lock. + + + Bump org.apache.commons:commons-parent from 81 to 85 #542. + Bump commons-io:commons-io from 2.18.0 to 2.20.0. + Bump com.opencsv:opencsv from 5.10 to 5.11.2 #545, #551, #553. + Bump org.apache.commons:commons-lang3 from 3.17.0 to 3.18.0 #556. + Bump commons-codec:commons-codec from 1.18.0 to 1.19.0. + + + + Release history link changed from changes-report.html to changes.html #516. + Remove -nouses directive from maven-bundle-plugin. OSGi package imports now state 'uses' definitions for package imports, this doesn't affect JPMS (from org.apache.commons:commons-parent:80). + CSVParser.parse(URL, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). + CSVParser.parse(String, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). + CSVParser.parse(File, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). + CSVParser.parse(Path, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). + CSVParser.parse(InputStream, Charset, CSVFormat) with a null CSVFormat maps to CSVFormat.DEFAULT (like CSVParser.parse(Reader, CSVFormat)). + CSVParser.parse(*) methods with a null Charset maps to Charset.defaultCharset(). + Fix possible NullPointerException in Token.toString(). + + Define and use Maven property commons.jmh.version. + Add CSVFormat.Builder.setMaxRows(long). + Add CSVFormat.getMaxRows(). + CSVPrinter.printRecords(ResultSet) knows how to use CSVFormat's maxRows. + CSVPrinter.printRecords(Iterable) knows how to use CSVFormat's maxRows. + CSVPrinter.printRecords(Stream) knows how to use CSVFormat's maxRows. + CSVParser.stream() knows how to use CSVFormat's maxRows. + CSVParser.getRecords() knows how to use CSVFormat's maxRows. + CSVParser.iterator() knows how to use CSVFormat's maxRows. + + Bump com.opencsv:opencsv from 5.9 to 5.10. + Bump commons-codec:commons-codec from 1.17.2 to 1.18.0 #522. + Bump org.apache.commons:commons-parent from 79 to 81. + + + + Required OSGi Import-Package version numbers in MANIFEST.MF #504. + CSVParser.nextRecord() should throw CSVException (an IOException subclass) instead of IOException and IllegalStateException, no method signature changes needed. + + Add CSVPrinter.getRecordCount(). + Add and use CSVParser.Builder and builder() and deprecate CSVParser constructors. + CSVFormat.Builder implements Supplier<CSVFormat>. + Deprecate CSVFormat.Builder.build() for get(). + Track byte position #502. + + Bump org.apache.commons:commons-parent from 76 to 78 #486, #495. + Bump org.codehaus.mojo:taglist-maven-plugin from 3.1.0 to 3.2.1 #493. + Bump commons-io:commons-io from 2.17.0 to 2.18.0 #505. + Bump commons-codec:commons-codec from 1.17.1 to 1.17.2. + Bump org.apache.commons:commons-parent from 78 to 79. + + + + Add CSVException that extends IOException thrown on invalid input instead of IOException. + + Fix PMD issues for port to PMD 7.1.0. + Fix some Javadoc links #442. + Extract duplicated code into a method #444. + Migrate CSVFormat#print(File, Charset) to NIO #445. + Fix documentation for CSVFormat private constructor #466. + CSVFormat does not support explicit " as escape char. + Escaping is not disableable. + Fix Javadoc warnings on Java 23. + Improve parser performance by up to 20%, YMMV. + + Bump commons-codec:commons-codec from 1.16.1 to 1.17.1 #422, #449. + Bump org.apache.commons:commons-parent from 69 to 76 #435, #452, #465, #468, #475, #482. + Bump org.codehaus.mojo:taglist-maven-plugin from 3.0.0 to 3.1.0 #441. + Bump org.apache.commons:commons-lang3 from 3.14.0 to 3.17.0 #450, #459, #470. + Bump org.hamcrest:hamcrest from 2.2 to 3.0 #455. + Bump commons-io:commons-io from 2.16.1 to 2.17.0 #476. + + + + [Javadoc] Add example to CSVFormat#setHeaderComments() #344. + Add and use CSVFormat#setTrailingData(boolean) in CSVFormat.EXCEL for Excel compatibility #303. + Add and use CSVFormat#setLenientEof(boolean) in CSVFormat.EXCEL for Excel compatibility #303. + + Replace deprecated method in user guide, update external link #324, #325. + Document duplicate header behavior #309. + Add missing docs #328. + [StepSecurity] CI: Harden GitHub Actions #329, #330. + Better error message during faulty CSV record read #347. + Misleading error message when QuoteMode set to None #352. + OutOfMemory for very long rows despite using column value of type Reader. + Use try-with-resources to manage JDBC CLOB in CSVPrinter.printRecords(ResultSet). + JDBC Blob columns are now output as Base64 instead of Object#toString(), which usually is InputStream#toString(). + Support unusual Excel use cases: Add support for trailing data after the closing quote, and EOF without a final closing quote #303. + MongoDB CSV empty first column parsing fix #412. + + Bump commons-io:commons-io: from 2.11.0 to 2.16.1 #408, #413. + Bump commons-parent from 57 to 69 #410. + Bump h2 from 2.1.214 to 2.2.224 #333, #349, #359. + Bump commons-lang3 from 3.12.0 to 3.14.0. + Update exception message in CSVRecord#getNextRecord() #348. + Bump tests using com.opencsv:opencsv from 5.8 to 5.9 #373. + + + + Minor changes #172. + No Automatic-Module-Name prevents usage in JPMS projects without repacking the JAR. + Fix for multi-char delimiter not working as expected #218. + CSVRecord.get(Enum) should use Enum.name() instead of Enum.toString(). + Allow org.apache.commons.csv.IOUtils.copy(Reader, Appendable, CharBuffer) to compile on Java 11 and run on Java 8. + CSVRecord.toList() does not give write access to the new List. + CSVParser.getRecords() now throws UncheckedIOException instead of IOException. + Add comments to iterator() and stream() #270. + Fix wrong assumptions in PostgreSQL formats #265. + Validate input to setDelimiter(String) for empty string #266. + Bump CSVFormat#serialVersionUID from 1 to 2. + CSVParser: Identify duplicates in null, empty and blank header names #279. + + Serialization in CSVFormat is not supported from one version to the next. + + Make CSVRecord#values() public. + Add DuplicateHeaderMode for flexibility with header strictness. #114. + Support for parallelism in CSVPrinter. + Add CSVPrinter.printRecord[s](Stream). + Add accessors for header/trailer comments #257. + Add github/codeql-action. + + Bump actions/cache from 2.1.6 to 3.0.10 #196, #233, #243, #267, #271. + Bump actions/checkout from 2.3.4 to 3.1.0 #188, #195, #220, #272. + Bump actions/setup-java from 2 to 3.5.1. + Bump actions/upload-artifact from 3.1.0 to 3.1.1 #280. + Bump commons-parent from 52 to 57 #264, #288, #298, #323. + Bump checkstyle from 8.44 to 9.2.1 #180, #190, #194, #202, #207. + Bump junit-jupiter from 5.8.0-M1 to 5.9.1 #179, #186, #201, #244, #263. + Bump jmh-core from 1.32 to 1.36 #176, #208, #229, #285. + Bump jmh-generator-annprocess from 1.32 to 1.36 #175, #206, #226, #283. + Bump mockito-core from 3.11.2 to 4.11.0 #187, #197, #204, #212, #230, #237, #251, #259, #284, #292, #297. + Bump maven-pmd-plugin from 3.14.0 to 3.19.0 #184, #219, #238, #254, #258. + Bump pmd from 6.36.0 to 6.52.0 #173, #189, #193, #199, #227, #233, #214, #236, #240, #247, #255, #273. + Bump opencsv from 5.5.1 to 5.7.1 #182, #221, #260, #281. + Bump spotbugs-maven-plugin from 4.3.0 to 4.7.3.0 #192, #198, #203, #211, #225, #234, #242, #245, #261, #275, #282. + Bump com.github.spotbugs:spotbugs from 4.5.3 to 4.7.2. + Bump h2 from 1.4.200 to 2.1.214 #200, #205, #213, #239. + Bump maven-javadoc-plugin from 3.3.0 to 3.4.1. + Bump biz.aQute.bnd:biz.aQute.bndlib from 5.3.0 to 6.3.1. + Bump jacoco-maven-plugin from 0.8.7 to 0.8.8. + Bump japicmp-maven-plugin from 0.15.3 to 0.16.0. + Bump maven-checkstyle-plugin from 3.1.2 to 3.2.0 #253. + + + + Replace FindBugs with SpotBugs #56. + Javadoc typo in CSVFormat let's -> lets #57. + CSVFormat.printWithEscapes throws StringIndexOutOfBoundsException when value is Reader #61. + Improve CSVFormat test coverage #63. + Fix CSVFileParserTest.java to allow for a null return value from record.getComment() #62. + Improve test coverage in CSVFormatTest #65. + Removed invalid Javadoc markup for CSVFormat EXCEL #64. + Improve CSVRecord and CSVPrinter code coverage #66. + Improve lexer and token coverage #67. + CSVFormat.format trims last delimiter if the delimiter is a white space #71. + Replace org.apache.commons.csv.Assertions.notNull() with Objects.requireNonNull(). + Line number is not proper at EOF. + Parser iterates over the last CSV Record twice. + Minor improvements #126, #127, #130. + Add possibility to use ResultSet header meta data as CSV header #11. + Add test cases for withIgnoreSurroundingSpaces() and withTrim() #70. + Update CSVParser.parse(File, Charset, CSVFormat) from IO to NIO. + Missing separator with print(object) followed by printRecord(Object[]) #157. + Fix EOL checking for read array in ExtendedBufferedReader #5. + Print from Reader with embedded quotes generates incorrect output #78. + Replace JUnit assert by simpler but equivalent calls. #159. + Update gitignore to ignore idea and vscode #160. + Update CSVBenchmark #165. + Remove Whitespace Check Determines Delimiter Twice #167. + Document and Automate CSV Benchmark Harness #166. + Optimize Lexer Delimiter Check for One Character Delimiter #163. + SpotBugs Error: Medium: org.apache.commons.csv.CSVParser.getHeaderNames() may expose internal representation by returning CSVParser.headerNames [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 599] EI_EXPOSE_REP. + SpotBugs Error: Medium: new org.apache.commons.csv.CSVParser(Reader, CSVFormat, long, long) may expose internal representation by storing an externally mutable object into CSVParser.format [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 433] EI_EXPOSE_REP2. + SpotBugs Error: Medium: new org.apache.commons.csv.CSVParser(Reader, CSVFormat, long, long) may expose internal representation by storing an externally mutable object into CSVParser.headerMap [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 437] EI_EXPOSE_REP2. + SpotBugs Error: Medium: new org.apache.commons.csv.CSVParser(Reader, CSVFormat, long, long) may expose internal representation by storing an externally mutable object into CSVParser.headerNames [org.apache.commons.csv.CSVParser] At CSVParser.java:[line 438] EI_EXPOSE_REP2. + SpotBugs Error: Medium: new org.apache.commons.csv.CSVPrinter(Appendable, CSVFormat) may expose internal representation by storing an externally mutable object into CSVPrinter.format [org.apache.commons.csv.CSVPrinter] At CSVPrinter.java:[line 100] EI_EXPOSE_REP2. + Formalize PerformanceTest #168. + Reuse Buffers in Lexer for Delimiter Detection #162. + Cleanup and Document Performance Test Harness #170. + Update buffer position when reading line comment #120. + + Make CSVRecord#toList() public. + Add CSVRecord#stream(). + Add CSVParser#stream(). + Make the method CSVRecord.putIn(Map) public. + Add test cases for CSVRecord with get(Enum) and toString. #54. + Add and use CSVFormat.Builder, deprecated CSVFormat#with methods, based on #73. + Add support for String delimiters #76. + + Update org.junit.jupiter:junit-jupiter from 5.6.0 to 5.7.0, #84 #109 + Update tests from Apache Commons Lang 3.9 to 3.12.0. + Update tests from commons-io:commons-io 2.6 to 2.11.0, #108. + Bump actions/checkout from v1 to v2.3.4, #79, #92, #121. + Bump commons-parent from 50 to 51 #80. + Bump tests from opencsv from 3.1 to 5.5.1 #81, #137, #158. + Update tests from super-csv from 2.2.1 to 2.4.0 #86. + Bump build actions/setup-java from v1.4.0 to v2, #101, #113. + Bump maven-pmd-plugin from 3.13.0 to 3.14.0 #122. + Bump tests from org.mockito:mockito-core 3.2.4 -> 3.11.2; #88, #107, #110, #123, #128, #129, #156. + Bump actions/cache from v2 to v2.1.6 #132, #153. + Bump maven-checkstyle-plugin from 3.0.0 to 3.1.2 #131. + Bump checkstyle from 8.29 to 8.44. + Bump junit-jupiter from 5.7.0 to 5.8.0-M1 #133, #149. + Bump commons.jacoco.version from 0.8.5 to 0.8.7 (Java 16). + Bump commons.spotbugs.version from 4.0.4 to 4.3.0 (Java 16). + Bump maven-javadoc-plugin from 3.2.0 to 3.3.0. + Bump jmh-generator-annprocess from 1.5.2 to 1.32 #151. + Bump PMD core from 6.29.0 to 6.36.0. + Bump biz.aQute.bnd:biz.aQute.bndlib from 5.1.2 to 5.3.0. + + + Add CSVRecord.isSet(int) method #52. + Char escape doesn't work properly with quoting. + Test case failures following CSVFormat#equals() update. + CSVFormat withTrim() and withIgnoreSurroundingSpaces() need better docs. + CSVFormat equals() and hashCode() don't use all fields. + CSVFormat#validate() does not account for allowDuplicateHeaderNames #43. + Post 1.7 release fixes. + Upgrade test framework to JUnit 5 Jupiter #49, #50. + A single empty header is allowed when not allowing empty column headers. #47. + CSVRecord is not Serializable. + Use test scope for supercsv #48. + Update tests from H2 1.4.199 to 1.4.200. + Update tests from Hamcrest 2.1 to 2.2. + Update tests from Mockito 3.1.0 to 3.2.4. + Fix typos in site and test #53. + Fix typo performance test #55. + + + Add predefined CSVFormats for printing MongoDB CSV and TSV. + Fix escape character for POSTGRESQL_TEXT and POSTGRESQL_CSV formats. + Site link "Source Repository" does not work. + Add support for java.sql.Clob. + Update to Java 8. + Escape quotes in CLOBs #39. + Cannot get headers in column order from CSVRecord. + Update tests from H2 1.4.198 to 1.4.199. + + + Add more documentation to CSVPrinter. + Add autoFlush option for CsvPrinter. PR #24. + The behavior of quote char using is not similar as Excel does when the first string contains CJK char(s). + Don't quote cells just because they have UTF-8 encoded characters. + Add API org.apache.commons.csv.CSVFormat.withSystemRecordSeparator(). + Inconsistency between Javadoc of CSVFormat DEFAULT EXCEL. + Create CSVFormat.ORACLE preset. + Some multi-iterator parsing peek sequences incorrectly consume elements. + Parse method should avoid creating a redundant BufferedReader. + Add predefined CSVFormats for printing MongoDB CSV and TSV. + + + withNullString value is printed without quotes when QuoteMode.ALL is specified; add QuoteMode.ALL_NON_NULL. PR #17. + Fix outdated comments about FileReader in CSVParser #13 + Fix incorrect method name 'withFirstRowAsHeader' in user guide. + Negative numeric values in the first column are always quoted in minimal mode. + Update platform requirement from Java 6 to 7. + Do not use RuntimeException in CSVParser.iterator().new Iterator() {...}.getNextRecord() + CSVParser: Add factory method accepting InputStream. + Add convenience API CSVFormat.print(File, Charset) + Add convenience API CSVFormat.print(Path, Charset) + Add convenience API CSVParser.parse(Path, Charset, CSVFormat) + Add convenience API CSVFormat#printer() to print to System.out + Provide a CSV Format for printing PostgreSQL CSV and Text formats. + Adding a placeholder in the Lexer and CSV parser to store the end-of-line string. + + + Make CSVPrinter.print(Object) GC-free. + Allow some printing operations directly from CSVFormat. + Drop ferc.gov tests. + + + Add shortcut method for using first record as header to CSVFormat + Add withHeader(Class<? extends Enum>) to CSVFormat + Comment line hides next record; update Javadoc to make behavior clear + CSVPrinter doesn't skip creation of header record if skipHeaderRecord is set to true + Add IgnoreCase option for accessing header names + The null string should be case-sensitive when reading records + CSVFormat.nullString should not be escaped + CSVFormat.MYSQL nullString should be "\N" + Fix Javadoc to say CSVFormat with() methods return a new CSVFormat + Support for ignoring trailing delimiter. + Support trimming leading and trailing blanks. + Create default formats for Informix UNLOAD and UNLOAD CSV. + + + CSVFormat.with* methods clear the header comments + Incorrect Javadoc on QuoteMode.NONE + Add enum CSVFormat.Predefined that contains the default CSVFormat values. + + + QuoteMode.NON_NUMERIC doesn't work with CSVPrinter.printRecords(ResultSet) + CSVFormat#withHeader doesn't work well with #printComment, add withHeaderComments(String...) + CSVFormat.EXCEL should ignore empty header names + Incorrect Javadoc referencing org.apache.commons.csv.CSVFormat withQuote() + Improve toString() implementation of CSVRecord + Unified parameter validation + Add CSVFormat#with 0-arg methods matching boolean arg methods + Save positions of records to enable random access + CSVPrinter.printRecord(ResultSet) with metadata + + + No longer works with Java 6 + NullPointerException when empty header string and null string of "" + Validate format parameters in constructor + IllegalArgumentException thrown when the header contains duplicate names when the column names are empty. + CSVFormat#withHeader doesn't work with CSVPrinter + CSVFormat is missing a print(...) method + CSVRecord.toMap() throws NPE on formats with no + headers. + Check whether ISE/IAE are being used appropriately + CSVFormat constructor should reject a header array with duplicate + entries + + HeaderMap is inconsistent when it is parsed from an input with + duplicate columns names + + CSVRecord.toMap() fails if row length shorter than header length + + CSVFormat.format allways append null + Add Map conversion API to CSVRecord + CSVParser: getHeaderMap throws NPE + Lots of possible changes + Use Character instead of char for char fields except delimiter + + Revert Builder implementation in CSVFormat + CSVRecord does not verify that the length of the header mapping + matches the number of values + + Allow the handling of NULL values + Use the Builder pattern for CSVFormat + Clarify comment handling + CSVParser.nextValue() seems pointless + Allow the String value for null to be customized for the CSV + printer + + Not possible to create a CSVFormat from scratch + Keep track of record number + Lexer should only use char fields + Need a way to extract parsed headers, e.g. for use in formatting + output + + Header support + Confusing semantic of the ignore leading/trailing spaces parameters + + Add convenience methods to CSVLexer + Is CharBuffer really needed, now that StringBuilder is available? + + Replace while(true)-loop in CSVParser.getRecord with do-while-loop + + CSVFormat describes itself as immutable, but it is not - in + particular it is not thread-safe + + Endless loops in CSV parser + NullPointerException in CSVPrinter.print()/println() + CSVPrinter overhaul + Excel strategy uses wrong separator + CSVStrategy has modifiable public static variables + + Predefined format for MYSQL + Reduce visibility of methods in internal classes + ExtendedBufferedReader does too much + Decide whether to keep the csv.writer subpackage + + + + diff --git a/src/changes/release-notes.vm b/src/changes/release-notes.vm index 412324c0f6..5769829552 100644 --- a/src/changes/release-notes.vm +++ b/src/changes/release-notes.vm @@ -6,7 +6,7 @@ ## "License"); you may not use this file except in compliance ## with the License. You may obtain a copy of the License at ## -## http://www.apache.org/licenses/LICENSE-2.0 +## https://www.apache.org/licenses/LICENSE-2.0 ## ## Unless required by applicable law or agreed to in writing, ## software distributed under the License is distributed on an @@ -15,22 +15,21 @@ ## specific language governing permissions and limitations ## under the License. ## - ${project.name} - Version ${version} - Release Notes +${project.name} ${version} Release Notes +------------------------------------------------ +The ${developmentTeam} is pleased to announce the release of ${project.name} ${version}. -INTRODUCTION: -This document contains the release notes for the ${version} version of Apache Commons CSV. -Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. +This document contains the release notes for the ${version} version of ${project.name}. +Commons CSV reads and writes files in Comma Separated Value (CSV) format variations. Commons CSV requires at least Java 8. $introduction.replaceAll("(? + + diff --git a/src/conf/checkstyle/checkstyle.xml b/src/conf/checkstyle/checkstyle.xml new file mode 100644 index 0000000000..4e8691243c --- /dev/null +++ b/src/conf/checkstyle/checkstyle.xml @@ -0,0 +1,91 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/main/java/org/apache/commons/csv/CSVException.java b/src/main/java/org/apache/commons/csv/CSVException.java new file mode 100644 index 0000000000..a986148f34 --- /dev/null +++ b/src/main/java/org/apache/commons/csv/CSVException.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import java.io.IOException; +import java.util.Formatter; +import java.util.IllegalFormatException; + +/** + * Signals a CSV exception. For example, this exception is thrown when parsing invalid input. + * + * @since 1.12.0 + */ +public class CSVException extends IOException { + + private static final long serialVersionUID = 1L; + + /** + * Constructs a new instance with a formatted message. + * + * @param format A {@link Formatter} format string. + * @param args See {@link String#format(String, Object...)}. + * @throws IllegalFormatException See {@link String#format(String, Object...)}. + */ + public CSVException(final String format, final Object... args) { + super(String.format(format, args)); + } + +} diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index ba39b742a9..7145d23d3b 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -1,2830 +1,3246 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.BACKSLASH; -import static org.apache.commons.csv.Constants.COMMA; -import static org.apache.commons.csv.Constants.COMMENT; -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.CRLF; -import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR; -import static org.apache.commons.csv.Constants.EMPTY; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.PIPE; -import static org.apache.commons.csv.Constants.SP; -import static org.apache.commons.csv.Constants.TAB; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Reader; -import java.io.Serializable; -import java.io.StringWriter; -import java.io.Writer; -import java.nio.charset.Charset; -import java.nio.file.Files; -import java.nio.file.Path; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Objects; -import java.util.Set; - -/** - * Specifies the format of a CSV file and parses input. - * - *

Using predefined formats

- * - *

- * You can use one of the predefined formats: - *

- * - *
    - *
  • {@link #DEFAULT}
  • - *
  • {@link #EXCEL}
  • - *
  • {@link #INFORMIX_UNLOAD}
  • - *
  • {@link #INFORMIX_UNLOAD_CSV}
  • - *
  • {@link #MYSQL}
  • - *
  • {@link #RFC4180}
  • - *
  • {@link #ORACLE}
  • - *
  • {@link #POSTGRESQL_CSV}
  • - *
  • {@link #POSTGRESQL_TEXT}
  • - *
  • {@link #TDF}
  • - *
- * - *

- * For example: - *

- * - *
- * CSVParser parser = CSVFormat.EXCEL.parse(reader);
- * 
- * - *

- * The {@link CSVParser} provides static methods to parse other input types, for example: - *

- * - *
- * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL);
- * 
- * - *

Defining formats

- * - *

- * You can extend a format by calling the {@code set} methods. For example: - *

- * - *
- * CSVFormat.EXCEL.withNullString("N/A").withIgnoreSurroundingSpaces(true);
- * 
- * - *

Defining column names

- * - *

- * To define the column names you want to use to access records, write: - *

- * - *
- * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3");
- * 
- * - *

- * Calling {@link Builder#setHeader(String...)} lets you use the given names to address values in a {@link CSVRecord}, and assumes that your CSV source does not - * contain a first record that also defines column names. - * - * If it does, then you are overriding this metadata with your names and you should skip the first record by calling - * {@link Builder#setSkipHeaderRecord(boolean)} with {@code true}. - *

- * - *

Parsing

- * - *

- * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write: - *

- * - *
- * Reader in = ...;
- * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3").parse(in);
- * 
- * - *

- * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}. - *

- * - *

Referencing columns safely

- * - *

- * If your source contains a header record, you can simplify your code and safely reference columns, by using {@link Builder#setHeader(String...)} with no - * arguments: - *

- * - *
- * CSVFormat.EXCEL.withHeader();
- * 
- * - *

- * This causes the parser to read the first record and use its values as column names. - * - * Then, call one of the {@link CSVRecord} get method that takes a String column name argument: - *

- * - *
- * String value = record.get("Col1");
- * 
- * - *

- * This makes your code impervious to changes in column order in the CSV file. - *

- * - *

Notes

- * - *

- * This class is immutable. - *

- */ -public final class CSVFormat implements Serializable { - - /** - * Builds CSVFormat instances. - * - * @since 1.9.0 - */ - public static class Builder { - - /** - * Creates a new default builder. - * - * @return a copy of the builder - */ - public static Builder create() { - return new Builder(CSVFormat.DEFAULT); - } - - /** - * Creates a new builder for the given format. - * - * @param csvFormat the source format. - * @return a copy of the builder - */ - public static Builder create(final CSVFormat csvFormat) { - return new Builder(csvFormat); - } - - private boolean allowMissingColumnNames; - - private boolean autoFlush; - - private Character commentMarker; - - private String delimiter; - - private DuplicateHeaderMode duplicateHeaderMode; - - private Character escapeCharacter; - - private String[] headerComments; - - private String[] headers; - - private boolean ignoreEmptyLines; - - private boolean ignoreHeaderCase; - - private boolean ignoreSurroundingSpaces; - - private String nullString; - - private Character quoteCharacter; - - private String quotedNullString; - - private QuoteMode quoteMode; - - private String recordSeparator; - - private boolean skipHeaderRecord; - - private boolean trailingDelimiter; - - private boolean trim; - - private Builder(final CSVFormat csvFormat) { - this.delimiter = csvFormat.delimiter; - this.quoteCharacter = csvFormat.quoteCharacter; - this.quoteMode = csvFormat.quoteMode; - this.commentMarker = csvFormat.commentMarker; - this.escapeCharacter = csvFormat.escapeCharacter; - this.ignoreSurroundingSpaces = csvFormat.ignoreSurroundingSpaces; - this.allowMissingColumnNames = csvFormat.allowMissingColumnNames; - this.ignoreEmptyLines = csvFormat.ignoreEmptyLines; - this.recordSeparator = csvFormat.recordSeparator; - this.nullString = csvFormat.nullString; - this.headerComments = csvFormat.headerComments; - this.headers = csvFormat.header; - this.skipHeaderRecord = csvFormat.skipHeaderRecord; - this.ignoreHeaderCase = csvFormat.ignoreHeaderCase; - this.trailingDelimiter = csvFormat.trailingDelimiter; - this.trim = csvFormat.trim; - this.autoFlush = csvFormat.autoFlush; - this.quotedNullString = csvFormat.quotedNullString; - this.duplicateHeaderMode = csvFormat.duplicateHeaderMode; - } - - /** - * Builds a new CSVFormat instance. - * - * @return a new CSVFormat instance. - */ - public CSVFormat build() { - return new CSVFormat(this); - } - - /** - * Sets the duplicate header names behavior, true to allow, false to disallow. - * - * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. - * @return This instance. - * @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}. - */ - @Deprecated - public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { - final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY; - setDuplicateHeaderMode(mode); - return this; - } - - /** - * Sets the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an - * {@link IllegalArgumentException} to be thrown. - * - * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to - * cause an {@link IllegalArgumentException} to be thrown. - * @return This instance. - */ - public Builder setAllowMissingColumnNames(final boolean allowMissingColumnNames) { - this.allowMissingColumnNames = allowMissingColumnNames; - return this; - } - - /** - * Sets whether to flush on close. - * - * @param autoFlush whether to flush on close. - * @return This instance. - */ - public Builder setAutoFlush(final boolean autoFlush) { - this.autoFlush = autoFlush; - return this; - } - - /** - * Sets the comment start marker, use {@code null} to disable. - * - * Note that the comment start character is only recognized at the start of a line. - * - * @param commentMarker the comment start marker, use {@code null} to disable. - * @return This instance. - * @throws IllegalArgumentException thrown if the specified character is a line break - */ - public Builder setCommentMarker(final char commentMarker) { - setCommentMarker(Character.valueOf(commentMarker)); - return this; - } - - /** - * Sets the comment start marker, use {@code null} to disable. - * - * Note that the comment start character is only recognized at the start of a line. - * - * @param commentMarker the comment start marker, use {@code null} to disable. - * @return This instance. - * @throws IllegalArgumentException thrown if the specified character is a line break - */ - public Builder setCommentMarker(final Character commentMarker) { - if (isLineBreak(commentMarker)) { - throw new IllegalArgumentException("The comment start marker character cannot be a line break"); - } - this.commentMarker = commentMarker; - return this; - } - - /** - * Sets the delimiter character. - * - * @param delimiter the delimiter character. - * @return This instance. - */ - public Builder setDelimiter(final char delimiter) { - return setDelimiter(String.valueOf(delimiter)); - } - - /** - * Sets the delimiter character. - * - * @param delimiter the delimiter character. - * @return This instance. - */ - public Builder setDelimiter(final String delimiter) { - if (containsLineBreak(delimiter)) { - throw new IllegalArgumentException("The delimiter cannot be a line break"); - } - this.delimiter = delimiter; - return this; - } - - /** - * Sets the duplicate header names behavior. - * - * @param duplicateHeaderMode the duplicate header names behavior - * @return This instance. - */ - public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) { - this.duplicateHeaderMode = duplicateHeaderMode; - return this; - } - - /** - * Sets the escape character. - * - * @param escapeCharacter the escape character. - * @return This instance. - * @throws IllegalArgumentException thrown if the specified character is a line break - */ - public Builder setEscape(final char escapeCharacter) { - setEscape(Character.valueOf(escapeCharacter)); - return this; - } - - /** - * Sets the escape character. - * - * @param escapeCharacter the escape character. - * @return This instance. - * @throws IllegalArgumentException thrown if the specified character is a line break - */ - public Builder setEscape(final Character escapeCharacter) { - if (isLineBreak(escapeCharacter)) { - throw new IllegalArgumentException("The escape character cannot be a line break"); - } - this.escapeCharacter = escapeCharacter; - return this; - } - - /** - * Sets the header defined by the given {@link Enum} class. - * - *

- * Example: - *

- * - *
-         * public enum HeaderEnum {
-         *     Name, Email, Phone
-         * }
-         *
-         * Builder builder = builder.setHeader(HeaderEnum.class);
-         * 
- *

- * The header is also used by the {@link CSVPrinter}. - *

- * - * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. - * @return This instance. - */ - public Builder setHeader(final Class> headerEnum) { - String[] header = null; - if (headerEnum != null) { - final Enum[] enumValues = headerEnum.getEnumConstants(); - header = new String[enumValues.length]; - for (int i = 0; i < enumValues.length; i++) { - header[i] = enumValues[i].name(); - } - } - return setHeader(header); - } - - /** - * Sets the header from the result set metadata. The header can either be parsed automatically from the input file with: - * - *
-         * builder.setHeader();
-         * 
- * - * or specified manually with: - * - *
-         * builder.setHeader(resultSet);
-         * 
- *

- * The header is also used by the {@link CSVPrinter}. - *

- * - * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. - * @return This instance. - * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. - */ - public Builder setHeader(final ResultSet resultSet) throws SQLException { - return setHeader(resultSet != null ? resultSet.getMetaData() : null); - } - - /** - * Sets the header from the result set metadata. The header can either be parsed automatically from the input file with: - * - *
-         * builder.setHeader();
-         * 
- * - * or specified manually with: - * - *
-         * builder.setHeader(resultSetMetaData);
-         * 
- *

- * The header is also used by the {@link CSVPrinter}. - *

- * - * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. - * @return This instance. - * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. - */ - public Builder setHeader(final ResultSetMetaData resultSetMetaData) throws SQLException { - String[] labels = null; - if (resultSetMetaData != null) { - final int columnCount = resultSetMetaData.getColumnCount(); - labels = new String[columnCount]; - for (int i = 0; i < columnCount; i++) { - labels[i] = resultSetMetaData.getColumnLabel(i + 1); - } - } - return setHeader(labels); - } - - /** - * Sets the header to the given values. The header can either be parsed automatically from the input file with: - * - *
-         * builder.setHeader();
-         * 
- * - * or specified manually with: - * - *
-         * builder.setHeader("name", "email", "phone");
-         * 
- *

- * The header is also used by the {@link CSVPrinter}. - *

- * - * @param header the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. - * @return This instance. - */ - public Builder setHeader(final String... header) { - this.headers = CSVFormat.clone(header); - return this; - } - - /** - * Sets the header comments set to the given values. The comments will be printed first, before the headers. This setting is ignored by the parser. - * - *
-         * builder.setHeaderComments("Generated by Apache Commons CSV.", Instant.now());
-         * 
- * - * @param headerComments the headerComments which will be printed by the Printer before the actual CSV data. - * @return This instance. - */ - public Builder setHeaderComments(final Object... headerComments) { - this.headerComments = CSVFormat.clone(toStringArray(headerComments)); - return this; - } - - /** - * Sets the header comments set to the given values. The comments will be printed first, before the headers. This setting is ignored by the parser. - * - *
-         * Builder.setHeaderComments("Generated by Apache Commons CSV.", Instant.now());
-         * 
- * - * @param headerComments the headerComments which will be printed by the Printer before the actual CSV data. - * @return This instance. - */ - public Builder setHeaderComments(final String... headerComments) { - this.headerComments = CSVFormat.clone(headerComments); - return this; - } - - /** - * Sets the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty lines to empty - * records. - * - * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate - * empty lines to empty records. - * @return This instance. - */ - public Builder setIgnoreEmptyLines(final boolean ignoreEmptyLines) { - this.ignoreEmptyLines = ignoreEmptyLines; - return this; - } - - /** - * Sets the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. - * - * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. - * @return This instance. - */ - public Builder setIgnoreHeaderCase(final boolean ignoreHeaderCase) { - this.ignoreHeaderCase = ignoreHeaderCase; - return this; - } - - /** - * Sets the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. - * - * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. - * @return This instance. - */ - public Builder setIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { - this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; - return this; - } - - /** - * Sets the String to convert to and from {@code null}. No substitution occurs if {@code null}. - * - *
    - *
  • Reading: Converts strings equal to the given {@code nullString} to {@code null} when reading records.
  • - *
  • Writing: Writes {@code null} as the given {@code nullString} when writing records.
  • - *
- * - * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null}. - * @return This instance. - */ - public Builder setNullString(final String nullString) { - this.nullString = nullString; - this.quotedNullString = quoteCharacter + nullString + quoteCharacter; - return this; - } - - /** - * Sets the quote character. - * - * @param quoteCharacter the quote character. - * @return This instance. - */ - public Builder setQuote(final char quoteCharacter) { - setQuote(Character.valueOf(quoteCharacter)); - return this; - } - - /** - * Sets the quote character, use {@code null} to disable. - * - * @param quoteCharacter the quote character, use {@code null} to disable. - * @return This instance. - */ - public Builder setQuote(final Character quoteCharacter) { - if (isLineBreak(quoteCharacter)) { - throw new IllegalArgumentException("The quoteChar cannot be a line break"); - } - this.quoteCharacter = quoteCharacter; - return this; - } - - /** - * Sets the quote policy to use for output. - * - * @param quoteMode the quote policy to use for output. - * @return This instance. - */ - public Builder setQuoteMode(final QuoteMode quoteMode) { - this.quoteMode = quoteMode; - return this; - } - - /** - * Sets the record separator to use for output. - * - *

- * Note: This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' - * and "\r\n" - *

- * - * @param recordSeparator the record separator to use for output. - * @return This instance. - */ - public Builder setRecordSeparator(final char recordSeparator) { - this.recordSeparator = String.valueOf(recordSeparator); - return this; - } - - /** - * Sets the record separator to use for output. - * - *

- * Note: This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' - * and "\r\n" - *

- * - * @param recordSeparator the record separator to use for output. - * @return This instance. - */ - public Builder setRecordSeparator(final String recordSeparator) { - this.recordSeparator = recordSeparator; - return this; - } - - /** - * Sets whether to skip the header record. - * - * @param skipHeaderRecord whether to skip the header record. - * @return This instance. - */ - public Builder setSkipHeaderRecord(final boolean skipHeaderRecord) { - this.skipHeaderRecord = skipHeaderRecord; - return this; - } - - /** - * Sets whether to add a trailing delimiter. - * - * @param trailingDelimiter whether to add a trailing delimiter. - * @return This instance. - */ - public Builder setTrailingDelimiter(final boolean trailingDelimiter) { - this.trailingDelimiter = trailingDelimiter; - return this; - } - - /** - * Sets whether to trim leading and trailing blanks. - * - * @param trim whether to trim leading and trailing blanks. - * @return This instance. - */ - public Builder setTrim(final boolean trim) { - this.trim = trim; - return this; - } - } - - /** - * Predefines formats. - * - * @since 1.2 - */ - public enum Predefined { - - /** - * @see CSVFormat#DEFAULT - */ - Default(CSVFormat.DEFAULT), - - /** - * @see CSVFormat#EXCEL - */ - Excel(CSVFormat.EXCEL), - - /** - * @see CSVFormat#INFORMIX_UNLOAD - * @since 1.3 - */ - InformixUnload(CSVFormat.INFORMIX_UNLOAD), - - /** - * @see CSVFormat#INFORMIX_UNLOAD_CSV - * @since 1.3 - */ - InformixUnloadCsv(CSVFormat.INFORMIX_UNLOAD_CSV), - - /** - * @see CSVFormat#MONGODB_CSV - * @since 1.7 - */ - MongoDBCsv(CSVFormat.MONGODB_CSV), - - /** - * @see CSVFormat#MONGODB_TSV - * @since 1.7 - */ - MongoDBTsv(CSVFormat.MONGODB_TSV), - - /** - * @see CSVFormat#MYSQL - */ - MySQL(CSVFormat.MYSQL), - - /** - * @see CSVFormat#ORACLE - */ - Oracle(CSVFormat.ORACLE), - - /** - * @see CSVFormat#POSTGRESQL_CSV - * @since 1.5 - */ - PostgreSQLCsv(CSVFormat.POSTGRESQL_CSV), - - /** - * @see CSVFormat#POSTGRESQL_CSV - */ - PostgreSQLText(CSVFormat.POSTGRESQL_TEXT), - - /** - * @see CSVFormat#RFC4180 - */ - RFC4180(CSVFormat.RFC4180), - - /** - * @see CSVFormat#TDF - */ - TDF(CSVFormat.TDF); - - private final CSVFormat format; - - Predefined(final CSVFormat format) { - this.format = format; - } - - /** - * Gets the format. - * - * @return the format. - */ - public CSVFormat getFormat() { - return format; - } - } - - /** - * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing - * empty lines. - * - *

- * The {@link Builder} settings are: - *

- *
    - *
  • {@code setDelimiter(',')}
  • - *
  • {@code setQuote('"')}
  • - *
  • {@code setRecordSeparator("\r\n")}
  • - *
  • {@code setIgnoreEmptyLines(true)}
  • - *
  • {@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}
  • - *
- * - * @see Predefined#Default - */ - public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false, - false, false, false, DuplicateHeaderMode.ALLOW_ALL); - - /** - * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary - * to customize this format to accommodate to your regional settings. - * - *

- * For example for parsing or generating a CSV file on a French system the following format will be used: - *

- * - *
-     * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
-     * 
- * - *

- * The {@link Builder} settings are: - *

- *
    - *
  • {@code setDelimiter(',')}
  • - *
  • {@code setQuote('"')}
  • - *
  • {@code setRecordSeparator("\r\n")}
  • - *
  • {@code setIgnoreEmptyLines(false)}
  • - *
  • {@code setAllowMissingColumnNames(true)}
  • - *
  • {@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}
  • - *
- *

- * Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and - * {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(false)}. - *

- * - * @see Predefined#Excel - */ - // @formatter:off - public static final CSVFormat EXCEL = DEFAULT.builder() - .setIgnoreEmptyLines(false) - .setAllowMissingColumnNames(true) - .build(); - // @formatter:on - - /** - * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation. - * - *

- * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. - * The default NULL string is {@code "\\N"}. - *

- * - *

- * The {@link Builder} settings are: - *

- *
    - *
  • {@code setDelimiter(',')}
  • - *
  • {@code setEscape('\\')}
  • - *
  • {@code setQuote("\"")}
  • - *
  • {@code setRecordSeparator('\n')}
  • - *
- * - * @see Predefined#MySQL - * @see - * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm - * @since 1.3 - */ - // @formatter:off - public static final CSVFormat INFORMIX_UNLOAD = DEFAULT.builder() - .setDelimiter(PIPE) - .setEscape(BACKSLASH) - .setQuote(DOUBLE_QUOTE_CHAR) - .setRecordSeparator(LF) - .build(); - // @formatter:on - - /** - * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.) - * - *

- * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. - * The default NULL string is {@code "\\N"}. - *

- * - *

- * The {@link Builder} settings are: - *

- *
    - *
  • {@code setDelimiter(',')}
  • - *
  • {@code setQuote("\"")}
  • - *
  • {@code setRecordSeparator('\n')}
  • - *
- * - * @see Predefined#MySQL - * @see - * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm - * @since 1.3 - */ - // @formatter:off - public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT.builder() - .setDelimiter(COMMA) - .setQuote(DOUBLE_QUOTE_CHAR) - .setRecordSeparator(LF) - .build(); - // @formatter:on - - /** - * Default MongoDB CSV format used by the {@code mongoexport} operation. - *

- * Parsing is not supported yet. - *

- * - *

- * This is a comma-delimited format. Values are double quoted only if needed and special characters are escaped with {@code '"'}. A header line with field - * names is expected. - *

- * - *

- * The {@link Builder} settings are: - *

- *
    - *
  • {@code setDelimiter(',')}
  • - *
  • {@code setEscape('"')}
  • - *
  • {@code setQuote('"')}
  • - *
  • {@code setQuoteMode(QuoteMode.ALL_NON_NULL)}
  • - *
  • {@code setSkipHeaderRecord(false)}
  • - *
- * - * @see Predefined#MongoDBCsv - * @see MongoDB mongoexport command documentation - * @since 1.7 - */ - // @formatter:off - public static final CSVFormat MONGODB_CSV = DEFAULT.builder() - .setDelimiter(COMMA) - .setEscape(DOUBLE_QUOTE_CHAR) - .setQuote(DOUBLE_QUOTE_CHAR) - .setQuoteMode(QuoteMode.MINIMAL) - .setSkipHeaderRecord(false) - .build(); - // @formatter:off - - /** - * Default MongoDB TSV format used by the {@code mongoexport} operation. - *

- * Parsing is not supported yet. - *

- * - *

- * This is a tab-delimited format. Values are double quoted only if needed and special - * characters are escaped with {@code '"'}. A header line with field names is expected. - *

- * - *

- * The {@link Builder} settings are: - *

- *
    - *
  • {@code setDelimiter('\t')}
  • - *
  • {@code setEscape('"')}
  • - *
  • {@code setQuote('"')}
  • - *
  • {@code setQuoteMode(QuoteMode.ALL_NON_NULL)}
  • - *
  • {@code setSkipHeaderRecord(false)}
  • - *
- * - * @see Predefined#MongoDBCsv - * @see MongoDB mongoexport command - * documentation - * @since 1.7 - */ - // @formatter:off - public static final CSVFormat MONGODB_TSV = DEFAULT.builder() - .setDelimiter(TAB) - .setEscape(DOUBLE_QUOTE_CHAR) - .setQuote(DOUBLE_QUOTE_CHAR) - .setQuoteMode(QuoteMode.MINIMAL) - .setSkipHeaderRecord(false) - .build(); - // @formatter:off - - /** - * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. - * - *

- * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special - * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. - *

- * - *

- * The {@link Builder} settings are: - *

- *
    - *
  • {@code setDelimiter('\t')}
  • - *
  • {@code setEscape('\\')}
  • - *
  • {@code setIgnoreEmptyLines(false)}
  • - *
  • {@code setQuote(null)}
  • - *
  • {@code setRecordSeparator('\n')}
  • - *
  • {@code setNullString("\\N")}
  • - *
  • {@code setQuoteMode(QuoteMode.ALL_NON_NULL)}
  • - *
- * - * @see Predefined#MySQL - * @see http://dev.mysql.com/doc/refman/5.1/en/load - * -data.html - */ - // @formatter:off - public static final CSVFormat MYSQL = DEFAULT.builder() - .setDelimiter(TAB) - .setEscape(BACKSLASH) - .setIgnoreEmptyLines(false) - .setQuote(null) - .setRecordSeparator(LF) - .setNullString("\\N") - .setQuoteMode(QuoteMode.ALL_NON_NULL) - .build(); - // @formatter:off - - /** - * Default Oracle format used by the SQL*Loader utility. - * - *

- * This is a comma-delimited format with the system line separator character as the record separator.Values are - * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is - * {@code ""}. Values are trimmed. - *

- * - *

- * The {@link Builder} settings are: - *

- *
    - *
  • {@code setDelimiter(',') // default is {@code FIELDS TERMINATED BY ','}}
  • - *
  • {@code setEscape('\\')}
  • - *
  • {@code setIgnoreEmptyLines(false)}
  • - *
  • {@code setQuote('"') // default is {@code OPTIONALLY ENCLOSED BY '"'}}
  • - *
  • {@code setNullString("\\N")}
  • - *
  • {@code setTrim()}
  • - *
  • {@code setSystemRecordSeparator()}
  • - *
  • {@code setQuoteMode(QuoteMode.MINIMAL)}
  • - *
- * - * @see Predefined#Oracle - * @see Oracle CSV Format Specification - * @since 1.6 - */ - // @formatter:off - public static final CSVFormat ORACLE = DEFAULT.builder() - .setDelimiter(COMMA) - .setEscape(BACKSLASH) - .setIgnoreEmptyLines(false) - .setQuote(DOUBLE_QUOTE_CHAR) - .setNullString("\\N") - .setTrim(true) - .setRecordSeparator(System.lineSeparator()) - .setQuoteMode(QuoteMode.MINIMAL) - .build(); - // @formatter:off - - /** - * Default PostgreSQL CSV format used by the {@code COPY} operation. - * - *

- * This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special - * characters are escaped with {@code '"'}. The default NULL string is {@code ""}. - *

- * - *

- * The {@link Builder} settings are: - *

- *
    - *
  • {@code setDelimiter(',')}
  • - *
  • {@code setEscape('"')}
  • - *
  • {@code setIgnoreEmptyLines(false)}
  • - *
  • {@code setQuote('"')}
  • - *
  • {@code setRecordSeparator('\n')}
  • - *
  • {@code setNullString("")}
  • - *
  • {@code setQuoteMode(QuoteMode.ALL_NON_NULL)}
  • - *
- * - * @see Predefined#MySQL - * @see PostgreSQL COPY command - * documentation - * @since 1.5 - */ - // @formatter:off - public static final CSVFormat POSTGRESQL_CSV = DEFAULT.builder() - .setDelimiter(COMMA) - .setEscape(DOUBLE_QUOTE_CHAR) - .setIgnoreEmptyLines(false) - .setQuote(DOUBLE_QUOTE_CHAR) - .setRecordSeparator(LF) - .setNullString(EMPTY) - .setQuoteMode(QuoteMode.ALL_NON_NULL) - .build(); - // @formatter:off - - /** - * Default PostgreSQL text format used by the {@code COPY} operation. - * - *

- * This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special - * characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}. - *

- * - *

- * The {@link Builder} settings are: - *

- *
    - *
  • {@code setDelimiter('\t')}
  • - *
  • {@code setEscape('\\')}
  • - *
  • {@code setIgnoreEmptyLines(false)}
  • - *
  • {@code setQuote('"')}
  • - *
  • {@code setRecordSeparator('\n')}
  • - *
  • {@code setNullString("\\N")}
  • - *
  • {@code setQuoteMode(QuoteMode.ALL_NON_NULL)}
  • - *
- * - * @see Predefined#MySQL - * @see PostgreSQL COPY command - * documentation - * @since 1.5 - */ - // @formatter:off - public static final CSVFormat POSTGRESQL_TEXT = DEFAULT.builder() - .setDelimiter(TAB) - .setEscape(BACKSLASH) - .setIgnoreEmptyLines(false) - .setQuote(DOUBLE_QUOTE_CHAR) - .setRecordSeparator(LF) - .setNullString("\\N") - .setQuoteMode(QuoteMode.ALL_NON_NULL) - .build(); - // @formatter:off - - /** - * Comma separated format as defined by RFC 4180. - * - *

- * The {@link Builder} settings are: - *

- *
    - *
  • {@code setDelimiter(',')}
  • - *
  • {@code setQuote('"')}
  • - *
  • {@code setRecordSeparator("\r\n")}
  • - *
  • {@code setIgnoreEmptyLines(false)}
  • - *
- * - * @see Predefined#RFC4180 - */ - public static final CSVFormat RFC4180 = DEFAULT.builder().setIgnoreEmptyLines(false).build(); - - private static final long serialVersionUID = 1L; - - /** - * Tab-delimited format. - * - *

- * The {@link Builder} settings are: - *

- *
    - *
  • {@code setDelimiter('\t')}
  • - *
  • {@code setQuote('"')}
  • - *
  • {@code setRecordSeparator("\r\n")}
  • - *
  • {@code setIgnoreSurroundingSpaces(true)}
  • - *
- * - * @see Predefined#TDF - */ - // @formatter:off - public static final CSVFormat TDF = DEFAULT.builder() - .setDelimiter(TAB) - .setIgnoreSurroundingSpaces(true) - .build(); - // @formatter:on - - /** - * Null-safe clone of an array. - * - * @param The array element type. - * @param values the source array - * @return the cloned array. - */ - @SafeVarargs - static T[] clone(final T... values) { - return values == null ? null : values.clone(); - } - - /** - * Returns true if the given string contains the search char. - * - * @param source the string to check. - * @param searchCh the character to search. - * - * @return true if {@code c} contains a line break character - */ - private static boolean contains(final String source, final char searchCh) { - return Objects.requireNonNull(source, "source").indexOf(searchCh) >= 0; - } - - /** - * Returns true if the given string contains a line break character. - * - * @param source the string to check. - * - * @return true if {@code c} contains a line break character. - */ - private static boolean containsLineBreak(final String source) { - return contains(source, CR) || contains(source, LF); - } - - /** - * Returns true if the given character is a line break character. - * - * @param c the character to check. - * - * @return true if {@code c} is a line break character. - */ - private static boolean isLineBreak(final char c) { - return c == LF || c == CR; - } - - /** - * Returns true if the given character is a line break character. - * - * @param c the character to check, may be null. - * - * @return true if {@code c} is a line break character (and not null). - */ - private static boolean isLineBreak(final Character c) { - return c != null && isLineBreak(c.charValue()); - } - - /** - * Creates a new CSV format with the specified delimiter. - * - *

- * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized with null/false. - *

- * - * @param delimiter the char used for value separation, must not be a line break character - * @return a new CSV format. - * @throws IllegalArgumentException if the delimiter is a line break character - * - * @see #DEFAULT - * @see #RFC4180 - * @see #MYSQL - * @see #EXCEL - * @see #TDF - */ - public static CSVFormat newFormat(final char delimiter) { - return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false, - DuplicateHeaderMode.ALLOW_ALL); - } - - static String[] toStringArray(final Object[] values) { - if (values == null) { - return null; - } - final String[] strings = new String[values.length]; - for (int i = 0; i < values.length; i++) { - strings[i] = Objects.toString(values[i], null); - } - return strings; - } - - static CharSequence trim(final CharSequence charSequence) { - if (charSequence instanceof String) { - return ((String) charSequence).trim(); - } - final int count = charSequence.length(); - int len = count; - int pos = 0; - - while (pos < len && charSequence.charAt(pos) <= SP) { - pos++; - } - while (pos < len && charSequence.charAt(len - 1) <= SP) { - len--; - } - return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence; - } - - /** - * Gets one of the predefined formats from {@link CSVFormat.Predefined}. - * - * @param format name - * @return one of the predefined formats - * @since 1.2 - */ - public static CSVFormat valueOf(final String format) { - return CSVFormat.Predefined.valueOf(format).getFormat(); - } - - private final DuplicateHeaderMode duplicateHeaderMode; - - private final boolean allowMissingColumnNames; - - private final boolean autoFlush; - - private final Character commentMarker; // null if commenting is disabled - - private final String delimiter; - - private final Character escapeCharacter; // null if escaping is disabled - - private final String[] header; // array of header column names - - private final String[] headerComments; // array of header comment lines - - private final boolean ignoreEmptyLines; - - private final boolean ignoreHeaderCase; // should ignore header names case - - private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? - - private final String nullString; // the string to be used for null values - - private final Character quoteCharacter; // null if quoting is disabled - - private final String quotedNullString; - - private final QuoteMode quoteMode; - - private final String recordSeparator; // for outputs - - private final boolean skipHeaderRecord; - - private final boolean trailingDelimiter; - - private final boolean trim; - - private CSVFormat(final Builder builder) { - this.delimiter = builder.delimiter; - this.quoteCharacter = builder.quoteCharacter; - this.quoteMode = builder.quoteMode; - this.commentMarker = builder.commentMarker; - this.escapeCharacter = builder.escapeCharacter; - this.ignoreSurroundingSpaces = builder.ignoreSurroundingSpaces; - this.allowMissingColumnNames = builder.allowMissingColumnNames; - this.ignoreEmptyLines = builder.ignoreEmptyLines; - this.recordSeparator = builder.recordSeparator; - this.nullString = builder.nullString; - this.headerComments = builder.headerComments; - this.header = builder.headers; - this.skipHeaderRecord = builder.skipHeaderRecord; - this.ignoreHeaderCase = builder.ignoreHeaderCase; - this.trailingDelimiter = builder.trailingDelimiter; - this.trim = builder.trim; - this.autoFlush = builder.autoFlush; - this.quotedNullString = builder.quotedNullString; - this.duplicateHeaderMode = builder.duplicateHeaderMode; - validate(); - } - - /** - * Creates a customized CSV format. - * - * @param delimiter the char used for value separation, must not be a line break character. - * @param quoteChar the Character used as value encapsulation marker, may be {@code null} to disable. - * @param quoteMode the quote mode. - * @param commentStart the Character used for comment identification, may be {@code null} to disable. - * @param escape the Character used to escape special characters in values, may be {@code null} to disable. - * @param ignoreSurroundingSpaces {@code true} when whitespaces enclosing values should be ignored. - * @param ignoreEmptyLines {@code true} when the parser should skip empty lines. - * @param recordSeparator the line separator to use for output. - * @param nullString the line separator to use for output. - * @param headerComments the comments to be printed by the Printer before the actual CSV data. - * @param header the header - * @param skipHeaderRecord TODO Doc me. - * @param allowMissingColumnNames TODO Doc me. - * @param ignoreHeaderCase TODO Doc me. - * @param trim TODO Doc me. - * @param trailingDelimiter TODO Doc me. - * @param autoFlush TODO Doc me. - * @param duplicateHeaderMode the behavior when handling duplicate headers - * @throws IllegalArgumentException if the delimiter is a line break character. - */ - private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape, - final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, - final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames, - final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush, - final DuplicateHeaderMode duplicateHeaderMode) { - this.delimiter = delimiter; - this.quoteCharacter = quoteChar; - this.quoteMode = quoteMode; - this.commentMarker = commentStart; - this.escapeCharacter = escape; - this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; - this.allowMissingColumnNames = allowMissingColumnNames; - this.ignoreEmptyLines = ignoreEmptyLines; - this.recordSeparator = recordSeparator; - this.nullString = nullString; - this.headerComments = toStringArray(headerComments); - this.header = clone(header); - this.skipHeaderRecord = skipHeaderRecord; - this.ignoreHeaderCase = ignoreHeaderCase; - this.trailingDelimiter = trailingDelimiter; - this.trim = trim; - this.autoFlush = autoFlush; - this.quotedNullString = quoteCharacter + nullString + quoteCharacter; - this.duplicateHeaderMode = duplicateHeaderMode; - validate(); - } - - private void append(final char c, final Appendable appendable) throws IOException { - //try { - appendable.append(c); - //} catch (final IOException e) { - // throw new UncheckedIOException(e); - //} - } - - private void append(final CharSequence csq, final Appendable appendable) throws IOException { - //try { - appendable.append(csq); - //} catch (final IOException e) { - // throw new UncheckedIOException(e); - //} - } - - /** - * Creates a new Builder for this instance. - * - * @return a new Builder. - */ - public Builder builder() { - return Builder.create(this); - } - - /** - * Creates a copy of this instance. - * - * @return a copy of this instance. - */ - CSVFormat copy() { - return builder().build(); - } - - @Override - public boolean equals(final Object obj) { - if (this == obj) { - return true; - } - if (obj == null || getClass() != obj.getClass()) { - return false; - } - final CSVFormat other = (CSVFormat) obj; - return duplicateHeaderMode == other.duplicateHeaderMode && allowMissingColumnNames == other.allowMissingColumnNames && - autoFlush == other.autoFlush && Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) && - Objects.equals(escapeCharacter, other.escapeCharacter) && Arrays.equals(header, other.header) && - Arrays.equals(headerComments, other.headerComments) && ignoreEmptyLines == other.ignoreEmptyLines && - ignoreHeaderCase == other.ignoreHeaderCase && ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && - Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == other.quoteMode && - Objects.equals(quotedNullString, other.quotedNullString) && Objects.equals(recordSeparator, other.recordSeparator) && - skipHeaderRecord == other.skipHeaderRecord && trailingDelimiter == other.trailingDelimiter && trim == other.trim; - } - - /** - * Formats the specified values. - * - * @param values the values to format - * @return the formatted values - */ - public String format(final Object... values) { - final StringWriter out = new StringWriter(); - try (CSVPrinter csvPrinter = new CSVPrinter(out, this)) { - csvPrinter.printRecord(values); - final String res = out.toString(); - final int len = recordSeparator != null ? res.length() - recordSeparator.length() : res.length(); - return res.substring(0, len); - } catch (final IOException e) { - // should not happen because a StringWriter does not do IO. - throw new IllegalStateException(e); - } - } - - /** - * Returns true if and only if duplicate names are allowed in the headers. - * - * @return whether duplicate header names are allowed - * @since 1.7 - * @deprecated Use {@link #getDuplicateHeaderMode()}. - */ - @Deprecated - public boolean getAllowDuplicateHeaderNames() { - return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL; - } - - /** - * Specifies whether missing column names are allowed when parsing the header line. - * - * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an {@link IllegalArgumentException}. - */ - public boolean getAllowMissingColumnNames() { - return allowMissingColumnNames; - } - - /** - * Returns whether to flush on close. - * - * @return whether to flush on close. - * @since 1.6 - */ - public boolean getAutoFlush() { - return autoFlush; - } - - /** - * Returns the character marking the start of a line comment. - * - * @return the comment start marker, may be {@code null} - */ - public Character getCommentMarker() { - return commentMarker; - } - - /** - * Returns the first character delimiting the values (typically ';', ',' or '\t'). - * - * @return the first delimiter character. - * @deprecated Use {@link #getDelimiterString()}. - */ - @Deprecated - public char getDelimiter() { - return delimiter.charAt(0); - } - - /** - * Returns the character delimiting the values (typically ";", "," or "\t"). - * - * @return the delimiter. - */ - public String getDelimiterString() { - return delimiter; - } - - /** - * Gets how duplicate headers are handled. - * - * @return if duplicate header values are allowed, allowed conditionally, or disallowed. - * @since 1.9.0 - */ - public DuplicateHeaderMode getDuplicateHeaderMode() { - return duplicateHeaderMode; - } - - /** - * Returns the escape character. - * - * @return the escape character, may be {@code null} - */ - public Character getEscapeCharacter() { - return escapeCharacter; - } - - /** - * Returns a copy of the header array. - * - * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file - */ - public String[] getHeader() { - return header != null ? header.clone() : null; - } - - /** - * Returns a copy of the header comment array. - * - * @return a copy of the header comment array; {@code null} if disabled. - */ - public String[] getHeaderComments() { - return headerComments != null ? headerComments.clone() : null; - } - - /** - * Specifies whether empty lines between records are ignored when parsing input. - * - * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty records. - */ - public boolean getIgnoreEmptyLines() { - return ignoreEmptyLines; - } - - /** - * Specifies whether header names will be accessed ignoring case. - * - * @return {@code true} if header names cases are ignored, {@code false} if they are case sensitive. - * @since 1.3 - */ - public boolean getIgnoreHeaderCase() { - return ignoreHeaderCase; - } - - /** - * Specifies whether spaces around values are ignored when parsing input. - * - * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value. - */ - public boolean getIgnoreSurroundingSpaces() { - return ignoreSurroundingSpaces; - } - - /** - * Gets the String to convert to and from {@code null}. - *
    - *
  • Reading: Converts strings equal to the given {@code nullString} to {@code null} when reading records.
  • - *
  • Writing: Writes {@code null} as the given {@code nullString} when writing records.
  • - *
- * - * @return the String to convert to and from {@code null}. No substitution occurs if {@code null} - */ - public String getNullString() { - return nullString; - } - - /** - * Returns the character used to encapsulate values containing special characters. - * - * @return the quoteChar character, may be {@code null} - */ - public Character getQuoteCharacter() { - return quoteCharacter; - } - - /** - * Returns the quote policy output fields. - * - * @return the quote policy - */ - public QuoteMode getQuoteMode() { - return quoteMode; - } - - /** - * Returns the record separator delimiting output records. - * - * @return the record separator - */ - public String getRecordSeparator() { - return recordSeparator; - } - - /** - * Returns whether to skip the header record. - * - * @return whether to skip the header record. - */ - public boolean getSkipHeaderRecord() { - return skipHeaderRecord; - } - - /** - * Returns whether to add a trailing delimiter. - * - * @return whether to add a trailing delimiter. - * @since 1.3 - */ - public boolean getTrailingDelimiter() { - return trailingDelimiter; - } - - /** - * Returns whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by - * {CSVParser#addRecordValue(boolean)} - * - * @return whether to trim leading and trailing blanks. - */ - public boolean getTrim() { - return trim; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + Arrays.hashCode(header); - result = prime * result + Arrays.hashCode(headerComments); - return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter, - ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator, - skipHeaderRecord, trailingDelimiter, trim); - } - - /** - * Specifies whether comments are supported by this format. - * - * Note that the comment introducer character is only recognized at the start of a line. - * - * @return {@code true} is comments are supported, {@code false} otherwise - */ - public boolean isCommentMarkerSet() { - return commentMarker != null; - } - - /** - * Matches whether the next characters constitute a delimiter - * - * @param ch - * the current char - * @param charSeq - * the match char sequence - * @param startIndex - * where start to match - * @param delimiter - * the delimiter - * @param delimiterLength - * the delimiter length - * @return true if the match is successful - */ - private boolean isDelimiter(final char ch, final CharSequence charSeq, final int startIndex, final char[] delimiter, final int delimiterLength) { - if (ch != delimiter[0]) { - return false; - } - final int len = charSeq.length(); - if (startIndex + delimiterLength > len) { - return false; - } - for (int i = 1; i < delimiterLength; i++) { - if (charSeq.charAt(startIndex + i) != delimiter[i]) { - return false; - } - } - return true; - } - - /** - * Returns whether escape are being processed. - * - * @return {@code true} if escapes are processed - */ - public boolean isEscapeCharacterSet() { - return escapeCharacter != null; - } - - /** - * Returns whether a nullString has been defined. - * - * @return {@code true} if a nullString is defined - */ - public boolean isNullStringSet() { - return nullString != null; - } - - /** - * Returns whether a quoteChar has been defined. - * - * @return {@code true} if a quoteChar is defined - */ - public boolean isQuoteCharacterSet() { - return quoteCharacter != null; - } - - /** - * Parses the specified content. - * - *

- * See also the various static parse methods on {@link CSVParser}. - *

- * - * @param reader the input stream - * @return a parser over a stream of {@link CSVRecord}s. - * @throws IOException If an I/O error occurs - */ - public CSVParser parse(final Reader reader) throws IOException { - return new CSVParser(reader, this); - } - - /** - * Prints to the specified output. - * - *

- * See also {@link CSVPrinter}. - *

- * - * @param out the output. - * @return a printer to an output. - * @throws IOException thrown if the optional header cannot be printed. - */ - public CSVPrinter print(final Appendable out) throws IOException { - return new CSVPrinter(out, this); - } - - /** - * Prints to the specified output. - * - *

- * See also {@link CSVPrinter}. - *

- * - * @param out the output. - * @param charset A charset. - * @return a printer to an output. - * @throws IOException thrown if the optional header cannot be printed. - * @since 1.5 - */ - @SuppressWarnings("resource") - public CSVPrinter print(final File out, final Charset charset) throws IOException { - // The writer will be closed when close() is called. - return new CSVPrinter(new OutputStreamWriter(new FileOutputStream(out), charset), this); - } - - /** - * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated as needed. Useful when one wants to - * avoid creating CSVPrinters. Trims the value if {@link #getTrim()} is true. - * - * @param value value to output. - * @param out where to print the value. - * @param newRecord if this a new record. - * @throws IOException If an I/O error occurs. - * @since 1.4 - */ - public synchronized void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { - // null values are considered empty - // Only call CharSequence.toString() if you have to, helps GC-free use cases. - CharSequence charSequence; - if (value == null) { - // https://issues.apache.org/jira/browse/CSV-203 - if (null == nullString) { - charSequence = EMPTY; - } else if (QuoteMode.ALL == quoteMode) { - charSequence = quotedNullString; - } else { - charSequence = nullString; - } - } else if (value instanceof CharSequence) { - charSequence = (CharSequence) value; - } else if (value instanceof Reader) { - print((Reader) value, out, newRecord); - return; - } else { - charSequence = value.toString(); - } - charSequence = getTrim() ? trim(charSequence) : charSequence; - print(value, charSequence, out, newRecord); - } - - private synchronized void print(final Object object, final CharSequence value, final Appendable out, final boolean newRecord) throws IOException { - final int offset = 0; - final int len = value.length(); - if (!newRecord) { - out.append(getDelimiterString()); - } - if (object == null) { - out.append(value); - } else if (isQuoteCharacterSet()) { - // the original object is needed so can check for Number - printWithQuotes(object, value, out, newRecord); - } else if (isEscapeCharacterSet()) { - printWithEscapes(value, out); - } else { - out.append(value, offset, len); - } - } - - /** - * Prints to the specified output, returns a {@code CSVPrinter} which the caller MUST close. - * - *

- * See also {@link CSVPrinter}. - *

- * - * @param out the output. - * @param charset A charset. - * @return a printer to an output. - * @throws IOException thrown if the optional header cannot be printed. - * @since 1.5 - */ - @SuppressWarnings("resource") - public CSVPrinter print(final Path out, final Charset charset) throws IOException { - return print(Files.newBufferedWriter(out, charset)); - } - - private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException { - // Reader is never null - if (!newRecord) { - append(getDelimiterString(), out); - } - if (isQuoteCharacterSet()) { - printWithQuotes(reader, out); - } else if (isEscapeCharacterSet()) { - printWithEscapes(reader, out); - } else if (out instanceof Writer) { - IOUtils.copyLarge(reader, (Writer) out); - } else { - IOUtils.copy(reader, out); - } - - } - - /** - * Prints to the {@link System#out}. - * - *

- * See also {@link CSVPrinter}. - *

- * - * @return a printer to {@link System#out}. - * @throws IOException thrown if the optional header cannot be printed. - * @since 1.5 - */ - public CSVPrinter printer() throws IOException { - return new CSVPrinter(System.out, this); - } - - /** - * Outputs the trailing delimiter (if set) followed by the record separator (if set). - * - * @param appendable where to write - * @throws IOException If an I/O error occurs. - * @since 1.4 - */ - public synchronized void println(final Appendable appendable) throws IOException { - if (getTrailingDelimiter()) { - append(getDelimiterString(), appendable); - } - if (recordSeparator != null) { - append(recordSeparator, appendable); - } - } - - /** - * Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the record separator. - * - *

- * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing - * the record, so there is no need to call {@link #println(Appendable)}. - *

- * - * @param appendable where to write. - * @param values values to output. - * @throws IOException If an I/O error occurs. - * @since 1.4 - */ - public synchronized void printRecord(final Appendable appendable, final Object... values) throws IOException { - for (int i = 0; i < values.length; i++) { - print(values[i], appendable, i == 0); - } - println(appendable); - } - - /* - * Note: Must only be called if escaping is enabled, otherwise will generate NPE. - */ - private void printWithEscapes(final CharSequence charSeq, final Appendable appendable) throws IOException { - int start = 0; - int pos = 0; - final int end = charSeq.length(); - - final char[] delim = getDelimiterString().toCharArray(); - final int delimLength = delim.length; - final char escape = getEscapeCharacter().charValue(); - - while (pos < end) { - char c = charSeq.charAt(pos); - final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delim, delimLength); - if (c == CR || c == LF || c == escape || isDelimiterStart) { - // write out segment up until this char - if (pos > start) { - appendable.append(charSeq, start, pos); - } - if (c == LF) { - c = 'n'; - } else if (c == CR) { - c = 'r'; - } - - appendable.append(escape); - appendable.append(c); - - if (isDelimiterStart) { - for (int i = 1; i < delimLength; i++) { - pos++; - c = charSeq.charAt(pos); - appendable.append(escape); - appendable.append(c); - } - } - - start = pos + 1; // start on the current char after this one - } - pos++; - } - - // write last segment - if (pos > start) { - appendable.append(charSeq, start, pos); - } - } - - private void printWithEscapes(final Reader reader, final Appendable appendable) throws IOException { - int start = 0; - int pos = 0; - - @SuppressWarnings("resource") // Temp reader on input reader. - final ExtendedBufferedReader bufferedReader = new ExtendedBufferedReader(reader); - final char[] delim = getDelimiterString().toCharArray(); - final int delimLength = delim.length; - final char escape = getEscapeCharacter().charValue(); - final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); - - int c; - while (-1 != (c = bufferedReader.read())) { - builder.append((char) c); - final boolean isDelimiterStart = isDelimiter((char) c, builder.toString() + new String(bufferedReader.lookAhead(delimLength - 1)), pos, delim, - delimLength); - if (c == CR || c == LF || c == escape || isDelimiterStart) { - // write out segment up until this char - if (pos > start) { - append(builder.substring(start, pos), appendable); - builder.setLength(0); - pos = -1; - } - if (c == LF) { - c = 'n'; - } else if (c == CR) { - c = 'r'; - } - - append(escape, appendable); - append((char) c, appendable); - - if (isDelimiterStart) { - for (int i = 1; i < delimLength; i++) { - c = bufferedReader.read(); - append(escape, appendable); - append((char) c, appendable); - } - } - - start = pos + 1; // start on the current char after this one - } - pos++; - } - - // write last segment - if (pos > start) { - append(builder.substring(start, pos), appendable); - } - } - - /* - * Note: must only be called if quoting is enabled, otherwise will generate NPE - */ - // the original object is needed so can check for Number - private void printWithQuotes(final Object object, final CharSequence charSeq, final Appendable out, final boolean newRecord) throws IOException { - boolean quote = false; - int start = 0; - int pos = 0; - final int len = charSeq.length(); - - final char[] delim = getDelimiterString().toCharArray(); - final int delimLength = delim.length; - final char quoteChar = getQuoteCharacter().charValue(); - // If escape char not specified, default to the quote char - // This avoids having to keep checking whether there is an escape character - // at the cost of checking against quote twice - final char escapeChar = isEscapeCharacterSet() ? getEscapeCharacter().charValue() : quoteChar; - - QuoteMode quoteModePolicy = getQuoteMode(); - if (quoteModePolicy == null) { - quoteModePolicy = QuoteMode.MINIMAL; - } - switch (quoteModePolicy) { - case ALL: - case ALL_NON_NULL: - quote = true; - break; - case NON_NUMERIC: - quote = !(object instanceof Number); - break; - case NONE: - // Use the existing escaping code - printWithEscapes(charSeq, out); - return; - case MINIMAL: - if (len <= 0) { - // always quote an empty token that is the first - // on the line, as it may be the only thing on the - // line. If it were not quoted in that case, - // an empty line has no tokens. - if (newRecord) { - quote = true; - } - } else { - char c = charSeq.charAt(pos); - - if (c <= COMMENT) { - // Some other chars at the start of a value caused the parser to fail, so for now - // encapsulate if we start in anything less than '#'. We are being conservative - // by including the default comment char too. - quote = true; - } else { - while (pos < len) { - c = charSeq.charAt(pos); - if (c == LF || c == CR || c == quoteChar || c == escapeChar || isDelimiter(c, charSeq, pos, delim, delimLength)) { - quote = true; - break; - } - pos++; - } - - if (!quote) { - pos = len - 1; - c = charSeq.charAt(pos); - // Some other chars at the end caused the parser to fail, so for now - // encapsulate if we end in anything less than ' ' - if (c <= SP) { - quote = true; - } - } - } - } - - if (!quote) { - // no encapsulation needed - write out the original value - out.append(charSeq, start, len); - return; - } - break; - default: - throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); - } - - if (!quote) { - // no encapsulation needed - write out the original value - out.append(charSeq, start, len); - return; - } - - // we hit something that needed encapsulation - out.append(quoteChar); - - // Pick up where we left off: pos should be positioned on the first character that caused - // the need for encapsulation. - while (pos < len) { - final char c = charSeq.charAt(pos); - if (c == quoteChar || c == escapeChar) { - // write out the chunk up until this point - out.append(charSeq, start, pos); - out.append(escapeChar); // now output the escape - start = pos; // and restart with the matched char - } - pos++; - } - - // write the last segment - out.append(charSeq, start, pos); - out.append(quoteChar); - } - - /** - * Always use quotes unless QuoteMode is NONE, so we not have to look ahead. - * - * @param reader What to print - * @param appendable Where to print it - * @throws IOException If an I/O error occurs - */ - private void printWithQuotes(final Reader reader, final Appendable appendable) throws IOException { - - if (getQuoteMode() == QuoteMode.NONE) { - printWithEscapes(reader, appendable); - return; - } - - int pos = 0; - - final char quote = getQuoteCharacter().charValue(); - final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); - - append(quote, appendable); - - int c; - while (-1 != (c = reader.read())) { - builder.append((char) c); - if (c == quote) { - // write out segment up until this char - if (pos > 0) { - append(builder.substring(0, pos), appendable); - append(quote, appendable); - builder.setLength(0); - pos = -1; - } - - append((char) c, appendable); - } - pos++; - } - - // write last segment - if (pos > 0) { - append(builder.substring(0, pos), appendable); - } - - append(quote, appendable); - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append("Delimiter=<").append(delimiter).append('>'); - if (isEscapeCharacterSet()) { - sb.append(' '); - sb.append("Escape=<").append(escapeCharacter).append('>'); - } - if (isQuoteCharacterSet()) { - sb.append(' '); - sb.append("QuoteChar=<").append(quoteCharacter).append('>'); - } - if (quoteMode != null) { - sb.append(' '); - sb.append("QuoteMode=<").append(quoteMode).append('>'); - } - if (isCommentMarkerSet()) { - sb.append(' '); - sb.append("CommentStart=<").append(commentMarker).append('>'); - } - if (isNullStringSet()) { - sb.append(' '); - sb.append("NullString=<").append(nullString).append('>'); - } - if (recordSeparator != null) { - sb.append(' '); - sb.append("RecordSeparator=<").append(recordSeparator).append('>'); - } - if (getIgnoreEmptyLines()) { - sb.append(" EmptyLines:ignored"); - } - if (getIgnoreSurroundingSpaces()) { - sb.append(" SurroundingSpaces:ignored"); - } - if (getIgnoreHeaderCase()) { - sb.append(" IgnoreHeaderCase:ignored"); - } - sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); - if (headerComments != null) { - sb.append(' '); - sb.append("HeaderComments:").append(Arrays.toString(headerComments)); - } - if (header != null) { - sb.append(' '); - sb.append("Header:").append(Arrays.toString(header)); - } - return sb.toString(); - } - - /** - * Verifies the validity and consistency of the attributes, and throws an IllegalArgumentException if necessary. - * - * @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes. - */ - private void validate() throws IllegalArgumentException { - if (containsLineBreak(delimiter)) { - throw new IllegalArgumentException("The delimiter cannot be a line break"); - } - - if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { - throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); - } - - if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { - throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')"); - } - - if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { - throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" + commentMarker + "')"); - } - - if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) { - throw new IllegalArgumentException("The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')"); - } - - if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) { - throw new IllegalArgumentException("The comment start and the escape character cannot be the same ('" + commentMarker + "')"); - } - - if (escapeCharacter == null && quoteMode == QuoteMode.NONE) { - throw new IllegalArgumentException("No quotes mode set but no escape character is set"); - } - - // validate header - if (header != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) { - final Set dupCheck = new HashSet<>(); - for (final String hdr : header) { - if (!dupCheck.add(hdr)) { - throw new IllegalArgumentException("The header contains a duplicate entry: '" + hdr + "' in " + Arrays.toString(header)); - } - } - } - } - - /** - * Returns a new {@code CSVFormat} that allows duplicate header names. - * - * @return a new {@code CSVFormat} that allows duplicate header names - * @since 1.7 - * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean) Builder#setAllowDuplicateHeaderNames(true)} - */ - @Deprecated - public CSVFormat withAllowDuplicateHeaderNames() { - return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build(); - } - - /** - * Returns a new {@code CSVFormat} with duplicate header names behavior set to the given value. - * - * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. - * @return a new {@code CSVFormat} with duplicate header names behavior set to the given value. - * @since 1.7 - * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean)} - */ - @Deprecated - public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { - final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY; - return builder().setDuplicateHeaderMode(mode).build(); - } - - /** - * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}. - * - * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. - * @see Builder#setAllowMissingColumnNames(boolean) - * @since 1.1 - * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} - */ - @Deprecated - public CSVFormat withAllowMissingColumnNames() { - return builder().setAllowMissingColumnNames(true).build(); - } - - /** - * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to the given value. - * - * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause - * an {@link IllegalArgumentException} to be thrown. - * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. - * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean)} - */ - @Deprecated - public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) { - return builder().setAllowMissingColumnNames(allowMissingColumnNames).build(); - } - - /** - * Returns a new {@code CSVFormat} with whether to flush on close. - * - * @param autoFlush whether to flush on close. - * - * @return A new CSVFormat that is equal to this but with the specified autoFlush setting. - * @since 1.6 - * @deprecated Use {@link Builder#setAutoFlush(boolean)} - */ - @Deprecated - public CSVFormat withAutoFlush(final boolean autoFlush) { - return builder().setAutoFlush(autoFlush).build(); - } - - /** - * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character. - * - * Note that the comment start character is only recognized at the start of a line. - * - * @param commentMarker the comment start marker - * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker - * @throws IllegalArgumentException thrown if the specified character is a line break - * @deprecated Use {@link Builder#setCommentMarker(char)} - */ - @Deprecated - public CSVFormat withCommentMarker(final char commentMarker) { - return builder().setCommentMarker(commentMarker).build(); - } - - /** - * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character. - * - * Note that the comment start character is only recognized at the start of a line. - * - * @param commentMarker the comment start marker, use {@code null} to disable - * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker - * @throws IllegalArgumentException thrown if the specified character is a line break - * @deprecated Use {@link Builder#setCommentMarker(Character)} - */ - @Deprecated - public CSVFormat withCommentMarker(final Character commentMarker) { - return builder().setCommentMarker(commentMarker).build(); - } - - /** - * Returns a new {@code CSVFormat} with the delimiter of the format set to the specified character. - * - * @param delimiter the delimiter character - * @return A new CSVFormat that is equal to this with the specified character as delimiter - * @throws IllegalArgumentException thrown if the specified character is a line break - * @deprecated Use {@link Builder#setDelimiter(char)} - */ - @Deprecated - public CSVFormat withDelimiter(final char delimiter) { - return builder().setDelimiter(delimiter).build(); - } - - /** - * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character. - * - * @param escape the escape character - * @return A new CSVFormat that is equal to this but with the specified character as the escape character - * @throws IllegalArgumentException thrown if the specified character is a line break - * @deprecated Use {@link Builder#setEscape(char)} - */ - @Deprecated - public CSVFormat withEscape(final char escape) { - return builder().setEscape(escape).build(); - } - - /** - * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character. - * - * @param escape the escape character, use {@code null} to disable - * @return A new CSVFormat that is equal to this but with the specified character as the escape character - * @throws IllegalArgumentException thrown if the specified character is a line break - * @deprecated Use {@link Builder#setEscape(Character)} - */ - @Deprecated - public CSVFormat withEscape(final Character escape) { - return builder().setEscape(escape).build(); - } - - /** - * Returns a new {@code CSVFormat} using the first record as header. - * - *

- * Calling this method is equivalent to calling: - *

- * - *
-     * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord();
-     * 
- * - * @return A new CSVFormat that is equal to this but using the first record as header. - * @see Builder#setSkipHeaderRecord(boolean) - * @see Builder#setHeader(String...) - * @since 1.3 - * @deprecated Use {@link Builder#setHeader(String...) Builder#setHeader()}.{@link Builder#setSkipHeaderRecord(boolean) setSkipHeaderRecord(true)}. - */ - @Deprecated - public CSVFormat withFirstRecordAsHeader() { - // @formatter:off - return builder() - .setHeader() - .setSkipHeaderRecord(true) - .build(); - // @formatter:on - } - - /** - * Returns a new {@code CSVFormat} with the header of the format defined by the enum class. - * - *

- * Example: - *

- * - *
-     * public enum Header {
-     *     Name, Email, Phone
-     * }
-     *
-     * CSVFormat format = aformat.withHeader(Header.class);
-     * 
- *

- * The header is also used by the {@link CSVPrinter}. - *

- * - * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. - * @return A new CSVFormat that is equal to this but with the specified header - * @see Builder#setHeader(String...) - * @see Builder#setSkipHeaderRecord(boolean) - * @since 1.3 - * @deprecated Use {@link Builder#setHeader(Class)} - */ - @Deprecated - public CSVFormat withHeader(final Class> headerEnum) { - return builder().setHeader(headerEnum).build(); - } - - /** - * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the - * input file with: - * - *
-     * CSVFormat format = aformat.withHeader();
-     * 
- * - * or specified manually with: - * - *
-     * CSVFormat format = aformat.withHeader(resultSet);
-     * 
- *

- * The header is also used by the {@link CSVPrinter}. - *

- * - * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. - * @return A new CSVFormat that is equal to this but with the specified header - * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. - * @since 1.1 - * @deprecated Use {@link Builder#setHeader(ResultSet)} - */ - @Deprecated - public CSVFormat withHeader(final ResultSet resultSet) throws SQLException { - return builder().setHeader(resultSet).build(); - } - - /** - * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the - * input file with: - * - *
-     * CSVFormat format = aformat.withHeader();
-     * 
- * - * or specified manually with: - * - *
-     * CSVFormat format = aformat.withHeader(metaData);
-     * 
- *

- * The header is also used by the {@link CSVPrinter}. - *

- * - * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. - * @return A new CSVFormat that is equal to this but with the specified header - * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. - * @since 1.1 - * @deprecated Use {@link Builder#setHeader(ResultSetMetaData)} - */ - @Deprecated - public CSVFormat withHeader(final ResultSetMetaData resultSetMetaData) throws SQLException { - return builder().setHeader(resultSetMetaData).build(); - } - - /** - * Returns a new {@code CSVFormat} with the header of the format set to the given values. The header can either be parsed automatically from the input file - * with: - * - *
-     * CSVFormat format = aformat.withHeader();
-     * 
- * - * or specified manually with: - * - *
-     * CSVFormat format = aformat.withHeader("name", "email", "phone");
-     * 
- *

- * The header is also used by the {@link CSVPrinter}. - *

- * - * @param header the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. - * @return A new CSVFormat that is equal to this but with the specified header - * @see Builder#setSkipHeaderRecord(boolean) - * @deprecated Use {@link Builder#setHeader(String...)} - */ - @Deprecated - public CSVFormat withHeader(final String... header) { - return builder().setHeader(header).build(); - } - - /** - * Returns a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will be printed first, before the headers. - * This setting is ignored by the parser. - * - *
-     * CSVFormat format = aformat.withHeaderComments("Generated by Apache Commons CSV.", Instant.now());
-     * 
- * - * @param headerComments the headerComments which will be printed by the Printer before the actual CSV data. - * @return A new CSVFormat that is equal to this but with the specified header - * @see Builder#setSkipHeaderRecord(boolean) - * @since 1.1 - * @deprecated Use {@link Builder#setHeaderComments(Object...)} - */ - @Deprecated - public CSVFormat withHeaderComments(final Object... headerComments) { - return builder().setHeaderComments(headerComments).build(); - } - - /** - * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}. - * - * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. - * @see Builder#setIgnoreEmptyLines(boolean) - * @since 1.1 - * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(true)} - */ - @Deprecated - public CSVFormat withIgnoreEmptyLines() { - return builder().setIgnoreEmptyLines(true).build(); - } - - /** - * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value. - * - * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty - * lines to empty records. - * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. - * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean)} - */ - @Deprecated - public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { - return builder().setIgnoreEmptyLines(ignoreEmptyLines).build(); - } - - /** - * Returns a new {@code CSVFormat} with the header ignore case behavior set to {@code true}. - * - * @return A new CSVFormat that will ignore case header name. - * @see Builder#setIgnoreHeaderCase(boolean) - * @since 1.3 - * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)} - */ - @Deprecated - public CSVFormat withIgnoreHeaderCase() { - return builder().setIgnoreHeaderCase(true).build(); - } - - /** - * Returns a new {@code CSVFormat} with whether header names should be accessed ignoring case. - * - * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. - * @return A new CSVFormat that will ignore case header name if specified as {@code true} - * @since 1.3 - * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean)} - */ - @Deprecated - public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { - return builder().setIgnoreHeaderCase(ignoreHeaderCase).build(); - } - - /** - * Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}. - * - * @return A new CSVFormat that is equal to this but with the specified parser trimming behavior. - * @see Builder#setIgnoreSurroundingSpaces(boolean) - * @since 1.1 - * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean) Builder#setIgnoreSurroundingSpaces(true)} - */ - @Deprecated - public CSVFormat withIgnoreSurroundingSpaces() { - return builder().setIgnoreSurroundingSpaces(true).build(); - } - - /** - * Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value. - * - * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. - * @return A new CSVFormat that is equal to this but with the specified trimming behavior. - * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean)} - */ - @Deprecated - public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { - return builder().setIgnoreSurroundingSpaces(ignoreSurroundingSpaces).build(); - } - - /** - * Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output. - *
    - *
  • Reading: Converts strings equal to the given {@code nullString} to {@code null} when reading records.
  • - *
  • Writing: Writes {@code null} as the given {@code nullString} when writing records.
  • - *
- * - * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null} - * @return A new CSVFormat that is equal to this but with the specified null conversion string. - * @deprecated Use {@link Builder#setNullString(String)} - */ - @Deprecated - public CSVFormat withNullString(final String nullString) { - return builder().setNullString(nullString).build(); - } - - /** - * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character. - * - * @param quoteChar the quote character - * @return A new CSVFormat that is equal to this but with the specified character as quoteChar - * @throws IllegalArgumentException thrown if the specified character is a line break - * @deprecated Use {@link Builder#setQuote(char)} - */ - @Deprecated - public CSVFormat withQuote(final char quoteChar) { - return builder().setQuote(quoteChar).build(); - } - - /** - * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character. - * - * @param quoteChar the quote character, use {@code null} to disable. - * @return A new CSVFormat that is equal to this but with the specified character as quoteChar - * @throws IllegalArgumentException thrown if the specified character is a line break - * @deprecated Use {@link Builder#setQuote(Character)} - */ - @Deprecated - public CSVFormat withQuote(final Character quoteChar) { - return builder().setQuote(quoteChar).build(); - } - - /** - * Returns a new {@code CSVFormat} with the output quote policy of the format set to the specified value. - * - * @param quoteMode the quote policy to use for output. - * - * @return A new CSVFormat that is equal to this but with the specified quote policy - * @deprecated Use {@link Builder#setQuoteMode(QuoteMode)} - */ - @Deprecated - public CSVFormat withQuoteMode(final QuoteMode quoteMode) { - return builder().setQuoteMode(quoteMode).build(); - } - - /** - * Returns a new {@code CSVFormat} with the record separator of the format set to the specified character. - * - *

- * Note: This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and - * "\r\n" - *

- * - * @param recordSeparator the record separator to use for output. - * @return A new CSVFormat that is equal to this but with the specified output record separator - * @deprecated Use {@link Builder#setRecordSeparator(char)} - */ - @Deprecated - public CSVFormat withRecordSeparator(final char recordSeparator) { - return builder().setRecordSeparator(recordSeparator).build(); - } - - /** - * Returns a new {@code CSVFormat} with the record separator of the format set to the specified String. - * - *

- * Note: This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and - * "\r\n" - *

- * - * @param recordSeparator the record separator to use for output. - * @return A new CSVFormat that is equal to this but with the specified output record separator - * @throws IllegalArgumentException if recordSeparator is none of CR, LF or CRLF - * @deprecated Use {@link Builder#setRecordSeparator(String)} - */ - @Deprecated - public CSVFormat withRecordSeparator(final String recordSeparator) { - return builder().setRecordSeparator(recordSeparator).build(); - } - - /** - * Returns a new {@code CSVFormat} with skipping the header record set to {@code true}. - * - * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. - * @see Builder#setSkipHeaderRecord(boolean) - * @see Builder#setHeader(String...) - * @since 1.1 - * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean) Builder#setSkipHeaderRecord(true)} - */ - @Deprecated - public CSVFormat withSkipHeaderRecord() { - return builder().setSkipHeaderRecord(true).build(); - } - - /** - * Returns a new {@code CSVFormat} with whether to skip the header record. - * - * @param skipHeaderRecord whether to skip the header record. - * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. - * @see Builder#setHeader(String...) - * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean)} - */ - @Deprecated - public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { - return builder().setSkipHeaderRecord(skipHeaderRecord).build(); - } - - /** - * Returns a new {@code CSVFormat} with the record separator of the format set to the operating system's line separator string, typically CR+LF on Windows - * and LF on Linux. - * - *

- * Note: This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and - * "\r\n" - *

- * - * @return A new CSVFormat that is equal to this but with the operating system's line separator string. - * @since 1.6 - * @deprecated Use {@link Builder#setRecordSeparator(String) setRecordSeparator(System.lineSeparator())} - */ - @Deprecated - public CSVFormat withSystemRecordSeparator() { - return builder().setRecordSeparator(System.lineSeparator()).build(); - } - - /** - * Returns a new {@code CSVFormat} to add a trailing delimiter. - * - * @return A new CSVFormat that is equal to this but with the trailing delimiter setting. - * @since 1.3 - * @deprecated Use {@link Builder#setTrailingDelimiter(boolean) Builder#setTrailingDelimiter(true)} - */ - @Deprecated - public CSVFormat withTrailingDelimiter() { - return builder().setTrailingDelimiter(true).build(); - } - - /** - * Returns a new {@code CSVFormat} with whether to add a trailing delimiter. - * - * @param trailingDelimiter whether to add a trailing delimiter. - * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting. - * @since 1.3 - * @deprecated Use {@link Builder#setTrailingDelimiter(boolean)} - */ - @Deprecated - public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { - return builder().setTrailingDelimiter(trailingDelimiter).build(); - } - - /** - * Returns a new {@code CSVFormat} to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. - * - * @return A new CSVFormat that is equal to this but with the trim setting on. - * @since 1.3 - * @deprecated Use {@link Builder#setTrim(boolean) Builder#setTrim(true)} - */ - @Deprecated - public CSVFormat withTrim() { - return builder().setTrim(true).build(); - } - - /** - * Returns a new {@code CSVFormat} with whether to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. - * - * @param trim whether to trim leading and trailing blanks. - * @return A new CSVFormat that is equal to this but with the specified trim setting. - * @since 1.3 - * @deprecated Use {@link Builder#setTrim(boolean)} - */ - @Deprecated - public CSVFormat withTrim(final boolean trim) { - return builder().setTrim(trim).build(); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.apache.commons.io.IOUtils.EOF; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Reader; +import java.io.Serializable; +import java.io.StringWriter; +import java.io.Writer; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Path; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import java.util.function.Supplier; + +import org.apache.commons.codec.binary.Base64OutputStream; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.function.IOStream; +import org.apache.commons.io.function.Uncheck; +import org.apache.commons.io.output.AppendableOutputStream; + +/** + * Specifies the format of a CSV file for parsing and writing. + * + *

Using predefined formats

+ * + *

+ * You can use one of the predefined formats: + *

+ * + *
    + *
  • {@link #DEFAULT}
  • + *
  • {@link #EXCEL}
  • + *
  • {@link #INFORMIX_UNLOAD}
  • + *
  • {@link #INFORMIX_UNLOAD_CSV}
  • + *
  • {@link #MONGODB_CSV}
  • + *
  • {@link #MONGODB_TSV}
  • + *
  • {@link #MYSQL}
  • + *
  • {@link #ORACLE}
  • + *
  • {@link #POSTGRESQL_CSV}
  • + *
  • {@link #POSTGRESQL_TEXT}
  • + *
  • {@link #RFC4180}
  • + *
  • {@link #TDF}
  • + *
+ * + *

+ * For example: + *

+ * + *
+ * CSVParser parser = CSVFormat.EXCEL.parse(reader);
+ * 
+ * + *

+ * The {@link CSVParser} provides static methods to parse other input types, for example: + *

+ * + *
+ * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL);
+ * 
+ * + *

Defining formats

+ * + *

+ * You can extend a format by calling the {@code set} methods. For example: + *

+ * + *
{@code
+ * CSVFormat.EXCEL.builder().setNullString("N/A").setIgnoreSurroundingSpaces(true).get();
+ * }
+ * + *

Defining column names

+ * + *

+ * To define the column names you want to use to access records, write: + *

+ * + *
{@code
+ * CSVFormat.EXCEL.builder().setHeader("Col1", "Col2", "Col3").get();
+ * }
+ * + *

+ * Calling {@link Builder#setHeader(String...)} lets you use the given names to address values in a {@link CSVRecord}, and assumes that your CSV source does not + * contain a first record that also defines column names. + * + * If it does, then you are overriding this metadata with your names and you should skip the first record by calling + * {@link Builder#setSkipHeaderRecord(boolean)} with {@code true}. + *

+ * + *

Parsing

+ * + *

+ * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write: + *

+ * + *
{@code
+ * Reader in = ...;
+ * CSVFormat.EXCEL.builder().setHeader("Col1", "Col2", "Col3").get().parse(in);
+ * }
+ * + *

+ * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}. + *

+ * + *

Referencing columns safely

+ * + *

+ * If your source contains a header record, you can simplify your code and safely reference columns, by using {@link Builder#setHeader(String...)} with no + * arguments: + *

+ * + *
+ * CSVFormat.EXCEL.builder().setHeader().get();
+ * 
+ * + *

+ * This causes the parser to read the first record and use its values as column names. + * + * Then, call one of the {@link CSVRecord} get method that takes a String column name argument: + *

+ * + *
{@code
+ * String value = record.get("Col1");
+ * }
+ * + *

+ * This makes your code impervious to changes in column order in the CSV file. + *

+ * + *

Serialization

+ *

+ * This class implements the {@link Serializable} interface with the following caveats: + *

+ *
    + *
  • This class will no longer implement Serializable in 2.0.
  • + *
  • Serialization is not supported from one version to the next.
  • + *
+ *

+ * The {@code serialVersionUID} values are: + *

+ *
    + *
  • Version 1.10.0: {@code 2L}
  • + *
  • Version 1.9.0 through 1.0: {@code 1L}
  • + *
+ * + *

Notes

+ *

+ * This class is immutable. + *

+ *

+ * Not all settings are used for both parsing and writing. + *

+ */ +public final class CSVFormat implements Serializable { + + /** + * Builds CSVFormat instances. + * + * @since 1.9.0 + */ + public static class Builder implements Supplier { + + /** + * Creates a new default builder, as for {@link #RFC4180} but allowing empty lines. + * + *

+ * The {@link Builder} settings are: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code (',')}
  • + *
  • {@link Builder#setQuote(char) setQuote}{@code ('"')}
  • + *
  • {@link Builder#setRecordSeparator(String) setRecordSeparator}{@code ("\r\n")}
  • + *
  • {@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (true)}
  • + *
  • {@link Builder#setDuplicateHeaderMode(DuplicateHeaderMode) setDuplicateHeaderMode}{@code (DuplicateHeaderMode.ALLOW_ALL)}
  • + *
  • All other values take their Java defaults, {@code false} for booleans, {@code null} for object references.
  • + *
+ * + * @see Predefined#Default + * @see DuplicateHeaderMode#ALLOW_ALL + * + * @return a copy of the builder + */ + public static Builder create() { + // @formatter:off + return new Builder() + .setDelimiter(Constants.COMMA) + .setQuote(Constants.DOUBLE_QUOTE_CHAR) + .setRecordSeparator(Constants.CRLF) + .setIgnoreEmptyLines(true) + .setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL); + // @formatter:on + } + + /** + * Creates a new builder from the given format. + * + * @param csvFormat the source format. + * @return a new builder. + */ + public static Builder create(final CSVFormat csvFormat) { + return new Builder(csvFormat); + } + + private boolean allowMissingColumnNames; + + private boolean autoFlush; + + private Character commentMarker; + + private String delimiter; + + private DuplicateHeaderMode duplicateHeaderMode; + + private Character escapeCharacter; + + private String[] headerComments; + + private String[] headers; + + private boolean ignoreEmptyLines; + + private boolean ignoreHeaderCase; + + private boolean ignoreSurroundingSpaces; + + private String nullString; + + private Character quoteCharacter; + + private String quotedNullString; + + private QuoteMode quoteMode; + + private String recordSeparator; + + private boolean skipHeaderRecord; + + private boolean lenientEof; + + private boolean trailingData; + + private boolean trailingDelimiter; + + private boolean trim; + + /** The maximum number of rows to process, excluding the header row. */ + private long maxRows; + + private Builder() { + // empty + } + + private Builder(final CSVFormat csvFormat) { + this.allowMissingColumnNames = csvFormat.allowMissingColumnNames; + this.autoFlush = csvFormat.autoFlush; + this.commentMarker = csvFormat.commentMarker; + this.delimiter = csvFormat.delimiter; + this.duplicateHeaderMode = csvFormat.duplicateHeaderMode; + this.escapeCharacter = csvFormat.escapeCharacter; + this.headerComments = csvFormat.headerComments; + this.headers = csvFormat.headers; + this.ignoreEmptyLines = csvFormat.ignoreEmptyLines; + this.ignoreHeaderCase = csvFormat.ignoreHeaderCase; + this.ignoreSurroundingSpaces = csvFormat.ignoreSurroundingSpaces; + this.lenientEof = csvFormat.lenientEof; + this.maxRows = csvFormat.maxRows; + this.nullString = csvFormat.nullString; + this.quoteCharacter = csvFormat.quoteCharacter; + this.quoteMode = csvFormat.quoteMode; + this.quotedNullString = csvFormat.quotedNullString; + this.recordSeparator = csvFormat.recordSeparator; + this.skipHeaderRecord = csvFormat.skipHeaderRecord; + this.trailingData = csvFormat.trailingData; + this.trailingDelimiter = csvFormat.trailingDelimiter; + this.trim = csvFormat.trim; + } + + /** + * Builds a new CSVFormat instance. + * + * @return a new CSVFormat instance. + * @deprecated Use {@link #get()}. + */ + @Deprecated + public CSVFormat build() { + return get(); + } + + /** + * Builds a new CSVFormat instance. + * + * @return a new CSVFormat instance. + * @since 1.13.0 + */ + @Override + public CSVFormat get() { + return new CSVFormat(this); + } + + /** + * Sets the duplicate header names behavior, true to allow, false to disallow. + * + * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. + * @return This instance. + * @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}. + */ + @Deprecated + public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { + setDuplicateHeaderMode(allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY); + return this; + } + + /** + * Sets the parser missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an + * {@link IllegalArgumentException} to be thrown. + * + * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to + * cause an {@link IllegalArgumentException} to be thrown. + * @return This instance. + */ + public Builder setAllowMissingColumnNames(final boolean allowMissingColumnNames) { + this.allowMissingColumnNames = allowMissingColumnNames; + return this; + } + + /** + * Sets whether to flush on close. + * + * @param autoFlush whether to flush on close. + * @return This instance. + */ + public Builder setAutoFlush(final boolean autoFlush) { + this.autoFlush = autoFlush; + return this; + } + + /** + * Sets the comment marker character, use {@code null} to disable comments. + *

+ * The comment start character is only recognized at the start of a line. + *

+ *

+ * Comments are printed first, before headers. + *

+ *

+ * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. + *

+ *

+ * If the comment marker is not set, then the header comments are ignored. + *

+ *

+ * For example: + *

+ * + *
+         * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+         * 
+ *

+ * writes: + *

+ * + *
+         * # Generated by Apache Commons CSV.
+         * # 1970-01-01T00:00:00Z
+         * 
+ * + * @param commentMarker the comment start marker, use {@code null} to disable. + * @return This instance. + * @throws IllegalArgumentException thrown if the specified character is a line break + */ + public Builder setCommentMarker(final char commentMarker) { + setCommentMarker(Character.valueOf(commentMarker)); + return this; + } + + /** + * Sets the comment marker character, use {@code null} to disable comments. + *

+ * The comment start character is only recognized at the start of a line. + *

+ *

+ * Comments are printed first, before headers. + *

+ *

+ * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. + *

+ *

+ * If the comment marker is not set, then the header comments are ignored. + *

+ *

+ * For example: + *

+ * + *
+         * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+         * 
+ *

+ * writes: + *

+ * + *
+         * # Generated by Apache Commons CSV.
+         * # 1970-01-01T00:00:00Z
+         * 
+ * + * @param commentMarker the comment start marker, use {@code null} to disable. + * @return This instance. + * @throws IllegalArgumentException thrown if the specified character is a line break + */ + public Builder setCommentMarker(final Character commentMarker) { + if (isLineBreak(commentMarker)) { + throw new IllegalArgumentException("The comment start marker character cannot be a line break"); + } + this.commentMarker = commentMarker; + return this; + } + + /** + * Sets the delimiter character. + * + * @param delimiter the delimiter character. + * @return This instance. + */ + public Builder setDelimiter(final char delimiter) { + return setDelimiter(String.valueOf(delimiter)); + } + + /** + * Sets the delimiter character. + * + * @param delimiter the delimiter character. + * @return This instance. + */ + public Builder setDelimiter(final String delimiter) { + if (containsLineBreak(delimiter)) { + throw new IllegalArgumentException("The delimiter cannot be a line break"); + } + if (delimiter.isEmpty()) { + throw new IllegalArgumentException("The delimiter cannot be empty"); + } + this.delimiter = delimiter; + return this; + } + + /** + * Sets the duplicate header names behavior. + * + * @param duplicateHeaderMode the duplicate header names behavior + * @return This instance. + * @since 1.10.0 + */ + public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) { + this.duplicateHeaderMode = Objects.requireNonNull(duplicateHeaderMode, "duplicateHeaderMode"); + return this; + } + + /** + * Sets the escape character. + * + * @param escapeCharacter the escape character. + * @return This instance. + * @throws IllegalArgumentException thrown if the specified character is a line break + */ + public Builder setEscape(final char escapeCharacter) { + setEscape(Character.valueOf(escapeCharacter)); + return this; + } + + /** + * Sets the escape character. + * + * @param escapeCharacter the escape character. + * @return This instance. + * @throws IllegalArgumentException thrown if the specified character is a line break + */ + public Builder setEscape(final Character escapeCharacter) { + if (isLineBreak(escapeCharacter)) { + throw new IllegalArgumentException("The escape character cannot be a line break"); + } + this.escapeCharacter = escapeCharacter; + return this; + } + + /** + * Sets the header defined by the given {@link Enum} class. + * + *

+ * Example: + *

+ * + *
+         * public enum HeaderEnum {
+         *     Name, Email, Phone
+         * }
+         *
+         * Builder builder = builder.setHeader(HeaderEnum.class);
+         * 
+ *

+ * The header is also used by the {@link CSVPrinter}. + *

+ * + * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. + * @return This instance. + */ + public Builder setHeader(final Class> headerEnum) { + String[] header = null; + if (headerEnum != null) { + final Enum[] enumValues = headerEnum.getEnumConstants(); + header = new String[enumValues.length]; + Arrays.setAll(header, i -> enumValues[i].name()); + } + return setHeader(header); + } + + /** + * Sets the header from the result set metadata. The header can be parsed automatically from the input file with: + * + *
+         * builder.setHeader();
+         * 
+ * + * or specified manually with: + * + *
+         * builder.setHeader(resultSet);
+         * 
+ *

+ * The header is also used by the {@link CSVPrinter}. + *

+ * + * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. + * @return This instance. + * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. + */ + public Builder setHeader(final ResultSet resultSet) throws SQLException { + return setHeader(resultSet != null ? resultSet.getMetaData() : null); + } + + /** + * Sets the header from the result set metadata. The header can be parsed automatically from the input file with: + * + *
+         * builder.setHeader();
+         * 
+ * + * or specified manually with: + * + *
+         * builder.setHeader(resultSetMetaData);
+         * 
+ *

+ * The header is also used by the {@link CSVPrinter}. + *

+ * + * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. + * @return This instance. + * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. + */ + public Builder setHeader(final ResultSetMetaData resultSetMetaData) throws SQLException { + String[] labels = null; + if (resultSetMetaData != null) { + final int columnCount = resultSetMetaData.getColumnCount(); + labels = new String[columnCount]; + for (int i = 0; i < columnCount; i++) { + labels[i] = resultSetMetaData.getColumnLabel(i + 1); + } + } + return setHeader(labels); + } + + /** + * Sets the header to the given values. The header can be parsed automatically from the input file with: + * + *
+         * builder.setHeader();
+         * 
+ * + * or specified manually with: + * + *
{@code
+         * builder.setHeader("name", "email", "phone");
+         * }
+ *

+ * The header is also used by the {@link CSVPrinter}. + *

+ *

+ * This method keeps a copy of the input array. + *

+ * + * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. + * @return This instance. + */ + public Builder setHeader(final String... header) { + this.headers = CSVFormat.clone(header); + return this; + } + + /** + * Sets the header comments to write before the CSV data. + *

+ * This setting is ignored by the parser. + *

+ *

+ * Comments are printed first, before headers. + *

+ *

+ * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. + *

+ *

+ * If the comment marker is not set, then the header comments are ignored. + *

+ *

+ * For example: + *

+ * + *
+         * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+         * 
+ *

+ * writes: + *

+ * + *
+         * # Generated by Apache Commons CSV.
+         * # 1970-01-01T00:00:00Z
+         * 
+ *

+ * This method keeps a copy of the input array. + *

+ * + * @param headerComments the headerComments which will be printed by the Printer before the CSV data. + * @return This instance. + */ + public Builder setHeaderComments(final Object... headerComments) { + this.headerComments = CSVFormat.clone(toStringArray(headerComments)); + return this; + } + + /** + * Sets the header comments to write before the CSV data. + *

+ * This setting is ignored by the parser. + *

+ *

+ * Comments are printed first, before headers. + *

+ *

+ * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. + *

+ *

+ * If the comment marker is not set, then the header comments are ignored. + *

+ *

+ * For example: + *

+ * + *
+         * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0).toString());
+         * 
+ *

+ * writes: + *

+ * + *
+         * # Generated by Apache Commons CSV.
+         * # 1970-01-01T00:00:00Z
+         * 
+ *

+ * This method keeps a copy of the input array. + *

+ * + * @param headerComments the headerComments which will be printed by the Printer before the CSV data. + * @return This instance. + */ + public Builder setHeaderComments(final String... headerComments) { + this.headerComments = CSVFormat.clone(headerComments); + return this; + } + + /** + * Sets the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty lines to empty + * records. + * + * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate + * empty lines to empty records. + * @return This instance. + */ + public Builder setIgnoreEmptyLines(final boolean ignoreEmptyLines) { + this.ignoreEmptyLines = ignoreEmptyLines; + return this; + } + + /** + * Sets the parser case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. + * + * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. + * @return This instance. + */ + public Builder setIgnoreHeaderCase(final boolean ignoreHeaderCase) { + this.ignoreHeaderCase = ignoreHeaderCase; + return this; + } + + /** + * Sets the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. + * + * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. + * @return This instance. + */ + public Builder setIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { + this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; + return this; + } + + /** + * Sets whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. + * + * @param lenientEof whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. + * @return This instance. + * @since 1.11.0 + */ + public Builder setLenientEof(final boolean lenientEof) { + this.lenientEof = lenientEof; + return this; + } + + /** + * Sets the maximum number of rows to process, excluding the header row. + *

+ * Values less than or equal to 0 mean no limit. + *

+ * + * @param maxRows the maximum number of rows to process, excluding the header row. + * @return This instance. + * @since 1.14.0 + */ + public Builder setMaxRows(final long maxRows) { + this.maxRows = maxRows; + return this; + } + + /** + * Sets the String to convert to and from {@code null}. No substitution occurs if {@code null}. + * + *
    + *
  • Reading: Converts strings equal to the given {@code nullString} to {@code null} when reading records.
  • + *
  • Writing: Writes {@code null} as the given {@code nullString} when writing records.
  • + *
+ * + * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null}. + * @return This instance. + */ + public Builder setNullString(final String nullString) { + this.nullString = nullString; + return setQuotedNullString(); + } + + /** + * Sets the quote character. + * + * @param quoteCharacter the quote character. + * @return This instance. + */ + public Builder setQuote(final char quoteCharacter) { + setQuote(Character.valueOf(quoteCharacter)); + return this; + } + + /** + * Sets the quote character, use {@code null} to disable. + * + * @param quoteCharacter the quote character, use {@code null} to disable. + * @return This instance. + */ + public Builder setQuote(final Character quoteCharacter) { + if (isLineBreak(quoteCharacter)) { + throw new IllegalArgumentException("The quoteCharacter cannot be a line break"); + } + this.quoteCharacter = quoteCharacter; + return setQuotedNullString(); + } + + private Builder setQuotedNullString() { + final Character quote = quoteCharacter != null ? quoteCharacter : Constants.DOUBLE_QUOTE_CHAR; + this.quotedNullString = quote + nullString + quote; + return this; + } + + /** + * Sets the quote policy to use for output. + * + * @param quoteMode the quote policy to use for output. + * @return This instance. + */ + public Builder setQuoteMode(final QuoteMode quoteMode) { + this.quoteMode = quoteMode; + return this; + } + + /** + * Sets the record separator to use for output. + * + *

+ * Note: This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' + * and "\r\n" + *

+ * + * @param recordSeparator the record separator to use for output. + * @return This instance. + */ + public Builder setRecordSeparator(final char recordSeparator) { + this.recordSeparator = String.valueOf(recordSeparator); + return this; + } + + /** + * Sets the record separator to use for output. + * + *

+ * Note: This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' + * and "\r\n" + *

+ * + * @param recordSeparator the record separator to use for output. + * @return This instance. + */ + public Builder setRecordSeparator(final String recordSeparator) { + this.recordSeparator = recordSeparator; + return this; + } + + /** + * Sets whether to skip the header record. + * + * @param skipHeaderRecord whether to skip the header record. + * @return This instance. + */ + public Builder setSkipHeaderRecord(final boolean skipHeaderRecord) { + this.skipHeaderRecord = skipHeaderRecord; + return this; + } + + /** + * Sets whether reading trailing data is allowed in records, helps Excel compatibility. + * + * @param trailingData whether reading trailing data is allowed in records, helps Excel compatibility. + * @return This instance. + * @since 1.11.0 + */ + public Builder setTrailingData(final boolean trailingData) { + this.trailingData = trailingData; + return this; + } + + /** + * Sets whether to add a trailing delimiter. + * + *

+ * When writing, a delimiter is appended after the last value of each record. When reading, the empty field + * that such a trailing delimiter produces is dropped so the output round-trips back to the original record; + * a quoted empty trailing field ({@code ""}) is a real value rather than a trailing delimiter and is kept. + *

+ *

+ * This is unrelated to {@link #setTrailingData(boolean) trailing data}, which controls whether characters + * after the closing quote of an encapsulated value are tolerated when reading. + *

+ * + * @param trailingDelimiter whether to add a trailing delimiter. + * @return This instance. + */ + public Builder setTrailingDelimiter(final boolean trailingDelimiter) { + this.trailingDelimiter = trailingDelimiter; + return this; + } + + + /** + * Sets whether to trim leading and trailing blanks. + * + * @param trim whether to trim leading and trailing blanks. + * @return This instance. + */ + public Builder setTrim(final boolean trim) { + this.trim = trim; + return this; + } + } + + /** + * Enumerates predefines formats. + * + * @since 1.2 + */ + public enum Predefined { + + /** + * The DEFAULT predefined format. + * + * @see CSVFormat#DEFAULT + */ + Default(DEFAULT), + + /** + * The EXCEL predefined format. + * + * @see CSVFormat#EXCEL + */ + Excel(EXCEL), + + /** + * The INFORMIX_UNLOAD predefined format. + * + * @see CSVFormat#INFORMIX_UNLOAD + * @since 1.3 + */ + InformixUnload(INFORMIX_UNLOAD), + + /** + * The INFORMIX_UNLOAD_CSV predefined format. + * + * @see CSVFormat#INFORMIX_UNLOAD_CSV + * @since 1.3 + */ + InformixUnloadCsv(INFORMIX_UNLOAD_CSV), + + /** + * The MONGODB_CSV predefined format. + * + * @see CSVFormat#MONGODB_CSV + * @since 1.7 + */ + MongoDBCsv(MONGODB_CSV), + + /** + * The MONGODB_TSV predefined format. + * + * @see CSVFormat#MONGODB_TSV + * @since 1.7 + */ + MongoDBTsv(MONGODB_TSV), + + /** + * The MYSQL predefined format. + * + * @see CSVFormat#MYSQL + */ + MySQL(MYSQL), + + /** + * The ORACLE predefined format. + * + * @see CSVFormat#ORACLE + */ + Oracle(ORACLE), + + /** + * The POSTGRESQL_CSV predefined format. + * + * @see CSVFormat#POSTGRESQL_CSV + * @since 1.5 + */ + PostgreSQLCsv(POSTGRESQL_CSV), + + /** + * The POSTGRESQL_TEXT predefined format. + * + * @see CSVFormat#POSTGRESQL_TEXT + */ + PostgreSQLText(POSTGRESQL_TEXT), + + /** + * The RFC4180 predefined format. + * + * @see CSVFormat#RFC4180 + */ + RFC4180(CSVFormat.RFC4180), + + /** + * The TDF predefined format. + * + * @see CSVFormat#TDF + */ + TDF(CSVFormat.TDF); + + private final CSVFormat format; + + Predefined(final CSVFormat format) { + this.format = format; + } + + /** + * Gets the format. + * + * @return the format. + */ + public CSVFormat getFormat() { + return format; + } + } + + /** + * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines. + * + *

+ * The {@link Builder} settings are: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code (',')}
  • + *
  • {@link Builder#setQuote(char) setQuote}{@code ('"')}
  • + *
  • {@link Builder#setRecordSeparator(String) setRecordSeparator}{@code ("\r\n")}
  • + *
  • {@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (true)}
  • + *
  • {@link Builder#setDuplicateHeaderMode(DuplicateHeaderMode) setDuplicateHeaderMode}{@code (DuplicateHeaderMode.ALLOW_ALL)}
  • + *
+ * + * @see Predefined#Default + * @see DuplicateHeaderMode#ALLOW_ALL + */ + public static final CSVFormat DEFAULT = new CSVFormat(Builder.create()); + + /** + * Microsoft Excel file + * format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary to customize + * this format to accommodate your regional settings. + * + *

+ * For example for parsing or generating a CSV file on a French system the following format will be used: + *

+ * + *
+     * CSVFormat format = CSVFormat.EXCEL.builder().setDelimiter(';').get();
+     * 
+ * + *

+ * The {@link Builder} settings are the {@link #DEFAULT} with: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code (',')}
  • + *
  • {@link Builder#setQuote(char) setQuote}{@code ('"')}
  • + *
  • {@link Builder#setRecordSeparator(String) setRecordSeparator}{@code ("\r\n")}
  • + *
  • {@link Builder#setDuplicateHeaderMode(DuplicateHeaderMode) setDuplicateHeaderMode}{@code (DuplicateHeaderMode.ALLOW_ALL)}
  • + *
  • {@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}
  • + *
  • {@link Builder#setAllowMissingColumnNames(boolean) setAllowMissingColumnNames}{@code (true)}
  • + *
  • {@link Builder#setTrailingData(boolean) setTrailingData}{@code (true)}
  • + *
  • {@link Builder#setLenientEof(boolean) setLenientEof}{@code (true)}
  • + *
+ *

+ * Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and + * {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(false)}. + *

+ * + * @see Predefined#Excel + * @see DuplicateHeaderMode#ALLOW_ALL + * @see Microsoft Excel + * + */ + // @formatter:off + public static final CSVFormat EXCEL = DEFAULT.builder() + .setIgnoreEmptyLines(false) + .setAllowMissingColumnNames(true) + .setTrailingData(true) + .setLenientEof(true) + .get(); + // @formatter:on + + /** + * Default Informix CSV UNLOAD + * format used by the {@code UNLOAD TO file_name} operation. + * + *

+ * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. + * The default NULL string is {@code "\\N"}. + *

+ * + *

+ * The {@link Builder} settings are the {@link #DEFAULT} with: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code (',')}
  • + *
  • {@link Builder#setEscape(char) setEscape}{@code ('\\')}
  • + *
  • {@link Builder#setQuote(char) setQuote}{@code ('\"')}
  • + *
  • {@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}
  • + *
+ * + * @see Predefined#MySQL + * @see Informix CSV UNLOAD + * @since 1.3 + */ + // @formatter:off + public static final CSVFormat INFORMIX_UNLOAD = DEFAULT.builder() + .setDelimiter(Constants.PIPE) + .setEscape(Constants.BACKSLASH) + .setQuote(Constants.DOUBLE_QUOTE_CHAR) + .setRecordSeparator(Constants.LF) + .get(); + // @formatter:on + + /** + * Default Informix CSV UNLOAD + * format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.) + * + *

+ * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. + * The default NULL string is {@code "\\N"}. + *

+ * + *

+ * The {@link Builder} settings are the {@link #DEFAULT} with: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code (',')}
  • + *
  • {@link Builder#setQuote(char) setQuote}{@code ('\"')}
  • + *
  • {@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}
  • + *
+ * + * @see Predefined#MySQL + * @see + * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm + * @since 1.3 + */ + // @formatter:off + public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT.builder() + .setDelimiter(Constants.COMMA) + .setQuote(Constants.DOUBLE_QUOTE_CHAR) + .setRecordSeparator(Constants.LF) + .get(); + // @formatter:on + + /** + * Default MongoDB CSV format used by the {@code mongoexport} operation. + *

+ * Parsing is not supported yet. + *

+ * + *

+ * This is a comma-delimited format. Values are double quoted only if needed and special characters are escaped with {@code '"'}. A header line with field + * names is expected. + *

+ *

+ * As of 2024-04-05, the MongoDB documentation for {@code mongoimport} states: + *

+ *
The csv parser accepts that data that complies with RFC RFC-4180. As a result, backslashes are + * not a valid escape character. If you use double-quotes to enclose fields in the CSV data, you must escape internal double-quote marks by prepending + * another double-quote.
+ *

+ * The {@link Builder} settings are the {@link #DEFAULT} with: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code (',')}
  • + *
  • {@link Builder#setEscape(char) setEscape}{@code ('"')}
  • + *
  • {@link Builder#setQuote(char) setQuote}{@code ('"')}
  • + *
  • {@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.MINIMAL)}
  • + *
+ * + * @see Predefined#MongoDBCsv + * @see QuoteMode#ALL_NON_NULL + * @see MongoDB mongoexport command documentation + * @since 1.7 + */ + // @formatter:off + public static final CSVFormat MONGODB_CSV = DEFAULT.builder() + .setDelimiter(Constants.COMMA) + .setEscape(Constants.DOUBLE_QUOTE_CHAR) + .setQuote(Constants.DOUBLE_QUOTE_CHAR) + .setQuoteMode(QuoteMode.MINIMAL) + .get(); + // @formatter:off + + /** + * Default MongoDB TSV format used by the {@code mongoexport} operation. + *

+ * Parsing is not supported yet. + *

+ * + *

+ * This is a tab-delimited format. Values are double quoted only if needed and special + * characters are escaped with {@code '"'}. A header line with field names is expected. + *

+ * + *

+ * The {@link Builder} settings are the {@link #DEFAULT} with: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code ('\t')}
  • + *
  • {@link Builder#setEscape(char) setEscape}{@code ('"')}
  • + *
  • {@link Builder#setQuote(char) setQuote}{@code ('"')}
  • + *
  • {@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.MINIMAL)}
  • + *
  • {@link Builder#setSkipHeaderRecord(boolean) setSkipHeaderRecord}{@code (false)}
  • + *
+ * + * @see Predefined#MongoDBCsv + * @see QuoteMode#ALL_NON_NULL + * @see MongoDB mongoexport command + * documentation + * @since 1.7 + */ + // @formatter:off + public static final CSVFormat MONGODB_TSV = DEFAULT.builder() + .setDelimiter(Constants.TAB) + .setEscape(Constants.DOUBLE_QUOTE_CHAR) + .setQuote(Constants.DOUBLE_QUOTE_CHAR) + .setQuoteMode(QuoteMode.MINIMAL) + .setSkipHeaderRecord(false) + .get(); + // @formatter:off + + /** + * Default MySQL + * format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. + * + *

+ * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special + * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. + *

+ * + *

+ * The {@link Builder} settings are the {@link #DEFAULT} with: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code ('\t')}
  • + *
  • {@link Builder#setEscape(char) setEscape}{@code ('\\')}
  • + *
  • {@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}
  • + *
  • {@link Builder#setQuote(Character) setQuote}{@code (null)}
  • + *
  • {@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}
  • + *
  • {@link Builder#setNullString(String) setNullString}{@code ("\\N")}
  • + *
  • {@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.ALL_NON_NULL)}
  • + *
+ * + * @see Predefined#MySQL + * @see QuoteMode#ALL_NON_NULL + * @see MySQL + */ + // @formatter:off + public static final CSVFormat MYSQL = DEFAULT.builder() + .setDelimiter(Constants.TAB) + .setEscape(Constants.BACKSLASH) + .setIgnoreEmptyLines(false) + .setQuote(null) + .setRecordSeparator(Constants.LF) + .setNullString(Constants.SQL_NULL_STRING) + .setQuoteMode(QuoteMode.ALL_NON_NULL) + .get(); + // @formatter:off + + /** + * Default + * Oracle + * format used by the SQL*Loader utility. + * + *

+ * This is a comma-delimited format with the system line separator character as the record separator. Values are + * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is + * {@code ""}. Values are trimmed. + *

+ * + *

+ * The {@link Builder} settings are the {@link #DEFAULT} with: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code (',')} // default is {@code FIELDS TERMINATED BY ','}}
  • + *
  • {@link Builder#setEscape(char) setEscape}{@code ('\\')}
  • + *
  • {@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}
  • + *
  • {@link Builder#setQuote(char) setQuote}{@code ('"')} // default is {@code OPTIONALLY ENCLOSED BY '"'}}
  • + *
  • {@link Builder#setNullString(String) setNullString}{@code ("\\N")}
  • + *
  • {@link Builder#setTrim(boolean) setTrim}{@code (true)}
  • + *
  • {@link Builder#setRecordSeparator(String) setRecordSeparator}{@code (System.lineSeparator())}
  • + *
  • {@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.MINIMAL)}
  • + *
+ * + * @see Predefined#Oracle + * @see QuoteMode#MINIMAL + * @see Oracle CSV Format Specification + * @since 1.6 + */ + // @formatter:off + public static final CSVFormat ORACLE = DEFAULT.builder() + .setDelimiter(Constants.COMMA) + .setEscape(Constants.BACKSLASH) + .setIgnoreEmptyLines(false) + .setQuote(Constants.DOUBLE_QUOTE_CHAR) + .setNullString(Constants.SQL_NULL_STRING) + .setTrim(true) + .setRecordSeparator(System.lineSeparator()) + .setQuoteMode(QuoteMode.MINIMAL) + .get(); + // @formatter:off + + /** + * Default PostgreSQL CSV format used by the {@code COPY} operation. + * + *

+ * This is a comma-delimited format with an LF character as the line separator. Values are double quoted and special + * characters are not escaped. The default NULL string is {@code ""}. + *

+ * + *

+ * The {@link Builder} settings are the {@link #DEFAULT} with: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code (',')}
  • + *
  • {@link Builder#setEscape(Character) setEscape}{@code (null)}
  • + *
  • {@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}
  • + *
  • {@link Builder#setQuote(char) setQuote}{@code ('"')}
  • + *
  • {@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}
  • + *
  • {@link Builder#setNullString(String) setNullString}{@code ("")}
  • + *
  • {@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.ALL_NON_NULL)}
  • + *
+ * + * @see Predefined#MySQL + * @see QuoteMode#ALL_NON_NULL + * @see PostgreSQL CSV + * @since 1.5 + */ + // @formatter:off + public static final CSVFormat POSTGRESQL_CSV = DEFAULT.builder() + .setDelimiter(Constants.COMMA) + .setEscape(null) + .setIgnoreEmptyLines(false) + .setQuote(Constants.DOUBLE_QUOTE_CHAR) + .setRecordSeparator(Constants.LF) + .setNullString(Constants.EMPTY) + .setQuoteMode(QuoteMode.ALL_NON_NULL) + .get(); + // @formatter:off + + /** + * Default PostgreSQL Text format used by the {@code COPY} operation. + * + *

+ * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special + * characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}. + *

+ * + *

+ * The {@link Builder} settings are the {@link #DEFAULT} with: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code ('\t')}
  • + *
  • {@link Builder#setEscape(char) setEscape}{@code ('\\')}
  • + *
  • {@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}
  • + *
  • {@link Builder#setQuote(Character) setQuote}{@code (null)}
  • + *
  • {@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}
  • + *
  • {@link Builder#setNullString(String) setNullString}{@code ("\\N")}
  • + *
  • {@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.ALL_NON_NULL)}
  • + *
+ * + * @see Predefined#MySQL + * @see QuoteMode#ALL_NON_NULL + * @see PostgreSQL Text + * @since 1.5 + */ + // @formatter:off + public static final CSVFormat POSTGRESQL_TEXT = DEFAULT.builder() + .setDelimiter(Constants.TAB) + .setEscape(Constants.BACKSLASH) + .setIgnoreEmptyLines(false) + .setQuote(null) + .setRecordSeparator(Constants.LF) + .setNullString(Constants.SQL_NULL_STRING) + .setQuoteMode(QuoteMode.ALL_NON_NULL) + .get(); + // @formatter:off + + /** + * Comma separated format as defined by RFC 4180. + * + *

+ * The {@link Builder} settings are the {@link #DEFAULT} with: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code (',')}
  • + *
  • {@link Builder#setQuote(char) setQuote}{@code ('"')}
  • + *
  • {@link Builder#setRecordSeparator(String) setRecordSeparator}{@code ("\r\n")}
  • + *
  • {@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}
  • + *
+ * + * @see Predefined#RFC4180 + * @see RFC 4180 + */ + public static final CSVFormat RFC4180 = DEFAULT.builder().setIgnoreEmptyLines(false).get(); + + private static final long serialVersionUID = 2L; + + /** + * Tab-delimited format (TDF). + * + *

+ * The {@link Builder} settings are the {@link #DEFAULT} with: + *

+ *
    + *
  • {@link Builder#setDelimiter(char) setDelimiter}{@code ('\t')}
  • + *
  • {@link Builder#setIgnoreSurroundingSpaces(boolean) setIgnoreSurroundingSpaces}{@code (true)}
  • + *
+ * + * @see Predefined#TDF + * @see TDF + */ + // @formatter:off + public static final CSVFormat TDF = DEFAULT.builder() + .setDelimiter(Constants.TAB) + .setIgnoreSurroundingSpaces(true) + .get(); + // @formatter:on + + /** + * Null-safe clone of an array. + * + * @param The array element type. + * @param values the source array + * @return the cloned array. + */ + @SafeVarargs + static T[] clone(final T... values) { + return values == null ? null : values.clone(); + } + + /** + * Returns true if the given string contains the search char. + * + * @param source the string to check. + * @param searchCh the character to search. + * @return true if {@code c} contains a line break character + */ + private static boolean contains(final String source, final char searchCh) { + return Objects.requireNonNull(source, "source").indexOf(searchCh) >= 0; + } + + /** + * Returns true if the given string contains a line break character. + * + * @param source the string to check. + * @return true if {@code c} contains a line break character. + */ + private static boolean containsLineBreak(final String source) { + return contains(source, Constants.CR) || contains(source, Constants.LF); + } + + /** + * Creates a null-safe copy of the given instance. + * + * @return a copy of the given instance or null if the input is null. + */ + static CSVFormat copy(final CSVFormat format) { + return format != null ? format.copy() : null; + } + + static boolean isBlank(final String value) { + return value == null || value.trim().isEmpty(); + } + + /** + * Returns true if the given character is a line break character. + * + * @param c the character to check. + * @return true if {@code c} is a line break character. + */ + private static boolean isLineBreak(final char c) { + return c == Constants.LF || c == Constants.CR; + } + + /** + * Returns true if the given character is a line break character. + * + * @param c the character to check, may be null. + * @return true if {@code c} is a line break character (and not null). + */ + private static boolean isLineBreak(final Character c) { + return c != null && isLineBreak(c.charValue()); // Explicit unboxing is intentional + } + + /** Same test as in as {@link String#trim()}. */ + private static boolean isTrimChar(final char ch) { + return ch <= Constants.SP; + } + + /** Same test as in as {@link String#trim()}. */ + private static boolean isTrimChar(final CharSequence charSequence, final int pos) { + return isTrimChar(charSequence.charAt(pos)); + } + + /** + * Creates a new CSV format with the specified delimiter. + * + *

+ * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized with null/false. + *

+ * + * @param delimiter the char used for value separation, must not be a line break character + * @return a new CSV format. + * @throws IllegalArgumentException if the delimiter is a line break character + * @see #DEFAULT + * @see #RFC4180 + * @see #MYSQL + * @see #EXCEL + * @see #TDF + */ + public static CSVFormat newFormat(final char delimiter) { + return new CSVFormat(new Builder().setDelimiter(delimiter)); + } + + static String[] toStringArray(final Object[] values) { + if (values == null) { + return null; + } + final String[] strings = new String[values.length]; + Arrays.setAll(strings, i -> Objects.toString(values[i], null)); + return strings; + } + + static CharSequence trim(final CharSequence charSequence) { + if (charSequence instanceof String) { + return ((String) charSequence).trim(); + } + final int count = charSequence.length(); + int len = count; + int pos = 0; + + while (pos < len && isTrimChar(charSequence, pos)) { + pos++; + } + while (pos < len && isTrimChar(charSequence, len - 1)) { + len--; + } + return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence; + } + + /** + * Gets one of the predefined formats from {@link CSVFormat.Predefined}. + * + * @param format name + * @return one of the predefined formats + * @since 1.2 + */ + public static CSVFormat valueOf(final String format) { + return CSVFormat.Predefined.valueOf(format).getFormat(); + } + + /** How duplicate headers are handled. */ + private final DuplicateHeaderMode duplicateHeaderMode; + + /** Whether missing column names are allowed when parsing the header line. */ + private final boolean allowMissingColumnNames; + + /** Whether to flush on close. */ + private final boolean autoFlush; + + /** Set to null if commenting is disabled. */ + private final Character commentMarker; + + /** The character delimiting the values (typically ";", "," or "\t"). */ + private final String delimiter; + + /** Set to null if escaping is disabled. */ + private final Character escapeCharacter; + + /** Array of header column names. */ + private final String[] headers; + + /** Array of header comment lines. */ + private final String[] headerComments; + + /** Whether empty lines between records are ignored when parsing input. */ + private final boolean ignoreEmptyLines; + + /** Should ignore header names case. */ + private final boolean ignoreHeaderCase; + + /** Should leading/trailing spaces be ignored around values?. */ + private final boolean ignoreSurroundingSpaces; + + /** The string to be used for null values. */ + private final String nullString; + + /** Set to null if quoting is disabled. */ + private final Character quoteCharacter; + + /** Set to {@code quoteCharacter + nullString + quoteCharacter} */ + private final String quotedNullString; + + /** The quote policy output fields. */ + private final QuoteMode quoteMode; + + /** For output. */ + private final String recordSeparator; + + /** Whether to skip the header record. */ + private final boolean skipHeaderRecord; + + /** Whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. */ + private final boolean lenientEof; + + /** Whether reading trailing data is allowed in records, helps Excel compatibility. */ + private final boolean trailingData; + + /** Whether to add a trailing delimiter. */ + private final boolean trailingDelimiter; + + /** Whether to trim leading and trailing blanks. */ + private final boolean trim; + + /** The maximum number of rows to process, excluding the header row. */ + private final long maxRows; + + private CSVFormat(final Builder builder) { + this.allowMissingColumnNames = builder.allowMissingColumnNames; + this.autoFlush = builder.autoFlush; + this.commentMarker = builder.commentMarker; + this.delimiter = builder.delimiter; + this.duplicateHeaderMode = builder.duplicateHeaderMode; + this.escapeCharacter = builder.escapeCharacter; + this.headerComments = builder.headerComments; + this.headers = builder.headers; + this.ignoreEmptyLines = builder.ignoreEmptyLines; + this.ignoreHeaderCase = builder.ignoreHeaderCase; + this.ignoreSurroundingSpaces = builder.ignoreSurroundingSpaces; + this.lenientEof = builder.lenientEof; + this.maxRows = builder.maxRows; + this.nullString = builder.nullString; + this.quoteCharacter = builder.quoteCharacter; + this.quoteMode = builder.quoteMode; + this.quotedNullString = builder.quotedNullString; + this.recordSeparator = builder.recordSeparator; + this.skipHeaderRecord = builder.skipHeaderRecord; + this.trailingData = builder.trailingData; + this.trailingDelimiter = builder.trailingDelimiter; + this.trim = builder.trim; + validate(); + } + + private void append(final char c, final Appendable appendable) throws IOException { + // try { + appendable.append(c); + // } catch (final IOException e) { + // throw new UncheckedIOException(e); + // } + } + + private void append(final CharSequence csq, final Appendable appendable) throws IOException { + // try { + appendable.append(csq); + // } catch (final IOException e) { + // throw new UncheckedIOException(e); + // } + } + + /** + * Creates a new Builder for this instance. + * + * @return a new Builder. + */ + public Builder builder() { + return Builder.create(this); + } + + /** + * Creates a copy of this instance. + * + * @return a copy of this instance. + */ + CSVFormat copy() { + return builder().get(); + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final CSVFormat other = (CSVFormat) obj; + return allowMissingColumnNames == other.allowMissingColumnNames && autoFlush == other.autoFlush && + Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) && + duplicateHeaderMode == other.duplicateHeaderMode && Objects.equals(escapeCharacter, other.escapeCharacter) && + Arrays.equals(headerComments, other.headerComments) && Arrays.equals(headers, other.headers) && + ignoreEmptyLines == other.ignoreEmptyLines && ignoreHeaderCase == other.ignoreHeaderCase && + ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && lenientEof == other.lenientEof && maxRows == other.maxRows && + Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == other.quoteMode && + Objects.equals(quotedNullString, other.quotedNullString) && Objects.equals(recordSeparator, other.recordSeparator) && + skipHeaderRecord == other.skipHeaderRecord && trailingData == other.trailingData && trailingDelimiter == other.trailingDelimiter && + trim == other.trim; + } + + private void escape(final char c, final Appendable appendable) throws IOException { + append(escapeCharacter.charValue(), appendable); // Explicit unboxing is intentional + append(c, appendable); + } + + /** + * Formats the specified values as a CSV record string. + * + * @param values the values to format. + * @return the formatted values. + */ + public String format(final Object... values) { + return Uncheck.get(() -> format_(values)); + } + + private String format_(final Object... values) throws IOException { + final StringWriter out = new StringWriter(); + try (CSVPrinter csvPrinter = new CSVPrinter(out, this)) { + csvPrinter.printRecord(values); + final String res = out.toString(); + final int len = recordSeparator != null ? res.length() - recordSeparator.length() : res.length(); + return res.substring(0, len); + } + } + + /** + * Gets whether duplicate names are allowed in the headers. + * + * @return whether duplicate header names are allowed + * @since 1.7 + * @deprecated Use {@link #getDuplicateHeaderMode()}. + */ + @Deprecated + public boolean getAllowDuplicateHeaderNames() { + return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL; + } + + /** + * Gets whether missing column names are allowed when parsing the header line. + * + * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an {@link IllegalArgumentException}. + */ + public boolean getAllowMissingColumnNames() { + return allowMissingColumnNames; + } + + /** + * Gets whether to flush on close. + * + * @return whether to flush on close. + * @since 1.6 + */ + public boolean getAutoFlush() { + return autoFlush; + } + + /** + * Gets the comment marker character, {@code null} disables comments. + *

+ * The comment start character is only recognized at the start of a line. + *

+ *

+ * Comments are printed first, before headers. + *

+ *

+ * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment marker written at the start of each comment + * line. + *

+ *

+ * If the comment marker is not set, then the header comments are ignored. + *

+ *

+ * For example: + *

+ * + *
+     * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+     * 
+ *

+ * writes: + *

+ * + *
+     * # Generated by Apache Commons CSV.
+     * # 1970-01-01T00:00:00Z
+     * 
+ * + * @return the comment start marker, may be {@code null} + */ + public Character getCommentMarker() { + return commentMarker; + } + + /** + * Gets the first character delimiting the values (typically ';', ',' or '\t'). + * + * @return the first delimiter character. + * @deprecated Use {@link #getDelimiterString()}. + */ + @Deprecated + public char getDelimiter() { + return delimiter.charAt(0); + } + + /** + * Gets the character delimiting the values (typically ";", "," or "\t"). + * + * @return the delimiter. + */ + char[] getDelimiterCharArray() { + return delimiter.toCharArray(); + } + + /** + * Gets the character delimiting the values (typically ";", "," or "\t"). + * + * @return the delimiter. + * @since 1.9.0 + */ + public String getDelimiterString() { + return delimiter; + } + + /** + * Gets how duplicate headers are handled. + * + * @return if duplicate header values are allowed, allowed conditionally, or disallowed. + * @since 1.10.0 + */ + public DuplicateHeaderMode getDuplicateHeaderMode() { + return duplicateHeaderMode; + } + + /** + * Gets the escape character. + * + * @return the escape character, may be {@code 0} + */ + char getEscapeChar() { + return escapeCharacter != null ? escapeCharacter.charValue() : 0; // Explicit unboxing is intentional + } + + /** + * Gets the escape character. + * + * @return the escape character, may be {@code null} + */ + public Character getEscapeCharacter() { + return escapeCharacter; + } + + /** + * Gets a copy of the header array. + * + * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file + */ + public String[] getHeader() { + return headers != null ? headers.clone() : null; + } + + /** + * Gets a copy of the header comment array to write before the CSV data. + *

+ * This setting is ignored by the parser. + *

+ *

+ * Comments are printed first, before headers. + *

+ *

+ * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment marker written at the start of each comment + * line. + *

+ *

+ * If the comment marker is not set, then the header comments are ignored. + *

+ *

+ * For example: + *

+ * + *
+     * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+     * 
+ *

+ * writes: + *

+ * + *
+     * # Generated by Apache Commons CSV.
+     * # 1970-01-01T00:00:00Z
+     * 
+ * + * @return a copy of the header comment array; {@code null} if disabled. + */ + public String[] getHeaderComments() { + return headerComments != null ? headerComments.clone() : null; + } + + /** + * Gets whether empty lines between records are ignored when parsing input. + * + * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty records. + */ + public boolean getIgnoreEmptyLines() { + return ignoreEmptyLines; + } + + /** + * Gets whether header names will be accessed ignoring case when parsing input. + * + * @return {@code true} if header names cases are ignored, {@code false} if they are case-sensitive. + * @since 1.3 + */ + public boolean getIgnoreHeaderCase() { + return ignoreHeaderCase; + } + + /** + * Gets whether spaces around values are ignored when parsing input. + * + * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value. + */ + public boolean getIgnoreSurroundingSpaces() { + return ignoreSurroundingSpaces; + } + + /** + * Gets whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. + * + * @return whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. + * @since 1.11.0 + */ + public boolean getLenientEof() { + return lenientEof; + } + + /** + * Gets the maximum number of rows to process, excluding the header row. + *

+ * Values less than or equal to 0 mean no limit. + *

+ * + * @return The maximum number of rows to process, excluding the header row. + * @since 1.14.0 + */ + public long getMaxRows() { + return maxRows; + } + + /** + * Gets the String to convert to and from {@code null}. + *
    + *
  • Reading: Converts strings equal to the given {@code nullString} to {@code null} when reading records.
  • + *
  • Writing: Writes {@code null} as the given {@code nullString} when writing records.
  • + *
+ * + * @return the String to convert to and from {@code null}. No substitution occurs if {@code null} + */ + public String getNullString() { + return nullString; + } + + /** + * Gets the character used to encapsulate values containing special characters. + * + * @return the quoteChar character, may be {@code null} + */ + public Character getQuoteCharacter() { + return quoteCharacter; + } + + /** + * Gets the quote policy output fields. + * + * @return the quote policy + */ + public QuoteMode getQuoteMode() { + return quoteMode; + } + + /** + * Gets the record separator delimiting output records. + * + * @return the record separator + */ + public String getRecordSeparator() { + return recordSeparator; + } + + /** + * Gets whether to skip the header record. + * + * @return whether to skip the header record. + */ + public boolean getSkipHeaderRecord() { + return skipHeaderRecord; + } + + /** + * Gets whether reading trailing data is allowed in records, helps Excel compatibility. + * + * @return whether reading trailing data is allowed in records, helps Excel compatibility. + * @since 1.11.0 + */ + public boolean getTrailingData() { + return trailingData; + } + + /** + * Gets whether to add a trailing delimiter. + * + *

+ * When writing, a delimiter is appended after the last value of each record. When reading, the empty field + * that such a trailing delimiter produces is dropped so the output round-trips back to the original record; + * a quoted empty trailing field ({@code ""}) is a real value rather than a trailing delimiter and is kept. + *

+ *

+ * This is unrelated to {@link #getTrailingData() trailing data}, which controls whether characters after the + * closing quote of an encapsulated value are tolerated when reading. + *

+ * + * @return whether to add a trailing delimiter. + * @since 1.3 + */ + public boolean getTrailingDelimiter() { + return trailingDelimiter; + } + + /** + * Gets whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by {CSVParser#addRecordValue(boolean)} + * + * @return whether to trim leading and trailing blanks. + */ + public boolean getTrim() { + return trim; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Arrays.hashCode(headerComments); + result = prime * result + Arrays.hashCode(headers); + result = prime * result + Objects.hash(allowMissingColumnNames, autoFlush, commentMarker, delimiter, duplicateHeaderMode, escapeCharacter, + ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, lenientEof, maxRows, nullString, quoteCharacter, quoteMode, quotedNullString, + recordSeparator, skipHeaderRecord, trailingData, trailingDelimiter, trim); + return result; + } + + /** + * Tests whether comments are supported by this format. + * + * Note that the comment introducer character is only recognized at the start of a line. + * + * @return {@code true} is comments are supported, {@code false} otherwise + */ + public boolean isCommentMarkerSet() { + return commentMarker != null; + } + + /** + * Tests whether the next characters constitute a delimiter + * + * @param ch0 the first char (index 0). + * @param charSeq the match char sequence + * @param startIndex where start to match + * @param delimiter the delimiter + * @param delimiterLength the delimiter length + * @return true if the match is successful + */ + private boolean isDelimiter(final char ch0, final CharSequence charSeq, final int startIndex, final char[] delimiter, final int delimiterLength) { + if (ch0 != delimiter[0]) { + return false; + } + final int len = charSeq.length(); + if (startIndex + delimiterLength > len) { + return false; + } + for (int i = 1; i < delimiterLength; i++) { + if (charSeq.charAt(startIndex + i) != delimiter[i]) { + return false; + } + } + return true; + } + + /** + * Tests whether escapes are being processed. + * + * @return {@code true} if escapes are processed + */ + public boolean isEscapeCharacterSet() { + return escapeCharacter != null; + } + + /** + * Tests whether a null string has been defined. + * + * @return {@code true} if a nullString is defined + */ + public boolean isNullStringSet() { + return nullString != null; + } + + /** + * Tests whether a quoteChar has been defined. + * + * @return {@code true} if a quoteChar is defined + */ + public boolean isQuoteCharacterSet() { + return quoteCharacter != null; + } + + IOStream limit(final IOStream stream) { + return useMaxRows() ? stream.limit(getMaxRows()) : stream; + } + + /** + * Parses the specified content. + * + *

+ * See also the various static parse methods on {@link CSVParser}. + *

+ * + * @param reader the input stream + * @return a parser over a stream of {@link CSVRecord}s. + * @throws IOException If an I/O error occurs + * @throws CSVException Thrown on invalid input. + */ + public CSVParser parse(final Reader reader) throws IOException { + return CSVParser.builder().setReader(reader).setFormat(this).get(); + } + + /** + * Prints to the specified output. + * + *

+ * See also {@link CSVPrinter}. + *

+ * + * @param out the output. + * @return a printer to an output. + * @throws IOException thrown if the optional header cannot be printed. + */ + public CSVPrinter print(final Appendable out) throws IOException { + return new CSVPrinter(out, this); + } + + /** + * Prints to the specified {@code File} with given {@code Charset}. + * + *

+ * See also {@link CSVPrinter}. + *

+ * + * @param out the output. + * @param charset A charset. + * @return a printer to an output. + * @throws IOException thrown if the optional header cannot be printed. + * @since 1.5 + */ + public CSVPrinter print(final File out, final Charset charset) throws IOException { + return print(out.toPath(), charset); + } + + private void print(final InputStream inputStream, final Appendable out, final boolean newRecord) throws IOException { + // InputStream is never null here + // There is nothing to escape when quoting is used which is the default. + if (!newRecord) { + append(getDelimiterString(), out); + } + final boolean quoteCharacterSet = isQuoteCharacterSet(); + if (quoteCharacterSet) { + append(getQuoteCharacter().charValue(), out); // Explicit unboxing is intentional + } + // Stream the input to the output without reading or holding the whole value in memory. + // AppendableOutputStream cannot "close" an Appendable. + try (OutputStream outputStream = new Base64OutputStream(new AppendableOutputStream<>(out))) { + IOUtils.copy(inputStream, outputStream); + } + if (quoteCharacterSet) { + append(getQuoteCharacter().charValue(), out); // Explicit unboxing is intentional + } + } + + /** + * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated as needed. Useful when one wants to + * avoid creating CSVPrinters. Trims the value if {@link #getTrim()} is true. + * + * @param value value to output. + * @param out where to print the value. + * @param newRecord if this a new record. + * @throws IOException If an I/O error occurs. + * @since 1.4 + */ + public synchronized void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { + // null values are considered empty + // Only call CharSequence.toString() if you have to, helps GC-free use cases. + CharSequence charSequence; + if (value == null) { + // https://issues.apache.org/jira/browse/CSV-203 + if (null == nullString) { + charSequence = Constants.EMPTY; + } else if (QuoteMode.ALL == quoteMode) { + charSequence = quotedNullString; + } else { + charSequence = nullString; + } + } else if (value instanceof CharSequence) { + charSequence = (CharSequence) value; + } else if (value instanceof Reader) { + print((Reader) value, out, newRecord); + return; + } else if (value instanceof InputStream) { + print((InputStream) value, out, newRecord); + return; + } else { + charSequence = value.toString(); + } + charSequence = getTrim() ? trim(charSequence) : charSequence; + print(value, charSequence, out, newRecord); + } + + private synchronized void print(final Object object, final CharSequence value, final Appendable out, final boolean newRecord) throws IOException { + final int offset = 0; + final int len = value.length(); + if (!newRecord) { + out.append(getDelimiterString()); + } + if (object == null) { + out.append(value); + } else if (isQuoteCharacterSet()) { + // The original object is needed so can check for Number + printWithQuotes(object, value, out, newRecord); + } else if (isEscapeCharacterSet()) { + printWithEscapes(value, out); + } else { + out.append(value, offset, len); + } + } + + /** + * Prints to the specified {@code Path} with given {@code Charset}, returns a {@code CSVPrinter} which the caller MUST close. + * + *

+ * See also {@link CSVPrinter}. + *

+ * + * @param out the output. + * @param charset A charset. + * @return a printer to an output. + * @throws IOException thrown if the optional header cannot be printed. + * @since 1.5 + */ + @SuppressWarnings("resource") + public CSVPrinter print(final Path out, final Charset charset) throws IOException { + return print(Files.newBufferedWriter(out, charset)); + } + + private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException { + // Reader is never null here + if (!newRecord) { + append(getDelimiterString(), out); + } + if (isQuoteCharacterSet()) { + printWithQuotes(reader, out); + } else if (isEscapeCharacterSet()) { + printWithEscapes(reader, out); + } else if (out instanceof Writer) { + IOUtils.copyLarge(reader, (Writer) out); + } else { + IOUtils.copy(reader, out); + } + } + + /** + * Prints to the {@link System#out}. + * + *

+ * See also {@link CSVPrinter}. + *

+ * + * @return a printer to {@link System#out}. + * @throws IOException thrown if the optional header cannot be printed. + * @since 1.5 + */ + public CSVPrinter printer() throws IOException { + return new CSVPrinter(System.out, this); + } + + /** + * Outputs the trailing delimiter (if set) followed by the record separator (if set). + * + * @param appendable where to write + * @throws IOException If an I/O error occurs. + * @since 1.4 + */ + public synchronized void println(final Appendable appendable) throws IOException { + if (getTrailingDelimiter()) { + append(getDelimiterString(), appendable); + } + if (recordSeparator != null) { + append(recordSeparator, appendable); + } + } + + /** + * Prints the given {@code values} to {@code out} as a single record of delimiter-separated values followed by the record separator. + * + *

+ * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing + * the record, so there is no need to call {@link #println(Appendable)}. + *

+ * + * @param appendable where to write. + * @param values values to output. + * @throws IOException If an I/O error occurs. + * @since 1.4 + */ + public synchronized void printRecord(final Appendable appendable, final Object... values) throws IOException { + for (int i = 0; i < values.length; i++) { + print(values[i], appendable, i == 0); + } + println(appendable); + } + + /* + * This method must only be called if escaping is enabled, otherwise can throw exceptions. + */ + private void printWithEscapes(final CharSequence charSeq, final Appendable appendable) throws IOException { + int start = 0; + int pos = 0; + final int end = charSeq.length(); + final char[] delimArray = getDelimiterCharArray(); + final int delimLength = delimArray.length; + final char escape = getEscapeChar(); + final boolean quoteSet = isQuoteCharacterSet(); + final char quote = quoteSet ? getQuoteCharacter().charValue() : 0; + final boolean commentMarkerSet = isCommentMarkerSet(); + final char commentChar = commentMarkerSet ? commentMarker.charValue() : 0; // Explicit unboxing is intentional + while (pos < end) { + char c = charSeq.charAt(pos); + final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delimArray, delimLength); + final boolean isCr = c == Constants.CR; + final boolean isLf = c == Constants.LF; + // A leading comment marker would be read back as a comment, so escape it. + final boolean isComment = commentMarkerSet && pos == 0 && c == commentChar; + if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart || isComment) { + // write out segment up until this char + if (pos > start) { + appendable.append(charSeq, start, pos); + } + if (isLf) { + c = 'n'; + } else if (isCr) { + c = 'r'; + } + escape(c, appendable); + if (isDelimiterStart) { + for (int i = 1; i < delimLength; i++) { + pos++; + escape(charSeq.charAt(pos), appendable); + } + } + start = pos + 1; // start on the current char after this one + } + pos++; + } + + // write last segment + if (pos > start) { + appendable.append(charSeq, start, pos); + } + } + + /* + * This method must only be called if escaping is enabled, otherwise can throw exceptions. + */ + private void printWithEscapes(final Reader reader, final Appendable appendable) throws IOException { + int start = 0; + int pos = 0; + @SuppressWarnings("resource") // Temp reader on input reader. + final ExtendedBufferedReader bufferedReader = new ExtendedBufferedReader(reader); + final char[] delimArray = getDelimiterCharArray(); + final int delimLength = delimArray.length; + final char escape = getEscapeChar(); + final boolean quoteSet = isQuoteCharacterSet(); + final char quote = quoteSet ? getQuoteCharacter().charValue() : 0; + final boolean commentMarkerSet = isCommentMarkerSet(); + final char commentChar = commentMarkerSet ? commentMarker.charValue() : 0; // Explicit unboxing is intentional + final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); + int c; + boolean firstChar = true; + final char[] lookAheadBuffer = new char[delimLength - 1]; + while (EOF != (c = bufferedReader.read())) { + builder.append((char) c); + Arrays.fill(lookAheadBuffer, (char) 0); + bufferedReader.peek(lookAheadBuffer); + final String test = builder.toString() + new String(lookAheadBuffer); + final boolean isDelimiterStart = isDelimiter((char) c, test, pos, delimArray, delimLength); + final boolean isCr = c == Constants.CR; + final boolean isLf = c == Constants.LF; + // A leading comment marker would be read back as a comment, so escape it. + final boolean isComment = commentMarkerSet && firstChar && c == commentChar; + firstChar = false; + if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart || isComment) { + // write out segment up until this char + if (pos > start) { + append(builder.substring(start, pos), appendable); + builder.setLength(0); + pos = -1; + } + if (isLf) { + c = 'n'; + } else if (isCr) { + c = 'r'; + } + escape((char) c, appendable); + if (isDelimiterStart) { + for (int i = 1; i < delimLength; i++) { + escape((char) bufferedReader.read(), appendable); + } + } + start = pos + 1; // start on the current char after this one + } + pos++; + } + // write last segment + if (pos > start) { + appendable.append(builder, start, pos); + } + } + + /* + * This method must only be called if quoting is enabled, otherwise will generate NPE. + * The original object is needed so can check for Number + */ + private void printWithQuotes(final Object object, final CharSequence charSeq, final Appendable out, final boolean newRecord) throws IOException { + boolean quote = false; + int start = 0; + int pos = 0; + final int len = charSeq.length(); + final char[] delim = getDelimiterCharArray(); + final int delimLength = delim.length; + final char quoteChar = getQuoteCharacter().charValue(); // Explicit unboxing is intentional + // If escape char not specified, default to the quote char + // This avoids having to keep checking whether there is an escape character + // at the cost of checking against quote twice + final char escapeChar = isEscapeCharacterSet() ? getEscapeChar() : quoteChar; + QuoteMode quoteModePolicy = getQuoteMode(); + if (quoteModePolicy == null) { + quoteModePolicy = QuoteMode.MINIMAL; + } + switch (quoteModePolicy) { + case ALL: + case ALL_NON_NULL: + quote = true; + break; + case NON_NUMERIC: + quote = !(object instanceof Number); + break; + case NONE: + // Use the existing escaping code + printWithEscapes(charSeq, out); + return; + case MINIMAL: + if (len <= 0) { + // Always quote an empty token that is the first + // on the line, as it may be the only thing on the + // line. If it were not quoted in that case, + // an empty line has no tokens. + if (newRecord) { + quote = true; + } + } else { + char c = charSeq.charAt(pos); + if (c <= Constants.COMMENT || isCommentMarkerSet() && c == commentMarker.charValue()) { + // Some other chars at the start of a value caused the parser to fail, so for now + // encapsulate if we start in anything less than '#'. We are being conservative + // by including the default comment char and any configured comment marker too, + // which the parser would otherwise read back as a comment line. + quote = true; + } else { + while (pos < len) { + c = charSeq.charAt(pos); + if (c == Constants.LF || c == Constants.CR || c == quoteChar || c == escapeChar || isDelimiter(c, charSeq, pos, delim, delimLength)) { + quote = true; + break; + } + pos++; + } + + if (!quote) { + pos = len - 1; + c = charSeq.charAt(pos); + // Some other chars at the end caused the parser to fail, so for now + // encapsulate if we end in anything less than ' ' + if (isTrimChar(c)) { + quote = true; + } + } + } + } + if (!quote) { + // No encapsulation needed - write out the original value + out.append(charSeq, start, len); + return; + } + break; + default: + throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); + } + if (!quote) { + // No encapsulation needed - write out the original value + out.append(charSeq, start, len); + return; + } + // We hit something that needed encapsulation + out.append(quoteChar); + // Pick up where we left off: pos should be positioned on the first character that caused + // the need for encapsulation. + while (pos < len) { + final char c = charSeq.charAt(pos); + if (c == quoteChar || c == escapeChar) { + // write out the chunk up until this point + out.append(charSeq, start, pos); + out.append(escapeChar); // now output the escape + start = pos; // and restart with the matched char + } + pos++; + } + // Write the last segment + out.append(charSeq, start, pos); + out.append(quoteChar); + } + + /** + * Always use quotes unless QuoteMode is NONE, so we do not have to look ahead. + * + * @param reader What to print + * @param appendable Where to print it + * @throws IOException If an I/O error occurs + */ + private void printWithQuotes(final Reader reader, final Appendable appendable) throws IOException { + if (getQuoteMode() == QuoteMode.NONE) { + printWithEscapes(reader, appendable); + return; + } + final char quote = getQuoteCharacter().charValue(); // Explicit unboxing is intentional + final char escape = isEscapeCharacterSet() ? getEscapeChar() : quote; + // (1) Append opening quote + append(quote, appendable); + // (2) Append Reader contents, doubling quotes and escape characters + int c; + while (EOF != (c = reader.read())) { + append((char) c, appendable); + if (c == quote || c == escape) { + append((char) c, appendable); + } + } + // (3) Append closing quote + append(quote, appendable); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append("Delimiter=<").append(delimiter).append('>'); + if (isEscapeCharacterSet()) { + sb.append(Constants.SP); + sb.append("Escape=<").append(escapeCharacter).append('>'); + } + if (isQuoteCharacterSet()) { + sb.append(Constants.SP); + sb.append("QuoteChar=<").append(quoteCharacter).append('>'); + } + if (quoteMode != null) { + sb.append(Constants.SP); + sb.append("QuoteMode=<").append(quoteMode).append('>'); + } + if (isCommentMarkerSet()) { + sb.append(Constants.SP); + sb.append("CommentStart=<").append(commentMarker).append('>'); + } + if (isNullStringSet()) { + sb.append(Constants.SP); + sb.append("NullString=<").append(nullString).append('>'); + } + if (recordSeparator != null) { + sb.append(Constants.SP); + sb.append("RecordSeparator=<").append(recordSeparator).append('>'); + } + if (getIgnoreEmptyLines()) { + sb.append(" EmptyLines:ignored"); + } + if (getIgnoreSurroundingSpaces()) { + sb.append(" SurroundingSpaces:ignored"); + } + if (getIgnoreHeaderCase()) { + sb.append(" IgnoreHeaderCase:ignored"); + } + sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); + if (headerComments != null) { + sb.append(Constants.SP); + sb.append("HeaderComments:").append(Arrays.toString(headerComments)); + } + if (headers != null) { + sb.append(Constants.SP); + sb.append("Header:").append(Arrays.toString(headers)); + } + return sb.toString(); + } + + String trim(final String value) { + return getTrim() ? value.trim() : value; + } + + boolean useMaxRows() { + return getMaxRows() > 0; + } + + boolean useRow(final long rowNum) { + return !useMaxRows() || rowNum <= getMaxRows(); + } + + /** + * Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary. + *

+ * Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used + * for parsing, so it cannot be used here. + *

+ * + * @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes. + */ + private void validate() throws IllegalArgumentException { + if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // Explicit unboxing is intentional + throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); + } + if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // Explicit unboxing is intentional + throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')"); + } + if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { // Explicit unboxing is intentional + throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" + commentMarker + "')"); + } + if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) { + throw new IllegalArgumentException("The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')"); + } + if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) { + throw new IllegalArgumentException("The comment start and the escape character cannot be the same ('" + commentMarker + "')"); + } + if (escapeCharacter == null && quoteMode == QuoteMode.NONE) { + throw new IllegalArgumentException("Quote mode set to NONE but no escape character is set"); + } + // Validate headers + if (headers != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) { + final Set dupCheckSet = new HashSet<>(headers.length); + final boolean emptyDuplicatesAllowed = duplicateHeaderMode == DuplicateHeaderMode.ALLOW_EMPTY; + for (final String header : headers) { + final boolean blank = isBlank(header); + // Sanitize all empty headers to the empty string "" when checking duplicates + final boolean containsHeader = !dupCheckSet.add(blank ? "" : header); + if (containsHeader && !(blank && emptyDuplicatesAllowed)) { + throw new IllegalArgumentException(String.format( + "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", header, + Arrays.toString(headers))); + } + } + } + } + + /** + * Builds a new {@code CSVFormat} that allows duplicate header names. + * + * @return a new {@code CSVFormat} that allows duplicate header names + * @since 1.7 + * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean) Builder#setAllowDuplicateHeaderNames(true)} + */ + @Deprecated + public CSVFormat withAllowDuplicateHeaderNames() { + return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).get(); + } + + /** + * Builds a new {@code CSVFormat} with duplicate header names behavior set to the given value. + * + * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. + * @return a new {@code CSVFormat} with duplicate header names behavior set to the given value. + * @since 1.7 + * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean)} + */ + @Deprecated + public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { + final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY; + return builder().setDuplicateHeaderMode(mode).get(); + } + + /** + * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}. + * + * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. + * @see Builder#setAllowMissingColumnNames(boolean) + * @since 1.1 + * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} + */ + @Deprecated + public CSVFormat withAllowMissingColumnNames() { + return builder().setAllowMissingColumnNames(true).get(); + } + + /** + * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to the given value. + * + * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause + * an {@link IllegalArgumentException} to be thrown. + * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. + * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean)} + */ + @Deprecated + public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) { + return builder().setAllowMissingColumnNames(allowMissingColumnNames).get(); + } + + /** + * Builds a new {@code CSVFormat} with whether to flush on close. + * + * @param autoFlush whether to flush on close. + * @return A new CSVFormat that is equal to this but with the specified autoFlush setting. + * @since 1.6 + * @deprecated Use {@link Builder#setAutoFlush(boolean)} + */ + @Deprecated + public CSVFormat withAutoFlush(final boolean autoFlush) { + return builder().setAutoFlush(autoFlush).get(); + } + + /** + * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character. + * + * Note that the comment start character is only recognized at the start of a line. + * + * @param commentMarker the comment start marker + * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker + * @throws IllegalArgumentException thrown if the specified character is a line break + * @deprecated Use {@link Builder#setCommentMarker(char)} + */ + @Deprecated + public CSVFormat withCommentMarker(final char commentMarker) { + return builder().setCommentMarker(commentMarker).get(); + } + + /** + * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character. + * + * Note that the comment start character is only recognized at the start of a line. + * + * @param commentMarker the comment start marker, use {@code null} to disable + * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker + * @throws IllegalArgumentException thrown if the specified character is a line break + * @deprecated Use {@link Builder#setCommentMarker(Character)} + */ + @Deprecated + public CSVFormat withCommentMarker(final Character commentMarker) { + return builder().setCommentMarker(commentMarker).get(); + } + + /** + * Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character. + * + * @param delimiter the delimiter character + * @return A new CSVFormat that is equal to this with the specified character as a delimiter + * @throws IllegalArgumentException thrown if the specified character is a line break + * @deprecated Use {@link Builder#setDelimiter(char)} + */ + @Deprecated + public CSVFormat withDelimiter(final char delimiter) { + return builder().setDelimiter(delimiter).get(); + } + + /** + * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character. + * + * @param escape the escape character + * @return A new CSVFormat that is equal to this but with the specified character as the escape character + * @throws IllegalArgumentException thrown if the specified character is a line break + * @deprecated Use {@link Builder#setEscape(char)} + */ + @Deprecated + public CSVFormat withEscape(final char escape) { + return builder().setEscape(escape).get(); + } + + /** + * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character. + * + * @param escape the escape character, use {@code null} to disable + * @return A new CSVFormat that is equal to this but with the specified character as the escape character + * @throws IllegalArgumentException thrown if the specified character is a line break + * @deprecated Use {@link Builder#setEscape(Character)} + */ + @Deprecated + public CSVFormat withEscape(final Character escape) { + return builder().setEscape(escape).get(); + } + + // @formatter:off + /** + * Builds a new {@code CSVFormat} using the first record as header. + * + *

+ * Calling this method is equivalent to calling: + *

+ * + *
+     * CSVFormat format = aFormat.builder()
+     *                           .setHeader()
+     *                           .setSkipHeaderRecord(true)
+     *                           .get();
+     * 
+ * + *

Any previously set headers are reset to empty. + * The resulting format will have {@code skipHeaderRecord = true}.

+ * + * @return A new CSVFormat that is equal to this but using the first record as header. + * @see Builder#setSkipHeaderRecord(boolean) + * @see Builder#setHeader(String...) + * @since 1.3 + * @deprecated Use {@link Builder#setHeader(String...) Builder#setHeader()}.{@link Builder#setSkipHeaderRecord(boolean) setSkipHeaderRecord(true)}. + */ + // @formatter:on + @Deprecated + public CSVFormat withFirstRecordAsHeader() { + // @formatter:off + return builder() + .setHeader() + .setSkipHeaderRecord(true) + .get(); + // @formatter:on + } + + /** + * Builds a new {@code CSVFormat} with the header of the format defined by the enum class. + * + *

+ * Example: + *

+ * + *
+     * public enum MyHeader {
+     *     Name, Email, Phone
+     * }
+     * ...
+     * CSVFormat format = aFormat.builder().setHeader(MyHeader.class).get();
+     * 
+ *

+ * The header is also used by the {@link CSVPrinter}. + *

+ * + * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. + * @return A new CSVFormat that is equal to this but with the specified header + * @see Builder#setHeader(String...) + * @see Builder#setSkipHeaderRecord(boolean) + * @since 1.3 + * @deprecated Use {@link Builder#setHeader(Class)} + */ + @Deprecated + public CSVFormat withHeader(final Class> headerEnum) { + return builder().setHeader(headerEnum).get(); + } + + /** + * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the + * input file with: + * + *
+     * CSVFormat format = aFormat.builder().setHeader().get();
+     * 
+ * + * or specified manually with: + * + *
+     * CSVFormat format = aFormat.builder().setHeader(resultSet).get();
+     * 
+ *

+ * The header is also used by the {@link CSVPrinter}. + *

+ * + * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. + * @return A new CSVFormat that is equal to this but with the specified header + * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. + * @since 1.1 + * @deprecated Use {@link Builder#setHeader(ResultSet)} + */ + @Deprecated + public CSVFormat withHeader(final ResultSet resultSet) throws SQLException { + return builder().setHeader(resultSet).get(); + } + + /** + * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the + * input file with: + * + *
+     * CSVFormat format = aFormat.builder().setHeader().get()
+     * 
+ * + * or specified manually with: + * + *
+     * CSVFormat format = aFormat.builder().setHeader(resultSetMetaData).get()
+     * 
+ *

+ * The header is also used by the {@link CSVPrinter}. + *

+ * + * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. + * @return A new CSVFormat that is equal to this but with the specified header + * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. + * @since 1.1 + * @deprecated Use {@link Builder#setHeader(ResultSetMetaData)} + */ + @Deprecated + public CSVFormat withHeader(final ResultSetMetaData resultSetMetaData) throws SQLException { + return builder().setHeader(resultSetMetaData).get(); + } + + /** + * Builds a new {@code CSVFormat} with the header of the format set to the given values. The header can either be parsed automatically from the input file + * with: + * + *
+     * CSVFormat format = aFormat.builder().setHeader().get();
+     * 
+ * + * or specified manually with: + * + *
{@code
+     * CSVFormat format = aFormat.builder().setHeader("name", "email", "phone").get();
+     * }
+ *

+ * The header is also used by the {@link CSVPrinter}. + *

+ * + * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. + * @return A new CSVFormat that is equal to this but with the specified header + * @see Builder#setSkipHeaderRecord(boolean) + * @deprecated Use {@link Builder#setHeader(String...)} + */ + @Deprecated + public CSVFormat withHeader(final String... header) { + return builder().setHeader(header).get(); + } + + /** + * Builds a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will be printed first, before the headers. + * This setting is ignored by the parser. + * + *
{@code
+     * CSVFormat format = aFormat.builder().setHeaderComments("Generated by Apache Commons CSV.", Instant.now()).get();
+     * }
+ * + * @param headerComments the headerComments which will be printed by the Printer before the actual CSV data. + * @return A new CSVFormat that is equal to this but with the specified header + * @see Builder#setSkipHeaderRecord(boolean) + * @since 1.1 + * @deprecated Use {@link Builder#setHeaderComments(Object...)} + */ + @Deprecated + public CSVFormat withHeaderComments(final Object... headerComments) { + return builder().setHeaderComments(headerComments).get(); + } + + /** + * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}. + * + * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. + * @see Builder#setIgnoreEmptyLines(boolean) + * @since 1.1 + * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(true)} + */ + @Deprecated + public CSVFormat withIgnoreEmptyLines() { + return builder().setIgnoreEmptyLines(true).get(); + } + + /** + * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value. + * + * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty + * lines to empty records. + * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. + * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean)} + */ + @Deprecated + public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { + return builder().setIgnoreEmptyLines(ignoreEmptyLines).get(); + } + + /** + * Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}. + * + * @return A new CSVFormat that will ignore the new case header name behavior. + * @see Builder#setIgnoreHeaderCase(boolean) + * @since 1.3 + * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)} + */ + @Deprecated + public CSVFormat withIgnoreHeaderCase() { + return builder().setIgnoreHeaderCase(true).get(); + } + + /** + * Builds a new {@code CSVFormat} with whether header names should be accessed ignoring case. + * + * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. + * @return A new CSVFormat that will ignore case header name if specified as {@code true} + * @since 1.3 + * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean)} + */ + @Deprecated + public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { + return builder().setIgnoreHeaderCase(ignoreHeaderCase).get(); + } + + /** + * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}. + * + * @return A new CSVFormat that is equal to this but with the specified parser trimming behavior. + * @see Builder#setIgnoreSurroundingSpaces(boolean) + * @since 1.1 + * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean) Builder#setIgnoreSurroundingSpaces(true)} + */ + @Deprecated + public CSVFormat withIgnoreSurroundingSpaces() { + return builder().setIgnoreSurroundingSpaces(true).get(); + } + + /** + * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value. + * + * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. + * @return A new CSVFormat that is equal to this but with the specified trimming behavior. + * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean)} + */ + @Deprecated + public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { + return builder().setIgnoreSurroundingSpaces(ignoreSurroundingSpaces).get(); + } + + /** + * Builds a new {@code CSVFormat} with conversions to and from null for strings on input and output. + *
    + *
  • Reading: Converts strings equal to the given {@code nullString} to {@code null} when reading records.
  • + *
  • Writing: Writes {@code null} as the given {@code nullString} when writing records.
  • + *
+ * + * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null} + * @return A new CSVFormat that is equal to this but with the specified null conversion string. + * @deprecated Use {@link Builder#setNullString(String)} + */ + @Deprecated + public CSVFormat withNullString(final String nullString) { + return builder().setNullString(nullString).get(); + } + + /** + * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character. + * + * @param quoteChar the quote character + * @return A new CSVFormat that is equal to this but with the specified character as quoteChar + * @throws IllegalArgumentException thrown if the specified character is a line break + * @deprecated Use {@link Builder#setQuote(char)} + */ + @Deprecated + public CSVFormat withQuote(final char quoteChar) { + return builder().setQuote(quoteChar).get(); + } + + /** + * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character. + * + * @param quoteChar the quote character, use {@code null} to disable. + * @return A new CSVFormat that is equal to this but with the specified character as quoteChar + * @throws IllegalArgumentException thrown if the specified character is a line break + * @deprecated Use {@link Builder#setQuote(Character)} + */ + @Deprecated + public CSVFormat withQuote(final Character quoteChar) { + return builder().setQuote(quoteChar).get(); + } + + /** + * Builds a new {@code CSVFormat} with the output quote policy of the format set to the specified value. + * + * @param quoteMode the quote policy to use for output. + * @return A new CSVFormat that is equal to this but with the specified quote policy + * @deprecated Use {@link Builder#setQuoteMode(QuoteMode)} + */ + @Deprecated + public CSVFormat withQuoteMode(final QuoteMode quoteMode) { + return builder().setQuoteMode(quoteMode).get(); + } + + /** + * Builds a new {@code CSVFormat} with the record separator of the format set to the specified character. + * + *

+ * Note: This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and + * "\r\n" + *

+ * + * @param recordSeparator the record separator to use for output. + * @return A new CSVFormat that is equal to this but with the specified output record separator + * @deprecated Use {@link Builder#setRecordSeparator(char)} + */ + @Deprecated + public CSVFormat withRecordSeparator(final char recordSeparator) { + return builder().setRecordSeparator(recordSeparator).get(); + } + + /** + * Builds a new {@code CSVFormat} with the record separator of the format set to the specified String. + * + *

+ * Note: This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and + * "\r\n" + *

+ * + * @param recordSeparator the record separator to use for output. + * @return A new CSVFormat that is equal to this but with the specified output record separator + * @throws IllegalArgumentException if recordSeparator is none of CR, LF or CRLF + * @deprecated Use {@link Builder#setRecordSeparator(String)} + */ + @Deprecated + public CSVFormat withRecordSeparator(final String recordSeparator) { + return builder().setRecordSeparator(recordSeparator).get(); + } + + /** + * Builds a new {@code CSVFormat} with skipping the header record set to {@code true}. + * + * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. + * @see Builder#setSkipHeaderRecord(boolean) + * @see Builder#setHeader(String...) + * @since 1.1 + * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean) Builder#setSkipHeaderRecord(true)} + */ + @Deprecated + public CSVFormat withSkipHeaderRecord() { + return builder().setSkipHeaderRecord(true).get(); + } + + /** + * Builds a new {@code CSVFormat} with whether to skip the header record. + * + * @param skipHeaderRecord whether to skip the header record. + * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. + * @see Builder#setHeader(String...) + * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean)} + */ + @Deprecated + public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { + return builder().setSkipHeaderRecord(skipHeaderRecord).get(); + } + + /** + * Builds a new {@code CSVFormat} with the record separator of the format set to the operating system's line separator string, typically CR+LF on Windows + * and LF on Linux. + * + *

+ * Note: This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and + * "\r\n" + *

+ * + * @return A new CSVFormat that is equal to this but with the operating system's line separator string. + * @since 1.6 + * @deprecated Use {@link Builder#setRecordSeparator(String) setRecordSeparator(System.lineSeparator())} + */ + @Deprecated + public CSVFormat withSystemRecordSeparator() { + return builder().setRecordSeparator(System.lineSeparator()).get(); + } + + /** + * Builds a new {@code CSVFormat} to add a trailing delimiter. + * + * @return A new CSVFormat that is equal to this but with the trailing delimiter setting. + * @since 1.3 + * @deprecated Use {@link Builder#setTrailingDelimiter(boolean) Builder#setTrailingDelimiter(true)} + */ + @Deprecated + public CSVFormat withTrailingDelimiter() { + return builder().setTrailingDelimiter(true).get(); + } + + /** + * Builds a new {@code CSVFormat} with whether to add a trailing delimiter. + * + * @param trailingDelimiter whether to add a trailing delimiter. + * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting. + * @since 1.3 + * @deprecated Use {@link Builder#setTrailingDelimiter(boolean)} + */ + @Deprecated + public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { + return builder().setTrailingDelimiter(trailingDelimiter).get(); + } + + /** + * Builds a new {@code CSVFormat} to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. + * + * @return A new CSVFormat that is equal to this but with the trim setting on. + * @since 1.3 + * @deprecated Use {@link Builder#setTrim(boolean) Builder#setTrim(true)} + */ + @Deprecated + public CSVFormat withTrim() { + return builder().setTrim(true).get(); + } + + /** + * Builds a new {@code CSVFormat} with whether to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. + * + * @param trim whether to trim leading and trailing blanks. + * @return A new CSVFormat that is equal to this but with the specified trim setting. + * @since 1.3 + * @deprecated Use {@link Builder#setTrim(boolean)} + */ + @Deprecated + public CSVFormat withTrim(final boolean trim) { + return builder().setTrim(trim).get(); + } + +} diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index bff55a00dd..141eba732c 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; @@ -47,13 +49,17 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; +import org.apache.commons.io.Charsets; +import org.apache.commons.io.build.AbstractStreamBuilder; +import org.apache.commons.io.function.Uncheck; + /** * Parses CSV files according to the specified format. * * Because CSV appears in many different dialects, the parser supports many formats by allowing the * specification of a {@link CSVFormat}. * - * The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream. + * The parser works record-wise. It is not possible to go back, once a record has been parsed from the input stream. * *

Creating instances

*

@@ -70,7 +76,7 @@ * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut: *

*
- * for(CSVRecord record : CSVFormat.EXCEL.parse(in)) {
+ * for (CSVRecord record : CSVFormat.EXCEL.parse(in)) {
  *     ...
  * }
  * 
@@ -80,17 +86,17 @@ * To parse a CSV input from a file, you write: *

* - *
- * File csvData = new File("/path/to/csv");
+ * 
{@code
+ * File csvData = new File("/path/to/csv");
  * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
  * for (CSVRecord csvRecord : parser) {
  *     ...
- * }
+ * }}
  * 
* *

* This will read the parse the contents of the file using the - * RFC 4180 format. + * RFC 4180 format. *

* *

@@ -105,20 +111,20 @@ *

* *

- * If the predefined formats don't match the format at hands, custom formats can be defined. More information about - * customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}. + * If the predefined formats don't match the format at hand, custom formats can be defined. More information about + * customizing CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}. *

* *

Parsing into memory

*

- * If parsing record wise is not desired, the contents of the input can be read completely into memory. + * If parsing record-wise is not desired, the contents of the input can be read completely into memory. *

* - *
- * Reader in = new StringReader("a;b\nc;d");
+ * 
{@code
+ * Reader in = new StringReader("a;b\nc;d");
  * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
- * List<CSVRecord> list = parser.getRecords();
- * 
+ * List list = parser.getRecords(); + * }
* *

* There are two constraints that have to be kept in mind: @@ -126,59 +132,156 @@ * *

    *
  1. Parsing into memory starts at the current position of the parser. If you have already parsed records from - * the input, those records will not end up in the in memory representation of your CSV data.
  2. - *
  3. Parsing into memory may consume a lot of system resources depending on the input. For example if you're + * the input, those records will not end up in the in-memory representation of your CSV data.
  4. + *
  5. Parsing into memory may consume a lot of system resources depending on the input. For example, if you're * parsing a 150MB file of CSV data the contents will be read completely into memory.
  6. *
* *

Notes

*

- * Internal parser state is completely covered by the format and the reader-state. + * The internal parser state is completely covered by the format and the reader state. *

* * @see package documentation for more details */ public final class CSVParser implements Iterable, Closeable { - class CSVRecordIterator implements Iterator { + /** + * Builds a new {@link CSVParser}. + * + * @since 1.13.0 + */ + public static class Builder extends AbstractStreamBuilder { + + private CSVFormat format; + private long byteOffset = -1; + private long characterOffset; + private long recordNumber = 1; + private boolean trackBytes; + + /** + * Constructs a new instance. + */ + protected Builder() { + // empty + } + + @Override + public CSVParser get() throws IOException { + return new CSVParser(this); + } + + /** + * Sets the lexer byte offset when the parser does not start parsing at the beginning of the source. + *

+ * By default, the value is {@code -1}, which reuses the character offset for the byte offset. + *

+ * + * @param byteOffset the lexer byte offset. + * @return {@code this} instance. + * @see #setCharacterOffset(long) + * @since 1.15.0 + */ + public Builder setByteOffset(final long byteOffset) { + this.byteOffset = byteOffset; + return asThis(); + } + + /** + * Sets the lexer character offset when the parser does not start parsing at the beginning of the source. + * + * @param characterOffset the lexer character offset. + * @return {@code this} instance. + * @see #setByteOffset(long) + */ + public Builder setCharacterOffset(final long characterOffset) { + this.characterOffset = characterOffset; + return asThis(); + } + + /** + * Sets the CSV format. A copy of the given format is kept. + * + * @param format the CSV format, {@code null} resets to {@link CSVFormat#DEFAULT}. + * @return {@code this} instance. + */ + public Builder setFormat(final CSVFormat format) { + this.format = CSVFormat.copy(format); + return asThis(); + } + + /** + * Sets the next record number to assign, defaults to {@code 1}. + * + * @param recordNumber the next record number to assign. + * @return {@code this} instance. + */ + public Builder setRecordNumber(final long recordNumber) { + this.recordNumber = recordNumber; + return asThis(); + } + + /** + * Sets whether to enable byte tracking for the parser. + * + * @param trackBytes {@code true} to enable byte tracking; {@code false} to disable it. + * @return {@code this} instance. + * @since 1.13.0 + */ + public Builder setTrackBytes(final boolean trackBytes) { + this.trackBytes = trackBytes; + return asThis(); + } + + } + + final class CSVRecordIterator implements Iterator { private CSVRecord current; + private long recordCount; + /** + * Gets the next record or null at the end of stream or max rows read. + * + * @throws IOException on parse error or input read-failure + * @throws CSVException on invalid input. + * @return the next record, or {@code null} if the end of the stream has been reached. + */ private CSVRecord getNextRecord() { - try { - return CSVParser.this.nextRecord(); - } catch (final IOException e) { - throw new UncheckedIOException(e.getClass().getSimpleName() + " reading next record: " + e.toString(), e); + CSVRecord record = null; + if (format.useRow(recordCount + 1)) { + record = Uncheck.get(CSVParser.this::nextRecord); + if (record != null) { + recordCount++; + } } + return record; } @Override public boolean hasNext() { - if (CSVParser.this.isClosed()) { + if (isClosed()) { return false; } - if (this.current == null) { - this.current = this.getNextRecord(); + if (current == null) { + current = getNextRecord(); } - - return this.current != null; + return current != null; } @Override public CSVRecord next() { - if (CSVParser.this.isClosed()) { + if (isClosed()) { throw new NoSuchElementException("CSVParser has been closed"); } - CSVRecord next = this.current; - this.current = null; - + CSVRecord next = current; + current = null; if (next == null) { // hasNext() wasn't called before - next = this.getNextRecord(); + next = getNextRecord(); if (next == null) { throw new NoSuchElementException("No more CSV records available"); } } - return next; } @@ -192,6 +295,7 @@ public void remove() { * Header information based on name and position. */ private static final class Headers { + /** * Header column positions (0-based) */ @@ -208,20 +312,32 @@ private static final class Headers { } } + /** + * Creates a new builder. + * + * @return a new builder. + * @since 1.13.0 + */ + public static Builder builder() { + return new Builder(); + } + /** * Creates a parser for the given {@link File}. * * @param file * a CSV file. Must not be null. * @param charset - * The Charset to decode the given file. + * The Charset to decode the given file, {@code null} maps to the {@link Charset#defaultCharset() default Charset}. * @param format - * the CSVFormat used for CSV parsing. Must not be null. + * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}. * @return a new parser * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either file or format are null. + * If the parameters of the format are inconsistent. * @throws IOException * If an I/O error occurs + * @throws CSVException Thrown on invalid CSV input data. + * @throws NullPointerException if {@code file} is {@code null}. */ public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException { Objects.requireNonNull(file, "file"); @@ -237,24 +353,22 @@ public static CSVParser parse(final File file, final Charset charset, final CSVF *

* * @param inputStream - * an InputStream containing CSV-formatted input. Must not be null. + * an InputStream containing CSV-formatted input, {@code null} maps to {@link CSVFormat#DEFAULT}. * @param charset - * The Charset to decode the given file. + * The Charset to decode the given file, {@code null} maps to the {@link Charset#defaultCharset() default Charset}. * @param format - * the CSVFormat used for CSV parsing. Must not be null. + * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}. * @return a new CSVParser configured with the given reader and format. * @throws IllegalArgumentException * If the parameters of the format are inconsistent or if either reader or format are null. * @throws IOException * If there is a problem reading the header or skipping the first record + * @throws CSVException Thrown on invalid CSV input data. * @since 1.5 */ - @SuppressWarnings("resource") public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format) throws IOException { - Objects.requireNonNull(inputStream, "inputStream"); - Objects.requireNonNull(format, "format"); - return parse(new InputStreamReader(inputStream, charset), format); + return parse(new InputStreamReader(inputStream, Charsets.toCharset(charset)), format); } /** @@ -263,20 +377,21 @@ public static CSVParser parse(final InputStream inputStream, final Charset chars * @param path * a CSV file. Must not be null. * @param charset - * The Charset to decode the given file. + * The Charset to decode the given file, {@code null} maps to the {@link Charset#defaultCharset() default Charset}. * @param format - * the CSVFormat used for CSV parsing. Must not be null. + * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}. * @return a new parser * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either file or format are null. + * If the parameters of the format are inconsistent. * @throws IOException * If an I/O error occurs + * @throws CSVException Thrown on invalid CSV input data. + * @throws NullPointerException if {@code path} is {@code null}. * @since 1.5 */ @SuppressWarnings("resource") public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException { Objects.requireNonNull(path, "path"); - Objects.requireNonNull(format, "format"); return parse(Files.newInputStream(path), charset, format); } @@ -291,38 +406,37 @@ public static CSVParser parse(final Path path, final Charset charset, final CSVF * @param reader * a Reader containing CSV-formatted input. Must not be null. * @param format - * the CSVFormat used for CSV parsing. Must not be null. + * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}. * @return a new CSVParser configured with the given reader and format. * @throws IllegalArgumentException * If the parameters of the format are inconsistent or if either reader or format are null. * @throws IOException * If there is a problem reading the header or skipping the first record + * @throws CSVException Thrown on invalid CSV input data. * @since 1.5 */ public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException { - return new CSVParser(reader, format); + return builder().setReader(reader).setFormat(format).get(); } - // the following objects are shared to reduce garbage - /** * Creates a parser for the given {@link String}. * * @param string * a CSV string. Must not be null. * @param format - * the CSVFormat used for CSV parsing. Must not be null. + * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}. * @return a new parser * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either string or format are null. + * If the parameters of the format are inconsistent. * @throws IOException * If an I/O error occurs + * @throws CSVException Thrown on invalid CSV input data. + * @throws NullPointerException if {@code string} is {@code null}. */ public static CSVParser parse(final String string, final CSVFormat format) throws IOException { Objects.requireNonNull(string, "string"); - Objects.requireNonNull(format, "format"); - - return new CSVParser(new StringReader(string), format); + return parse(new StringReader(string), format); } /** @@ -336,22 +450,21 @@ public static CSVParser parse(final String string, final CSVFormat format) throw * @param url * a URL. Must not be null. * @param charset - * the charset for the resource. Must not be null. + * the charset for the resource, {@code null} maps to the {@link Charset#defaultCharset() default Charset}. * @param format - * the CSVFormat used for CSV parsing. Must not be null. + * the CSVFormat used for CSV parsing, {@code null} maps to {@link CSVFormat#DEFAULT}. * @return a new parser * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either url, charset or format are null. + * If the parameters of the format are inconsistent. * @throws IOException * If an I/O error occurs + * @throws CSVException Thrown on invalid CSV input data. + * @throws NullPointerException if {@code url} is {@code null}. */ @SuppressWarnings("resource") public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException { Objects.requireNonNull(url, "url"); - Objects.requireNonNull(charset, "charset"); - Objects.requireNonNull(format, "format"); - - return new CSVParser(new InputStreamReader(url.openStream(), charset), format); + return parse(url.openStream(), charset, format); } private String headerComment; @@ -374,6 +487,12 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor */ private long recordNumber; + /** + * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination + * with {@link #recordNumber}. + */ + private final long byteOffset; + /** * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination * with {@link #recordNumber}. @@ -383,7 +502,24 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor private final Token reusableToken = new Token(); /** - * Customized CSV parser using the given {@link CSVFormat} + * Constructs a new instance from a builder. + * + * @param builder The source builder. + * @throws IOException if an I/O error occurs. + */ + @SuppressWarnings("resource") // Lexer manages ExtendedBufferedReader. + private CSVParser(final Builder builder) throws IOException { + this.format = (builder.format != null ? builder.format : CSVFormat.DEFAULT).copy(); + this.lexer = new Lexer(format, new ExtendedBufferedReader(builder.getReader(), builder.getCharset(), builder.trackBytes)); + this.csvRecordIterator = new CSVRecordIterator(); + this.headers = createHeaders(); + this.byteOffset = builder.byteOffset != -1 ? builder.byteOffset : builder.characterOffset; + this.characterOffset = builder.characterOffset; + this.recordNumber = builder.recordNumber - 1; + } + + /** + * Constructs a new instance using the given {@link CSVFormat}. * *

* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, @@ -398,13 +534,16 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor * If the parameters of the format are inconsistent or if either reader or format are null. * @throws IOException * If there is a problem reading the header or skipping the first record + * @throws CSVException Thrown on invalid CSV input data. + * @deprecated Will be removed in the next major version, use {@link Builder#get()}. */ + @Deprecated public CSVParser(final Reader reader, final CSVFormat format) throws IOException { this(reader, format, 0, 1); } /** - * Customized CSV parser using the given {@link CSVFormat} + * Constructs a new instance using the given {@link CSVFormat}. * *

* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, @@ -418,34 +557,35 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException * @param characterOffset * Lexer offset when the parser does not start parsing at the beginning of the source. * @param recordNumber - * The next record number to assign + * The next record number to assign. * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either reader or format are null. + * If the parameters of the format are inconsistent or if either the reader or format is null. * @throws IOException - * If there is a problem reading the header or skipping the first record + * if there is a problem reading the header or skipping the first record + * @throws CSVException on invalid input. * @since 1.1 + * @deprecated Will be removed in the next major version, use {@link Builder#get()}. */ - @SuppressWarnings("resource") - public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) - throws IOException { - Objects.requireNonNull(reader, "reader"); - Objects.requireNonNull(format, "format"); - - this.format = format.copy(); - this.lexer = new Lexer(format, new ExtendedBufferedReader(reader)); - this.csvRecordIterator = new CSVRecordIterator(); - this.headers = createHeaders(); - this.characterOffset = characterOffset; - this.recordNumber = recordNumber - 1; + @Deprecated + public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) throws IOException { + // @formatter:off + this(builder() + .setReader(reader) + .setFormat(Objects.requireNonNull(format, "format")) // requireNonNull for full compatibility + .setCharacterOffset(characterOffset) + .setRecordNumber(recordNumber) + .setCharset((Charset) null).setTrackBytes(false)); + // @formatter:off } private void addRecordValue(final boolean lastRecord) { - final String input = this.reusableToken.content.toString(); - final String inputClean = this.format.getTrim() ? input.trim() : input; - if (lastRecord && inputClean.isEmpty() && this.format.getTrailingDelimiter()) { + final String input = format.trim(reusableToken.content.toString()); + // Only drop the empty field produced by an actual trailing delimiter. A quoted empty + // field ("") is a real value, not a trailing delimiter, so it must be kept. + if (lastRecord && input.isEmpty() && format.getTrailingDelimiter() && !reusableToken.isQuoted) { return; } - this.recordList.add(handleNull(inputClean)); + recordList.add(handleNull(input)); } /** @@ -456,13 +596,11 @@ private void addRecordValue(final boolean lastRecord) { */ @Override public void close() throws IOException { - if (this.lexer != null) { - this.lexer.close(); - } + lexer.close(); } private Map createEmptyHeaderMap() { - return this.format.getIgnoreHeaderCase() ? + return format.getIgnoreHeaderCase() ? new TreeMap<>(String.CASE_INSENSITIVE_ORDER) : new LinkedHashMap<>(); } @@ -472,54 +610,53 @@ private Map createEmptyHeaderMap() { * * @return null if the format has no header. * @throws IOException if there is a problem reading the header or skipping the first record + * @throws CSVException on invalid input. */ private Headers createHeaders() throws IOException { - Map hdrMap = null; + Map headerMap = null; List headerNames = null; - final String[] formatHeader = this.format.getHeader(); + final String[] formatHeader = format.getHeader(); if (formatHeader != null) { - hdrMap = createEmptyHeaderMap(); + headerMap = createEmptyHeaderMap(); String[] headerRecord = null; if (formatHeader.length == 0) { // read the header from the first line of the file - final CSVRecord nextRecord = this.nextRecord(); + final CSVRecord nextRecord = nextRecord(); if (nextRecord != null) { headerRecord = nextRecord.values(); headerComment = nextRecord.getComment(); } } else { - if (this.format.getSkipHeaderRecord()) { - final CSVRecord nextRecord = this.nextRecord(); + if (format.getSkipHeaderRecord()) { + final CSVRecord nextRecord = nextRecord(); if (nextRecord != null) { headerComment = nextRecord.getComment(); } } headerRecord = formatHeader; } - // build the name to index mappings if (headerRecord != null) { + // Track an occurrence of a null, empty or blank header. + boolean observedMissing = false; for (int i = 0; i < headerRecord.length; i++) { final String header = headerRecord[i]; - final boolean emptyHeader = header == null || header.trim().isEmpty(); - if (emptyHeader && !this.format.getAllowMissingColumnNames()) { - throw new IllegalArgumentException( - "A header name is missing in " + Arrays.toString(headerRecord)); + final boolean blankHeader = CSVFormat.isBlank(header); + if (blankHeader && !format.getAllowMissingColumnNames()) { + throw new IllegalArgumentException("A header name is missing in " + Arrays.toString(headerRecord)); } - - final boolean containsHeader = header != null && hdrMap.containsKey(header); - final DuplicateHeaderMode headerMode = this.format.getDuplicateHeaderMode(); + final boolean containsHeader = blankHeader ? observedMissing : headerMap.containsKey(header); + final DuplicateHeaderMode headerMode = format.getDuplicateHeaderMode(); final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL; final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY; - - if (containsHeader && !duplicatesAllowed && !(emptyHeader && emptyDuplicatesAllowed)) { - throw new IllegalArgumentException( - String.format( + if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) { + throw new IllegalArgumentException(String.format( "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", header, Arrays.toString(headerRecord))); } + observedMissing |= blankHeader; if (header != null) { - hdrMap.put(header, Integer.valueOf(i)); + headerMap.put(header, Integer.valueOf(i)); // Explicit boxing is intentional if (headerNames == null) { headerNames = new ArrayList<>(headerRecord.length); } @@ -528,32 +665,28 @@ private Headers createHeaders() throws IOException { } } } - if (headerNames == null) { - headerNames = Collections.emptyList(); //immutable - } else { - headerNames = Collections.unmodifiableList(headerNames); - } - return new Headers(hdrMap, headerNames); + // Make header names Collection immutable + return new Headers(headerMap, headerNames == null ? Collections.emptyList() : Collections.unmodifiableList(headerNames)); } /** - * Returns the current line number in the input stream. + * Gets the current line number in the input stream. * *

- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to + * Note: If your CSV input has multi-line values, the returned number does not correspond to * the record number. *

* - * @return current line number + * @return current line number. */ public long getCurrentLineNumber() { - return this.lexer.getCurrentLineNumber(); + return lexer.getCurrentLineNumber(); } /** * Gets the first end-of-line string encountered. * - * @return the first end-of-line string + * @return the first end-of-line string. * @since 1.5 */ public String getFirstEndOfLine() { @@ -561,7 +694,7 @@ public String getFirstEndOfLine() { } /** - * Returns the header comment, if any. + * Gets the header comment, if any. * The header comment appears before the header record. * * @return the header comment for this stream, or null if no comment is available. @@ -572,37 +705,37 @@ public String getHeaderComment() { } /** - * Returns a copy of the header map. + * Gets a copy of the header map as defined in the CSVFormat's header. *

* The map keys are column names. The map values are 0-based indices. *

*

- * Note: The map can only provide a one-to-one mapping when the format did not + * Note: The map can only provide a one-to-one mapping when the format did not * contain null or duplicate column names. *

* * @return a copy of the header map. */ public Map getHeaderMap() { - if (this.headers.headerMap == null) { + if (headers.headerMap == null) { return null; } final Map map = createEmptyHeaderMap(); - map.putAll(this.headers.headerMap); + map.putAll(headers.headerMap); return map; } /** - * Returns the header map. + * Gets the underlying header map. * - * @return the header map. + * @return the underlying header map. */ Map getHeaderMapRaw() { - return this.headers.headerMap; + return headers.headerMap; } /** - * Returns a read-only list of header names that iterates in column order. + * Gets a read-only list of header names that iterates in column order as defined in the CSVFormat's header. *

* Note: The list provides strings that can be used as keys in the header map. * The list will not contain null column names if they were present in the input @@ -618,17 +751,17 @@ public List getHeaderNames() { } /** - * Returns the current record number in the input stream. + * Gets the current record number in the input stream. * *

- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to + * Note: If your CSV input has multi-line values, the returned number does not correspond to * the line number. *

* * @return current record number */ public long getRecordNumber() { - return this.recordNumber; + return recordNumber; } /** @@ -638,6 +771,9 @@ public long getRecordNumber() { *

* The returned content starts at the current parse-position in the stream. *

+ *

+ * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows this method produces. + *

* * @return list of {@link CSVRecord CSVRecords}, may be empty * @throws UncheckedIOException @@ -648,7 +784,7 @@ public List getRecords() { } /** - * Returns the trailer comment, if any. + * Gets the trailer comment, if any. * Trailer comments are located between the last record and EOF * * @return the trailer comment for this stream, or null if no comment is available. @@ -659,14 +795,14 @@ public String getTrailerComment() { } /** - * Handle whether input is parsed as null + * Handles whether the input is parsed as null * * @param input * the cell data to further processed - * @return null if input is parsed as null, or input itself if input isn't parsed as null + * @return null if input is parsed as null, or input itself if the input isn't parsed as null */ private String handleNull(final String input) { - final boolean isQuoted = this.reusableToken.isQuoted; + final boolean isQuoted = reusableToken.isQuoted; final String nullString = format.getNullString(); final boolean strictQuoteMode = isStrictQuoteMode(); if (input.equals(nullString)) { @@ -712,7 +848,7 @@ public boolean hasTrailerComment() { * @return whether this parser is closed. */ public boolean isClosed() { - return this.lexer.isClosed(); + return lexer.isClosed(); } /** @@ -722,21 +858,31 @@ public boolean isClosed() { * {@link QuoteMode#NON_NUMERIC}. */ private boolean isStrictQuoteMode() { - return this.format.getQuoteMode() == QuoteMode.ALL_NON_NULL || - this.format.getQuoteMode() == QuoteMode.NON_NUMERIC; + return format.getQuoteMode() == QuoteMode.ALL_NON_NULL || + format.getQuoteMode() == QuoteMode.NON_NUMERIC; } /** * Returns the record iterator. * *

- * An {@link IOException} caught during the iteration are re-thrown as an + * An {@link IOException} caught during the iteration is re-thrown as an * {@link IllegalStateException}. *

*

- * If the parser is closed a call to {@link Iterator#next()} will throw a + * If the parser is closed, the iterator will not yield any more records. + * A call to {@link Iterator#hasNext()} will return {@code false} and + * a call to {@link Iterator#next()} will throw a * {@link NoSuchElementException}. *

+ *

+ * If it is necessary to construct an iterator which is usable after the + * parser is closed, one option is to extract all records as a list with + * {@link #getRecords()}, and return an iterator to that list. + *

+ *

+ * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows an Iterator produces. + *

*/ @Override public Iterator iterator() { @@ -746,59 +892,65 @@ public Iterator iterator() { /** * Parses the next record from the current point in the stream. * - * @return the record as an array of values, or {@code null} if the end of the stream has been reached - * @throws IOException - * on parse error or input read-failure + * @return the record as an array of values, or {@code null} if the end of the stream has been reached. + * @throws IOException on parse error or input read-failure. + * @throws CSVException on invalid CSV input data. */ CSVRecord nextRecord() throws IOException { CSVRecord result = null; - this.recordList.clear(); + recordList.clear(); StringBuilder sb = null; - final long startCharPosition = lexer.getCharacterPosition() + this.characterOffset; + final long startCharPosition = lexer.getCharacterPosition() + characterOffset; + final long startBytePosition = lexer.getBytesRead() + byteOffset; do { - this.reusableToken.reset(); - this.lexer.nextToken(this.reusableToken); - switch (this.reusableToken.type) { + reusableToken.reset(); + lexer.nextToken(reusableToken); + switch (reusableToken.type) { case TOKEN: - this.addRecordValue(false); + addRecordValue(false); break; case EORECORD: - this.addRecordValue(true); + addRecordValue(true); break; case EOF: - if (this.reusableToken.isReady) { - this.addRecordValue(true); + if (reusableToken.isReady) { + addRecordValue(true); } else if (sb != null) { trailerComment = sb.toString(); } break; case INVALID: - throw new IOException("(line " + this.getCurrentLineNumber() + ") invalid parse sequence"); + throw new CSVException("(line %,d) invalid parse sequence", getCurrentLineNumber()); case COMMENT: // Ignored currently if (sb == null) { // first comment for this record sb = new StringBuilder(); } else { sb.append(Constants.LF); } - sb.append(this.reusableToken.content); - this.reusableToken.type = TOKEN; // Read another token + sb.append(reusableToken.content); + reusableToken.type = TOKEN; // Read another token break; default: - throw new IllegalStateException("Unexpected Token type: " + this.reusableToken.type); + throw new CSVException("Unexpected Token type: %s", reusableToken.type); } - } while (this.reusableToken.type == TOKEN); - - if (!this.recordList.isEmpty()) { - this.recordNumber++; - final String comment = sb == null ? null : sb.toString(); - result = new CSVRecord(this, this.recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment, - this.recordNumber, startCharPosition); + } while (reusableToken.type == TOKEN); + if (!recordList.isEmpty()) { + recordNumber++; + result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), Objects.toString(sb, null), recordNumber, startCharPosition, + startBytePosition); } return result; } /** * Returns a sequential {@code Stream} with this collection as its source. + *

+ * If the parser is closed, the stream will not produce any more values. + * See the comments in {@link #iterator()}. + *

+ *

+ * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows a Stream produces. + *

* * @return a sequential {@code Stream} with this collection as its source. * @since 1.9.0 diff --git a/src/main/java/org/apache/commons/csv/CSVPrinter.java b/src/main/java/org/apache/commons/csv/CSVPrinter.java index dba6de9e79..a7048fd625 100644 --- a/src/main/java/org/apache/commons/csv/CSVPrinter.java +++ b/src/main/java/org/apache/commons/csv/CSVPrinter.java @@ -1,496 +1,579 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.SP; - -import java.io.Closeable; -import java.io.Flushable; -import java.io.IOException; -import java.sql.Clob; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.Objects; -import java.util.stream.Stream; - -/** - * Prints values in a {@link CSVFormat CSV format}. - * - *

Values can be appended to the output by calling the {@link #print(Object)} method. - * Values are printed according to {@link String#valueOf(Object)}. - * To complete a record the {@link #println()} method has to be called. - * Comments can be appended by calling {@link #printComment(String)}. - * However a comment will only be written to the output if the {@link CSVFormat} supports comments. - *

- * - *

The printer also supports appending a complete record at once by calling {@link #printRecord(Object...)} - * or {@link #printRecord(Iterable)}. - * Furthermore {@link #printRecords(Object...)}, {@link #printRecords(Iterable)} and {@link #printRecords(ResultSet)} - * methods can be used to print several records at once. - *

- * - *

Example:

- * - *
- * try (CSVPrinter printer = new CSVPrinter(new FileWriter("csv.txt"), CSVFormat.EXCEL)) {
- *     printer.printRecord("id", "userName", "firstName", "lastName", "birthday");
- *     printer.printRecord(1, "john73", "John", "Doe", LocalDate.of(1973, 9, 15));
- *     printer.println();
- *     printer.printRecord(2, "mary", "Mary", "Meyer", LocalDate.of(1985, 3, 29));
- * } catch (IOException ex) {
- *     ex.printStackTrace();
- * }
- * 
- * - *

This code will write the following to csv.txt:

- *
- * id,userName,firstName,lastName,birthday
- * 1,john73,John,Doe,1973-09-15
- *
- * 2,mary,Mary,Meyer,1985-03-29
- * 
- */ -public final class CSVPrinter implements Flushable, Closeable { - - /** The place that the values get written. */ - private final Appendable appendable; - - private final CSVFormat format; - - /** True if we just began a new record. */ - private boolean newRecord = true; - - /** - * Creates a printer that will print values to the given stream following the CSVFormat. - *

- * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation - * and escaping with a different character) are not supported. - *

- * - * @param appendable - * stream to which to print. Must not be null. - * @param format - * the CSV format. Must not be null. - * @throws IOException - * thrown if the optional header cannot be printed. - * @throws IllegalArgumentException - * thrown if the parameters of the format are inconsistent or if either out or format are null. - */ - public CSVPrinter(final Appendable appendable, final CSVFormat format) throws IOException { - Objects.requireNonNull(appendable, "appendable"); - Objects.requireNonNull(format, "format"); - - this.appendable = appendable; - this.format = format.copy(); - // TODO: Is it a good idea to do this here instead of on the first call to a print method? - // It seems a pain to have to track whether the header has already been printed or not. - final String[] headerComments = format.getHeaderComments(); - if (headerComments != null) { - for (final String line : headerComments) { - this.printComment(line); - } - } - if (format.getHeader() != null && !format.getSkipHeaderRecord()) { - this.printRecord((Object[]) format.getHeader()); - } - } - - @Override - public void close() throws IOException { - close(false); - } - - /** - * Closes the underlying stream with an optional flush first. - * @param flush whether to flush before the actual close. - * - * @throws IOException - * If an I/O error occurs - * @since 1.6 - */ - public void close(final boolean flush) throws IOException { - if (flush || format.getAutoFlush()) { - flush(); - } - if (appendable instanceof Closeable) { - ((Closeable) appendable).close(); - } - } - - /** - * Flushes the underlying stream. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void flush() throws IOException { - if (appendable instanceof Flushable) { - ((Flushable) appendable).flush(); - } - } - - /** - * Gets the target Appendable. - * - * @return the target Appendable. - */ - public Appendable getOut() { - return this.appendable; - } - - /** - * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. - * - * @param value - * value to be output. - * @throws IOException - * If an I/O error occurs - */ - public synchronized void print(final Object value) throws IOException { - format.print(value, appendable, newRecord); - newRecord = false; - } - - /** - * Prints a comment on a new line among the delimiter separated values. - * - *

- * Comments will always begin on a new line and occupy at least one full line. The character specified to start - * comments and a space will be inserted at the beginning of each new line in the comment. - *

- * - *

- * If comments are disabled in the current CSV format this method does nothing. - *

- * - *

This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()} - * to start a new line of the comment. Note that this might produce unexpected results for formats that do not use - * line breaks as record separator.

- * - * @param comment - * the comment to output - * @throws IOException - * If an I/O error occurs - */ - public synchronized void printComment(final String comment) throws IOException { - if (comment == null || !format.isCommentMarkerSet()) { - return; - } - if (!newRecord) { - println(); - } - appendable.append(format.getCommentMarker().charValue()); - appendable.append(SP); - for (int i = 0; i < comment.length(); i++) { - final char c = comment.charAt(i); - switch (c) { - case CR: - if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) { - i++; - } - //$FALL-THROUGH$ break intentionally excluded. - case LF: - println(); - appendable.append(format.getCommentMarker().charValue()); - appendable.append(SP); - break; - default: - appendable.append(c); - break; - } - } - println(); - } - - /** - * Prints headers for a result set based on its metadata. - * - * @param resultSet The result set to query for metadata. - * @throws IOException If an I/O error occurs. - * @throws SQLException If a database access error occurs or this method is called on a closed result set. - * @since 1.9.0 - */ - public synchronized void printHeaders(final ResultSet resultSet) throws IOException, SQLException { - printRecord((Object[]) format.builder().setHeader(resultSet).build().getHeader()); - } - - /** - * Outputs the record separator. - * - * @throws IOException - * If an I/O error occurs - */ - public synchronized void println() throws IOException { - format.println(appendable); - newRecord = true; - } - - /** - * Prints the given values as a single record of delimiter separated values followed by the record separator. - * - *

- * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record - * separator to the output after printing the record, so there is no need to call {@link #println()}. - *

- * - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs - */ - public synchronized void printRecord(final Iterable values) throws IOException { - for (final Object value : values) { - print(value); - } - println(); - } - - /** - * Prints the given values as a single record of delimiter separated values followed by the record separator. - * - *

- * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record - * separator to the output after printing the record, so there is no need to call {@link #println()}. - *

- * - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs - */ - public void printRecord(final Object... values) throws IOException { - printRecord(Arrays.asList(values)); - } - - /** - * Prints the given values as a single record of delimiter separated values followed by the record separator. - * - *

- * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record - * separator to the output after printing the record, so there is no need to call {@link #println()}. - *

- * - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs - * @since 1.10.0 - */ - public synchronized void printRecord(final Stream values) throws IOException { - values.forEachOrdered(t -> { - try { - print(t); - } catch (final IOException e) { - throw IOUtils.rethrow(e); - } - }); - println(); - } - - private void printRecordObject(final Object value) throws IOException { - if (value instanceof Object[]) { - this.printRecord((Object[]) value); - } else if (value instanceof Iterable) { - this.printRecord((Iterable) value); - } else { - this.printRecord(value); - } - } - - /** - * Prints all the objects in the given {@link Iterable} handling nested collections/arrays as records. - * - *

- * If the given Iterable only contains simple objects, this method will print a single record like - * {@link #printRecord(Iterable)}. If the given Iterable contains nested collections/arrays those nested elements - * will each be printed as records using {@link #printRecord(Object...)}. - *

- * - *

- * Given the following data structure: - *

- * - *
-     * 
-     * List<String[]> data = new ArrayList<>();
-     * data.add(new String[]{ "A", "B", "C" });
-     * data.add(new String[]{ "1", "2", "3" });
-     * data.add(new String[]{ "A1", "B2", "C3" });
-     * 
-     * 
- * - *

- * Calling this method will print: - *

- * - *
-     * 
-     * A, B, C
-     * 1, 2, 3
-     * A1, B2, C3
-     * 
-     * 
- * - * @param values - * the values to print. - * @throws IOException - * If an I/O error occurs - */ - public void printRecords(final Iterable values) throws IOException { - for (final Object value : values) { - printRecordObject(value); - } - } - - /** - * Prints all the objects in the given array handling nested collections/arrays as records. - * - *

- * If the given array only contains simple objects, this method will print a single record like - * {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested - * elements will each be printed as records using {@link #printRecord(Object...)}. - *

- * - *

- * Given the following data structure: - *

- * - *
-     * 
-     * String[][] data = new String[3][]
-     * data[0] = String[]{ "A", "B", "C" };
-     * data[1] = new String[]{ "1", "2", "3" };
-     * data[2] = new String[]{ "A1", "B2", "C3" };
-     * 
-     * 
- * - *

- * Calling this method will print: - *

- * - *
-     * 
-     * A, B, C
-     * 1, 2, 3
-     * A1, B2, C3
-     * 
-     * 
- * - * @param values - * the values to print. - * @throws IOException - * If an I/O error occurs - */ - public void printRecords(final Object... values) throws IOException { - printRecords(Arrays.asList(values)); - } - - /** - * Prints all the objects in the given JDBC result set. - * - * @param resultSet - * result set the values to print. - * @throws IOException - * If an I/O error occurs - * @throws SQLException - * if a database access error occurs - */ - public void printRecords(final ResultSet resultSet) throws SQLException, IOException { - final int columnCount = resultSet.getMetaData().getColumnCount(); - while (resultSet.next()) { - for (int i = 1; i <= columnCount; i++) { - final Object object = resultSet.getObject(i); - // TODO Who manages the Clob? The JDBC driver or must we close it? Is it driver-dependent? - print(object instanceof Clob ? ((Clob) object).getCharacterStream() : object); - } - println(); - } - } - - /** - * Prints all the objects with metadata in the given JDBC result set based on the header boolean. - * - * @param resultSet source of row data. - * @param printHeader whether to print headers. - * @throws IOException If an I/O error occurs - * @throws SQLException if a database access error occurs - * @since 1.9.0 - */ - public void printRecords(final ResultSet resultSet, final boolean printHeader) throws SQLException, IOException { - if (printHeader) { - printHeaders(resultSet); - } - printRecords(resultSet); - } - - /** - * Prints all the objects in the given {@link Stream} handling nested collections/arrays as records. - * - *

- * If the given Stream only contains simple objects, this method will print a single record like - * {@link #printRecord(Iterable)}. If the given Stream contains nested collections/arrays those nested elements - * will each be printed as records using {@link #printRecord(Object...)}. - *

- * - *

- * Given the following data structure: - *

- * - *
-     * 
-     * List<String[]> data = new ArrayList<>();
-     * data.add(new String[]{ "A", "B", "C" });
-     * data.add(new String[]{ "1", "2", "3" });
-     * data.add(new String[]{ "A1", "B2", "C3" });
-     * Stream<String[]> stream = data.stream();
-     * 
-     * 
- * - *

- * Calling this method will print: - *

- * - *
-     * 
-     * A, B, C
-     * 1, 2, 3
-     * A1, B2, C3
-     * 
-     * 
- * - * @param values - * the values to print. - * @throws IOException - * If an I/O error occurs - * @since 1.10.0 - */ - @SuppressWarnings("unused") // rethrow() throws IOException - public void printRecords(final Stream values) throws IOException { - values.forEachOrdered(t -> { - try { - printRecordObject(t); - } catch (final IOException e) { - throw IOUtils.rethrow(e); - } - }); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.apache.commons.csv.Constants.CR; +import static org.apache.commons.csv.Constants.LF; +import static org.apache.commons.csv.Constants.SP; + +import java.io.Closeable; +import java.io.Flushable; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.sql.Blob; +import java.sql.Clob; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Arrays; +import java.util.Objects; +import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Stream; + +import org.apache.commons.io.function.IOStream; + +/** + * Prints values in a {@link CSVFormat CSV format}. + * + *

Values can be appended to the output by calling the {@link #print(Object)} method. + * Values are printed according to {@link String#valueOf(Object)}. + * To complete a record the {@link #println()} method has to be called. + * Comments can be appended by calling {@link #printComment(String)}. + * However a comment will only be written to the output if the {@link CSVFormat} supports comments. + *

+ * + *

The printer also supports appending a complete record at once by calling {@link #printRecord(Object...)} + * or {@link #printRecord(Iterable)}. + * Furthermore {@link #printRecords(Object...)}, {@link #printRecords(Iterable)} and {@link #printRecords(ResultSet)} + * methods can be used to print several records at once. + *

+ * + *

Example:

+ * + *
+ * try (CSVPrinter printer = new CSVPrinter(new FileWriter("csv.txt"), CSVFormat.EXCEL)) {
+ *     printer.printRecord("id", "userName", "firstName", "lastName", "birthday");
+ *     printer.printRecord(1, "john73", "John", "Doe", LocalDate.of(1973, 9, 15));
+ *     printer.println();
+ *     printer.printRecord(2, "mary", "Mary", "Meyer", LocalDate.of(1985, 3, 29));
+ * } catch (IOException ex) {
+ *     ex.printStackTrace();
+ * }
+ * 
+ * + *

This code will write the following to csv.txt:

+ *
+ * id,userName,firstName,lastName,birthday
+ * 1,john73,John,Doe,1973-09-15
+ *
+ * 2,mary,Mary,Meyer,1985-03-29
+ * 
+ */ +public final class CSVPrinter implements Flushable, Closeable { + + /** The place that the values get written. */ + private final Appendable appendable; + + private final CSVFormat format; + + /** True if we just began a new record. */ + private boolean newRecord = true; + + private long recordCount; + + private final ReentrantLock lock = new ReentrantLock(); + + /** + * Creates a printer that will print values to the given stream following the CSVFormat. + *

+ * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation and escaping with a different + * character) are not supported. + *

+ * + * @param appendable stream to which to print. Must not be null. + * @param format the CSV format. Must not be null. + * @throws IOException thrown if the optional header cannot be printed. + * @throws IllegalArgumentException thrown if the parameters of the format are inconsistent. + * @throws NullPointerException thrown if either parameters are null. + */ + public CSVPrinter(final Appendable appendable, final CSVFormat format) throws IOException { + Objects.requireNonNull(appendable, "appendable"); + Objects.requireNonNull(format, "format"); + this.appendable = appendable; + this.format = format.copy(); + // TODO: Is it a good idea to do this here instead of on the first call to a print method? + // It seems a pain to have to track whether the header has already been printed or not. + final String[] headerComments = format.getHeaderComments(); + if (headerComments != null) { + for (final String line : headerComments) { + printComment(line); + } + } + if (format.getHeader() != null && !format.getSkipHeaderRecord()) { + this.printRecord((Object[]) format.getHeader()); + } + } + + @Override + public void close() throws IOException { + close(false); + } + + /** + * Closes the underlying stream with an optional flush first. + * + * @param flush whether to flush before the actual close. + * @throws IOException + * If an I/O error occurs + * @since 1.6 + * @see CSVFormat#getAutoFlush() + */ + public void close(final boolean flush) throws IOException { + if (flush || format.getAutoFlush()) { + flush(); + } + if (appendable instanceof Closeable) { + ((Closeable) appendable).close(); + } + } + + /** + * Prints the record separator and increments the record count. + * + * @throws IOException + * If an I/O error occurs + */ + private void endOfRecord() throws IOException { + println(); + recordCount++; + } + + /** + * Flushes the underlying stream. + * + * @throws IOException + * If an I/O error occurs + */ + @Override + public void flush() throws IOException { + if (appendable instanceof Flushable) { + ((Flushable) appendable).flush(); + } + } + + /** + * Gets the target Appendable. + * + * @return the target Appendable. + */ + public Appendable getOut() { + return appendable; + } + + /** + * Gets the record count printed, this does not include comments or headers. + * + * @return the record count, this does not include comments or headers. + * @since 1.13.0 + */ + public long getRecordCount() { + return recordCount; + } + + /** + * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. + * + * @param value + * value to be output. + * @throws IOException + * If an I/O error occurs + */ + public void print(final Object value) throws IOException { + lock.lock(); + try { + printRaw(value); + } finally { + lock.unlock(); + } + } + + /** + * Prints a comment on a new line among the delimiter-separated values. + * + *

+ * Comments will always begin on a new line and occupy at least one full line. The character specified to start + * comments and a space will be inserted at the beginning of each new line in the comment. + *

+ * + *

+ * If comments are disabled in the current CSV format this method does nothing. + *

+ * + *

This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()} + * to start a new line of the comment. Note that this might produce unexpected results for formats that do not use + * line breaks as record separators.

+ * + * @param comment + * the comment to output + * @throws IOException + * If an I/O error occurs + */ + public void printComment(final String comment) throws IOException { + lock.lock(); + try { + if (comment == null || !format.isCommentMarkerSet()) { + return; + } + if (!newRecord) { + println(); + } + appendable.append(format.getCommentMarker().charValue()); // Explicit unboxing is intentional + appendable.append(SP); + for (int i = 0; i < comment.length(); i++) { + final char c = comment.charAt(i); + switch (c) { + case CR: + if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) { + i++; + } + // falls-through: break intentionally excluded. + case LF: + println(); + appendable.append(format.getCommentMarker().charValue()); // Explicit unboxing is intentional + appendable.append(SP); + break; + default: + appendable.append(c); + break; + } + } + println(); + } finally { + lock.unlock(); + } + } + + /** + * Prints headers for a result set based on its metadata. + * + * @param resultSet The ResultSet to query for metadata. + * @throws IOException If an I/O error occurs. + * @throws SQLException If a database access error occurs or this method is called on a closed result set. + * @since 1.9.0 + */ + public void printHeaders(final ResultSet resultSet) throws IOException, SQLException { + lock.lock(); + try { + try (IOStream stream = IOStream.of(format.builder().setHeader(resultSet).get().getHeader())) { + stream.forEachOrdered(this::print); + } + println(); + } finally { + lock.unlock(); + } + } + + /** + * Prints the record separator. + * + * @throws IOException + * If an I/O error occurs + */ + public void println() throws IOException { + lock.lock(); + try { + format.println(appendable); + newRecord = true; + } finally { + lock.unlock(); + } + } + + /** + * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. + * + * @param value + * value to be output. + * @throws IOException + * If an I/O error occurs + */ + private void printRaw(final Object value) throws IOException { + format.print(value, appendable, newRecord); + newRecord = false; + } + + /** + * Prints the given values as a single record of delimiter-separated values followed by the record separator. + * + *

+ * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record + * separator to the output after printing the record, so there is no need to call {@link #println()}. + *

+ * + * @param values + * values to output. + * @throws IOException + * If an I/O error occurs + */ + @SuppressWarnings("resource") + public void printRecord(final Iterable values) throws IOException { + lock.lock(); + try { + IOStream.of(values).forEachOrdered(this::print); + endOfRecord(); + } finally { + lock.unlock(); + } + } + + /** + * Prints the given values as a single record of delimiter-separated values followed by the record separator. + * + *

+ * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record + * separator to the output after printing the record, so there is no need to call {@link #println()}. + *

+ * + * @param values + * values to output. + * @throws IOException + * If an I/O error occurs + */ + public void printRecord(final Object... values) throws IOException { + printRecord(Arrays.asList(values)); + } + + /** + * Prints the given values as a single record of delimiter-separated values followed by the record separator. + * + *

+ * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record + * separator to the output after printing the record, so there is no need to call {@link #println()}. + *

+ * + * @param stream + * values to output. + * @throws IOException + * If an I/O error occurs + * @since 1.10.0 + */ + @SuppressWarnings("resource") // caller closes. + public void printRecord(final Stream stream) throws IOException { + lock.lock(); + try { + IOStream.adapt(stream).forEachOrdered(stream.isParallel() ? this::printRaw : this::print); + endOfRecord(); + } finally { + lock.unlock(); + } + } + + private void printRecordObject(final Object value) throws IOException { + if (value instanceof Object[]) { + this.printRecord((Object[]) value); + } else if (value instanceof Iterable) { + this.printRecord((Iterable) value); + } else { + this.printRecord(value); + } + } + + @SuppressWarnings("resource") + private void printRecords(final IOStream stream) throws IOException { + format.limit(stream).forEachOrdered(this::printRecordObject); + } + + /** + * Prints all the objects in the given {@link Iterable} handling nested collections/arrays as records. + * + *

+ * If the given Iterable only contains simple objects, this method will print a single record like + * {@link #printRecord(Iterable)}. If the given Iterable contains nested collections/arrays those nested elements + * will each be printed as records using {@link #printRecord(Object...)}. + *

+ * + *

+ * Given the following data structure: + *

+ * + *
{@code
+     * List data = new ArrayList<>();
+     * data.add(new String[]{ "A", "B", "C" });
+     * data.add(new String[]{ "1", "2", "3" });
+     * data.add(new String[]{ "A1", "B2", "C3" });
+     * }
+     * 
+ * + *

+ * Calling this method will print: + *

+ * + *
+     * {@code
+     * A, B, C
+     * 1, 2, 3
+     * A1, B2, C3
+     * }
+     * 
+ * + * @param values + * the values to print. + * @throws IOException + * If an I/O error occurs + */ + @SuppressWarnings("resource") + public void printRecords(final Iterable values) throws IOException { + printRecords(IOStream.of(values)); + } + + /** + * Prints all the objects in the given array handling nested collections/arrays as records. + * + *

+ * If the given array only contains simple objects, this method will print a single record like + * {@link #printRecord(Object...)}. If the given collections contain nested collections or arrays, those nested + * elements will each be printed as records using {@link #printRecord(Object...)}. + *

+ * + *

+ * Given the following data structure: + *

+ * + *
{@code
+     * String[][] data = new String[3][]
+     * data[0] = String[]{ "A", "B", "C" };
+     * data[1] = new String[]{ "1", "2", "3" };
+     * data[2] = new String[]{ "A1", "B2", "C3" };
+     * }
+     * 
+ * + *

+ * Calling this method will print: + *

+ * + *
{@code
+     * A, B, C
+     * 1, 2, 3
+     * A1, B2, C3
+     * }
+     * 
+ * + * @param values + * the values to print. + * @throws IOException + * If an I/O error occurs + */ + public void printRecords(final Object... values) throws IOException { + printRecords(Arrays.asList(values)); + } + + /** + * Prints all the objects in the given JDBC result set. + *

+ * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows a result set produces. This is most useful when you cannot limit rows + * through {@link Statement#setLargeMaxRows(long)} or {@link Statement#setMaxRows(int)}. + *

+ * + * @param resultSet The values to print. + * @throws IOException If an I/O error occurs. + * @throws SQLException Thrown when a database access error occurs. + */ + public void printRecords(final ResultSet resultSet) throws SQLException, IOException { + final int columnCount = resultSet.getMetaData().getColumnCount(); + while (resultSet.next() && format.useRow(resultSet.getRow())) { + lock.lock(); + try { + for (int i = 1; i <= columnCount; i++) { + final Object object = resultSet.getObject(i); + if (object instanceof Clob) { + try (Reader reader = ((Clob) object).getCharacterStream()) { + print(reader); + } + } else if (object instanceof Blob) { + try (InputStream inputStream = ((Blob) object).getBinaryStream()) { + print(inputStream); + } + } else { + print(object); + } + } + endOfRecord(); + } finally { + lock.unlock(); + } + } + } + + /** + * Prints all the objects with metadata in the given JDBC result set based on the header boolean. + *

+ * You can use {@link CSVFormat.Builder#setMaxRows(long)} to limit how many rows a result set produces. This is most useful when you cannot limit rows + * through {@link Statement#setLargeMaxRows(long)} or {@link Statement#setMaxRows(int)}. + *

+ * + * @param resultSet source of row data. + * @param printHeader whether to print headers. + * @throws IOException If an I/O error occurs + * @throws SQLException if a database access error occurs + * @since 1.9.0 + */ + public void printRecords(final ResultSet resultSet, final boolean printHeader) throws SQLException, IOException { + if (printHeader) { + printHeaders(resultSet); + } + printRecords(resultSet); + } + + /** + * Prints all the objects in the given {@link Stream} handling nested collections/arrays as records. + * + *

+ * If the given Stream only contains simple objects, this method will print a single record like + * {@link #printRecord(Iterable)}. If the given Stream contains nested collections/arrays those nested elements + * will each be printed as records using {@link #printRecord(Object...)}. + *

+ * + *

+ * Given the following data structure: + *

+ * + *
{@code
+     * List data = new ArrayList<>();
+     * data.add(new String[]{ "A", "B", "C" });
+     * data.add(new String[]{ "1", "2", "3" });
+     * data.add(new String[]{ "A1", "B2", "C3" });
+     * Stream stream = data.stream();
+     * }
+     * 
+ * + *

+ * Calling this method will print: + *

+ * + *
+     * {@code
+     * A, B, C
+     * 1, 2, 3
+     * A1, B2, C3
+     * }
+     * 
+ * + * @param values + * the values to print. + * @throws IOException + * If an I/O error occurs + * @since 1.10.0 + */ + @SuppressWarnings({ "resource" }) // Caller closes. + public void printRecords(final Stream values) throws IOException { + printRecords(IOStream.adapt(values)); + } +} diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java index 5524d9e845..8dab14d907 100644 --- a/src/main/java/org/apache/commons/csv/CSVRecord.java +++ b/src/main/java/org/apache/commons/csv/CSVRecord.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; @@ -32,37 +34,47 @@ *

* Note: Support for {@link Serializable} is scheduled to be removed in version 2.0. * In version 1.8 the mapping between the column header and the column index was - * removed from the serialised state. The class maintains serialization compatibility + * removed from the serialized state. The class maintains serialization compatibility * with versions pre-1.8 for the record values; these must be accessed by index - * following deserialization. There will be loss of any functionally linked to the header - * mapping when transferring serialised forms pre-1.8 to 1.8 and vice versa. + * following deserialization. There will be a loss of any functionally linked to the header + * mapping when transferring serialized forms pre-1.8 to 1.8 and vice versa. *

*/ public final class CSVRecord implements Serializable, Iterable { private static final long serialVersionUID = 1L; + /** + * The start position of this record as a character position in the source stream. This may or may not correspond to the byte position depending on the + * character set. + */ private final long characterPosition; - /** The accumulated comments (if any) */ + /** + * The starting position of this record in the source stream, measured in bytes. + */ + private final long bytePosition; + + /** The accumulated comments (if any). */ private final String comment; /** The record number. */ private final long recordNumber; - /** The values of the record */ + /** The values of the record. */ private final String[] values; /** The parser that originates this record. This is not serialized. */ private final transient CSVParser parser; - CSVRecord(final CSVParser parser, final String[] values, final String comment, final long recordNumber, - final long characterPosition) { + CSVRecord(final CSVParser parser, final String[] values, final String comment, final long recordNumber, + final long characterPosition, final long bytePosition) { this.recordNumber = recordNumber; this.values = values != null ? values : Constants.EMPTY_STRING_ARRAY; this.parser = parser; this.comment = comment; this.characterPosition = characterPosition; + this.bytePosition = bytePosition; } /** @@ -88,7 +100,7 @@ public String get(final int i) { } /** - * Returns a value by name. + * Returns a value by name. If multiple instances of the header name exists, only the last occurrence is returned. * *

* Note: This requires a field mapping obtained from the original parser. @@ -102,9 +114,9 @@ public String get(final int i) { * the name of the column to be retrieved. * @return the column value, maybe null depending on {@link CSVFormat#getNullString()}. * @throws IllegalStateException - * if no header mapping was provided + * if no header mapping was provided. * @throws IllegalArgumentException - * if {@code name} is not mapped or if the record is inconsistent + * if {@code name} is not mapped or if the record is inconsistent. * @see #isMapped(String) * @see #isConsistent() * @see #getParser() @@ -113,23 +125,31 @@ public String get(final int i) { public String get(final String name) { final Map headerMap = getHeaderMapRaw(); if (headerMap == null) { - throw new IllegalStateException( - "No header mapping was specified, the record values can't be accessed by name"); + throw new IllegalStateException("No header mapping was specified, the record values can't be accessed by name"); } final Integer index = headerMap.get(name); if (index == null) { - throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, - headerMap.keySet())); + throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, headerMap.keySet())); } try { - return values[index.intValue()]; + return values[index.intValue()]; // Explicit unboxing is intentional } catch (final ArrayIndexOutOfBoundsException e) { - throw new IllegalArgumentException(String.format( - "Index for header '%s' is %d but CSVRecord only has %d values!", name, index, - Integer.valueOf(values.length))); + // Explicit boxing is intentional + throw new IllegalArgumentException( + String.format("Index for header '%s' is %d but CSVRecord only has %d values!", name, index, Integer.valueOf(values.length))); } } + /** + * Returns the starting position of this record in the source stream, measured in bytes. + * + * @return the byte position of this record in the source stream. + * @since 1.13.0 + */ + public long getBytePosition() { + return bytePosition; + } + /** * Returns the start position of this record as a character position in the source stream. This may or may not * correspond to the byte position depending on the character set. @@ -143,8 +163,8 @@ public long getCharacterPosition() { /** * Returns the comment for this record, if any. * Note that comments are attached to the following record. - * If there is no following record (i.e. the comment is at EOF) - * the comment will be ignored. + * If there is no following record (that is, the comment is at EOF), + * then the comment will be ignored. * * @return the comment for this record, or null if no comment for this record is available. */ @@ -175,7 +195,7 @@ public CSVParser getParser() { * Returns the number of this record in the parsed CSV file. * *

- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to + * NOTE:If your CSV input has multi-line values, the returned number does not correspond to * the current line number of the parser that created this record. *

* @@ -189,10 +209,10 @@ public long getRecordNumber() { /** * Checks whether this record has a comment, false otherwise. * Note that comments are attached to the following record. - * If there is no following record (i.e. the comment is at EOF) - * the comment will be ignored. + * If there is no following record (that is, the comment is at EOF), + * then the comment will be ignored. * - * @return true if this record has a comment, false otherwise + * @return true if this record has a comment, false otherwise. * @since 1.3 */ public boolean hasComment() { @@ -207,7 +227,7 @@ public boolean hasComment() { * test but still produce parsable files. *

* - * @return true of this record is valid, false if not + * @return true of this record is valid, false if not. */ public boolean isConsistent() { final Map headerMap = getHeaderMapRaw(); @@ -215,7 +235,7 @@ public boolean isConsistent() { } /** - * Checks whether a given column is mapped, i.e. its name has been defined to the parser. + * Checks whether a given column is mapped, that is, its name has been defined to the parser. * * @param name * the name of the column to be retrieved. @@ -227,25 +247,25 @@ public boolean isMapped(final String name) { } /** - * Checks whether a column with given index has a value. + * Checks whether a column with a given index has a value. * * @param index - * a column index (0-based) - * @return whether a column with given index has a value + * a column index (0-based). + * @return whether a column with a given index has a value. */ public boolean isSet(final int index) { return 0 <= index && index < values.length; } /** - * Checks whether a given columns is mapped and has a value. + * Checks whether a given column is mapped and has a value. * * @param name * the name of the column to be retrieved. - * @return whether a given columns is mapped and has a value + * @return whether a given column is mapped and has a value. */ public boolean isSet(final String name) { - return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; + return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; // Explicit unboxing is intentional } /** @@ -261,7 +281,7 @@ public Iterator iterator() { /** * Puts all values of this record into the given Map. * - * @param the map type + * @param The map type. * @param map The Map to populate. * @return the given map. * @since 1.9.0 @@ -311,7 +331,9 @@ public List toList() { } /** - * Copies this record into a new Map of header name to record value. + * Copies this record into a new Map of header name to record value. If multiple instances of a header name exist, + * then only the last occurrence is mapped. + * *

* Editing the map does not update this instance. *

@@ -330,14 +352,13 @@ public Map toMap() { */ @Override public String toString() { - return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" + - Arrays.toString(values) + "]"; + return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" + Arrays.toString(values) + "]"; } /** - * Gets the values for this record. This is not a copy. + * Gets the values for this record. This is not a copy. * - * @return the values for this record. + * @return the values for this record, never null. * @since 1.10.0 */ public String[] values() { diff --git a/src/main/java/org/apache/commons/csv/Constants.java b/src/main/java/org/apache/commons/csv/Constants.java index 86f15ab83f..9dd276eccc 100644 --- a/src/main/java/org/apache/commons/csv/Constants.java +++ b/src/main/java/org/apache/commons/csv/Constants.java @@ -1,89 +1,90 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -/** - * Constants for this package. - */ -final class Constants { - - static final char BACKSLASH = '\\'; - - static final char BACKSPACE = '\b'; - - static final String COMMA = ","; - - /** - * Starts a comment, the remainder of the line is the comment. - */ - static final char COMMENT = '#'; - - static final char CR = '\r'; - - /** RFC 4180 defines line breaks as CRLF */ - static final String CRLF = "\r\n"; - - static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"'); - - static final String EMPTY = ""; - - /** The end of stream symbol */ - static final int END_OF_STREAM = -1; - - static final char FF = '\f'; - - static final char LF = '\n'; - - /** - * Unicode line separator. - */ - static final String LINE_SEPARATOR = "\u2028"; - - /** - * Unicode next line. - */ - static final String NEXT_LINE = "\u0085"; - - /** - * Unicode paragraph separator. - */ - static final String PARAGRAPH_SEPARATOR = "\u2029"; - - static final char PIPE = '|'; - - /** ASCII record separator */ - static final char RS = 30; - - static final char SP = ' '; - - static final char TAB = '\t'; - - /** Undefined state for the lookahead char */ - static final int UNDEFINED = -2; - - /** ASCII unit separator */ - static final char US = 31; - - static final String[] EMPTY_STRING_ARRAY = {}; - - /** No instances. */ - private Constants() { - // noop - } - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +/** + * Private constants for this package. + */ +final class Constants { + + static final char BACKSLASH = '\\'; + + static final char BACKSPACE = '\b'; + + static final String COMMA = ","; + + /** + * Starts a comment, the remainder of the line is the comment. + */ + static final char COMMENT = '#'; + + static final char CR = '\r'; + + /** RFC 4180 defines line breaks as CRLF. */ + static final String CRLF = "\r\n"; + + static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"'); // Explicit boxing is intentional. + + static final String EMPTY = ""; + + static final String[] EMPTY_STRING_ARRAY = {}; + + static final char FF = '\f'; + + static final char LF = '\n'; + + /** + * Unicode line separator. + */ + static final String LINE_SEPARATOR = "\u2028"; + + /** + * Unicode next line. + */ + static final String NEXT_LINE = "\u0085"; + + /** + * Unicode paragraph separator. + */ + static final String PARAGRAPH_SEPARATOR = "\u2029"; + + static final char PIPE = '|'; + + /** ASCII record separator. */ + static final char RS = 30; + + static final char SP = ' '; + + static final String SQL_NULL_STRING = "\\N"; + + static final char TAB = '\t'; + + /** Undefined state for the lookahead char. */ + static final int UNDEFINED = -2; + + /** ASCII unit separator. */ + static final char US = 31; + + /** No instances. */ + private Constants() { + // noop + } + +} diff --git a/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java index 28ce071987..8087f16eeb 100644 --- a/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java +++ b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java @@ -1,27 +1,29 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; /** - * Determines how duplicate header fields should be handled + * Enumerates how duplicate header fields should be handled * if {@link CSVFormat.Builder#setHeader(Class)} is not null. * - * @since 1.9.0 + * @since 1.10.0 */ public enum DuplicateHeaderMode { @@ -31,7 +33,7 @@ public enum DuplicateHeaderMode { ALLOW_ALL, /** - * Allows duplicate headers only if they're empty strings or null. + * Allows duplicate headers only if they're empty, blank, or null strings. */ ALLOW_EMPTY, diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java index 89e63e3a8b..20c1ef5444 100644 --- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java +++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java @@ -1,165 +1,217 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.END_OF_STREAM; import static org.apache.commons.csv.Constants.LF; import static org.apache.commons.csv.Constants.UNDEFINED; +import static org.apache.commons.io.IOUtils.EOF; -import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.input.UnsynchronizedBufferedReader; /** * A special buffered reader which supports sophisticated read access. *

- * In particular the reader supports a look-ahead option, which allows you to see the next char returned by - * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}. + * In particular the reader supports a look-ahead option, which allows you to see the next char returned by {@link #read()}. This reader also tracks how many + * characters have been read with {@link #getPosition()}. *

*/ -final class ExtendedBufferedReader extends BufferedReader { +final class ExtendedBufferedReader extends UnsynchronizedBufferedReader { /** The last char returned */ private int lastChar = UNDEFINED; + private int lastCharMark = UNDEFINED; + /** The count of EOLs (CR/LF/CRLF) seen so far */ - private long eolCounter; + private long lineNumber; + + private long lineNumberMark; - /** The position, which is number of characters read so far */ + /** The position, which is the number of characters read so far */ private long position; - private boolean closed; + private long positionMark; + + /** The number of bytes read so far. */ + private long bytesRead; + + private long bytesReadMark; + + /** Encoder for calculating the number of bytes for each character read. */ + private final CharsetEncoder encoder; /** - * Created extended buffered reader using default buffer-size + * Constructs a new instance using the default buffer size. */ ExtendedBufferedReader(final Reader reader) { + this(reader, null, false); + } + + /** + * Constructs a new instance with the specified reader, character set, and byte tracking option. Initializes an encoder if byte tracking is enabled and a + * character set is provided. + * + * @param reader the reader supports a look-ahead option. + * @param charset the character set for encoding, or {@code null} if not applicable. + * @param trackBytes {@code true} to enable byte tracking; {@code false} to disable it. + */ + ExtendedBufferedReader(final Reader reader, final Charset charset, final boolean trackBytes) { super(reader); + encoder = charset != null && trackBytes ? charset.newEncoder() : null; } /** * Closes the stream. * - * @throws IOException - * If an I/O error occurs + * @throws IOException If an I/O error occurs */ @Override public void close() throws IOException { // Set ivars before calling super close() in case close() throws an IOException. - closed = true; - lastChar = END_OF_STREAM; + lastChar = EOF; super.close(); } /** - * Returns the current line number + * Gets the number of bytes read by the reader. * - * @return the current line number + * @return the number of bytes read by the read */ - long getCurrentLineNumber() { - // Check if we are at EOL or EOF or just starting - if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) { - return eolCounter; // counter is accurate - } - return eolCounter + 1; // Allow for counter being incremented only at EOL + long getBytesRead() { + return this.bytesRead; } - /** - * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by - * any of the read methods. This will not include a character read using the {@link #lookAhead()} method. If no - * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached - * on the last read then this will return {@link Constants#END_OF_STREAM}. - * - * @return the last character that was read - */ - int getLastChar() { - return lastChar; + private long getEncodedCharLength(final char[] buf, final int offset, final int length) throws CharacterCodingException { + long len = 0; + int previous = lastChar; + for (int i = offset; i < offset + length; i++) { + len += getEncodedCharLength(previous, buf[i]); + previous = buf[i]; + } + return len; } /** - * Gets the character position in the reader. + * Gets the byte length of the given character based on the original Unicode specification, which defined characters as fixed-width 16-bit entities. + *

+ * The Unicode characters are divided into two main ranges: + *

    + *
  • U+0000 to U+FFFF (Basic Multilingual Plane, BMP): + *
      + *
    • Represented using a single 16-bit {@code char}.
    • + *
    • Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte characters.
    • + *
    + *
  • + *
  • U+10000 to U+10FFFF (Supplementary Characters): + *
      + *
    • Represented as a pair of {@code char}s:
    • + *
    • The first {@code char} is from the high-surrogates range (\uD800-\uDBFF).
    • + *
    • The second {@code char} is from the low-surrogates range (\uDC00-\uDFFF).
    • + *
    • Includes UTF-8 encodings of some 3-byte characters and all 4-byte characters.
    • + *
    + *
  • + *
* - * @return the current position in the reader (counting characters, not bytes since this is a Reader) + * @param current the current character to process. + * @return the byte length of the character. + * @throws CharacterCodingException if the character cannot be encoded. */ - long getPosition() { - return this.position; + private int getEncodedCharLength(final int current) throws CharacterCodingException { + return getEncodedCharLength(lastChar, current); } - public boolean isClosed() { - return closed; + private int getEncodedCharLength(final int previous, final int current) throws CharacterCodingException { + final char cChar = (char) current; + final char lChar = (char) previous; + if (!Character.isSurrogate(cChar)) { + return encoder.encode(CharBuffer.wrap(new char[] { cChar })).limit(); + } + if (Character.isHighSurrogate(cChar)) { + // Move on to the next char (low surrogate) + return 0; + } + if (Character.isSurrogatePair(lChar, cChar)) { + return encoder.encode(CharBuffer.wrap(new char[] { lChar, cChar })).limit(); + } + throw new CharacterCodingException(); } /** - * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will - * still return this value. Does not affect line number or last character. + * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by any of the read methods. This will not + * include a character read using the {@link #peek()} method. If no character has been read then this will return {@link Constants#UNDEFINED}. If the end of + * the stream was reached on the last read then this will return {@link IOUtils#EOF}. * - * @return the next character - * - * @throws IOException - * If an I/O error occurs + * @return the last character that was read */ - int lookAhead() throws IOException { - super.mark(1); - final int c = super.read(); - super.reset(); - - return c; + int getLastChar() { + return lastChar; } /** - * Populates the buffer with the next {@code buf.length} characters in the - * current reader without consuming them. The next call to {@link #read()} will - * still return the next value. This doesn't affect line number or last - * character. + * Returns the current line number * - * @param buf the buffer to fill for the look ahead. - * @return the buffer itself - * @throws IOException If an I/O error occurs + * @return the current line number */ - char[] lookAhead(final char[] buf) throws IOException { - final int n = buf.length; - super.mark(n); - super.read(buf, 0, n); - super.reset(); - - return buf; + long getLineNumber() { + // Check if we are at EOL or EOF or just starting + if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == EOF) { + return lineNumber; // counter is accurate + } + return lineNumber + 1; // Allow for counter being incremented only at EOL } /** - * Returns the next n characters in the current reader without consuming them. The next call to {@link #read()} will still return the next value. This - * doesn't affect line number or last character. + * Gets the character position in the reader. * - * @param n the number characters look ahead. - * @return the next n characters. - * @throws IOException If an I/O error occurs + * @return the current position in the reader (counting characters, not bytes since this is a Reader) */ - char[] lookAhead(final int n) throws IOException { - final char[] buf = new char[n]; - return lookAhead(buf); + long getPosition() { + return this.position; + } + + @Override + public void mark(final int readAheadLimit) throws IOException { + lineNumberMark = lineNumber; + lastCharMark = lastChar; + positionMark = position; + bytesReadMark = bytesRead; + super.mark(readAheadLimit); } @Override public int read() throws IOException { final int current = super.read(); - if (current == CR || current == LF && lastChar != CR || - current == END_OF_STREAM && lastChar != CR && lastChar != LF && lastChar != END_OF_STREAM) { - eolCounter++; + if (current == CR || current == LF && lastChar != CR || current == EOF && lastChar != CR && lastChar != LF && lastChar != EOF) { + lineNumber++; + } + if (encoder != null && current != EOF) { + this.bytesRead += getEncodedCharLength(current); } lastChar = current; position++; @@ -171,59 +223,55 @@ public int read(final char[] buf, final int offset, final int length) throws IOE if (length == 0) { return 0; } - final int len = super.read(buf, offset, length); - + if (encoder != null && len > 0) { + this.bytesRead += getEncodedCharLength(buf, offset, len); + } if (len > 0) { - for (int i = offset; i < offset + len; i++) { final char ch = buf[i]; if (ch == LF) { if (CR != (i > offset ? buf[i - 1] : lastChar)) { - eolCounter++; + lineNumber++; } } else if (ch == CR) { - eolCounter++; + lineNumber++; } } - lastChar = buf[offset + len - 1]; - - } else if (len == -1) { - lastChar = END_OF_STREAM; + } else if (len == EOF) { + lastChar = EOF; } - position += len; return len; } /** - * Gets the next line, dropping the line terminator(s). This method should only be called when processing a - * comment, otherwise information can be lost. + * Gets the next line, dropping the line terminator(s). This method should only be called when processing a comment, otherwise, information can be lost. *

- * Increments {@link #eolCounter} and updates {@link #position}. + * Increments {@link #lineNumber} and updates {@link #position}. *

*

- * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise the last EOL character. + * Sets {@link #lastChar} to {@code Constants.EOF} at EOF, otherwise the last EOL character. *

* * @return the line that was read, or null if reached EOF. */ @Override public String readLine() throws IOException { - if (lookAhead() == END_OF_STREAM) { + if (peek() == EOF) { return null; } final StringBuilder buffer = new StringBuilder(); while (true) { final int current = read(); if (current == CR) { - final int next = lookAhead(); + final int next = peek(); if (next == LF) { read(); } } - if (current == END_OF_STREAM || current == LF || current == CR) { + if (current == EOF || current == LF || current == CR) { break; } buffer.append((char) current); @@ -231,4 +279,12 @@ public String readLine() throws IOException { return buffer.toString(); } + @Override + public void reset() throws IOException { + lineNumber = lineNumberMark; + lastChar = lastCharMark; + position = positionMark; + bytesRead = bytesReadMark; + super.reset(); + } } diff --git a/src/main/java/org/apache/commons/csv/IOUtils.java b/src/main/java/org/apache/commons/csv/IOUtils.java deleted file mode 100644 index c4baeb4703..0000000000 --- a/src/main/java/org/apache/commons/csv/IOUtils.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.csv; - -import java.io.IOException; -import java.io.Reader; -import java.io.Writer; -import java.nio.Buffer; -import java.nio.CharBuffer; - -/** Copied from Apache Commons IO. */ -final class IOUtils { - - /** - * The default buffer size ({@value}). - */ - static final int DEFAULT_BUFFER_SIZE = 1024 * 4; - - /** - * Represents the end-of-file (or stream). - */ - private static final int EOF = -1; - - /** - * Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}. - *

- * This method buffers the input internally, so there is no need to use a - * {@code BufferedReader}. - *

- * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}. - * - * @param input the {@code Reader} to read from - * @param output the {@code Appendable} to append to - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 2.7 - */ - static long copy(final Reader input, final Appendable output) throws IOException { - return copy(input, output, CharBuffer.allocate(DEFAULT_BUFFER_SIZE)); - } - - /** - * Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}. - *

- * This method uses the provided buffer, so there is no need to use a - * {@code BufferedReader}. - *

- * - * @param input the {@code Reader} to read from - * @param output the {@code Appendable} to write to - * @param buffer the buffer to be used for the copy - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 2.7 - */ - static long copy(final Reader input, final Appendable output, final CharBuffer buffer) throws IOException { - long count = 0; - int n; - while (EOF != (n = input.read(buffer))) { - ((Buffer) buffer).flip(); - output.append(buffer, 0, n); - count += n; - } - return count; - } - - /** - * Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}. - *

- * This method buffers the input internally, so there is no need to use a - * {@code BufferedReader}. - *

- *

- * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}. - *

- * - * @param input the {@code Reader} to read from - * @param output the {@code Writer} to write to - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 1.3 - */ - static long copyLarge(final Reader input, final Writer output) throws IOException { - return copyLarge(input, output, new char[DEFAULT_BUFFER_SIZE]); - } - - /** - * Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}. - *

- * This method uses the provided buffer, so there is no need to use a - * {@code BufferedReader}. - *

- * - * @param input the {@code Reader} to read from - * @param output the {@code Writer} to write to - * @param buffer the buffer to be used for the copy - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 2.2 - */ - static long copyLarge(final Reader input, final Writer output, final char[] buffer) throws IOException { - long count = 0; - int n; - while (EOF != (n = input.read(buffer))) { - output.write(buffer, 0, n); - count += n; - } - return count; - } - - /** No instances. */ - private IOUtils() { - // Noop - } - - /** - * Throws the given throwable. - * - * @param The throwable cast type. - * @param throwable The throwable to rethrow. - * @return nothing because we throw. - * @throws T Always thrown. - */ - @SuppressWarnings("unchecked") - static RuntimeException rethrow(final Throwable throwable) throws T { - throw (T) throwable; - } - -} diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java index 06b2c9c222..fe964480a4 100644 --- a/src/main/java/org/apache/commons/csv/Lexer.java +++ b/src/main/java/org/apache/commons/csv/Lexer.java @@ -1,64 +1,52 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; -import static org.apache.commons.csv.Constants.BACKSPACE; -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.END_OF_STREAM; -import static org.apache.commons.csv.Constants.FF; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.TAB; -import static org.apache.commons.csv.Constants.UNDEFINED; -import static org.apache.commons.csv.Token.Type.COMMENT; -import static org.apache.commons.csv.Token.Type.EOF; -import static org.apache.commons.csv.Token.Type.EORECORD; -import static org.apache.commons.csv.Token.Type.INVALID; -import static org.apache.commons.csv.Token.Type.TOKEN; +import static org.apache.commons.io.IOUtils.EOF; import java.io.Closeable; import java.io.IOException; +import java.util.Arrays; + +import org.apache.commons.io.IOUtils; /** * Lexical analyzer. */ final class Lexer implements Closeable { - private static final String CR_STRING = Character.toString(CR); - private static final String LF_STRING = Character.toString(LF); - - /** - * Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it - * won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two - * chars (using surrogates) and thus there should never be a collision with a real text char. - */ - private static final char DISABLED = '\ufffe'; + private static final String CR_STRING = Character.toString(Constants.CR); + private static final String LF_STRING = Character.toString(Constants.LF); private final char[] delimiter; private final char[] delimiterBuf; private final char[] escapeDelimiterBuf; - private final char escape; - private final char quoteChar; - private final char commentStart; - + private final int escape; + private final int quoteChar; + private final int commentStart; private final boolean ignoreSurroundingSpaces; private final boolean ignoreEmptyLines; + private final boolean lenientEof; + private final boolean trailingData; - /** The input stream */ + /** The buffered reader. */ private final ExtendedBufferedReader reader; private String firstEol; @@ -66,21 +54,43 @@ final class Lexer implements Closeable { Lexer(final CSVFormat format, final ExtendedBufferedReader reader) { this.reader = reader; - this.delimiter = format.getDelimiterString().toCharArray(); - this.escape = mapNullToDisabled(format.getEscapeCharacter()); - this.quoteChar = mapNullToDisabled(format.getQuoteCharacter()); - this.commentStart = mapNullToDisabled(format.getCommentMarker()); + this.delimiter = format.getDelimiterCharArray(); + this.escape = nullToDisabled(format.getEscapeCharacter()); + this.quoteChar = nullToDisabled(format.getQuoteCharacter()); + this.commentStart = nullToDisabled(format.getCommentMarker()); this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces(); this.ignoreEmptyLines = format.getIgnoreEmptyLines(); + this.lenientEof = format.getLenientEof(); + this.trailingData = format.getTrailingData(); this.delimiterBuf = new char[delimiter.length - 1]; this.escapeDelimiterBuf = new char[2 * delimiter.length - 1]; } + /** + * Appends the next escaped character to the token's content. + * + * @param token the current token. + * @throws IOException on stream access error. + * @throws CSVException Thrown on invalid input. + */ + private void appendNextEscapedCharacterToToken(final Token token) throws IOException { + if (isEscapeDelimiter()) { + token.content.append(delimiter); + } else { + final int unescaped = readEscape(); + if (unescaped == EOF) { // unexpected char after escape + token.content.append((char) escape).append((char) reader.getLastChar()); + } else { + token.content.append((char) unescaped); + } + } + } + /** * Closes resources. * * @throws IOException - * If an I/O error occurs + * If an I/O error occurs. */ @Override public void close() throws IOException { @@ -88,24 +98,33 @@ public void close() throws IOException { } /** - * Returns the current character position + * Gets the number of bytes read. + * + * @return the number of bytes read. + */ + long getBytesRead() { + return reader.getBytesRead(); + } + + /** + * Gets the current character position. * - * @return the current character position + * @return the current character position. */ long getCharacterPosition() { return reader.getPosition(); } /** - * Returns the current line number + * Gets the current line number. * - * @return the current line number + * @return the current line number. */ long getCurrentLineNumber() { - return reader.getCurrentLineNumber(); + return reader.getLineNumber(); } - String getFirstEol(){ + String getFirstEol() { return firstEol; } @@ -118,7 +137,7 @@ boolean isCommentStart(final int ch) { } /** - * Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}. + * Tests whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#peek(char[])}. * * @param ch * the current character. @@ -134,24 +153,25 @@ boolean isDelimiter(final int ch) throws IOException { isLastTokenDelimiter = true; return true; } - reader.lookAhead(delimiterBuf); + Arrays.fill(delimiterBuf, '\0'); + reader.peek(delimiterBuf); for (int i = 0; i < delimiterBuf.length; i++) { - if (delimiterBuf[i] != delimiter[i+1]) { + if (delimiterBuf[i] != delimiter[i + 1]) { return false; } } final int count = reader.read(delimiterBuf, 0, delimiterBuf.length); - isLastTokenDelimiter = count != END_OF_STREAM; + isLastTokenDelimiter = count != EOF; return isLastTokenDelimiter; } /** - * Tests if the given character indicates end of file. + * Tests if the given character indicates the end of the file. * - * @return true if the given character indicates end of file. + * @return true if the given character indicates the end of the file. */ boolean isEndOfFile(final int ch) { - return ch == END_OF_STREAM; + return ch == EOF; } /** @@ -164,15 +184,16 @@ boolean isEscape(final int ch) { } /** - * Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}. + * Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#peek(char[])}. * * For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]". * - * @return true if the next characters constitute a escape delimiter. + * @return true if the next characters constitute an escape delimiter. * @throws IOException If an I/O error occurs. */ boolean isEscapeDelimiter() throws IOException { - reader.lookAhead(escapeDelimiterBuf); + Arrays.fill(escapeDelimiterBuf, '\0'); + reader.peek(escapeDelimiterBuf); if (escapeDelimiterBuf[0] != delimiter[0]) { return false; } @@ -182,7 +203,7 @@ boolean isEscapeDelimiter() throws IOException { } } final int count = reader.read(escapeDelimiterBuf, 0, escapeDelimiterBuf.length); - return count != END_OF_STREAM; + return count != EOF; } private boolean isMetaChar(final int ch) { @@ -194,17 +215,13 @@ boolean isQuoteChar(final int ch) { } /** - * Tests if the current character represents the start of a line: a CR, LF or is at the start of the file. + * Tests if the current character represents the start of a line: a CR, LF, or is at the start of the file. * - * @param ch the character to check + * @param ch the character to check. * @return true if the character is at the start of a line. */ boolean isStartOfLine(final int ch) { - return ch == LF || ch == CR || ch == UNDEFINED; - } - - private char mapNullToDisabled(final Character c) { - return c == null ? DISABLED : c.charValue(); + return ch == Constants.LF || ch == Constants.CR || ch == Constants.UNDEFINED; } /** @@ -213,85 +230,82 @@ private char mapNullToDisabled(final Character c) { * A token corresponds to a term, a record change or an end-of-file indicator. *

* - * @param token - * an existing Token object to reuse. The caller is responsible to initialize the Token. + * @param token an existing Token object to reuse. The caller is responsible for initializing the Token. * @return the next token found. - * @throws IOException on stream access error. + * @throws IOException on stream access error. + * @throws CSVException Thrown on invalid input. */ Token nextToken(final Token token) throws IOException { - - // get the last read char (required for empty line detection) + // Get the last read char (required for empty line detection) int lastChar = reader.getLastChar(); - // read the next char and set eol int c = reader.read(); - /* - * Note: The following call will swallow LF if c == CR. But we don't need to know if the last char was CR or LF - * - they are equivalent here. - */ + // Note: The following call will swallow LF if c == CR. But we don't need to know if the last char was CR or LF - they are equivalent here. boolean eol = readEndOfLine(c); - // empty line detection: eol AND (last char was EOL or beginning) if (ignoreEmptyLines) { while (eol && isStartOfLine(lastChar)) { - // go on char ahead ... + // Go on char ahead ... lastChar = c; c = reader.read(); eol = readEndOfLine(c); - // reached end of file without any content (empty line at the end) + // reached the end of the file without any content (empty line at the end) if (isEndOfFile(c)) { - token.type = EOF; + token.type = Token.Type.EOF; // don't set token.isReady here because no content return token; } } } - - // did we reach eof during the last iteration already ? EOF + // Did we reach EOF during the last iteration already? EOF if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) { - token.type = EOF; + token.type = Token.Type.EOF; // don't set token.isReady here because no content return token; } - if (isStartOfLine(lastChar) && isCommentStart(c)) { final String line = reader.readLine(); if (line == null) { - token.type = EOF; + token.type = Token.Type.EOF; // don't set token.isReady here because no content return token; } final String comment = line.trim(); token.content.append(comment); - token.type = COMMENT; + token.type = Token.Type.COMMENT; return token; } - - // important: make sure a new char gets consumed in each iteration - while (token.type == INVALID) { + // Important: make sure a new char gets consumed in each iteration + while (token.type == Token.Type.INVALID) { + // isDelimiter consumes the trailing characters of a multi-character delimiter as a side effect, so it must + // only be evaluated once per character. Remember a match found while skipping whitespace below. + boolean delimiter = false; // ignore whitespaces at beginning of a token if (ignoreSurroundingSpaces) { - while (Character.isWhitespace((char)c) && !isDelimiter(c) && !eol) { + while (Character.isWhitespace((char) c) && !eol) { + if (isDelimiter(c)) { + delimiter = true; + break; + } c = reader.read(); eol = readEndOfLine(c); } } - // ok, start of token reached: encapsulated, or token - if (isDelimiter(c)) { + if (delimiter || isDelimiter(c)) { // empty token return TOKEN("") - token.type = TOKEN; + token.type = Token.Type.TOKEN; } else if (eol) { // empty token return EORECORD("") // noop: token.content.append(""); - token.type = EORECORD; + token.type = Token.Type.EORECORD; } else if (isQuoteChar(c)) { // consume encapsulated token parseEncapsulatedToken(token); } else if (isEndOfFile(c)) { // end of file return EOF() // noop: token.content.append(""); - token.type = EOF; + token.type = Token.Type.EOF; token.isReady = true; // there is data at EOF } else { // next token must be a simple token @@ -302,15 +316,19 @@ Token nextToken(final Token token) throws IOException { return token; } + private int nullToDisabled(final Character c) { + return c == null ? Constants.UNDEFINED : c.charValue(); // Explicit unboxing + } + /** * Parses an encapsulated token. *

- * Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included + * Encapsulated tokens are surrounded by the given encapsulating string. The encapsulator itself might be included * in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after - * an encapsulated token are ignored. The token is finished when one of the following conditions become true: + * an encapsulated token is ignored. The token is finished when one of the following conditions becomes true: *

*
    - *
  • an unescaped encapsulator has been reached, and is followed by optional whitespace then:
  • + *
  • An unescaped encapsulator has been reached and is followed by optional whitespace then:
  • *
      *
    • delimiter (TOKEN)
    • *
    • end of line (EORECORD)
    • @@ -321,29 +339,19 @@ Token nextToken(final Token token) throws IOException { * the current token * @return a valid token object * @throws IOException - * on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL + * Thrown when in an invalid state: EOF before closing encapsulator or invalid character before + * delimiter or EOL. + * @throws CSVException Thrown on invalid input. */ private Token parseEncapsulatedToken(final Token token) throws IOException { token.isQuoted = true; - // save current line number in case needed for IOE + // Save current line number in case needed for IOE final long startLineNumber = getCurrentLineNumber(); int c; while (true) { c = reader.read(); - - if (isEscape(c)) { - if (isEscapeDelimiter()) { - token.content.append(delimiter); - } else { - final int unescaped = readEscape(); - if (unescaped == END_OF_STREAM) { // unexpected char after escape - token.content.append((char) c).append((char) reader.getLastChar()); - } else { - token.content.append((char) unescaped); - } - } - } else if (isQuoteChar(c)) { - if (isQuoteChar(reader.lookAhead())) { + if (isQuoteChar(c)) { + if (isQuoteChar(reader.peek())) { // double or escaped encapsulator -> add single encapsulator to token c = reader.read(); token.content.append((char) c); @@ -352,29 +360,37 @@ private Token parseEncapsulatedToken(final Token token) throws IOException { while (true) { c = reader.read(); if (isDelimiter(c)) { - token.type = TOKEN; + token.type = Token.Type.TOKEN; return token; } if (isEndOfFile(c)) { - token.type = EOF; + token.type = Token.Type.EOF; token.isReady = true; // There is data at EOF return token; } if (readEndOfLine(c)) { - token.type = EORECORD; + token.type = Token.Type.EORECORD; return token; } - if (!Character.isWhitespace((char)c)) { + if (trailingData) { + token.content.append((char) c); + } else if (!Character.isWhitespace((char) c)) { // error invalid char between token and next delimiter - throw new IOException("(line " + getCurrentLineNumber() + - ") invalid char between encapsulated token and delimiter"); + throw new CSVException("Invalid character between encapsulated token and delimiter at line: %,d, position: %,d", + getCurrentLineNumber(), getCharacterPosition()); } } } + } else if (isEscape(c)) { + appendNextEscapedCharacterToToken(token); } else if (isEndOfFile(c)) { + if (lenientEof) { + token.type = Token.Type.EOF; + token.isReady = true; // There is data at EOF + return token; + } // error condition (end of file before end of token) - throw new IOException("(startline " + startLineNumber + - ") EOF reached before encapsulated token finished"); + throw new CSVException("(startline %,d) EOF reached before encapsulated token finished", startLineNumber); } else { // consume character token.content.append((char) c); @@ -385,55 +401,45 @@ private Token parseEncapsulatedToken(final Token token) throws IOException { /** * Parses a simple token. *

      - * Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped - * delimiters (as \, or \;). The token is finished when one of the following conditions become true: + * Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped delimiters (as \, or \;). The token is finished + * when one of the following conditions becomes true: *

      *
        - *
      • end of line has been reached (EORECORD)
      • - *
      • end of stream has been reached (EOF)
      • - *
      • an unescaped delimiter has been reached (TOKEN)
      • + *
      • The end of line has been reached (EORECORD)
      • + *
      • The end of stream has been reached (EOF)
      • + *
      • An unescaped delimiter has been reached (TOKEN)
      • *
      * - * @param token - * the current token - * @param ch - * the current character - * @return the filled token - * @throws IOException - * on stream access error + * @param token the current token. + * @param ch the current character. + * @return the filled token. + * @throws IOException on stream access error. + * @throws CSVException Thrown on invalid input. */ - private Token parseSimpleToken(final Token token, int ch) throws IOException { + private Token parseSimpleToken(final Token token, final int ch) throws IOException { // Faster to use while(true)+break than while(token.type == INVALID) + int cur = ch; while (true) { - if (readEndOfLine(ch)) { - token.type = EORECORD; + if (readEndOfLine(cur)) { + token.type = Token.Type.EORECORD; break; } - if (isEndOfFile(ch)) { - token.type = EOF; + if (isEndOfFile(cur)) { + token.type = Token.Type.EOF; token.isReady = true; // There is data at EOF break; } - if (isDelimiter(ch)) { - token.type = TOKEN; + if (isDelimiter(cur)) { + token.type = Token.Type.TOKEN; break; } // continue - if (isEscape(ch)) { - if (isEscapeDelimiter()) { - token.content.append(delimiter); - } else { - final int unescaped = readEscape(); - if (unescaped == END_OF_STREAM) { // unexpected char after escape - token.content.append((char) ch).append((char) reader.getLastChar()); - } else { - token.content.append((char) unescaped); - } - } + if (isEscape(cur)) { + appendNextEscapedCharacterToToken(token); } else { - token.content.append((char) ch); + token.content.append((char) cur); } - ch = reader.read(); // continue + cur = reader.read(); // continue } if (ignoreSurroundingSpaces) { @@ -446,13 +452,14 @@ private Token parseSimpleToken(final Token token, int ch) throws IOException { /** * Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character... * - * @return true if the given or next character is a line-terminator + * @return true if the given or next character is a line-terminator. */ - boolean readEndOfLine(int ch) throws IOException { + boolean readEndOfLine(final int ch) throws IOException { // check if we have \r\n... - if (ch == CR && reader.lookAhead() == LF) { + int cur = ch; + if (cur == Constants.CR && reader.peek() == Constants.LF) { // note: does not change ch outside of this method! - ch = reader.read(); + cur = reader.read(); // Save the EOL state if (firstEol == null) { this.firstEol = Constants.CRLF; @@ -460,64 +467,61 @@ boolean readEndOfLine(int ch) throws IOException { } // save EOL state here. if (firstEol == null) { - if (ch == LF) { + if (cur == Constants.LF) { this.firstEol = LF_STRING; - } else if (ch == CR) { + } else if (cur == Constants.CR) { this.firstEol = CR_STRING; } } - return ch == LF || ch == CR; + return cur == Constants.LF || cur == Constants.CR; } // TODO escape handling needs more work /** - * Handle an escape sequence. - * The current character must be the escape character. - * On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()} - * on the input stream. + * Handle an escape sequence. The current character must be the escape character. On return, the next character is available by calling + * {@link ExtendedBufferedReader#getLastChar()} on the input stream. * - * @return the unescaped character (as an int) or {@link Constants#END_OF_STREAM} if char following the escape is - * invalid. - * @throws IOException if there is a problem reading the stream or the end of stream is detected: - * the escape character is not allowed at end of stream + * @return the unescaped character (as an int) or {@link IOUtils#EOF} if char following the escape is invalid. + * @throws IOException if there is a problem reading the stream or the end of stream is detected: the escape character is not allowed at end of stream + * @throws CSVException Thrown on invalid input. */ int readEscape() throws IOException { // the escape char has just been read (normally a backslash) final int ch = reader.read(); switch (ch) { case 'r': - return CR; + return Constants.CR; case 'n': - return LF; + return Constants.LF; case 't': - return TAB; + return Constants.TAB; case 'b': - return BACKSPACE; + return Constants.BACKSPACE; case 'f': - return FF; - case CR: - case LF: - case FF: // TODO is this correct? - case TAB: // TODO is this correct? Do tabs need to be escaped? - case BACKSPACE: // TODO is this correct? + return Constants.FF; + case Constants.CR: + case Constants.LF: + case Constants.FF: // TODO is this correct? + case Constants.TAB: // TODO is this correct? Do tabs need to be escaped? + case Constants.BACKSPACE: // TODO is this correct? return ch; - case END_OF_STREAM: - throw new IOException("EOF whilst processing escape sequence"); + case EOF: + throw new CSVException("EOF while processing escape sequence"); default: // Now check for meta-characters if (isMetaChar(ch)) { return ch; } // indicate unexpected char - available from in.getLastChar() - return END_OF_STREAM; + return EOF; } } void trimTrailingSpaces(final StringBuilder buffer) { int length = buffer.length(); while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) { - length = length - 1; + length--; } if (length != buffer.length()) { buffer.setLength(length); diff --git a/src/main/java/org/apache/commons/csv/QuoteMode.java b/src/main/java/org/apache/commons/csv/QuoteMode.java index a9b33a10fa..ae64ab4863 100644 --- a/src/main/java/org/apache/commons/csv/QuoteMode.java +++ b/src/main/java/org/apache/commons/csv/QuoteMode.java @@ -1,23 +1,27 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; /** - * Defines quoting behavior. + * Enumerates quoting behavior. + * + * @see CSVFormat.Builder#setQuoteMode(QuoteMode) */ public enum QuoteMode { @@ -32,7 +36,7 @@ public enum QuoteMode { ALL_NON_NULL, /** - * Quotes fields which contain special characters such as a the field delimiter, quote character or any of the + * Quotes fields that contain special characters such as a field delimiter, quote character, or any of the * characters in the line separator string. */ MINIMAL, diff --git a/src/main/java/org/apache/commons/csv/Token.java b/src/main/java/org/apache/commons/csv/Token.java index 2dedc58a5e..87af335678 100644 --- a/src/main/java/org/apache/commons/csv/Token.java +++ b/src/main/java/org/apache/commons/csv/Token.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; @@ -21,16 +23,18 @@ /** * Internal token representation. - *

      - * It is used as contract between the lexer and the parser. + *

      + * This is used as a contract between the lexer and the parser. + *

      */ final class Token { enum Type { - /** Token has no valid content, i.e. is in its initialized state. */ + + /** Token has no valid content, that is, is in its initialized state. */ INVALID, - /** Token with content, at beginning or in the middle of a line. */ + /** Token with content, at the beginning or in the middle of a line. */ TOKEN, /** Token (which can have content) when the end of file is reached. */ @@ -43,14 +47,14 @@ enum Type { COMMENT } - /** length of the initial token (content-)buffer */ - private static final int INITIAL_TOKEN_LENGTH = 50; + /** Length of the initial token (content-)buffer */ + private static final int DEFAULT_CAPACITY = 50; /** Token type */ Token.Type type = INVALID; - /** The content buffer. */ - final StringBuilder content = new StringBuilder(INITIAL_TOKEN_LENGTH); + /** The content buffer, never null. */ + final StringBuilder content = new StringBuilder(DEFAULT_CAPACITY); /** Token ready flag: indicates a valid token with content (ready for the parser). */ boolean isReady; @@ -65,12 +69,12 @@ void reset() { } /** - * Eases IDE debugging. + * Converts the token state to a string to ease debugging. * * @return a string helpful for debugging. */ @Override public String toString() { - return type.name() + " [" + content.toString() + "]"; + return type + " [" + content.toString() + "]"; } } diff --git a/src/main/java/org/apache/commons/csv/package-info.java b/src/main/java/org/apache/commons/csv/package-info.java index 29e7fef612..d3340fcae8 100644 --- a/src/main/java/org/apache/commons/csv/package-info.java +++ b/src/main/java/org/apache/commons/csv/package-info.java @@ -1,31 +1,33 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ /** - * Apache Commons CSV Format Support. + *

      Apache Commons CSV

      * - *

      CSV are widely used as interfaces to legacy systems or manual data-imports. + *

      CSV are widely used as interfaces to legacy systems or manual data imports. * CSV stands for "Comma Separated Values" (or sometimes "Character Separated * Values"). The CSV data format is defined in - * RFC 4180 + * RFC 4180 * but many dialects exist.

      * *

      Common to all file dialects is its basic structure: The CSV data-format - * is record oriented, whereas each record starts on a new textual line. A + * is record-oriented, whereas each record starts on a new textual line. A * record is build of a list of values. Keep in mind that not all records * must have an equal number of values:

      *
      @@ -36,28 +38,28 @@
        * 

      The following list contains the CSV aspects the Commons CSV parser supports:

      *
      *
      Separators (for lines)
      - *
      The record separators are hardcoded and cannot be changed. The must be '\r', '\n' or '\r\n'.
      + *
      The record separators are hardcoded and cannot be changed. The must be '\r', '\n', or '\r\n'.
      * *
      Delimiter (for values)
      *
      The delimiter for values is freely configurable (default ',').
      * *
      Comments
      - *
      Some CSV-dialects support a simple comment syntax. A comment is a record + *
      Some CSV dialects support a simple comment syntax. A comment is a record * which must start with a designated character (the commentStarter). A record - * of this kind is treated as comment and gets removed from the input (default none)
      + * of this kind is treated as a comment and gets removed from the input (default none) * *
      Encapsulator
      *
      Two encapsulator characters (default '"') are used to enclose -> complex values.
      * *
      Simple values
      - *
      A simple value consist of all characters (except the delimiter) until - * (but not including) the next delimiter or a record-terminator. Optionally + *
      A simple value consists of all characters (except the delimiter) until + * (but not including) the next delimiter or a record terminator. Optionally * all surrounding whitespaces of a simple value can be ignored (default: true).
      * *
      Complex values
      *
      Complex values are encapsulated within a pair of the defined encapsulator characters. * The encapsulator itself must be escaped or doubled when used inside complex values. - * Complex values preserve all kind of formatting (including newlines -> multiline-values)
      + * Complex values preserve all kinds of formatting (including newlines -> multiline-values) * *
      Empty line skipping
      *
      Optionally empty lines in CSV files can be skipped. diff --git a/src/main/javadoc/overview.html b/src/main/javadoc/overview.html new file mode 100644 index 0000000000..0598cf19d0 --- /dev/null +++ b/src/main/javadoc/overview.html @@ -0,0 +1,375 @@ + + + +Apache Commons CSV Overview + + + Apache Commons CSV +

      + You can find the Javadoc package list at the bottom of this page. +

      +
      +

      Introducing Commons CSV

      +

      Apache Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.

      +

      + Common CSV formats are predefined in the CSVFormat class: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      CSV Formats
      CSVFormatDescriptionSince Version
      DEFAULTIO for the Standard Comma Separated Value format, like RFC 4180 but allowing + empty lines. + 1.0
      EXCELIO for the Microsoft + Excel CSV. format. + 1.0
      INFORMIX_UNLOADIO for the Informix UNLOAD TO file_name + command. + 1.3
      INFORMIX_UNLOAD_CSVIO for the Informix UNLOAD CSV TO + file_name command with escaping disabled. + 1.3
      MONGODB_CSVIO for the MongoDB CSV mongoexport command. + 1.7
      MONGODB_TSVIO for the MongoDB Tab Separated Values (TSV)mongoexport + command. + 1.7
      MYSQLIO for the MySQL CSV format. + 1.0
      ORACLEIO for the Oracle CSV format + of the SQL*Loader utility. + 1.6
      POSTGRESQL_CSVIO for the PostgreSQL CSV format used by the COPY + operation. + 1.5
      POSTGRESQL_TEXTIO for the PostgreSQL Text format used by the COPY + operation. + 1.5
      RFC4180IO for the RFC-4180 format defined byRFC 4180. + 1.0
      TDFIO for the Tab Delimited Format (also known as Tab Separated Values). + 1.0
      +

      Custom formats can be created using a fluent style API.

      +
      +
      +

      Parsing Standard CSV Files

      +

      + Parsing files with Apache Commons CSV is relatively straight forward. Pick a + CSVFormat + and go from there. +

      +
      +

      Parsing an Excel CSV File

      +

      To parse an Excel CSV file, write:

      +
      +        
      +Reader in = new FileReader("path/to/file.csv");
      +Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
      +for (CSVRecord record : records) {
      +    String lastName = record.get("Last Name");
      +    String firstName = record.get("First Name");
      +}
      +        
      +      
      +
      +
      +
      +

      Parsing Custom CSV Files

      +

      + You can define your own using IO rules by building your own CSVFormat instance. Starting with + CSVFormat.builder() + lets you start from a predefined format and customize. For example: +

      +
      +      
      +CSVFormat myFormat = CSVFormat.DEFAULT.builder()
      +    .setCommentMarker('#')
      +    .setEscape('+')
      +    .setIgnoreSurroundingSpaces(true)
      +    .setQuote('"')
      +    .setQuoteMode(QuoteMode.ALL)
      +    .get()
      +      
      +    
      +
      +
      +

      Handling Byte Order Marks

      +

      + To handle files that start with a Byte Order Mark (BOM), like some Excel CSV files, you need an extra step to deal with the optional BOM bytes. Using the + BOMInputStream class from Apache Commons IO simplifies this task; for example: +

      +
      +        
      +try (Reader reader = new InputStreamReader(BOMInputStream.builder()
      +        .setPath(path)
      +        .get(), "UTF-8");
      +        CSVParser parser = CSVFormat.EXCEL.builder()
      +                .setHeader()
      +                .get()
      +                .parse(reader)) {
      +    for (CSVRecord record : parser) {
      +        String string = record.get("ColumnA");
      +        // ...
      +    }
      +}
      +        
      +      
      +

      You might find it handy to create something like this:

      +
      +        
      +/**
      + * Creates a reader capable of handling BOMs.
      + *
      + * @param path The path to read.
      + * @return a new InputStreamReader for UTF-8 bytes.
      + * @throws IOException if an I/O error occurs.
      + */
      +public InputStreamReader newReader(final Path path) throws IOException {
      +    return new InputStreamReader(BOMInputStream.builder()
      +            .setPath(path)
      +            .get(), StandardCharsets.UTF_8);
      +}
      +        
      +      
      +
      +
      +

      Using Headers

      +

      + Apache Commons CSV provides several ways to access record values. The simplest way is to access values by their index in the record. However, columns in + CSV files often have a name, for example: ID, CustomerNo, Birthday, etc. The CSVFormat class provides an API for specifying these header names and + CSVRecord on the other hand has methods to access values by their corresponding header name. +

      +
      +

      Accessing column values by index

      +

      To access a record value by index, no special configuration of the CSVFormat is necessary:

      +
      +        
      +Reader in = new FileReader("path/to/file.csv");
      +Iterable<CSVRecord> records = CSVFormat.RFC4180.parse(in);
      +for (CSVRecord record : records) {
      +    String columnOne = record.get(0);
      +    String columnTwo = record.get(1);
      +}
      +        
      +      
      +
      +
      +

      Defining a header manually

      +

      Indices may not be the most intuitive way to access record values. For this reason it is possible to assign names to each column in the file:

      +
      +        
      +Reader in = new FileReader("path/to/file.csv");
      +Iterable<CSVRecord> records = CSVFormat.RFC4180.builder()
      +  .setHeader("ID", "CustomerNo", "Name")
      +  .build()
      +  .parse(in);
      +for (CSVRecord record : records) {
      +    String id = record.get("ID");
      +    String customerNo = record.get("CustomerNo");
      +    String name = record.get("Name");
      +}
      +        
      +      
      + Note that column values can still be accessed using their index. +
      +
      +

      Using an enum to define a header

      +

      Using String values all over the code to reference columns can be error prone. For this reason, it is possible to define an enum to specify header + names. Note that the enum constant names are used to access column values. This may lead to enums constant names which do not follow the Java coding + standard of defining constants in upper case with underscores:

      +
      +        
      +public enum Headers {
      +    ID, CustomerNo, Name
      +}
      +Reader in = new FileReader("path/to/file.csv");
      +Iterable<CSVRecord> records = CSVFormat.RFC4180.builder()
      +  .setHeader(Headers.class)
      +  .build()
      +  .parse(in);
      +for (CSVRecord record : records) {
      +    String id = record.get(Headers.ID);
      +    String customerNo = record.get(Headers.CustomerNo);
      +    String name = record.get(Headers.Name);
      +}
      +        
      +      
      + Again it is possible to access values by their index and by using a String (for example "CustomerNo"). +
      +
      +

      Header auto detection

      +

      Some CSV files define header names in their first record. If configured, Apache Commons CSV can parse the header names from the first record:

      +
      +        
      +Reader in = new FileReader("path/to/file.csv");
      +Iterable<CSVRecord> records = CSVFormat.RFC4180.builder()
      +  .setHeader()
      +  .setSkipHeaderRecord(true)
      +  .build()
      +  .parse(in);
      +for (CSVRecord record : records) {
      +    String id = record.get("ID");
      +    String customerNo = record.get("CustomerNo");
      +    String name = record.get("Name");
      +}
      +        
      +      
      + This will use the values from the first record as header names and skip the first record when iterating. +
      +
      +
      +

      Printing with headers

      +

      To print a CSV file with headers, you specify the headers in the format:

      +
      +        
      +Appendable out = ...;
      +CSVPrinter printer = CSVFormat.DEFAULT.builder()
      +  .setHeader("H1", "H2")
      +  .build()
      +  .print(out);
      +        
      +      
      +

      To print a CSV file with JDBC column labels, you specify the ResultSet in the format:

      +
      +        
      +try (ResultSet resultSet = ...) {
      +    CSVPrinter printer = CSVFormat.DEFAULT.builder()
      +      .setHeader(resultSet)
      +      .build()
      +      .print(out);
      +}
      +        
      +      
      +
      +
      +

      Working with JDBC

      +
      +

      Exporting JDBC Result Sets

      +

      + To export row data from a JDBC + ResultSet + , use CSVPrinter.printRecords(ResultSet) : +

      +
      +        
      +        final StringWriter sw = new StringWriter();
      +        final CSVFormat csvFormat = CSVFormat.DEFAULT;
      +        try (Connection connection = DriverManager.getConnection("jdbc:h2:mem:my_test;", "sa", "")) {
      +            try (Statement stmt = connection.createStatement();
      +                    CSVPrinter printer = new CSVPrinter(sw, csvFormat);
      +                    ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT, BIN_DATA from TEST")) {
      +                printer.printRecords(resultSet);
      +            }
      +        }
      +        final String csv = sw.toString();
      +        System.out.println(csv);
      +        
      +        
      +
      +
      +

      Limiting rows from JDBC Result Sets

      +

      SQL lets you limit how many rows a SELECT statement returns with the LIMIT clause.

      +

      + When you can't or don't want to change the SQL used to generate rows, JDBC lets you limit how many rows a JDBC Statement returns with the Statement.setMaxRows(int) method. +

      +

      + When you get a JDBC ResultSet from an API like + DatabaseMetaData.getProcedures(...), there is no SQL or JDBC Statement to use to set a limit, the ResultSet class does not have an API to limit rows. +

      +

      + To simplify limiting ResultSet rows, Commons CVS offers the CSVFormat.Builder.setMaxRows(long) + method. For example: +

      +
      +        
      +        CSVFormat csvFormat = CSVFormat.DEFAULT
      +            .setMaxRows(5_000)
      +            .get();
      +        try (ResultSet resultSet = ...) {
      +            csvFormat.printer().printRecords(resultSet);
      +        }
      +        
      +      
      +

      + Using the above, calling CSVPrinter.printRecords(ResultSet) will + limit the row count to the maximum number of rows specified in setMaxRows(). +

      +

      Note that setMaxRows() works with the other methods that print a sequence of records.

      +
      +
      + + diff --git a/src/media/commons-logo-component-100.xcf b/src/media/commons-logo-component-100.xcf new file mode 100644 index 0000000000..77d92f2779 Binary files /dev/null and b/src/media/commons-logo-component-100.xcf differ diff --git a/src/media/commons-logo-component.xcf b/src/media/commons-logo-component.xcf new file mode 100644 index 0000000000..3670221da7 Binary files /dev/null and b/src/media/commons-logo-component.xcf differ diff --git a/src/media/logo.png b/src/media/logo.png new file mode 100644 index 0000000000..93bb6c0148 Binary files /dev/null and b/src/media/logo.png differ diff --git a/src/site/resources/checkstyle/checkstyle-header.txt b/src/site/resources/checkstyle/checkstyle-header.txt deleted file mode 100644 index ae6f28c4a1..0000000000 --- a/src/site/resources/checkstyle/checkstyle-header.txt +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ diff --git a/src/site/resources/checkstyle/checkstyle.xml b/src/site/resources/checkstyle/checkstyle.xml deleted file mode 100644 index 59d5fc9df5..0000000000 --- a/src/site/resources/checkstyle/checkstyle.xml +++ /dev/null @@ -1,69 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/site/resources/images/logo.png b/src/site/resources/images/logo.png index 77e721d2c7..93bb6c0148 100644 Binary files a/src/site/resources/images/logo.png and b/src/site/resources/images/logo.png differ diff --git a/src/site/resources/images/logo.xcf b/src/site/resources/images/logo.xcf deleted file mode 100644 index 98ff21ec1d..0000000000 Binary files a/src/site/resources/images/logo.xcf and /dev/null differ diff --git a/src/site/resources/pmd/pmd-ruleset.xml b/src/site/resources/pmd/pmd-ruleset.xml index 17a8fab27a..74e41f991d 100644 --- a/src/site/resources/pmd/pmd-ruleset.xml +++ b/src/site/resources/pmd/pmd-ruleset.xml @@ -7,7 +7,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -18,7 +18,7 @@ + xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 https://pmd.sourceforge.net/ruleset_2_0_0.xsd"> This ruleset checks the code for discouraged programming constructs. @@ -26,13 +26,10 @@ - - - @@ -54,16 +51,7 @@ - - - - - - - - - @@ -71,12 +59,10 @@ - - diff --git a/src/site/resources/profile.jacoco b/src/site/resources/profile.jacoco index e69de29bb2..0314c63ff2 100644 --- a/src/site/resources/profile.jacoco +++ b/src/site/resources/profile.jacoco @@ -0,0 +1,16 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. diff --git a/src/site/resources/spotbugs/spotbugs-exclude-filter.xml b/src/site/resources/spotbugs/spotbugs-exclude-filter.xml index a7364d40db..79c57d3ae4 100644 --- a/src/site/resources/spotbugs/spotbugs-exclude-filter.xml +++ b/src/site/resources/spotbugs/spotbugs-exclude-filter.xml @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -54,5 +54,12 @@ + + + + + + + diff --git a/src/site/site.xml b/src/site/site.xml index 62d9bafd16..232c2056c5 100644 --- a/src/site/site.xml +++ b/src/site/site.xml @@ -7,7 +7,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -15,32 +15,29 @@ See the License for the specific language governing permissions and limitations under the License. --> - - - - Apache Commons CSV - /images/logo.png - /index.html - Apache Commons CSV&trade; logo + + + - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - + diff --git a/src/site/xdoc/download_csv.xml b/src/site/xdoc/download_csv.xml index 3e00f95c54..151c3f69ec 100644 --- a/src/site/xdoc/download_csv.xml +++ b/src/site/xdoc/download_csv.xml @@ -1,144 +1,146 @@ - - - - - - Download Apache Commons CSV - Apache Commons Documentation Team - - -
      - -

      - We recommend you use a mirror to download our release - builds, but you must verify the integrity of - the downloaded files using signatures downloaded from our main - distribution directories. Recent releases (48 hours) may not yet - be available from all the mirrors. -

      - -

      - You are currently using [preferred]. If you - encounter a problem with this mirror, please select another - mirror. If all mirrors are failing, there are backup - mirrors (at the end of the mirrors list) that should be - available. -

      - [if-any logo][end] -

      - -
      -

      - Other mirrors: - - -

      -
      - -

      - It is essential that you - verify the integrity - of downloaded files, preferably using the PGP signature (*.asc files); - failing that using the SHA512 hash (*.sha512 checksum files). -

      -

      - The KEYS - file contains the public PGP keys used by Apache Commons developers - to sign releases. -

      -
      -
      -
      + + + + + + Download Apache Commons CSV + Apache Commons Team + + +
      + +

      + We recommend you use a mirror to download our release + builds, but you must verify the integrity of + the downloaded files using signatures downloaded from our main + distribution directories. Recent releases (48 hours) may not yet + be available from all the mirrors. +

      + +

      + You are currently using [preferred]. If you + encounter a problem with this mirror, please select another + mirror. If all mirrors are failing, there are backup + mirrors (at the end of the mirrors list) that should be + available. +

      + [if-any logo]Logo[end] +

      + +
      +

      + Other mirrors: + + +

      +
      + +

      + It is essential that you + verify the integrity + of downloaded files, preferably using the PGP signature (*.asc files); + failing that using the SHA512 hash (*.sha512 checksum files). +

      +

      + The KEYS + file contains the public PGP keys used by Apache Commons developers + to sign releases. +

      +
      +
      +
      - - - + + + - - - + + +
      commons-csv-1.9.0-bin.tar.gzsha512pgpcommons-csv-1.14.1-bin.tar.gzsha512pgp
      commons-csv-1.9.0-bin.zipsha512pgpcommons-csv-1.14.1-bin.zipsha512pgp
      - - - + + + - - - + + +
      commons-csv-1.9.0-src.tar.gzsha512pgpcommons-csv-1.14.1-src.tar.gzsha512pgp
      commons-csv-1.9.0-src.zipsha512pgpcommons-csv-1.14.1-src.zipsha512pgp
      diff --git a/src/site/xdoc/index.xml b/src/site/xdoc/index.xml index 9d69cf2246..ac5b8cfa9f 100644 --- a/src/site/xdoc/index.xml +++ b/src/site/xdoc/index.xml @@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -18,32 +18,20 @@ limitations under the License. Home - Commons Documentation Team + Apache Commons Team + + + -

      Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format.

      -

      The most common CSV formats are predefined in the CSVFormat class: -

      -

      -

      Custom formats can be created using a fluent style API.

      +

      Read the documentation starting with the Javadoc Overview.

      -

      An overview of the functionality is provided in the -user guide. +user guide. Various project reports are also available.

      @@ -58,46 +46,56 @@ The git repository can be browsed.

      -

      See the Download Page -for the latest releases.
      +for the latest releases.

      -Change reports are also available. +Release History are also available.

      For previous releases, see the Apache Archive

      - Alternatively, you can pull it from a Maven repository: -

      <dependency>
      -    <groupId>org.apache.commons</groupId>
      -    <artifactId>commons-csv</artifactId>
      -    <version>1.9.0</version>
      -</dependency>
      -

      -

      - For other dependency access methods, see Dependency Information + For dependency access methods, see Dependency Information

      -

      The latest code can be checked out from our git repository at https://gitbox.apache.org/repos/asf/commons-csv.git. You can build the component using Apache Maven using mvn clean package.

      - +
      +

      + Apache Commons CSV requires Java 8 or above. +

      + + + + + + + + + + + + + + + +
      Commons CSVJavaAndroid
      1.10.0+8Android 7.0 (API level 24)
      +

      The commons developer mailing list is the main channel of communication for contributors. Please remember that the lists are shared between all commons components, so prefix your email by [csv].

      -

      You can also visit the #apache-commons IRC channel on irc.freenode.net or peruse JIRA. Specific links of interest for JIRA are:

      +

      You can also peruse JIRA. Specific links of interest for JIRA are:

      • Ideas looking for code: Patch Needed
      • Issues with patches, looking for reviews: Review Patch
      • @@ -105,7 +103,6 @@ For previous releases, see the TagList report.

        If you'd like to offer up pull requests via GitHub rather than applying patches to JIRA, we have a GitHub mirror.

      -

      The commons mailing lists act as the main support forum. @@ -119,22 +116,18 @@ For previous releases, see the

      Commons CSV was started to unify a common and simple interface for reading and writing CSV files under an ASL license. It has been bootstrapped by a code donation from Netcetera in Switzerland. There are three pre-existing BSD compatible CSV parsers which this component will hopefully make redundant (authors willing):

      In addition to the code from Netcetera (org.apache.commons.csv), Martin van den Bemt has added an additional writer API.

      Other CSV implementations:

      - -
      diff --git a/src/site/xdoc/issue-tracking.xml b/src/site/xdoc/issue-tracking.xml index c7edd21b1b..3aa64b4042 100644 --- a/src/site/xdoc/issue-tracking.xml +++ b/src/site/xdoc/issue-tracking.xml @@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -41,10 +41,12 @@ limitations under the License. | | +======================================================================+ --> - + Apache Commons CSV Issue tracking - Apache Commons Documentation Team + Apache Commons Team @@ -64,6 +66,7 @@ limitations under the License.

      If you would like to report a bug, or raise an enhancement request with Apache Commons CSV please do the following: +

      1. Search existing open bugs. If you find your issue listed then please add a comment with your details.
      2. @@ -73,30 +76,29 @@ limitations under the License.
      3. Submit either a bug report or enhancement request.
      -

      Please also remember these points: +

      • the more information you provide, the better we can help you
      • test cases are vital, particularly for any proposed enhancements
      • the developers of Apache Commons CSV are all unpaid volunteers
      -

      - For more information on subversion and creating patches see the + For more information on creating patches see the Apache Contributors Guide.

      You may also find these links useful: +

      -

      diff --git a/src/site/xdoc/mail-lists.xml b/src/site/xdoc/mail-lists.xml index 111faf1212..345cef8996 100644 --- a/src/site/xdoc/mail-lists.xml +++ b/src/site/xdoc/mail-lists.xml @@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -39,10 +39,12 @@ limitations under the License. | | +======================================================================+ --> - + Apache Commons CSV Mailing Lists - Apache Commons Documentation Team + Apache Commons Team @@ -53,15 +55,15 @@ limitations under the License. To make it easier for people to only read messages related to components they are interested in, the convention in Commons is to prefix the subject line of messages with the component's name, for example: -
        -
      • [csv] Problem with the ...
      • -

      +
        +
      • [csv] Problem with the ...
      • +

      Questions related to the usage of Apache Commons CSV should be posted to the - User List. + User List.
      - The Developer List + The Developer List is for questions and discussion related to the development of Apache Commons CSV.
      Please do not cross-post; developers are also subscribed to the user list. @@ -70,8 +72,10 @@ limitations under the License. to subscribe.

      - Note: please don't send patches or attachments to any of the mailing lists. + Note: please don't send patches or attachments to any of the mailing lists; + most of the lists are set up to drop attachments. Patches are best handled via the Issue Tracking system. + If you have a GitHub account, most components also accept PRs (pull requests). Otherwise, please upload the file to a public server and include the URL in the mail.

      @@ -105,12 +109,11 @@ limitations under the License. Subscribe Unsubscribe Post - mail-archives.apache.org
      + lists.apache.org - markmail.org
      - www.mail-archive.com
      - news.gmane.org + + www.mail-archive.com @@ -125,12 +128,11 @@ limitations under the License. Subscribe Unsubscribe Post - mail-archives.apache.org
      + lists.apache.org - markmail.org
      - www.mail-archive.com
      - news.gmane.org + + www.mail-archive.com @@ -145,10 +147,10 @@ limitations under the License. Subscribe Unsubscribe read only - mail-archives.apache.org
      + lists.apache.org - markmail.org
      + www.mail-archive.com @@ -158,16 +160,16 @@ limitations under the License. Commons Commits List

      - Only for e-mails automatically generated by the source control sytem. + Only for e-mails automatically generated by the source control system.

      Subscribe Unsubscribe read only - mail-archives.apache.org
      + lists.apache.org - markmail.org
      + www.mail-archive.com @@ -199,13 +201,11 @@ limitations under the License. Subscribe Unsubscribe read only - mail-archives.apache.org
      + lists.apache.org - markmail.org
      - old.nabble.com
      - www.mail-archive.com
      - news.gmane.org + + www.mail-archive.com diff --git a/src/site/xdoc/security.xml b/src/site/xdoc/security.xml new file mode 100644 index 0000000000..47edf5d116 --- /dev/null +++ b/src/site/xdoc/security.xml @@ -0,0 +1,56 @@ + + + + + Apache Commons Security Reports + Apache Commons Team + + +
      +

      + For information about reporting or asking questions about security, please see + Apache Commons Security. +

      +

      This page lists all security vulnerabilities fixed in released versions of this component. +

      +

      Please note that binary patches are never provided. If you need to apply a source code patch, use the building instructions for the component version + that you are using. +

      +

      + If you need help on building this component or other help on following the instructions to mitigate the known vulnerabilities listed here, please send + your questions to the public + user mailing list. +

      +

      If you have encountered an unlisted security vulnerability or other unexpected behavior that has security impact, or if the descriptions here are + incomplete, please report them privately to the Apache Security Team. Thank you. +

      +
      +
      +

      None.

      +
      +
      +

      + For information about safe deserialization, please see Safe Deserialization. +

      +
      + +
      \ No newline at end of file diff --git a/src/site/xdoc/user-guide.xml b/src/site/xdoc/user-guide.xml index 254b461729..d5a1f26850 100644 --- a/src/site/xdoc/user-guide.xml +++ b/src/site/xdoc/user-guide.xml @@ -7,7 +7,7 @@ The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -21,158 +21,6 @@ limitations under the License. Apache Commons Documentation Team - - -

      Apache Commons CSV User Guide

      - - - - -
      - - Parsing files with Apache Commons CSV is relatively straight forward. - The CSVFormat class provides some commonly used CSV variants: - -
      -
      DEFAULT
      Standard Comma Separated Value format, as for RFC4180 but allowing empty lines.
      -
      EXCEL
      The Microsoft Excel CSV format.
      -
      INFORMIX_UNLOAD1.3
      Informix UNLOAD format used by the UNLOAD TO file_name operation.
      -
      INFORMIX_UNLOAD_CSV1.3
      Informix CSV UNLOAD format used by the UNLOAD TO file_name operation (escaping is disabled.)
      -
      MONGO_CSV1.7
      MongoDB CSV format used by the mongoexport operation.
      -
      MONGO_TSV1.7
      MongoDB TSV format used by the mongoexport operation.
      -
      MYSQL
      The MySQL CSV format.
      -
      ORACLE1.6
      Default Oracle format used by the SQL*Loader utility.
      -
      POSTGRESSQL_CSV1.5
      Default PostgreSQL CSV format used by the COPY operation.
      -
      POSTGRESSQL_TEXT1.5
      Default PostgreSQL text format used by the COPY operation.
      -
      RFC-4180
      The RFC-4180 format defined by RFC-4180.
      -
      TDF
      A tab delimited format.
      -
      - - -

      To parse an Excel CSV file, write:

      - Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in); -for (CSVRecord record : records) { - String lastName = record.get("Last Name"); - String firstName = record.get("First Name"); -} - -
      - -

      - To handle files that start with a Byte Order Mark (BOM) like some Excel CSV files, you need an extra step to - deal with these optional bytes. - You can use the - - BOMInputStream - - class from - Apache Commons IO - for example: -

      - final URL url = ...; -final Reader reader = new InputStreamReader(new BOMInputStream(url.openStream()), "UTF-8"); -final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader()); -try { - for (final CSVRecord record : parser) { - final String string = record.get("SomeColumn"); - ... - } -} finally { - parser.close(); - reader.close(); -} - -

      - You might find it handy to create something like this: -

      - /** -* Creates a reader capable of handling BOMs. -*/ -public InputStreamReader newReader(final InputStream inputStream) { - return new InputStreamReader(new BOMInputStream(inputStream), StandardCharsets.UTF_8); -} - -
      -
      - -
      - - Apache Commons CSV provides several ways to access record values. - The simplest way is to access values by their index in the record. - However, columns in CSV files often have a name, for example: ID, CustomerNo, Birthday, etc. - The CSVFormat class provides an API for specifing these header names and CSVRecord on - the other hand has methods to access values by their corresponding header name. - - - To access a record value by index, no special configuration of the CSVFormat is necessary: - Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.RFC4180.parse(in); -for (CSVRecord record : records) { - String columnOne = record.get(0); - String columnTwo = record.get(1); -} - - - - Indices may not be the most intuitive way to access record values. For this reason it is possible to - assign names to each column in the file: - Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.RFC4180.withHeader("ID", "CustomerNo", "Name").parse(in); -for (CSVRecord record : records) { - String id = record.get("ID"); - String customerNo = record.get("CustomerNo"); - String name = record.get("Name"); -} - - Note that column values can still be accessed using their index. - - - Using String values all over the code to reference columns can be error prone. For this reason, - it is possible to define an enum to specify header names. Note that the enum constant names are - used to access column values. This may lead to enums constant names which do not follow the Java - coding standard of defining constants in upper case with underscores: - public enum Headers { - ID, CustomerNo, Name -} -Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.RFC4180.withHeader(Headers.class).parse(in); -for (CSVRecord record : records) { - String id = record.get(Headers.ID); - String customerNo = record.get(Headers.CustomerNo); - String name = record.get(Headers.Name); -} - - Again it is possible to access values by their index and by using a String (for example "CustomerNo"). - - - Some CSV files define header names in their first record. If configured, Apache Commons CSV can parse - the header names from the first record: - Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.RFC4180.withFirstRecordAsHeader().parse(in); -for (CSVRecord record : records) { - String id = record.get("ID"); - String customerNo = record.get("CustomerNo"); - String name = record.get("Name"); -} - - This will use the values from the first record as header names and skip the first record when iterating. - - -

      - To print a CSV file with headers, you specify the headers in the format: -

      - final Appendable out = ...; - final CSVPrinter printer = CSVFormat.DEFAULT.withHeader("H1", "H2").print(out) - -

      - To print a CSV file with JDBC column labels, you specify the ResultSet in the format: -

      - final ResultSet resultSet = ...; - final CSVPrinter printer = CSVFormat.DEFAULT.withHeader(resultSet).print(out) - -
      -
      - +

      The User Guide migrated to the Javadoc.

      diff --git a/src/test/java/org/apache/commons/csv/CSVBenchmark.java b/src/test/java/org/apache/commons/csv/CSVBenchmark.java index 64d3f4980a..b1be4ce095 100644 --- a/src/test/java/org/apache/commons/csv/CSVBenchmark.java +++ b/src/test/java/org/apache/commons/csv/CSVBenchmark.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; @@ -28,10 +30,6 @@ import java.util.concurrent.TimeUnit; import java.util.zip.GZIPInputStream; -import com.generationjava.io.CsvReader; -import com.opencsv.CSVParserBuilder; -import com.opencsv.CSVReaderBuilder; - import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.openjdk.jmh.annotations.Benchmark; @@ -49,6 +47,10 @@ import org.supercsv.io.CsvListReader; import org.supercsv.prefs.CsvPreference; +import com.generationjava.io.CsvReader; +import com.opencsv.CSVParserBuilder; +import com.opencsv.CSVReaderBuilder; + @BenchmarkMode(Mode.AverageTime) @Fork(value = 1, jvmArgs = {"-server", "-Xms1024M", "-Xmx1024M"}) @Threads(1) @@ -58,76 +60,39 @@ @State(Scope.Benchmark) public class CSVBenchmark { - private String data; + private static final class CountingReaderCallback implements org.skife.csv.ReaderCallback { + public int count; - /** - * Load the data in memory before running the benchmarks, this takes out IO from the results. - */ - @Setup - public void init() throws IOException { - InputStream in = this.getClass().getClassLoader().getResourceAsStream( - "org/apache/commons/csv/perf/worldcitiespop.txt.gz"); - try (final InputStream gzin = new GZIPInputStream(in, 8192)) { - this.data = IOUtils.toString(gzin, StandardCharsets.ISO_8859_1); + @Override + public void onRow(final String[] fields) { + count++; } } + private String data; + private Reader getReader() { return new StringReader(data); } - @Benchmark - public int read(final Blackhole bh) throws Exception { - int count = 0; - - try (BufferedReader reader = new BufferedReader(getReader())) { - while (reader.readLine() != null) { - count++; - } - } - - bh.consume(count); - return count; - } - - @Benchmark - public int scan(final Blackhole bh) throws Exception { - int count = 0; - - try (Scanner scanner = new Scanner(getReader())) { - while (scanner.hasNextLine()) { - scanner.nextLine(); - count++; - } + /** + * Load the data in memory before running the benchmarks, this takes out IO from the results. + */ + @Setup + public void init() throws IOException { + try (InputStream in = this.getClass().getClassLoader().getResourceAsStream("org/apache/commons/csv/perf/worldcitiespop.txt.gz"); + InputStream gzin = new GZIPInputStream(in, 8192)) { + this.data = IOUtils.toString(gzin, StandardCharsets.ISO_8859_1); } - - bh.consume(count); - return count; - } - - @Benchmark - public int split(final Blackhole bh) throws Exception { - int count = 0; - - try (BufferedReader reader = new BufferedReader(getReader())) { - String line; - while ((line = reader.readLine()) != null) { - final String[] values = StringUtils.split(line, ','); - count += values.length; - } - } - - bh.consume(count); - return count; } @Benchmark public int parseCommonsCSV(final Blackhole bh) throws Exception { int count = 0; - try (final Reader in = getReader()) { + try (Reader in = getReader()) { final CSVFormat format = CSVFormat.Builder.create().setSkipHeaderRecord(true).build(); - Iterator iter = format.parse(in).iterator(); + final Iterator iter = format.parse(in).iterator(); while (iter.hasNext()) { count++; iter.next(); @@ -142,7 +107,7 @@ public int parseCommonsCSV(final Blackhole bh) throws Exception { public int parseGenJavaCSV(final Blackhole bh) throws Exception { int count = 0; - try (final Reader in = getReader()) { + try (Reader in = getReader()) { final CsvReader reader = new CsvReader(in); reader.setFieldDelimiter(','); while (reader.readLine() != null) { @@ -158,7 +123,7 @@ public int parseGenJavaCSV(final Blackhole bh) throws Exception { public int parseJavaCSV(final Blackhole bh) throws Exception { int count = 0; - try (final Reader in = getReader()) { + try (Reader in = getReader()) { final com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ','); reader.setRecordDelimiter('\n'); while (reader.readRecord()) { @@ -177,7 +142,7 @@ public int parseOpenCSV(final Blackhole bh) throws Exception { final com.opencsv.CSVParser parser = new CSVParserBuilder() .withSeparator(',').withIgnoreQuotations(true).build(); - try (final Reader in = getReader()) { + try (Reader in = getReader()) { final com.opencsv.CSVReader reader = new CSVReaderBuilder(in).withSkipLines(1).withCSVParser(parser).build(); while (reader.readNext() != null) { count++; @@ -194,7 +159,7 @@ public int parseSkifeCSV(final Blackhole bh) throws Exception { reader.setSeperator(','); final CountingReaderCallback callback = new CountingReaderCallback(); - try (final Reader in = getReader()) { + try (Reader in = getReader()) { reader.parse(in, callback); } @@ -202,26 +167,62 @@ public int parseSkifeCSV(final Blackhole bh) throws Exception { return callback.count; } - private static class CountingReaderCallback implements org.skife.csv.ReaderCallback { - public int count; + @Benchmark + public int parseSuperCSV(final Blackhole bh) throws Exception { + int count = 0; - @Override - public void onRow(final String[] fields) { - count++; + try (CsvListReader reader = new CsvListReader(getReader(), CsvPreference.STANDARD_PREFERENCE)) { + while (reader.read() != null) { + count++; + } } + + bh.consume(count); + return count; } @Benchmark - public int parseSuperCSV(final Blackhole bh) throws Exception { + public int read(final Blackhole bh) throws Exception { int count = 0; - try (final CsvListReader reader = new CsvListReader(getReader(), CsvPreference.STANDARD_PREFERENCE)) { - while (reader.read() != null) { - count++; + try (BufferedReader reader = new BufferedReader(getReader())) { + while (reader.readLine() != null) { + count++; + } + } + + bh.consume(count); + return count; + } + + @Benchmark + public int scan(final Blackhole bh) throws Exception { + int count = 0; + + try (Scanner scanner = new Scanner(getReader())) { + while (scanner.hasNextLine()) { + scanner.nextLine(); + count++; } } bh.consume(count); return count; } + + @Benchmark + public int split(final Blackhole bh) throws Exception { + int count = 0; + + try (BufferedReader reader = new BufferedReader(getReader())) { + String line; + while ((line = reader.readLine()) != null) { + final String[] values = StringUtils.split(line, ','); + count += values.length; + } + } + + bh.consume(count); + return count; + } } diff --git a/src/test/java/org/apache/commons/csv/CSVDuplicateHeaderTest.java b/src/test/java/org/apache/commons/csv/CSVDuplicateHeaderTest.java new file mode 100644 index 0000000000..2f518a1206 --- /dev/null +++ b/src/test/java/org/apache/commons/csv/CSVDuplicateHeaderTest.java @@ -0,0 +1,339 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +/** + * Tests parsing of duplicate column names in a CSV header. + * The test verifies that headers are consistently handled by CSVFormat and CSVParser. + */ +class CSVDuplicateHeaderTest { + + /** + * Return test cases for duplicate header data for use in CSVFormat. + *

      + * This filters the parsing test data to all cases where the allow missing column + * names flag is true and ignore header case is false: these flags are exclusively for parsing. + * CSVFormat validation applies to both parsing and writing and thus validation + * is less strict and behaves as if the allow missing column names constraint and + * the ignore header case behavior are absent. + * The filtered data is then returned with the parser flags set to both true and false + * for each test case. + *

      + * + * @return the stream of arguments + */ + static Stream duplicateHeaderAllowsMissingColumnsNamesData() { + return duplicateHeaderData() + .filter(arg -> Boolean.TRUE.equals(arg.get()[1]) && Boolean.FALSE.equals(arg.get()[2])) + .flatMap(arg -> { + // Return test case with flags as all true/false combinations + final Object[][] data = new Object[4][]; + final Boolean[] flags = {Boolean.TRUE, Boolean.FALSE}; + int i = 0; + for (final Boolean a : flags) { + for (final Boolean b : flags) { + data[i] = arg.get().clone(); + data[i][1] = a; + data[i][2] = b; + i++; + } + } + return Arrays.stream(data).map(Arguments::of); + }); + } + + /** + * Return test cases for duplicate header data for use in parsing (CSVParser). Uses the order: + *
      +     * DuplicateHeaderMode duplicateHeaderMode
      +     * boolean allowMissingColumnNames
      +     * String[] headers
      +     * boolean valid
      +     * 
      + * + * @return the stream of arguments + */ + static Stream duplicateHeaderData() { + return Stream.of( + // Any combination with a valid header + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "B"}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "B"}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "B"}, true), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "B"}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "B"}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "B"}, true), + + // Any combination with a valid header including empty + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", ""}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", ""}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", ""}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", ""}, true), + + // Any combination with a valid header including blank (1 space) + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", " "}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", " "}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", " "}, true), + + // Any combination with a valid header including null + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", null}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", null}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", null}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", null}, true), + + // Duplicate non-empty names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "A"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "A"}, true), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "A"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "A"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "A"}, true), + + // Duplicate empty names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"", ""}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"", ""}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"", ""}, true), + + // Duplicate blank names (1 space) + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {" ", " "}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {" ", " "}, true), + + // Duplicate blank names (3 spaces) + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {" ", " "}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {" ", " "}, true), + + // Duplicate null names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {null, null}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {null, null}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {null, null}, true), + + // Duplicate blank names (1+3 spaces) + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {" ", " "}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {" ", " "}, true), + + // Duplicate blank names and null names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {" ", null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {" ", null}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {" ", null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {" ", null}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {" ", null}, true), + + // Duplicate non-empty and empty names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "A", "", ""}, true), + + // Non-duplicate non-empty and duplicate empty names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "B", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "B", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "B", "", ""}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "B", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "B", "", ""}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "B", "", ""}, true), + + // Duplicate non-empty and blank names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "A", " ", " "}, true), + + // Duplicate non-empty and null names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "A", null, null}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "A", null, null}, true), + + // Duplicate blank names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "", ""}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "", ""}, true), + + // Duplicate null names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", null, null}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", null, null}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", null, null}, true), + + // Duplicate blank names (1+3 spaces) + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", " ", " "}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", " ", " "}, true), + + // Duplicate names (case insensitive) + Arguments.of(DuplicateHeaderMode.DISALLOW, false, true , new String[] {"A", "a"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true , new String[] {"A", "a"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, true , new String[] {"A", "a"}, true), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, true , new String[] {"A", "a"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, true , new String[] {"A", "a"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, true , new String[] {"A", "a"}, true), + + // Duplicate non-empty (case insensitive) and empty names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, true, new String[] {"A", "a", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, true, new String[] {"A", "a", "", ""}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, true, new String[] {"A", "a", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, true, new String[] {"A", "a", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, true, new String[] {"A", "a", "", ""}, true), + + // Duplicate non-empty (case insensitive) and blank names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, true, new String[] {"A", "a", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, true, new String[] {"A", "a", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, true, new String[] {"A", "a", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, true, new String[] {"A", "a", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, true, new String[] {"A", "a", " ", " "}, true), + + // Duplicate non-empty (case insensitive) and null names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, true, new String[] {"A", "a", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, true, new String[] {"A", "a", null, null}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, true, new String[] {"A", "a", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, true, new String[] {"A", "a", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, true, new String[] {"A", "a", null, null}, true) + ); + } + + /** + * Tests duplicate headers with the CSVFormat. + * + * @param duplicateHeaderMode the duplicate header mode + * @param allowMissingColumnNames the allow missing column names flag (only used for parsing) + * @param ignoreHeaderCase the ignore header case flag (only used for parsing) + * @param headers the headers + * @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException + */ + @ParameterizedTest + @MethodSource(value = {"duplicateHeaderAllowsMissingColumnsNamesData"}) + void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode, + final boolean allowMissingColumnNames, + final boolean ignoreHeaderCase, + final String[] headers, + final boolean valid) { + final CSVFormat.Builder builder = + CSVFormat.DEFAULT.builder() + .setDuplicateHeaderMode(duplicateHeaderMode) + .setAllowMissingColumnNames(allowMissingColumnNames) + .setIgnoreHeaderCase(ignoreHeaderCase) + .setHeader(headers); + if (valid) { + final CSVFormat format = builder.get(); + assertEquals(duplicateHeaderMode, format.getDuplicateHeaderMode(), "DuplicateHeaderMode"); + assertEquals(allowMissingColumnNames, format.getAllowMissingColumnNames(), "AllowMissingColumnNames"); + assertArrayEquals(headers, format.getHeader(), "Header"); + } else { + assertThrows(IllegalArgumentException.class, builder::get); + } + } + + /** + * Tests duplicate headers with the CSVParser. + * + * @param duplicateHeaderMode the duplicate header mode + * @param allowMissingColumnNames the allow missing column names flag (only used for parsing) + * @param ignoreHeaderCase the ignore header case flag (only used for parsing) + * @param headers the headers (joined with the CSVFormat delimiter to create a string input) + * @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException + * @throws IOException Signals that an I/O exception has occurred. + */ + @ParameterizedTest + @MethodSource(value = {"duplicateHeaderData"}) + void testCSVParser(final DuplicateHeaderMode duplicateHeaderMode, + final boolean allowMissingColumnNames, + final boolean ignoreHeaderCase, + final String[] headers, + final boolean valid) throws IOException { + // @formatter:off + final CSVFormat format = CSVFormat.DEFAULT.builder() + .setDuplicateHeaderMode(duplicateHeaderMode) + .setAllowMissingColumnNames(allowMissingColumnNames) + .setIgnoreHeaderCase(ignoreHeaderCase) + .setNullString("NULL") + .setHeader() + .get(); + // @formatter:on + final String input = Arrays.stream(headers) + .map(s -> s == null ? format.getNullString() : s) + .collect(Collectors.joining(format.getDelimiterString())); + // @formatter:off + if (valid) { + try (CSVParser parser = CSVParser.parse(input, format)) { + // Parser ignores null headers + final List expected = Arrays.stream(headers).filter(s -> s != null).collect(Collectors.toList()); + assertEquals(expected, parser.getHeaderNames(), "HeaderNames"); + } + } else { + assertThrows(IllegalArgumentException.class, () -> CSVParser.parse(input, format)); + } + } +} diff --git a/src/test/java/org/apache/commons/csv/CSVFileParserTest.java b/src/test/java/org/apache/commons/csv/CSVFileParserTest.java index 4d9b87118f..e74d0e6884 100644 --- a/src/test/java/org/apache/commons/csv/CSVFileParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVFileParserTest.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; @@ -38,7 +40,7 @@ /** * Parse tests using test files */ -public class CSVFileParserTest { +class CSVFileParserTest { private static final File BASE_DIR = new File("src/test/resources/org/apache/commons/csv/CSVFileParser"); @@ -57,9 +59,9 @@ private String readTestData(final BufferedReader reader) throws IOException { @ParameterizedTest @MethodSource("generateData") - public void testCSVFile(final File testFile) throws Exception { - try (FileReader fr = new FileReader(testFile); BufferedReader testData = new BufferedReader(fr)) { - String line = readTestData(testData); + void testCSVFile(final File testFile) throws Exception { + try (FileReader fr = new FileReader(testFile); BufferedReader testDataReader = new BufferedReader(fr)) { + String line = readTestData(testDataReader); assertNotNull("file must contain config line", line); final String[] split = line.split(" "); assertTrue(split.length >= 1, testFile.getName() + " require 1 param"); @@ -68,25 +70,25 @@ public void testCSVFile(final File testFile) throws Exception { boolean checkComments = false; for (int i = 1; i < split.length; i++) { final String option = split[i]; - final String[] option_parts = option.split("=", 2); - if ("IgnoreEmpty".equalsIgnoreCase(option_parts[0])) { - format = format.withIgnoreEmptyLines(Boolean.parseBoolean(option_parts[1])); - } else if ("IgnoreSpaces".equalsIgnoreCase(option_parts[0])) { - format = format.withIgnoreSurroundingSpaces(Boolean.parseBoolean(option_parts[1])); - } else if ("CommentStart".equalsIgnoreCase(option_parts[0])) { - format = format.withCommentMarker(option_parts[1].charAt(0)); - } else if ("CheckComments".equalsIgnoreCase(option_parts[0])) { + final String[] optionParts = option.split("=", 2); + if ("IgnoreEmpty".equalsIgnoreCase(optionParts[0])) { + format = format.withIgnoreEmptyLines(Boolean.parseBoolean(optionParts[1])); + } else if ("IgnoreSpaces".equalsIgnoreCase(optionParts[0])) { + format = format.withIgnoreSurroundingSpaces(Boolean.parseBoolean(optionParts[1])); + } else if ("CommentStart".equalsIgnoreCase(optionParts[0])) { + format = format.withCommentMarker(optionParts[1].charAt(0)); + } else if ("CheckComments".equalsIgnoreCase(optionParts[0])) { checkComments = true; } else { fail(testFile.getName() + " unexpected option: " + option); } } - line = readTestData(testData); // get string version of format + line = readTestData(testDataReader); // get string version of format assertEquals(line, format.toString(), testFile.getName() + " Expected format "); // Now parse the file and compare against the expected results // We use a buffered reader internally so no need to create one here. - try (final CSVParser parser = CSVParser.parse(new File(BASE_DIR, split[0]), Charset.defaultCharset(), format)) { + try (CSVParser parser = CSVParser.parse(new File(BASE_DIR, split[0]), Charset.defaultCharset(), format)) { for (final CSVRecord record : parser) { String parsed = Arrays.toString(record.values()); final String comment = record.getComment(); @@ -94,7 +96,7 @@ public void testCSVFile(final File testFile) throws Exception { parsed += "#" + comment.replace("\n", "\\n"); } final int count = record.size(); - assertEquals(readTestData(testData), count + ":" + parsed, testFile.getName()); + assertEquals(readTestData(testDataReader), count + ":" + parsed, testFile.getName()); } } } @@ -102,7 +104,7 @@ public void testCSVFile(final File testFile) throws Exception { @ParameterizedTest @MethodSource("generateData") - public void testCSVUrl(final File testFile) throws Exception { + void testCSVUrl(final File testFile) throws Exception { try (FileReader fr = new FileReader(testFile); BufferedReader testData = new BufferedReader(fr)) { String line = readTestData(testData); assertNotNull("file must contain config line", line); @@ -113,14 +115,14 @@ public void testCSVUrl(final File testFile) throws Exception { boolean checkComments = false; for (int i = 1; i < split.length; i++) { final String option = split[i]; - final String[] option_parts = option.split("=", 2); - if ("IgnoreEmpty".equalsIgnoreCase(option_parts[0])) { - format = format.withIgnoreEmptyLines(Boolean.parseBoolean(option_parts[1])); - } else if ("IgnoreSpaces".equalsIgnoreCase(option_parts[0])) { - format = format.withIgnoreSurroundingSpaces(Boolean.parseBoolean(option_parts[1])); - } else if ("CommentStart".equalsIgnoreCase(option_parts[0])) { - format = format.withCommentMarker(option_parts[1].charAt(0)); - } else if ("CheckComments".equalsIgnoreCase(option_parts[0])) { + final String[] optionParts = option.split("=", 2); + if ("IgnoreEmpty".equalsIgnoreCase(optionParts[0])) { + format = format.withIgnoreEmptyLines(Boolean.parseBoolean(optionParts[1])); + } else if ("IgnoreSpaces".equalsIgnoreCase(optionParts[0])) { + format = format.withIgnoreSurroundingSpaces(Boolean.parseBoolean(optionParts[1])); + } else if ("CommentStart".equalsIgnoreCase(optionParts[0])) { + format = format.withCommentMarker(optionParts[1].charAt(0)); + } else if ("CheckComments".equalsIgnoreCase(optionParts[0])) { checkComments = true; } else { fail(testFile.getName() + " unexpected option: " + option); @@ -131,7 +133,7 @@ public void testCSVUrl(final File testFile) throws Exception { // Now parse the file and compare against the expected results final URL resource = ClassLoader.getSystemResource("org/apache/commons/csv/CSVFileParser/" + split[0]); - try (final CSVParser parser = CSVParser.parse(resource, StandardCharsets.UTF_8, format)) { + try (CSVParser parser = CSVParser.parse(resource, StandardCharsets.UTF_8, format)) { for (final CSVRecord record : parser) { String parsed = Arrays.toString(record.values()); final String comment = record.getComment(); diff --git a/src/test/java/org/apache/commons/csv/CSVFormatPredefinedTest.java b/src/test/java/org/apache/commons/csv/CSVFormatPredefinedTest.java index d62f41f3cb..dad08cdb1d 100644 --- a/src/test/java/org/apache/commons/csv/CSVFormatPredefinedTest.java +++ b/src/test/java/org/apache/commons/csv/CSVFormatPredefinedTest.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; @@ -24,7 +26,7 @@ /** * Tests {@link CSVFormat.Predefined}. */ -public class CSVFormatPredefinedTest { +class CSVFormatPredefinedTest { private void test(final CSVFormat format, final String enumName) { assertEquals(format, CSVFormat.Predefined.valueOf(enumName).getFormat()); @@ -32,52 +34,52 @@ private void test(final CSVFormat format, final String enumName) { } @Test - public void testDefault() { + void testDefault() { test(CSVFormat.DEFAULT, "Default"); } @Test - public void testExcel() { + void testExcel() { test(CSVFormat.EXCEL, "Excel"); } @Test - public void testMongoDbCsv() { + void testMongoDbCsv() { test(CSVFormat.MONGODB_CSV, "MongoDBCsv"); } @Test - public void testMongoDbTsv() { + void testMongoDbTsv() { test(CSVFormat.MONGODB_TSV, "MongoDBTsv"); } @Test - public void testMySQL() { + void testMySQL() { test(CSVFormat.MYSQL, "MySQL"); } @Test - public void testOracle() { + void testOracle() { test(CSVFormat.ORACLE, "Oracle"); } @Test - public void testPostgreSqlCsv() { + void testPostgreSqlCsv() { test(CSVFormat.POSTGRESQL_CSV, "PostgreSQLCsv"); } @Test - public void testPostgreSqlText() { + void testPostgreSqlText() { test(CSVFormat.POSTGRESQL_TEXT, "PostgreSQLText"); } @Test - public void testRFC4180() { + void testRFC4180() { test(CSVFormat.RFC4180, "RFC4180"); } @Test - public void testTDF() { + void testTDF() { test(CSVFormat.TDF, "TDF"); } } diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java index 0be9774507..ed20898de9 100644 --- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java +++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java @@ -1,1572 +1,1607 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.CSVFormat.RFC4180; -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.CRLF; -import static org.apache.commons.csv.Constants.LF; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNotSame; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.io.Reader; -import java.io.StringReader; -import java.lang.reflect.Method; -import java.lang.reflect.Modifier; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.Objects; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -/** - * Tests {@link CSVFormat}. - */ -public class CSVFormatTest { - - public enum EmptyEnum { - // empty enum. - } - - public enum Header { - Name, Email, Phone - } - - private static void assertNotEquals(final Object right, final Object left) { - Assertions.assertNotEquals(right, left); - Assertions.assertNotEquals(left, right); - } - - private static CSVFormat copy(final CSVFormat format) { - return format.builder().setDelimiter(format.getDelimiter()).build(); - } - - private void assertNotEquals(final String name, final String type, final Object left, final Object right) { - if (left.equals(right) || right.equals(left)) { - fail("Objects must not compare equal for " + name + "(" + type + ")"); - } - if (left.hashCode() == right.hashCode()) { - fail("Hash code should not be equal for " + name + "(" + type + ")"); - } - } - - @SuppressWarnings("deprecation") - @Test - public void testDelimiterSameAsCommentStartThrowsException_Deprecated() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter('!').withCommentMarker('!')); - } - - @Test - public void testDelimiterSameAsCommentStartThrowsException1() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter('!').setCommentMarker('!').build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testDelimiterSameAsEscapeThrowsException_Deprecated() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter('!').withEscape('!')); - } - - @Test - public void testDelimiterSameAsEscapeThrowsException1() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter('!').setEscape('!').build()); - } - - @Test - public void testDelimiterSameAsRecordSeparatorThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat(CR)); - } - - @Test - public void testDuplicateHeaderElements() { - final String[] header = { "A", "A" }; - final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader(header).build(); - assertEquals(2, format.getHeader().length); - assertArrayEquals(header, format.getHeader()); - } - - @SuppressWarnings("deprecation") - @Test - public void testDuplicateHeaderElements_Deprecated() { - final String[] header = { "A", "A" }; - final CSVFormat format = CSVFormat.DEFAULT.withHeader(header); - assertEquals(2, format.getHeader().length); - assertArrayEquals(header, format.getHeader()); - } - - @Test - public void testDuplicateHeaderElementsFalse() { - assertThrows( - IllegalArgumentException.class, - () -> CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(false).setHeader("A", "A").build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testDuplicateHeaderElementsFalse_Deprecated() { - assertThrows( - IllegalArgumentException.class, - () -> CSVFormat.DEFAULT.withAllowDuplicateHeaderNames(false).withHeader("A", "A")); - } - - public void testDuplicateHeaderElementsTrue() { - CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(true).setHeader("A", "A").build(); - } - - @SuppressWarnings("deprecation") - public void testDuplicateHeaderElementsTrue_Deprecated() { - CSVFormat.DEFAULT.withAllowDuplicateHeaderNames(true).withHeader("A", "A"); - } - - @Test - public void testEquals() { - final CSVFormat right = CSVFormat.DEFAULT; - final CSVFormat left = copy(right); - - Assertions.assertNotEquals(null, right); - Assertions.assertNotEquals("A String Instance", right); - - assertEquals(right, right); - assertEquals(right, left); - assertEquals(left, right); - - assertEquals(right.hashCode(), right.hashCode()); - assertEquals(right.hashCode(), left.hashCode()); - } - - @Test - public void testEqualsCommentStart() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setQuote('"') - .setCommentMarker('#') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setCommentMarker('!') - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsCommentStart_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withQuote('"') - .withCommentMarker('#') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withCommentMarker('!'); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsDelimiter() { - final CSVFormat right = CSVFormat.newFormat('!'); - final CSVFormat left = CSVFormat.newFormat('?'); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsEscape() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setQuote('"') - .setCommentMarker('#') - .setEscape('+') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setEscape('!') - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsEscape_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withQuote('"') - .withCommentMarker('#') - .withEscape('+') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withEscape('!'); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsHash() throws Exception { - final Method[] methods = CSVFormat.class.getDeclaredMethods(); - for (final Method method : methods) { - if (Modifier.isPublic(method.getModifiers())) { - final String name = method.getName(); - if (name.startsWith("with")) { - for (final Class cls : method.getParameterTypes()) { - final String type = cls.getCanonicalName(); - if ("boolean".equals(type)) { - final Object defTrue = method.invoke(CSVFormat.DEFAULT, Boolean.TRUE); - final Object defFalse = method.invoke(CSVFormat.DEFAULT, Boolean.FALSE); - assertNotEquals(name, type ,defTrue, defFalse); - } else if ("char".equals(type)){ - final Object a = method.invoke(CSVFormat.DEFAULT, 'a'); - final Object b = method.invoke(CSVFormat.DEFAULT, 'b'); - assertNotEquals(name, type, a, b); - } else if ("java.lang.Character".equals(type)){ - final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {null}); - final Object b = method.invoke(CSVFormat.DEFAULT, Character.valueOf('d')); - assertNotEquals(name, type, a, b); - } else if ("java.lang.String".equals(type)){ - final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {null}); - final Object b = method.invoke(CSVFormat.DEFAULT, "e"); - assertNotEquals(name, type, a, b); - } else if ("java.lang.String[]".equals(type)){ - final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {new String[] {null, null}}); - final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {new String[] {"f", "g"}}); - assertNotEquals(name, type, a, b); - } else if ("org.apache.commons.csv.QuoteMode".equals(type)){ - final Object a = method.invoke(CSVFormat.DEFAULT, QuoteMode.MINIMAL); - final Object b = method.invoke(CSVFormat.DEFAULT, QuoteMode.ALL); - assertNotEquals(name, type, a, b); - } else if ("org.apache.commons.csv.DuplicateHeaderMode".equals(type)) { - final Object a = method.invoke(CSVFormat.DEFAULT, DuplicateHeaderMode.ALLOW_ALL); - final Object b = method.invoke(CSVFormat.DEFAULT, DuplicateHeaderMode.DISALLOW); - assertNotEquals(name, type, a, b); - } else if ("java.lang.Object[]".equals(type)){ - final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {null, null}}); - final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {new Object(), new Object()}}); - assertNotEquals(name, type, a, b); - } else if ("withHeader".equals(name)){ // covered above by String[] - // ignored - } else { - fail("Unhandled method: "+name + "(" + type + ")"); - } - } - } - } - } - } - - @Test - public void testEqualsHeader() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setRecordSeparator(CR) - .setCommentMarker('#') - .setEscape('+') - .setHeader("One", "Two", "Three") - .setIgnoreEmptyLines(true) - .setIgnoreSurroundingSpaces(true) - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setHeader("Three", "Two", "One") - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsHeader_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withRecordSeparator(CR) - .withCommentMarker('#') - .withEscape('+') - .withHeader("One", "Two", "Three") - .withIgnoreEmptyLines() - .withIgnoreSurroundingSpaces() - .withQuote('"') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withHeader("Three", "Two", "One"); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsIgnoreEmptyLines() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setCommentMarker('#') - .setEscape('+') - .setIgnoreEmptyLines(true) - .setIgnoreSurroundingSpaces(true) - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setIgnoreEmptyLines(false) - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsIgnoreEmptyLines_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withCommentMarker('#') - .withEscape('+') - .withIgnoreEmptyLines() - .withIgnoreSurroundingSpaces() - .withQuote('"') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withIgnoreEmptyLines(false); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsIgnoreSurroundingSpaces() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setCommentMarker('#') - .setEscape('+') - .setIgnoreSurroundingSpaces(true) - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setIgnoreSurroundingSpaces(false) - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsIgnoreSurroundingSpaces_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withCommentMarker('#') - .withEscape('+') - .withIgnoreSurroundingSpaces() - .withQuote('"') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withIgnoreSurroundingSpaces(false); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsLeftNoQuoteRightQuote() { - final CSVFormat left = CSVFormat.newFormat(',').builder().setQuote(null).build(); - final CSVFormat right = left.builder().setQuote('#').build(); - - assertNotEquals(left, right); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsLeftNoQuoteRightQuote_Deprecated() { - final CSVFormat left = CSVFormat.newFormat(',').withQuote(null); - final CSVFormat right = left.withQuote('#'); - - assertNotEquals(left, right); - } - - @Test - public void testEqualsNoQuotes() { - final CSVFormat left = CSVFormat.newFormat(',').builder().setQuote(null).build(); - final CSVFormat right = left.builder().setQuote(null).build(); - - assertEquals(left, right); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsNoQuotes_Deprecated() { - final CSVFormat left = CSVFormat.newFormat(',').withQuote(null); - final CSVFormat right = left.withQuote(null); - - assertEquals(left, right); - } - - @Test - public void testEqualsNullString() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setRecordSeparator(CR) - .setCommentMarker('#') - .setEscape('+') - .setIgnoreEmptyLines(true) - .setIgnoreSurroundingSpaces(true) - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .setNullString("null") - .build(); - final CSVFormat left = right.builder() - .setNullString("---") - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsNullString_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withRecordSeparator(CR) - .withCommentMarker('#') - .withEscape('+') - .withIgnoreEmptyLines() - .withIgnoreSurroundingSpaces() - .withQuote('"') - .withQuoteMode(QuoteMode.ALL) - .withNullString("null"); - final CSVFormat left = right - .withNullString("---"); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsOne() { - - final CSVFormat csvFormatOne = CSVFormat.INFORMIX_UNLOAD; - final CSVFormat csvFormatTwo = CSVFormat.MYSQL; - - - assertEquals('\\', (char)csvFormatOne.getEscapeCharacter()); - assertNull(csvFormatOne.getQuoteMode()); - - assertTrue(csvFormatOne.getIgnoreEmptyLines()); - assertFalse(csvFormatOne.getSkipHeaderRecord()); - - assertFalse(csvFormatOne.getIgnoreHeaderCase()); - assertNull(csvFormatOne.getCommentMarker()); - - assertFalse(csvFormatOne.isCommentMarkerSet()); - assertTrue(csvFormatOne.isQuoteCharacterSet()); - - assertEquals('|', csvFormatOne.getDelimiter()); - assertFalse(csvFormatOne.getAllowMissingColumnNames()); - - assertTrue(csvFormatOne.isEscapeCharacterSet()); - assertEquals("\n", csvFormatOne.getRecordSeparator()); - - assertEquals('\"', (char)csvFormatOne.getQuoteCharacter()); - assertFalse(csvFormatOne.getTrailingDelimiter()); - - assertFalse(csvFormatOne.getTrim()); - assertFalse(csvFormatOne.isNullStringSet()); - - assertNull(csvFormatOne.getNullString()); - assertFalse(csvFormatOne.getIgnoreSurroundingSpaces()); - - - assertTrue(csvFormatTwo.isEscapeCharacterSet()); - assertNull(csvFormatTwo.getQuoteCharacter()); - - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - assertEquals(QuoteMode.ALL_NON_NULL, csvFormatTwo.getQuoteMode()); - - assertEquals('\t', csvFormatTwo.getDelimiter()); - assertEquals("\n", csvFormatTwo.getRecordSeparator()); - - assertFalse(csvFormatTwo.isQuoteCharacterSet()); - assertTrue(csvFormatTwo.isNullStringSet()); - - assertEquals('\\', (char)csvFormatTwo.getEscapeCharacter()); - assertFalse(csvFormatTwo.getIgnoreHeaderCase()); - - assertFalse(csvFormatTwo.getTrim()); - assertFalse(csvFormatTwo.getIgnoreEmptyLines()); - - assertEquals("\\N", csvFormatTwo.getNullString()); - assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormatTwo.getTrailingDelimiter()); - assertFalse(csvFormatTwo.getSkipHeaderRecord()); - - assertNull(csvFormatTwo.getCommentMarker()); - assertFalse(csvFormatTwo.isCommentMarkerSet()); - - assertNotSame(csvFormatTwo, csvFormatOne); - Assertions.assertNotEquals(csvFormatTwo, csvFormatOne); - - assertEquals('\\', (char)csvFormatOne.getEscapeCharacter()); - assertNull(csvFormatOne.getQuoteMode()); - - assertTrue(csvFormatOne.getIgnoreEmptyLines()); - assertFalse(csvFormatOne.getSkipHeaderRecord()); - - assertFalse(csvFormatOne.getIgnoreHeaderCase()); - assertNull(csvFormatOne.getCommentMarker()); - - assertFalse(csvFormatOne.isCommentMarkerSet()); - assertTrue(csvFormatOne.isQuoteCharacterSet()); - - assertEquals('|', csvFormatOne.getDelimiter()); - assertFalse(csvFormatOne.getAllowMissingColumnNames()); - - assertTrue(csvFormatOne.isEscapeCharacterSet()); - assertEquals("\n", csvFormatOne.getRecordSeparator()); - - assertEquals('\"', (char)csvFormatOne.getQuoteCharacter()); - assertFalse(csvFormatOne.getTrailingDelimiter()); - - assertFalse(csvFormatOne.getTrim()); - assertFalse(csvFormatOne.isNullStringSet()); - - assertNull(csvFormatOne.getNullString()); - assertFalse(csvFormatOne.getIgnoreSurroundingSpaces()); - - assertTrue(csvFormatTwo.isEscapeCharacterSet()); - assertNull(csvFormatTwo.getQuoteCharacter()); - - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - assertEquals(QuoteMode.ALL_NON_NULL, csvFormatTwo.getQuoteMode()); - - assertEquals('\t', csvFormatTwo.getDelimiter()); - assertEquals("\n", csvFormatTwo.getRecordSeparator()); - - assertFalse(csvFormatTwo.isQuoteCharacterSet()); - assertTrue(csvFormatTwo.isNullStringSet()); - - assertEquals('\\', (char)csvFormatTwo.getEscapeCharacter()); - assertFalse(csvFormatTwo.getIgnoreHeaderCase()); - - assertFalse(csvFormatTwo.getTrim()); - assertFalse(csvFormatTwo.getIgnoreEmptyLines()); - - assertEquals("\\N", csvFormatTwo.getNullString()); - assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormatTwo.getTrailingDelimiter()); - assertFalse(csvFormatTwo.getSkipHeaderRecord()); - - assertNull(csvFormatTwo.getCommentMarker()); - assertFalse(csvFormatTwo.isCommentMarkerSet()); - - assertNotSame(csvFormatOne, csvFormatTwo); - assertNotSame(csvFormatTwo, csvFormatOne); - - Assertions.assertNotEquals(csvFormatOne, csvFormatTwo); - Assertions.assertNotEquals(csvFormatTwo, csvFormatOne); - - Assertions.assertNotEquals(csvFormatTwo, csvFormatOne); - - } - - @Test - public void testEqualsQuoteChar() { - final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').build(); - final CSVFormat left = right.builder().setQuote('!').build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsQuoteChar_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"'); - final CSVFormat left = right.withQuote('!'); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsQuotePolicy() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setQuoteMode(QuoteMode.MINIMAL) - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsQuotePolicy_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withQuote('"') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withQuoteMode(QuoteMode.MINIMAL); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsRecordSeparator() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setRecordSeparator(CR) - .setCommentMarker('#') - .setEscape('+') - .setIgnoreEmptyLines(true) - .setIgnoreSurroundingSpaces(true) - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setRecordSeparator(LF) - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsRecordSeparator_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withRecordSeparator(CR) - .withCommentMarker('#') - .withEscape('+') - .withIgnoreEmptyLines() - .withIgnoreSurroundingSpaces() - .withQuote('"') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withRecordSeparator(LF); - - assertNotEquals(right, left); - } - - public void testEqualsSkipHeaderRecord() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setRecordSeparator(CR) - .setCommentMarker('#') - .setEscape('+') - .setIgnoreEmptyLines(true) - .setIgnoreSurroundingSpaces(true) - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .setNullString("null") - .setSkipHeaderRecord(true) - .build(); - final CSVFormat left = right.builder() - .setSkipHeaderRecord(false) - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsSkipHeaderRecord_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withRecordSeparator(CR) - .withCommentMarker('#') - .withEscape('+') - .withIgnoreEmptyLines() - .withIgnoreSurroundingSpaces() - .withQuote('"') - .withQuoteMode(QuoteMode.ALL) - .withNullString("null") - .withSkipHeaderRecord(); - final CSVFormat left = right - .withSkipHeaderRecord(false); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsWithNull() { - - final CSVFormat csvFormat = CSVFormat.POSTGRESQL_TEXT; - - assertEquals('\\', (char)csvFormat.getEscapeCharacter()); - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormat.getTrailingDelimiter()); - assertFalse(csvFormat.getTrim()); - - assertTrue(csvFormat.isQuoteCharacterSet()); - assertEquals("\\N", csvFormat.getNullString()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertTrue(csvFormat.isEscapeCharacterSet()); - - assertFalse(csvFormat.isCommentMarkerSet()); - assertNull(csvFormat.getCommentMarker()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertEquals(QuoteMode.ALL_NON_NULL, csvFormat.getQuoteMode()); - - assertEquals('\t', csvFormat.getDelimiter()); - assertFalse(csvFormat.getSkipHeaderRecord()); - - assertEquals("\n", csvFormat.getRecordSeparator()); - assertFalse(csvFormat.getIgnoreEmptyLines()); - - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - assertTrue(csvFormat.isNullStringSet()); - - assertEquals('\\', (char)csvFormat.getEscapeCharacter()); - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormat.getTrailingDelimiter()); - assertFalse(csvFormat.getTrim()); - - assertTrue(csvFormat.isQuoteCharacterSet()); - assertEquals("\\N", csvFormat.getNullString()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertTrue(csvFormat.isEscapeCharacterSet()); - - assertFalse(csvFormat.isCommentMarkerSet()); - assertNull(csvFormat.getCommentMarker()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertEquals(QuoteMode.ALL_NON_NULL, csvFormat.getQuoteMode()); - - assertEquals('\t', csvFormat.getDelimiter()); - assertFalse(csvFormat.getSkipHeaderRecord()); - - assertEquals("\n", csvFormat.getRecordSeparator()); - assertFalse(csvFormat.getIgnoreEmptyLines()); - - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - assertTrue(csvFormat.isNullStringSet()); - - Assertions.assertNotEquals(null, csvFormat); - - } - - @Test - public void testEscapeSameAsCommentStartThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setEscape('!').setCommentMarker('!').build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testEscapeSameAsCommentStartThrowsException_Deprecated() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape('!').withCommentMarker('!')); - } - - @Test - public void testEscapeSameAsCommentStartThrowsExceptionForWrapperType() { - // Cannot assume that callers won't use different Character objects - assertThrows( - IllegalArgumentException.class, - () -> CSVFormat.DEFAULT.builder().setEscape(Character.valueOf('!')).setCommentMarker(Character.valueOf('!')).build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testEscapeSameAsCommentStartThrowsExceptionForWrapperType_Deprecated() { - // Cannot assume that callers won't use different Character objects - assertThrows( - IllegalArgumentException.class, - () -> CSVFormat.DEFAULT.withEscape(Character.valueOf('!')).withCommentMarker(Character.valueOf('!'))); - } - - @Test - public void testFormat() { - final CSVFormat format = CSVFormat.DEFAULT; - - assertEquals("", format.format()); - assertEquals("a,b,c", format.format("a", "b", "c")); - assertEquals("\"x,y\",z", format.format("x,y", "z")); - } - - @Test //I assume this to be a defect. - public void testFormatThrowsNullPointerException() { - - final CSVFormat csvFormat = CSVFormat.MYSQL; - - final NullPointerException e = assertThrows(NullPointerException.class, () -> csvFormat.format((Object[]) null)); - assertEquals(Objects.class.getName(), e.getStackTrace()[0].getClassName()); - } - - @Test - public void testFormatToString() { - final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',') - .withQuoteMode(QuoteMode.MINIMAL).withRecordSeparator(CRLF).withQuote('"') - .withNullString("").withIgnoreHeaderCase(true) - .withHeaderComments("This is HeaderComments").withHeader("col1","col2","col3"); - assertEquals("Delimiter=<,> Escape= QuoteChar=<\"> QuoteMode= NullString=<> RecordSeparator=<" +CRLF+ - "> IgnoreHeaderCase:ignored SkipHeaderRecord:false HeaderComments:[This is HeaderComments] Header:[col1, col2, col3]", format.toString()); - } - - @Test - public void testGetHeader() { - final String[] header = {"one", "two", "three"}; - final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header); - // getHeader() makes a copy of the header array. - final String[] headerCopy = formatWithHeader.getHeader(); - headerCopy[0] = "A"; - headerCopy[1] = "B"; - headerCopy[2] = "C"; - assertFalse(Arrays.equals(formatWithHeader.getHeader(), headerCopy)); - assertNotSame(formatWithHeader.getHeader(), headerCopy); - } - - @Test - public void testHashCodeAndWithIgnoreHeaderCase() { - - final CSVFormat csvFormat = CSVFormat.INFORMIX_UNLOAD_CSV; - final CSVFormat csvFormatTwo = csvFormat.withIgnoreHeaderCase(); - csvFormatTwo.hashCode(); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertTrue(csvFormatTwo.getIgnoreHeaderCase()); // now different - assertFalse(csvFormatTwo.getTrailingDelimiter()); - - Assertions.assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - - assertFalse(csvFormatTwo.getTrim()); - - } - - @Test - public void testJiraCsv236() { - CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(true).setHeader("CC","VV","VV").build(); - } - - @SuppressWarnings("deprecation") - @Test - public void testJiraCsv236__Deprecated() { - CSVFormat.DEFAULT.withAllowDuplicateHeaderNames().withHeader("CC","VV","VV"); - } - - @Test - public void testNewFormat() { - - final CSVFormat csvFormat = CSVFormat.newFormat('X'); - - assertFalse(csvFormat.getSkipHeaderRecord()); - assertFalse(csvFormat.isEscapeCharacterSet()); - - assertNull(csvFormat.getRecordSeparator()); - assertNull(csvFormat.getQuoteMode()); - - assertNull(csvFormat.getCommentMarker()); - assertFalse(csvFormat.getIgnoreHeaderCase()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertFalse(csvFormat.getTrim()); - - assertFalse(csvFormat.isNullStringSet()); - assertNull(csvFormat.getEscapeCharacter()); - - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - assertFalse(csvFormat.getTrailingDelimiter()); - - assertEquals('X', csvFormat.getDelimiter()); - assertNull(csvFormat.getNullString()); - - assertFalse(csvFormat.isQuoteCharacterSet()); - assertFalse(csvFormat.isCommentMarkerSet()); - - assertNull(csvFormat.getQuoteCharacter()); - assertFalse(csvFormat.getIgnoreEmptyLines()); - - assertFalse(csvFormat.getSkipHeaderRecord()); - assertFalse(csvFormat.isEscapeCharacterSet()); - - assertNull(csvFormat.getRecordSeparator()); - assertNull(csvFormat.getQuoteMode()); - - assertNull(csvFormat.getCommentMarker()); - assertFalse(csvFormat.getIgnoreHeaderCase()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertFalse(csvFormat.getTrim()); - - assertFalse(csvFormat.isNullStringSet()); - assertNull(csvFormat.getEscapeCharacter()); - - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - assertFalse(csvFormat.getTrailingDelimiter()); - - assertEquals('X', csvFormat.getDelimiter()); - assertNull(csvFormat.getNullString()); - - assertFalse(csvFormat.isQuoteCharacterSet()); - assertFalse(csvFormat.isCommentMarkerSet()); - - assertNull(csvFormat.getQuoteCharacter()); - assertFalse(csvFormat.getIgnoreEmptyLines()); - - } - - @Test - public void testNullRecordSeparatorCsv106() { - final CSVFormat format = CSVFormat.newFormat(';').builder().setSkipHeaderRecord(true).setHeader("H1", "H2").build(); - final String formatStr = format.format("A", "B"); - assertNotNull(formatStr); - assertFalse(formatStr.endsWith("null")); - } - - @SuppressWarnings("deprecation") - @Test - public void testNullRecordSeparatorCsv106__Deprecated() { - final CSVFormat format = CSVFormat.newFormat(';').withSkipHeaderRecord().withHeader("H1", "H2"); - final String formatStr = format.format("A", "B"); - assertNotNull(formatStr); - assertFalse(formatStr.endsWith("null")); - } - - @Test - public void testPrintWithEscapesEndWithCRLF() throws IOException { - final Reader in = new StringReader("x,y,x\r\na,?b,c\r\n"); - final Appendable out = new StringBuilder(); - final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',').withQuote(null).withRecordSeparator(CRLF); - format.print(in,out,true); - assertEquals("x?,y?,x?r?na?,??b?,c?r?n", out.toString()); - } - - @Test - public void testPrintWithEscapesEndWithoutCRLF() throws IOException { - final Reader in = new StringReader("x,y,x"); - final Appendable out = new StringBuilder(); - final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',').withQuote(null).withRecordSeparator(CRLF); - format.print(in,out,true); - assertEquals("x?,y?,x", out.toString()); - } - - @Test - public void testPrintWithoutQuotes() throws IOException { - final Reader in = new StringReader(""); - final Appendable out = new StringBuilder(); - final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NON_NUMERIC); - format.print(in, out, true); - assertEquals("\"\"", out.toString()); - } - - @Test - public void testPrintWithQuoteModeIsNONE() throws IOException { - final Reader in = new StringReader("a,b,c"); - final Appendable out = new StringBuilder(); - final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NONE); - format.print(in, out, true); - assertEquals("a?,b?,c", out.toString()); - } - - @Test - public void testPrintWithQuotes() throws IOException { - final Reader in = new StringReader("\"a,b,c\r\nx,y,z"); - final Appendable out = new StringBuilder(); - final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NON_NUMERIC); - format.print(in, out, true); - assertEquals("\"\"\"a,b,c\r\nx,y,z\"", out.toString()); - } - - @Test - public void testQuoteCharSameAsCommentStartThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote('!').setCommentMarker('!').build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testQuoteCharSameAsCommentStartThrowsException_Deprecated() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote('!').withCommentMarker('!')); - } - - @Test - public void testQuoteCharSameAsCommentStartThrowsExceptionForWrapperType() { - // Cannot assume that callers won't use different Character objects - assertThrows( - IllegalArgumentException.class, - () -> CSVFormat.DEFAULT.builder().setQuote(Character.valueOf('!')).setCommentMarker('!').build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testQuoteCharSameAsCommentStartThrowsExceptionForWrapperType_Deprecated() { - // Cannot assume that callers won't use different Character objects - assertThrows( - IllegalArgumentException.class, - () -> CSVFormat.DEFAULT.withQuote(Character.valueOf('!')).withCommentMarker('!')); - } - - @Test - public void testQuoteCharSameAsDelimiterThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote('!').setDelimiter('!').build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testQuoteCharSameAsDelimiterThrowsException_Deprecated() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote('!').withDelimiter('!')); - } - - @Test - public void testQuotePolicyNoneWithoutEscapeThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat('!').builder().setQuoteMode(QuoteMode.NONE).build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testQuotePolicyNoneWithoutEscapeThrowsException_Deprecated() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat('!').withQuoteMode(QuoteMode.NONE)); - } - - @Test - public void testRFC4180() { - assertNull(RFC4180.getCommentMarker()); - assertEquals(',', RFC4180.getDelimiter()); - assertNull(RFC4180.getEscapeCharacter()); - assertFalse(RFC4180.getIgnoreEmptyLines()); - assertEquals(Character.valueOf('"'), RFC4180.getQuoteCharacter()); - assertNull(RFC4180.getQuoteMode()); - assertEquals("\r\n", RFC4180.getRecordSeparator()); - } - - @SuppressWarnings("boxing") // no need to worry about boxing here - @Test - public void testSerialization() throws Exception { - final ByteArrayOutputStream out = new ByteArrayOutputStream(); - - try (final ObjectOutputStream oos = new ObjectOutputStream(out)) { - oos.writeObject(CSVFormat.DEFAULT); - oos.flush(); - } - - final ObjectInputStream in = new ObjectInputStream(new ByteArrayInputStream(out.toByteArray())); - final CSVFormat format = (CSVFormat) in.readObject(); - - assertNotNull(format); - assertEquals(CSVFormat.DEFAULT.getDelimiter(), format.getDelimiter(), "delimiter"); - assertEquals(CSVFormat.DEFAULT.getQuoteCharacter(), format.getQuoteCharacter(), "encapsulator"); - assertEquals(CSVFormat.DEFAULT.getCommentMarker(), format.getCommentMarker(), "comment start"); - assertEquals(CSVFormat.DEFAULT.getRecordSeparator(), format.getRecordSeparator(), "record separator"); - assertEquals(CSVFormat.DEFAULT.getEscapeCharacter(), format.getEscapeCharacter(), "escape"); - assertEquals(CSVFormat.DEFAULT.getIgnoreSurroundingSpaces(), format.getIgnoreSurroundingSpaces(), "trim"); - assertEquals(CSVFormat.DEFAULT.getIgnoreEmptyLines(), format.getIgnoreEmptyLines(), "empty lines"); - } - - @Test - public void testToString() { - - final String string = CSVFormat.INFORMIX_UNLOAD.toString(); - - assertEquals("Delimiter=<|> Escape=<\\> QuoteChar=<\"> RecordSeparator=<\n> EmptyLines:ignored SkipHeaderRecord:false", string); - - } - - @Test - public void testToStringAndWithCommentMarkerTakingCharacter() { - - final CSVFormat.Predefined csvFormat_Predefined = CSVFormat.Predefined.Default; - final CSVFormat csvFormat = csvFormat_Predefined.getFormat(); - - assertNull(csvFormat.getEscapeCharacter()); - assertTrue(csvFormat.isQuoteCharacterSet()); - - assertFalse(csvFormat.getTrim()); - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormat.getTrailingDelimiter()); - assertEquals(',', csvFormat.getDelimiter()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertEquals("\r\n", csvFormat.getRecordSeparator()); - - assertFalse(csvFormat.isCommentMarkerSet()); - assertNull(csvFormat.getCommentMarker()); - - assertFalse(csvFormat.isNullStringSet()); - assertFalse(csvFormat.getAllowMissingColumnNames()); - - assertFalse(csvFormat.isEscapeCharacterSet()); - assertFalse(csvFormat.getSkipHeaderRecord()); - - assertNull(csvFormat.getNullString()); - assertNull(csvFormat.getQuoteMode()); - - assertTrue(csvFormat.getIgnoreEmptyLines()); - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - - final Character character = Character.valueOf('n'); - - final CSVFormat csvFormatTwo = csvFormat.withCommentMarker(character); - - assertNull(csvFormat.getEscapeCharacter()); - assertTrue(csvFormat.isQuoteCharacterSet()); - - assertFalse(csvFormat.getTrim()); - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormat.getTrailingDelimiter()); - assertEquals(',', csvFormat.getDelimiter()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertEquals("\r\n", csvFormat.getRecordSeparator()); - - assertFalse(csvFormat.isCommentMarkerSet()); - assertNull(csvFormat.getCommentMarker()); - - assertFalse(csvFormat.isNullStringSet()); - assertFalse(csvFormat.getAllowMissingColumnNames()); - - assertFalse(csvFormat.isEscapeCharacterSet()); - assertFalse(csvFormat.getSkipHeaderRecord()); - - assertNull(csvFormat.getNullString()); - assertNull(csvFormat.getQuoteMode()); - - assertTrue(csvFormat.getIgnoreEmptyLines()); - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - - assertFalse(csvFormatTwo.isNullStringSet()); - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - - assertEquals('\"', (char)csvFormatTwo.getQuoteCharacter()); - assertNull(csvFormatTwo.getNullString()); - - assertEquals(',', csvFormatTwo.getDelimiter()); - assertFalse(csvFormatTwo.getTrailingDelimiter()); - - assertTrue(csvFormatTwo.isCommentMarkerSet()); - assertFalse(csvFormatTwo.getIgnoreHeaderCase()); - - assertFalse(csvFormatTwo.getTrim()); - assertNull(csvFormatTwo.getEscapeCharacter()); - - assertTrue(csvFormatTwo.isQuoteCharacterSet()); - assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); - - assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); - assertNull(csvFormatTwo.getQuoteMode()); - - assertEquals('n', (char)csvFormatTwo.getCommentMarker()); - assertFalse(csvFormatTwo.getSkipHeaderRecord()); - - assertFalse(csvFormatTwo.isEscapeCharacterSet()); - assertTrue(csvFormatTwo.getIgnoreEmptyLines()); - - assertNotSame(csvFormat, csvFormatTwo); - assertNotSame(csvFormatTwo, csvFormat); - - Assertions.assertNotEquals(csvFormatTwo, csvFormat); - - assertNull(csvFormat.getEscapeCharacter()); - assertTrue(csvFormat.isQuoteCharacterSet()); - - assertFalse(csvFormat.getTrim()); - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormat.getTrailingDelimiter()); - assertEquals(',', csvFormat.getDelimiter()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertEquals("\r\n", csvFormat.getRecordSeparator()); - - assertFalse(csvFormat.isCommentMarkerSet()); - assertNull(csvFormat.getCommentMarker()); - - assertFalse(csvFormat.isNullStringSet()); - assertFalse(csvFormat.getAllowMissingColumnNames()); - - assertFalse(csvFormat.isEscapeCharacterSet()); - assertFalse(csvFormat.getSkipHeaderRecord()); - - assertNull(csvFormat.getNullString()); - assertNull(csvFormat.getQuoteMode()); - - assertTrue(csvFormat.getIgnoreEmptyLines()); - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - - assertFalse(csvFormatTwo.isNullStringSet()); - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - - assertEquals('\"', (char)csvFormatTwo.getQuoteCharacter()); - assertNull(csvFormatTwo.getNullString()); - - assertEquals(',', csvFormatTwo.getDelimiter()); - assertFalse(csvFormatTwo.getTrailingDelimiter()); - - assertTrue(csvFormatTwo.isCommentMarkerSet()); - assertFalse(csvFormatTwo.getIgnoreHeaderCase()); - - assertFalse(csvFormatTwo.getTrim()); - assertNull(csvFormatTwo.getEscapeCharacter()); - - assertTrue(csvFormatTwo.isQuoteCharacterSet()); - assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); - - assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); - assertNull(csvFormatTwo.getQuoteMode()); - - assertEquals('n', (char)csvFormatTwo.getCommentMarker()); - assertFalse(csvFormatTwo.getSkipHeaderRecord()); - - assertFalse(csvFormatTwo.isEscapeCharacterSet()); - assertTrue(csvFormatTwo.getIgnoreEmptyLines()); - - assertNotSame(csvFormat, csvFormatTwo); - assertNotSame(csvFormatTwo, csvFormat); - - Assertions.assertNotEquals(csvFormat, csvFormatTwo); - - Assertions.assertNotEquals(csvFormatTwo, csvFormat); - assertEquals("Delimiter=<,> QuoteChar=<\"> CommentStart= " + - "RecordSeparator=<\r\n> EmptyLines:ignored SkipHeaderRecord:false" - , csvFormatTwo.toString()); - - } - - @Test - public void testTrim() throws IOException { - final CSVFormat formatWithTrim = CSVFormat.DEFAULT.withDelimiter(',').withTrim().withQuote(null).withRecordSeparator(CRLF); - - CharSequence in = "a,b,c"; - final StringBuilder out = new StringBuilder(); - formatWithTrim.print(in, out, true); - assertEquals("a,b,c", out.toString()); - - in = new StringBuilder(" x,y,z"); - out.setLength(0); - formatWithTrim.print(in, out, true); - assertEquals("x,y,z", out.toString()); - - in = new StringBuilder(""); - out.setLength(0); - formatWithTrim.print(in, out, true); - assertEquals("", out.toString()); - - in = new StringBuilder("header\r\n"); - out.setLength(0); - formatWithTrim.print(in, out, true); - assertEquals("header", out.toString()); - } - - @Test - public void testWithCommentStart() { - final CSVFormat formatWithCommentStart = CSVFormat.DEFAULT.withCommentMarker('#'); - assertEquals( Character.valueOf('#'), formatWithCommentStart.getCommentMarker()); - } - - - @Test - public void testWithCommentStartCRThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withCommentMarker(CR)); - } - - - @Test - public void testWithDelimiter() { - final CSVFormat formatWithDelimiter = CSVFormat.DEFAULT.withDelimiter('!'); - assertEquals('!', formatWithDelimiter.getDelimiter()); - } - - - @Test - public void testWithDelimiterLFThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(LF)); - } - - - @Test - public void testWithEmptyDuplicates() { - final CSVFormat formatWithEmptyDuplicates = - CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build(); - - assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, formatWithEmptyDuplicates.getDuplicateHeaderMode()); - assertFalse(formatWithEmptyDuplicates.getAllowDuplicateHeaderNames()); - } - - - @Test - public void testWithEmptyEnum() { - final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(EmptyEnum.class); - assertEquals(0, formatWithHeader.getHeader().length); - } - - - @Test - public void testWithEscape() { - final CSVFormat formatWithEscape = CSVFormat.DEFAULT.withEscape('&'); - assertEquals(Character.valueOf('&'), formatWithEscape.getEscapeCharacter()); - } - - @Test - public void testWithEscapeCRThrowsExceptions() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(CR)); - } - - - @Test - public void testWithFirstRecordAsHeader() { - final CSVFormat formatWithFirstRecordAsHeader = CSVFormat.DEFAULT.withFirstRecordAsHeader(); - assertTrue(formatWithFirstRecordAsHeader.getSkipHeaderRecord()); - assertEquals(0, formatWithFirstRecordAsHeader.getHeader().length); - } - - @Test - public void testWithHeader() { - final String[] header = {"one", "two", "three"}; - // withHeader() makes a copy of the header array. - final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header); - assertArrayEquals(header, formatWithHeader.getHeader()); - assertNotSame(header, formatWithHeader.getHeader()); - } - - @Test - public void testWithHeaderComments() { - - final CSVFormat csvFormat = CSVFormat.DEFAULT; - - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - assertFalse(csvFormat.isCommentMarkerSet()); - - assertFalse(csvFormat.isEscapeCharacterSet()); - assertTrue(csvFormat.isQuoteCharacterSet()); - - assertFalse(csvFormat.getSkipHeaderRecord()); - assertNull(csvFormat.getQuoteMode()); - - assertEquals(',', csvFormat.getDelimiter()); - assertTrue(csvFormat.getIgnoreEmptyLines()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertNull(csvFormat.getCommentMarker()); - - assertEquals("\r\n", csvFormat.getRecordSeparator()); - assertFalse(csvFormat.getTrailingDelimiter()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertFalse(csvFormat.getTrim()); - - assertFalse(csvFormat.isNullStringSet()); - assertNull(csvFormat.getNullString()); - - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - assertNull(csvFormat.getEscapeCharacter()); - - final Object[] objectArray = new Object[8]; - final CSVFormat csvFormatTwo = csvFormat.withHeaderComments(objectArray); - - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - assertFalse(csvFormat.isCommentMarkerSet()); - - assertFalse(csvFormat.isEscapeCharacterSet()); - assertTrue(csvFormat.isQuoteCharacterSet()); - - assertFalse(csvFormat.getSkipHeaderRecord()); - assertNull(csvFormat.getQuoteMode()); - - assertEquals(',', csvFormat.getDelimiter()); - assertTrue(csvFormat.getIgnoreEmptyLines()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertNull(csvFormat.getCommentMarker()); - - assertEquals("\r\n", csvFormat.getRecordSeparator()); - assertFalse(csvFormat.getTrailingDelimiter()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertFalse(csvFormat.getTrim()); - - assertFalse(csvFormat.isNullStringSet()); - assertNull(csvFormat.getNullString()); - - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - assertNull(csvFormat.getEscapeCharacter()); - - assertFalse(csvFormatTwo.getIgnoreHeaderCase()); - assertNull(csvFormatTwo.getQuoteMode()); - - assertTrue(csvFormatTwo.getIgnoreEmptyLines()); - assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); - - assertNull(csvFormatTwo.getEscapeCharacter()); - assertFalse(csvFormatTwo.getTrim()); - - assertFalse(csvFormatTwo.isEscapeCharacterSet()); - assertTrue(csvFormatTwo.isQuoteCharacterSet()); - - assertFalse(csvFormatTwo.getSkipHeaderRecord()); - assertEquals('\"', (char)csvFormatTwo.getQuoteCharacter()); - - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - assertNull(csvFormatTwo.getNullString()); - - assertFalse(csvFormatTwo.isNullStringSet()); - assertFalse(csvFormatTwo.getTrailingDelimiter()); - - assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); - assertEquals(',', csvFormatTwo.getDelimiter()); - - assertNull(csvFormatTwo.getCommentMarker()); - assertFalse(csvFormatTwo.isCommentMarkerSet()); - - assertNotSame(csvFormat, csvFormatTwo); - assertNotSame(csvFormatTwo, csvFormat); - - Assertions.assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal - - final String string = csvFormatTwo.format(objectArray); - - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - assertFalse(csvFormat.isCommentMarkerSet()); - - assertFalse(csvFormat.isEscapeCharacterSet()); - assertTrue(csvFormat.isQuoteCharacterSet()); - - assertFalse(csvFormat.getSkipHeaderRecord()); - assertNull(csvFormat.getQuoteMode()); - - assertEquals(',', csvFormat.getDelimiter()); - assertTrue(csvFormat.getIgnoreEmptyLines()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertNull(csvFormat.getCommentMarker()); - - assertEquals("\r\n", csvFormat.getRecordSeparator()); - assertFalse(csvFormat.getTrailingDelimiter()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertFalse(csvFormat.getTrim()); - - assertFalse(csvFormat.isNullStringSet()); - assertNull(csvFormat.getNullString()); - - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - assertNull(csvFormat.getEscapeCharacter()); - - assertFalse(csvFormatTwo.getIgnoreHeaderCase()); - assertNull(csvFormatTwo.getQuoteMode()); - - assertTrue(csvFormatTwo.getIgnoreEmptyLines()); - assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); - - assertNull(csvFormatTwo.getEscapeCharacter()); - assertFalse(csvFormatTwo.getTrim()); - - assertFalse(csvFormatTwo.isEscapeCharacterSet()); - assertTrue(csvFormatTwo.isQuoteCharacterSet()); - - assertFalse(csvFormatTwo.getSkipHeaderRecord()); - assertEquals('\"', (char)csvFormatTwo.getQuoteCharacter()); - - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - assertNull(csvFormatTwo.getNullString()); - - assertFalse(csvFormatTwo.isNullStringSet()); - assertFalse(csvFormatTwo.getTrailingDelimiter()); - - assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); - assertEquals(',', csvFormatTwo.getDelimiter()); - - assertNull(csvFormatTwo.getCommentMarker()); - assertFalse(csvFormatTwo.isCommentMarkerSet()); - - assertNotSame(csvFormat, csvFormatTwo); - assertNotSame(csvFormatTwo, csvFormat); - - assertNotNull(string); - Assertions.assertNotEquals(csvFormat, csvFormatTwo); // CSV-244 - should not be equal - - Assertions.assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal - assertEquals(",,,,,,,", string); - - } - - @Test - public void testWithHeaderEnum() { - final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(Header.class); - assertArrayEquals(new String[]{ "Name", "Email", "Phone" }, formatWithHeader.getHeader()); - } - - @Test - public void testWithHeaderEnumNull() { - final CSVFormat format = CSVFormat.DEFAULT; - final Class> simpleName = null; - format.withHeader(simpleName); - } - - @Test - public void testWithHeaderResultSetNull() throws SQLException { - final CSVFormat format = CSVFormat.DEFAULT; - final ResultSet resultSet = null; - format.withHeader(resultSet); - } - - @Test - public void testWithIgnoreEmptyLines() { - assertFalse(CSVFormat.DEFAULT.withIgnoreEmptyLines(false).getIgnoreEmptyLines()); - assertTrue(CSVFormat.DEFAULT.withIgnoreEmptyLines().getIgnoreEmptyLines()); - } - - @Test - public void testWithIgnoreSurround() { - assertFalse(CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces()); - assertTrue(CSVFormat.DEFAULT.withIgnoreSurroundingSpaces().getIgnoreSurroundingSpaces()); - } - - @Test - public void testWithNullString() { - final CSVFormat formatWithNullString = CSVFormat.DEFAULT.withNullString("null"); - assertEquals("null", formatWithNullString.getNullString()); - } - - @Test - public void testWithQuoteChar() { - final CSVFormat formatWithQuoteChar = CSVFormat.DEFAULT.withQuote('"'); - assertEquals(Character.valueOf('"'), formatWithQuoteChar.getQuoteCharacter()); - } - - @Test - public void testWithQuoteLFThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote(LF)); - } - - @Test - public void testWithQuotePolicy() { - final CSVFormat formatWithQuotePolicy = CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL); - assertEquals(QuoteMode.ALL, formatWithQuotePolicy.getQuoteMode()); - } - - @Test - public void testWithRecordSeparatorCR() { - final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(CR); - assertEquals(String.valueOf(CR), formatWithRecordSeparator.getRecordSeparator()); - } - - @Test - public void testWithRecordSeparatorCRLF() { - final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(CRLF); - assertEquals(CRLF, formatWithRecordSeparator.getRecordSeparator()); - } - - @Test - public void testWithRecordSeparatorLF() { - final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(LF); - assertEquals(String.valueOf(LF), formatWithRecordSeparator.getRecordSeparator()); - } - - @Test - public void testWithSystemRecordSeparator() { - final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withSystemRecordSeparator(); - assertEquals(System.lineSeparator(), formatWithRecordSeparator.getRecordSeparator()); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.apache.commons.csv.CSVFormat.RFC4180; +import static org.apache.commons.csv.Constants.CR; +import static org.apache.commons.csv.Constants.CRLF; +import static org.apache.commons.csv.Constants.LF; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Reader; +import java.io.StringReader; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Objects; + +import org.apache.commons.csv.CSVFormat.Builder; +import org.junit.jupiter.api.Test; + +/** + * Tests {@link CSVFormat}. + */ +class CSVFormatTest { + + public enum EmptyEnum { + // empty enum. + } + + public enum Header { + Name, Email, Phone + } + + private static void assertNotEqualsFlip(final Object right, final Object left) { + assertNotEquals(right, left); + assertNotEquals(left, right); + } + + private static CSVFormat copy(final CSVFormat format) { + return format.builder().setDelimiter(format.getDelimiter()).get(); + } + + private void assertNotEqualsHash(final String name, final String type, final Object left, final Object right) { + if (left.equals(right) || right.equals(left)) { + fail("Objects must not compare equal for " + name + "(" + type + ")"); + } + if (left.hashCode() == right.hashCode()) { + fail("Hash code should not be equal for " + name + "(" + type + ")"); + } + } + + @Test + void testBuildVsGet() { + final Builder builder = CSVFormat.DEFAULT.builder(); + assertNotSame(builder.get(), builder.build()); + } + + @Test + void testDelimiterCharLineBreakCrThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter(Constants.CR).get()); + } + + @Test + void testDelimiterCharLineBreakLfThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter(Constants.LF).get()); + } + + @Test + void testDelimiterEmptyStringThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter("").get()); + } + + @SuppressWarnings("deprecation") + @Test + void testDelimiterSameAsCommentStartThrowsException_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter('!').withCommentMarker('!')); + } + + @Test + void testDelimiterSameAsCommentStartThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter('!').setCommentMarker('!').get()); + } + + @SuppressWarnings("deprecation") + @Test + void testDelimiterSameAsEscapeThrowsException_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter('!').withEscape('!')); + } + + @Test + void testDelimiterSameAsEscapeThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter('!').setEscape('!').get()); + } + + @Test + void testDelimiterSameAsRecordSeparatorThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat(CR)); + } + + @Test + void testDelimiterStringLineBreakCrThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter(String.valueOf(Constants.CR)).get()); + } + + @Test + void testDelimiterStringLineBreakLfThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter(String.valueOf(Constants.LF)).get()); + } + + @Test + void testDuplicateHeaderElements() { + final String[] header = { "A", "A" }; + final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader(header).get(); + assertEquals(2, format.getHeader().length); + assertArrayEquals(header, format.getHeader()); + } + + @SuppressWarnings("deprecation") + @Test + void testDuplicateHeaderElements_Deprecated() { + final String[] header = { "A", "A" }; + final CSVFormat format = CSVFormat.DEFAULT.withHeader(header); + assertEquals(2, format.getHeader().length); + assertArrayEquals(header, format.getHeader()); + } + + @Test + void testDuplicateHeaderElementsFalse() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(false).setHeader("A", "A").get()); + } + + @SuppressWarnings("deprecation") + @Test + void testDuplicateHeaderElementsFalse_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withAllowDuplicateHeaderNames(false).withHeader("A", "A")); + } + + @Test + void testDuplicateHeaderElementsTrue() { + CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(true).setHeader("A", "A").get(); + } + + @SuppressWarnings("deprecation") + @Test + void testDuplicateHeaderElementsTrue_Deprecated() { + CSVFormat.DEFAULT.withAllowDuplicateHeaderNames(true).withHeader("A", "A"); + } + + @Test + void testDuplicateHeaderElementsTrueContainsEmpty1() { + CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(false).setHeader("A", "", "B", "").get(); + } + + @Test + void testDuplicateHeaderElementsTrueContainsEmpty2() { + CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).setHeader("A", "", "B", "").get(); + } + + @Test + void testDuplicateHeaderElementsTrueContainsEmpty3() { + CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(false).setAllowMissingColumnNames(true).setHeader("A", "", "B", "").get(); + } + + @Test + void testEquals() { + final CSVFormat right = CSVFormat.DEFAULT; + final CSVFormat left = copy(right); + assertNotEquals(null, right); + assertNotEquals("A String Instance", right); + assertEquals(right, right); + assertEquals(right, left); + assertEquals(left, right); + assertEquals(right.hashCode(), right.hashCode()); + assertEquals(right.hashCode(), left.hashCode()); + } + + @Test + void testEqualsCommentStart() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').setCommentMarker('#').setQuoteMode(QuoteMode.ALL).get(); + final CSVFormat left = right.builder().setCommentMarker('!').get(); + + assertNotEqualsFlip(right, left); + } + + @SuppressWarnings("deprecation") + @Test + void testEqualsCommentStart_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"').withCommentMarker('#').withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withCommentMarker('!'); + + assertNotEqualsFlip(right, left); + } + + @Test + void testEqualsDelimiter() { + final CSVFormat right = CSVFormat.newFormat('!'); + final CSVFormat left = CSVFormat.newFormat('?'); + + assertNotEqualsFlip(right, left); + } + + @Test + void testEqualsEscape() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').setCommentMarker('#').setEscape('+').setQuoteMode(QuoteMode.ALL).get(); + final CSVFormat left = right.builder().setEscape('!').get(); + + assertNotEqualsFlip(right, left); + } + + @SuppressWarnings("deprecation") + @Test + void testEqualsEscape_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"').withCommentMarker('#').withEscape('+').withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withEscape('!'); + + assertNotEqualsFlip(right, left); + } + + @Test + void testEqualsHash() throws Exception { + final Method[] methods = CSVFormat.class.getDeclaredMethods(); + for (final Method method : methods) { + if (Modifier.isPublic(method.getModifiers())) { + final String name = method.getName(); + if (name.startsWith("with")) { + for (final Class cls : method.getParameterTypes()) { + final String type = cls.getCanonicalName(); + switch (type) { + case "boolean": { + final Object defTrue = method.invoke(CSVFormat.DEFAULT, Boolean.TRUE); + final Object defFalse = method.invoke(CSVFormat.DEFAULT, Boolean.FALSE); + assertNotEqualsHash(name, type, defTrue, defFalse); + break; + } + case "char": { + final Object a = method.invoke(CSVFormat.DEFAULT, 'a'); + final Object b = method.invoke(CSVFormat.DEFAULT, 'b'); + assertNotEqualsHash(name, type, a, b); + break; + } + case "java.lang.Character": { + final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] { null }); + final Object b = method.invoke(CSVFormat.DEFAULT, Character.valueOf('d')); + assertNotEqualsHash(name, type, a, b); + break; + } + case "java.lang.String": { + final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] { null }); + final Object b = method.invoke(CSVFormat.DEFAULT, "e"); + assertNotEqualsHash(name, type, a, b); + break; + } + case "java.lang.String[]": { + final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] { new String[] { null, null } }); + final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] { new String[] { "f", "g" } }); + assertNotEqualsHash(name, type, a, b); + break; + } + case "org.apache.commons.csv.QuoteMode": { + final Object a = method.invoke(CSVFormat.DEFAULT, QuoteMode.MINIMAL); + final Object b = method.invoke(CSVFormat.DEFAULT, QuoteMode.ALL); + assertNotEqualsHash(name, type, a, b); + break; + } + case "org.apache.commons.csv.DuplicateHeaderMode": { + final Object a = method.invoke(CSVFormat.DEFAULT, DuplicateHeaderMode.ALLOW_ALL); + final Object b = method.invoke(CSVFormat.DEFAULT, DuplicateHeaderMode.DISALLOW); + assertNotEqualsHash(name, type, a, b); + break; + } + case "java.lang.Object[]": { + final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] { new Object[] { null, null } }); + final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] { new Object[] { new Object(), new Object() } }); + assertNotEqualsHash(name, type, a, b); + break; + } + default: + if ("withHeader".equals(name)) { // covered above by String[] + // ignored + } else { + fail("Unhandled method: " + name + "(" + type + ")"); + } + break; + } + } + } + } + } + } + + @Test + void testEqualsHeader() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setRecordSeparator(CR).setCommentMarker('#').setEscape('+').setHeader("One", "Two", "Three") + .setIgnoreEmptyLines(true).setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).get(); + final CSVFormat left = right.builder().setHeader("Three", "Two", "One").get(); + + assertNotEqualsFlip(right, left); + } + + @SuppressWarnings("deprecation") + @Test + void testEqualsHeader_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withRecordSeparator(CR).withCommentMarker('#').withEscape('+').withHeader("One", "Two", "Three") + .withIgnoreEmptyLines().withIgnoreSurroundingSpaces().withQuote('"').withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withHeader("Three", "Two", "One"); + + assertNotEqualsFlip(right, left); + } + + @Test + void testEqualsIgnoreEmptyLines() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setCommentMarker('#').setEscape('+').setIgnoreEmptyLines(true) + .setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).get(); + final CSVFormat left = right.builder().setIgnoreEmptyLines(false).get(); + + assertNotEqualsFlip(right, left); + } + + @SuppressWarnings("deprecation") + @Test + void testEqualsIgnoreEmptyLines_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withCommentMarker('#').withEscape('+').withIgnoreEmptyLines().withIgnoreSurroundingSpaces() + .withQuote('"').withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withIgnoreEmptyLines(false); + + assertNotEqualsFlip(right, left); + } + + @Test + void testEqualsIgnoreSurroundingSpaces() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setCommentMarker('#').setEscape('+').setIgnoreSurroundingSpaces(true).setQuote('"') + .setQuoteMode(QuoteMode.ALL).get(); + final CSVFormat left = right.builder().setIgnoreSurroundingSpaces(false).get(); + + assertNotEqualsFlip(right, left); + } + + @SuppressWarnings("deprecation") + @Test + void testEqualsIgnoreSurroundingSpaces_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withCommentMarker('#').withEscape('+').withIgnoreSurroundingSpaces().withQuote('"') + .withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withIgnoreSurroundingSpaces(false); + + assertNotEqualsFlip(right, left); + } + + @Test + void testEqualsLeftNoQuoteRightQuote() { + final CSVFormat left = CSVFormat.newFormat(',').builder().setQuote(null).get(); + final CSVFormat right = left.builder().setQuote('#').get(); + + assertNotEqualsFlip(left, right); + } + + @SuppressWarnings("deprecation") + @Test + void testEqualsLeftNoQuoteRightQuote_Deprecated() { + final CSVFormat left = CSVFormat.newFormat(',').withQuote(null); + final CSVFormat right = left.withQuote('#'); + + assertNotEqualsFlip(left, right); + } + + @Test + void testEqualsMaxRows() { + final CSVFormat right = CSVFormat.DEFAULT.builder().setMaxRows(10).get(); + final CSVFormat left = CSVFormat.DEFAULT.builder().setMaxRows(1000).get(); + assertNotEqualsFlip(right, left); + assertNotEquals(right.hashCode(), left.hashCode()); + } + + @Test + void testEqualsNoQuotes() { + final CSVFormat left = CSVFormat.newFormat(',').builder().setQuote(null).get(); + final CSVFormat right = left.builder().setQuote(null).get(); + + assertEquals(left, right); + } + + @SuppressWarnings("deprecation") + @Test + void testEqualsNoQuotes_Deprecated() { + final CSVFormat left = CSVFormat.newFormat(',').withQuote(null); + final CSVFormat right = left.withQuote(null); + + assertEquals(left, right); + } + + @Test + void testEqualsNullString() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setRecordSeparator(CR).setCommentMarker('#').setEscape('+').setIgnoreEmptyLines(true) + .setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).setNullString("null").get(); + final CSVFormat left = right.builder().setNullString("---").get(); + + assertNotEqualsFlip(right, left); + } + + @SuppressWarnings("deprecation") + @Test + void testEqualsNullString_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withRecordSeparator(CR).withCommentMarker('#').withEscape('+').withIgnoreEmptyLines() + .withIgnoreSurroundingSpaces().withQuote('"').withQuoteMode(QuoteMode.ALL).withNullString("null"); + final CSVFormat left = right.withNullString("---"); + + assertNotEqualsFlip(right, left); + } + + @Test + void testEqualsOne() { + + final CSVFormat csvFormatOne = CSVFormat.INFORMIX_UNLOAD; + final CSVFormat csvFormatTwo = CSVFormat.MYSQL; + + assertEquals('\\', (char) csvFormatOne.getEscapeCharacter()); + assertEquals('\\', csvFormatOne.getEscapeChar()); + assertNull(csvFormatOne.getQuoteMode()); + + assertTrue(csvFormatOne.getIgnoreEmptyLines()); + assertFalse(csvFormatOne.getSkipHeaderRecord()); + + assertFalse(csvFormatOne.getIgnoreHeaderCase()); + assertNull(csvFormatOne.getCommentMarker()); + + assertFalse(csvFormatOne.isCommentMarkerSet()); + assertTrue(csvFormatOne.isQuoteCharacterSet()); + + assertEquals('|', csvFormatOne.getDelimiter()); + assertFalse(csvFormatOne.getAllowMissingColumnNames()); + + assertTrue(csvFormatOne.isEscapeCharacterSet()); + assertEquals("\n", csvFormatOne.getRecordSeparator()); + + assertEquals('\"', (char) csvFormatOne.getQuoteCharacter()); + assertFalse(csvFormatOne.getTrailingDelimiter()); + + assertFalse(csvFormatOne.getTrim()); + assertFalse(csvFormatOne.isNullStringSet()); + + assertNull(csvFormatOne.getNullString()); + assertFalse(csvFormatOne.getIgnoreSurroundingSpaces()); + + assertTrue(csvFormatTwo.isEscapeCharacterSet()); + assertNull(csvFormatTwo.getQuoteCharacter()); + + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + assertEquals(QuoteMode.ALL_NON_NULL, csvFormatTwo.getQuoteMode()); + + assertEquals('\t', csvFormatTwo.getDelimiter()); + assertArrayEquals(new char[] { '\t' }, csvFormatTwo.getDelimiterCharArray()); + assertEquals("\t", csvFormatTwo.getDelimiterString()); + assertEquals("\n", csvFormatTwo.getRecordSeparator()); + + assertFalse(csvFormatTwo.isQuoteCharacterSet()); + assertTrue(csvFormatTwo.isNullStringSet()); + + assertEquals('\\', (char) csvFormatTwo.getEscapeCharacter()); + assertFalse(csvFormatTwo.getIgnoreHeaderCase()); + + assertFalse(csvFormatTwo.getTrim()); + assertFalse(csvFormatTwo.getIgnoreEmptyLines()); + + assertEquals("\\N", csvFormatTwo.getNullString()); + assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormatTwo.getTrailingDelimiter()); + assertFalse(csvFormatTwo.getSkipHeaderRecord()); + + assertNull(csvFormatTwo.getCommentMarker()); + assertFalse(csvFormatTwo.isCommentMarkerSet()); + + assertNotSame(csvFormatTwo, csvFormatOne); + assertNotEquals(csvFormatTwo, csvFormatOne); + + assertEquals('\\', (char) csvFormatOne.getEscapeCharacter()); + assertNull(csvFormatOne.getQuoteMode()); + + assertTrue(csvFormatOne.getIgnoreEmptyLines()); + assertFalse(csvFormatOne.getSkipHeaderRecord()); + + assertFalse(csvFormatOne.getIgnoreHeaderCase()); + assertNull(csvFormatOne.getCommentMarker()); + + assertFalse(csvFormatOne.isCommentMarkerSet()); + assertTrue(csvFormatOne.isQuoteCharacterSet()); + + assertEquals('|', csvFormatOne.getDelimiter()); + assertFalse(csvFormatOne.getAllowMissingColumnNames()); + + assertTrue(csvFormatOne.isEscapeCharacterSet()); + assertEquals("\n", csvFormatOne.getRecordSeparator()); + + assertEquals('\"', (char) csvFormatOne.getQuoteCharacter()); + assertFalse(csvFormatOne.getTrailingDelimiter()); + + assertFalse(csvFormatOne.getTrim()); + assertFalse(csvFormatOne.isNullStringSet()); + + assertNull(csvFormatOne.getNullString()); + assertFalse(csvFormatOne.getIgnoreSurroundingSpaces()); + + assertTrue(csvFormatTwo.isEscapeCharacterSet()); + assertNull(csvFormatTwo.getQuoteCharacter()); + + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + assertEquals(QuoteMode.ALL_NON_NULL, csvFormatTwo.getQuoteMode()); + + assertEquals('\t', csvFormatTwo.getDelimiter()); + assertEquals("\n", csvFormatTwo.getRecordSeparator()); + + assertFalse(csvFormatTwo.isQuoteCharacterSet()); + assertTrue(csvFormatTwo.isNullStringSet()); + + assertEquals('\\', (char) csvFormatTwo.getEscapeCharacter()); + assertFalse(csvFormatTwo.getIgnoreHeaderCase()); + + assertFalse(csvFormatTwo.getTrim()); + assertFalse(csvFormatTwo.getIgnoreEmptyLines()); + + assertEquals("\\N", csvFormatTwo.getNullString()); + assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormatTwo.getTrailingDelimiter()); + assertFalse(csvFormatTwo.getSkipHeaderRecord()); + + assertNull(csvFormatTwo.getCommentMarker()); + assertFalse(csvFormatTwo.isCommentMarkerSet()); + + assertNotSame(csvFormatOne, csvFormatTwo); + assertNotSame(csvFormatTwo, csvFormatOne); + + assertNotEquals(csvFormatOne, csvFormatTwo); + assertNotEquals(csvFormatTwo, csvFormatOne); + + assertNotEquals(csvFormatTwo, csvFormatOne); + + } + + @Test + void testEqualsQuoteChar() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').get(); + final CSVFormat left = right.builder().setQuote('!').get(); + + assertNotEqualsFlip(right, left); + } + + @SuppressWarnings("deprecation") + @Test + void testEqualsQuoteChar_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"'); + final CSVFormat left = right.withQuote('!'); + + assertNotEqualsFlip(right, left); + } + + @Test + void testEqualsQuotePolicy() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').setQuoteMode(QuoteMode.ALL).get(); + final CSVFormat left = right.builder().setQuoteMode(QuoteMode.MINIMAL).get(); + + assertNotEqualsFlip(right, left); + } + + @SuppressWarnings("deprecation") + @Test + void testEqualsQuotePolicy_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"').withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withQuoteMode(QuoteMode.MINIMAL); + + assertNotEqualsFlip(right, left); + } + + @Test + void testEqualsRecordSeparator() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setRecordSeparator(CR).setCommentMarker('#').setEscape('+').setIgnoreEmptyLines(true) + .setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).get(); + final CSVFormat left = right.builder().setRecordSeparator(LF).get(); + + assertNotEqualsFlip(right, left); + } + + @SuppressWarnings("deprecation") + @Test + void testEqualsRecordSeparator_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withRecordSeparator(CR).withCommentMarker('#').withEscape('+').withIgnoreEmptyLines() + .withIgnoreSurroundingSpaces().withQuote('"').withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withRecordSeparator(LF); + + assertNotEqualsFlip(right, left); + } + + void testEqualsSkipHeaderRecord() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setRecordSeparator(CR).setCommentMarker('#').setEscape('+').setIgnoreEmptyLines(true) + .setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).setNullString("null").setSkipHeaderRecord(true).get(); + final CSVFormat left = right.builder().setSkipHeaderRecord(false).get(); + + assertNotEqualsFlip(right, left); + } + + @SuppressWarnings("deprecation") + @Test + void testEqualsSkipHeaderRecord_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withRecordSeparator(CR).withCommentMarker('#').withEscape('+').withIgnoreEmptyLines() + .withIgnoreSurroundingSpaces().withQuote('"').withQuoteMode(QuoteMode.ALL).withNullString("null").withSkipHeaderRecord(); + final CSVFormat left = right.withSkipHeaderRecord(false); + + assertNotEqualsFlip(right, left); + } + + @Test + void testEqualsWithNull() { + + final CSVFormat csvFormat = CSVFormat.POSTGRESQL_TEXT; + + assertEquals('\\', (char) csvFormat.getEscapeCharacter()); + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormat.getTrailingDelimiter()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isQuoteCharacterSet()); + assertEquals("\\N", csvFormat.getNullString()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertTrue(csvFormat.isEscapeCharacterSet()); + + assertFalse(csvFormat.isCommentMarkerSet()); + assertNull(csvFormat.getCommentMarker()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertEquals(QuoteMode.ALL_NON_NULL, csvFormat.getQuoteMode()); + + assertEquals('\t', csvFormat.getDelimiter()); + assertFalse(csvFormat.getSkipHeaderRecord()); + + assertEquals("\n", csvFormat.getRecordSeparator()); + assertFalse(csvFormat.getIgnoreEmptyLines()); + + assertNull(csvFormat.getQuoteCharacter()); + assertTrue(csvFormat.isNullStringSet()); + + assertEquals('\\', (char) csvFormat.getEscapeCharacter()); + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormat.getTrailingDelimiter()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isQuoteCharacterSet()); + assertEquals("\\N", csvFormat.getNullString()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertTrue(csvFormat.isEscapeCharacterSet()); + + assertFalse(csvFormat.isCommentMarkerSet()); + assertNull(csvFormat.getCommentMarker()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertEquals(QuoteMode.ALL_NON_NULL, csvFormat.getQuoteMode()); + + assertEquals('\t', csvFormat.getDelimiter()); + assertFalse(csvFormat.getSkipHeaderRecord()); + + assertEquals("\n", csvFormat.getRecordSeparator()); + assertFalse(csvFormat.getIgnoreEmptyLines()); + + assertNull(csvFormat.getQuoteCharacter()); + assertTrue(csvFormat.isNullStringSet()); + + assertNotEquals(null, csvFormat); + + } + + @Test + void testEscapeSameAsCommentStartThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setEscape('!').setCommentMarker('!').get()); + } + + @SuppressWarnings("deprecation") + @Test + void testEscapeSameAsCommentStartThrowsException_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape('!').withCommentMarker('!')); + } + + @Test + void testEscapeSameAsCommentStartThrowsExceptionForWrapperType() { + // Cannot assume that callers won't use different Character objects + assertThrows(IllegalArgumentException.class, + () -> CSVFormat.DEFAULT.builder().setEscape(Character.valueOf('!')).setCommentMarker(Character.valueOf('!')).get()); + } + + @SuppressWarnings("deprecation") + @Test + void testEscapeSameAsCommentStartThrowsExceptionForWrapperType_Deprecated() { + // Cannot assume that callers won't use different Character objects + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(Character.valueOf('!')).withCommentMarker(Character.valueOf('!'))); + } + + @Test + void testFormat() { + final CSVFormat format = CSVFormat.DEFAULT; + + assertEquals("", format.format()); + assertEquals("a,b,c", format.format("a", "b", "c")); + assertEquals("\"x,y\",z", format.format("x,y", "z")); + } + + @Test // I assume this to be a defect. + void testFormatThrowsNullPointerException() { + + final CSVFormat csvFormat = CSVFormat.MYSQL; + + final NullPointerException e = assertThrows(NullPointerException.class, () -> csvFormat.format((Object[]) null)); + assertEquals(Objects.class.getName(), e.getStackTrace()[0].getClassName()); + } + + @Test + void testFormatToString() { + // @formatter:off + final CSVFormat format = CSVFormat.RFC4180 + .withEscape('?') + .withDelimiter(',') + .withQuoteMode(QuoteMode.MINIMAL) + .withRecordSeparator(CRLF) + .withQuote('"') + .withNullString("") + .withIgnoreHeaderCase(true) + .withHeaderComments("This is HeaderComments") + .withHeader("col1", "col2", "col3"); + // @formatter:on + assertEquals( + "Delimiter=<,> Escape= QuoteChar=<\"> QuoteMode= NullString=<> RecordSeparator=<" + CRLF + + "> IgnoreHeaderCase:ignored SkipHeaderRecord:false HeaderComments:[This is HeaderComments] Header:[col1, col2, col3]", + format.toString()); + } + + @Test + void testGetAllowDuplicateHeaderNames() { + final Builder builder = CSVFormat.DEFAULT.builder(); + assertTrue(builder.get().getAllowDuplicateHeaderNames()); + assertTrue(builder.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).get().getAllowDuplicateHeaderNames()); + assertFalse(builder.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).get().getAllowDuplicateHeaderNames()); + assertFalse(builder.setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW).get().getAllowDuplicateHeaderNames()); + } + + @Test + void testGetDuplicateHeaderMode() { + final Builder builder = CSVFormat.DEFAULT.builder(); + + assertEquals(DuplicateHeaderMode.ALLOW_ALL, builder.get().getDuplicateHeaderMode()); + assertEquals(DuplicateHeaderMode.ALLOW_ALL, builder.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).get().getDuplicateHeaderMode()); + assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, builder.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).get().getDuplicateHeaderMode()); + assertEquals(DuplicateHeaderMode.DISALLOW, builder.setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW).get().getDuplicateHeaderMode()); + } + + @Test + void testGetHeader() { + final String[] header = { "one", "two", "three" }; + final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header); + // getHeader() makes a copy of the header array. + final String[] headerCopy = formatWithHeader.getHeader(); + headerCopy[0] = "A"; + headerCopy[1] = "B"; + headerCopy[2] = "C"; + assertFalse(Arrays.equals(formatWithHeader.getHeader(), headerCopy)); + assertNotSame(formatWithHeader.getHeader(), headerCopy); + } + + @Test + void testHashCodeAndWithIgnoreHeaderCase() { + + final CSVFormat csvFormat = CSVFormat.INFORMIX_UNLOAD_CSV; + final CSVFormat csvFormatTwo = csvFormat.withIgnoreHeaderCase(); + csvFormatTwo.hashCode(); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertTrue(csvFormatTwo.getIgnoreHeaderCase()); // now different + assertFalse(csvFormatTwo.getTrailingDelimiter()); + + assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + + assertFalse(csvFormatTwo.getTrim()); + + } + + @Test + void testJiraCsv236() { + CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(true).setHeader("CC", "VV", "VV").get(); + } + + @SuppressWarnings("deprecation") + @Test + void testJiraCsv236__Deprecated() { + CSVFormat.DEFAULT.withAllowDuplicateHeaderNames().withHeader("CC", "VV", "VV"); + } + + @Test + void testNewFormat() { + + final CSVFormat csvFormat = CSVFormat.newFormat('X'); + + assertFalse(csvFormat.getSkipHeaderRecord()); + assertFalse(csvFormat.isEscapeCharacterSet()); + + assertNull(csvFormat.getRecordSeparator()); + assertNull(csvFormat.getQuoteMode()); + + assertNull(csvFormat.getCommentMarker()); + assertFalse(csvFormat.getIgnoreHeaderCase()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isNullStringSet()); + assertNull(csvFormat.getEscapeCharacter()); + + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + assertFalse(csvFormat.getTrailingDelimiter()); + + assertEquals('X', csvFormat.getDelimiter()); + assertNull(csvFormat.getNullString()); + + assertFalse(csvFormat.isQuoteCharacterSet()); + assertFalse(csvFormat.isCommentMarkerSet()); + + assertNull(csvFormat.getQuoteCharacter()); + assertFalse(csvFormat.getIgnoreEmptyLines()); + + assertFalse(csvFormat.getSkipHeaderRecord()); + assertFalse(csvFormat.isEscapeCharacterSet()); + + assertNull(csvFormat.getRecordSeparator()); + assertNull(csvFormat.getQuoteMode()); + + assertNull(csvFormat.getCommentMarker()); + assertFalse(csvFormat.getIgnoreHeaderCase()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isNullStringSet()); + assertNull(csvFormat.getEscapeCharacter()); + + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + assertFalse(csvFormat.getTrailingDelimiter()); + + assertEquals('X', csvFormat.getDelimiter()); + assertNull(csvFormat.getNullString()); + + assertFalse(csvFormat.isQuoteCharacterSet()); + assertFalse(csvFormat.isCommentMarkerSet()); + + assertNull(csvFormat.getQuoteCharacter()); + assertFalse(csvFormat.getIgnoreEmptyLines()); + + } + + @Test + void testNullRecordSeparatorCsv106() { + final CSVFormat format = CSVFormat.newFormat(';').builder().setSkipHeaderRecord(true).setHeader("H1", "H2").get(); + final String formatStr = format.format("A", "B"); + assertNotNull(formatStr); + assertFalse(formatStr.endsWith("null")); + } + + @SuppressWarnings("deprecation") + @Test + void testNullRecordSeparatorCsv106__Deprecated() { + final CSVFormat format = CSVFormat.newFormat(';').withSkipHeaderRecord().withHeader("H1", "H2"); + final String formatStr = format.format("A", "B"); + assertNotNull(formatStr); + assertFalse(formatStr.endsWith("null")); + } + + @Test + void testPrintRecord() throws IOException { + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180; + format.printRecord(out, "a", "b", "c"); + assertEquals("a,b,c" + format.getRecordSeparator(), out.toString()); + } + + @Test + void testPrintRecordEmpty() throws IOException { + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180; + format.printRecord(out); + assertEquals(format.getRecordSeparator(), out.toString()); + } + + @Test + void testPrintWithEscapesEndWithCRLF() throws IOException { + final Reader in = new StringReader("x,y,x\r\na,?b,c\r\n"); + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',').withQuote(null).withRecordSeparator(CRLF); + format.print(in, out, true); + assertEquals("x?,y?,x?r?na?,??b?,c?r?n", out.toString()); + } + + @Test + void testPrintWithEscapesEndWithoutCRLF() throws IOException { + final Reader in = new StringReader("x,y,x"); + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',').withQuote(null).withRecordSeparator(CRLF); + format.print(in, out, true); + assertEquals("x?,y?,x", out.toString()); + } + + @Test + void testPrintWithoutQuotes() throws IOException { + final Reader in = new StringReader(""); + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NON_NUMERIC); + format.print(in, out, true); + assertEquals("\"\"", out.toString()); + } + + @Test + void testPrintWithQuoteModeIsNONE() throws IOException { + final Reader in = new StringReader("a,b,c"); + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NONE); + format.print(in, out, true); + assertEquals("a?,b?,c", out.toString()); + } + + @Test + void testPrintWithQuotes() throws IOException { + final Reader in = new StringReader("\"a,b,c\r\nx,y,z"); + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NON_NUMERIC); + format.print(in, out, true); + assertEquals("\"\"\"a,b,c\r\nx,y,z\"", out.toString()); + } + + /** + * Tests CSV-326. + */ + @Test + void testPrintWithQuotesEscapeBeforeQuote() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder() + .setEscape('\\') + .setQuote('"') + .get(); + final String value = "\\\""; + final Appendable out = new StringBuilder(); + format.print(new StringReader(value), out, true); + try (CSVParser parser = CSVParser.parse(out.toString(), format)) { + assertEquals(value, parser.getRecords().get(0).get(0)); + } + } + + @Test + void testQuoteCharSameAsCommentStartThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote('!').setCommentMarker('!').get()); + } + + @SuppressWarnings("deprecation") + @Test + void testQuoteCharSameAsCommentStartThrowsException_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote('!').withCommentMarker('!')); + } + + @Test + void testQuoteCharSameAsCommentStartThrowsExceptionForWrapperType() { + // Cannot assume that callers won't use different Character objects + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote(Character.valueOf('!')).setCommentMarker('!').get()); + } + + @SuppressWarnings("deprecation") + @Test + void testQuoteCharSameAsCommentStartThrowsExceptionForWrapperType_Deprecated() { + // Cannot assume that callers won't use different Character objects + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote(Character.valueOf('!')).withCommentMarker('!')); + } + + @Test + void testQuoteCharSameAsDelimiterThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote('!').setDelimiter('!').get()); + } + + @SuppressWarnings("deprecation") + @Test + void testQuoteCharSameAsDelimiterThrowsException_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote('!').withDelimiter('!')); + } + + @Test + void testQuotedNullStringTracksQuoteCharacter() throws IOException { + final StringBuilder out = new StringBuilder(); + // @formatter:off + final Builder builder = CSVFormat.DEFAULT.builder(); + final CSVFormat format = builder + .setQuoteMode(QuoteMode.ALL) + .setNullString("NULL") + .get(); + // @formatter:on + format.print(null, out, true); + assertEquals("\"NULL\"", out.toString()); + // set + out.setLength(0); + builder.setQuote('\''); + builder.get().print(null, out, true); + assertEquals("'NULL'", out.toString()); + // reset + out.setLength(0); + builder.setQuote((Character) null); + builder.get().print(null, out, true); + assertEquals("\"NULL\"", out.toString()); + // reset, reverse setter order + out.setLength(0); + builder.setNullString(null).setQuote((Character) null).setNullString("NULL"); + builder.get().print(null, out, true); + assertEquals("\"NULL\"", out.toString()); + } + + @Test + void testQuoteModeNoneShouldReturnMeaningfulExceptionMessage() { + final Exception exception = assertThrows(IllegalArgumentException.class, () -> + // @formatter:off + CSVFormat.DEFAULT.builder() + .setHeader("Col1", "Col2", "Col3", "Col4") + .setQuoteMode(QuoteMode.NONE) + .get() + // @formatter:on + ); + final String actualMessage = exception.getMessage(); + final String expectedMessage = "Quote mode set to NONE but no escape character is set"; + assertEquals(expectedMessage, actualMessage); + } + + @Test + void testQuotePolicyNoneWithoutEscapeThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat('!').builder().setQuoteMode(QuoteMode.NONE).get()); + } + + @SuppressWarnings("deprecation") + @Test + void testQuotePolicyNoneWithoutEscapeThrowsException_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat('!').withQuoteMode(QuoteMode.NONE)); + } + + @Test + void testRFC4180() { + assertNull(RFC4180.getCommentMarker()); + assertEquals(',', RFC4180.getDelimiter()); + assertNull(RFC4180.getEscapeCharacter()); + assertFalse(RFC4180.getIgnoreEmptyLines()); + assertEquals(Character.valueOf('"'), RFC4180.getQuoteCharacter()); + assertNull(RFC4180.getQuoteMode()); + assertEquals("\r\n", RFC4180.getRecordSeparator()); + } + + @SuppressWarnings("boxing") // no need to worry about boxing here + @Test + void testSerialization() throws Exception { + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + + try (ObjectOutputStream oos = new ObjectOutputStream(out)) { + oos.writeObject(CSVFormat.DEFAULT); + oos.flush(); + } + + final ObjectInputStream in = new ObjectInputStream(new ByteArrayInputStream(out.toByteArray())); + final CSVFormat format = (CSVFormat) in.readObject(); + + assertNotNull(format); + assertEquals(CSVFormat.DEFAULT.getDelimiter(), format.getDelimiter(), "delimiter"); + assertEquals(CSVFormat.DEFAULT.getQuoteCharacter(), format.getQuoteCharacter(), "encapsulator"); + assertEquals(CSVFormat.DEFAULT.getCommentMarker(), format.getCommentMarker(), "comment start"); + assertEquals(CSVFormat.DEFAULT.getRecordSeparator(), format.getRecordSeparator(), "record separator"); + assertEquals(CSVFormat.DEFAULT.getEscapeCharacter(), format.getEscapeCharacter(), "escape"); + assertEquals(CSVFormat.DEFAULT.getIgnoreSurroundingSpaces(), format.getIgnoreSurroundingSpaces(), "trim"); + assertEquals(CSVFormat.DEFAULT.getIgnoreEmptyLines(), format.getIgnoreEmptyLines(), "empty lines"); + } + + @Test + void testToString() { + + final String string = CSVFormat.INFORMIX_UNLOAD.toString(); + + assertEquals("Delimiter=<|> Escape=<\\> QuoteChar=<\"> RecordSeparator=<\n> EmptyLines:ignored SkipHeaderRecord:false", string); + + } + + @Test + void testToStringAndWithCommentMarkerTakingCharacter() { + + final CSVFormat.Predefined csvFormatPredefined = CSVFormat.Predefined.Default; + final CSVFormat csvFormat = csvFormatPredefined.getFormat(); + + assertNull(csvFormat.getEscapeCharacter()); + assertTrue(csvFormat.isQuoteCharacterSet()); + + assertFalse(csvFormat.getTrim()); + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormat.getTrailingDelimiter()); + assertEquals(',', csvFormat.getDelimiter()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertEquals("\r\n", csvFormat.getRecordSeparator()); + + assertFalse(csvFormat.isCommentMarkerSet()); + assertNull(csvFormat.getCommentMarker()); + + assertFalse(csvFormat.isNullStringSet()); + assertFalse(csvFormat.getAllowMissingColumnNames()); + + assertFalse(csvFormat.isEscapeCharacterSet()); + assertFalse(csvFormat.getSkipHeaderRecord()); + + assertNull(csvFormat.getNullString()); + assertNull(csvFormat.getQuoteMode()); + + assertTrue(csvFormat.getIgnoreEmptyLines()); + assertEquals('\"', (char) csvFormat.getQuoteCharacter()); + + final Character character = Character.valueOf('n'); + + final CSVFormat csvFormatTwo = csvFormat.withCommentMarker(character); + + assertNull(csvFormat.getEscapeCharacter()); + assertTrue(csvFormat.isQuoteCharacterSet()); + + assertFalse(csvFormat.getTrim()); + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormat.getTrailingDelimiter()); + assertEquals(',', csvFormat.getDelimiter()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertEquals("\r\n", csvFormat.getRecordSeparator()); + + assertFalse(csvFormat.isCommentMarkerSet()); + assertNull(csvFormat.getCommentMarker()); + + assertFalse(csvFormat.isNullStringSet()); + assertFalse(csvFormat.getAllowMissingColumnNames()); + + assertFalse(csvFormat.isEscapeCharacterSet()); + assertFalse(csvFormat.getSkipHeaderRecord()); + + assertNull(csvFormat.getNullString()); + assertNull(csvFormat.getQuoteMode()); + + assertTrue(csvFormat.getIgnoreEmptyLines()); + assertEquals('\"', (char) csvFormat.getQuoteCharacter()); + + assertFalse(csvFormatTwo.isNullStringSet()); + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + + assertEquals('\"', (char) csvFormatTwo.getQuoteCharacter()); + assertNull(csvFormatTwo.getNullString()); + + assertEquals(',', csvFormatTwo.getDelimiter()); + assertFalse(csvFormatTwo.getTrailingDelimiter()); + + assertTrue(csvFormatTwo.isCommentMarkerSet()); + assertFalse(csvFormatTwo.getIgnoreHeaderCase()); + + assertFalse(csvFormatTwo.getTrim()); + assertNull(csvFormatTwo.getEscapeCharacter()); + + assertTrue(csvFormatTwo.isQuoteCharacterSet()); + assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); + + assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); + assertNull(csvFormatTwo.getQuoteMode()); + + assertEquals('n', (char) csvFormatTwo.getCommentMarker()); + assertFalse(csvFormatTwo.getSkipHeaderRecord()); + + assertFalse(csvFormatTwo.isEscapeCharacterSet()); + assertTrue(csvFormatTwo.getIgnoreEmptyLines()); + + assertNotSame(csvFormat, csvFormatTwo); + assertNotSame(csvFormatTwo, csvFormat); + + assertNotEquals(csvFormatTwo, csvFormat); + + assertNull(csvFormat.getEscapeCharacter()); + assertTrue(csvFormat.isQuoteCharacterSet()); + + assertFalse(csvFormat.getTrim()); + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormat.getTrailingDelimiter()); + assertEquals(',', csvFormat.getDelimiter()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertEquals("\r\n", csvFormat.getRecordSeparator()); + + assertFalse(csvFormat.isCommentMarkerSet()); + assertNull(csvFormat.getCommentMarker()); + + assertFalse(csvFormat.isNullStringSet()); + assertFalse(csvFormat.getAllowMissingColumnNames()); + + assertFalse(csvFormat.isEscapeCharacterSet()); + assertFalse(csvFormat.getSkipHeaderRecord()); + + assertNull(csvFormat.getNullString()); + assertNull(csvFormat.getQuoteMode()); + + assertTrue(csvFormat.getIgnoreEmptyLines()); + assertEquals('\"', (char) csvFormat.getQuoteCharacter()); + + assertFalse(csvFormatTwo.isNullStringSet()); + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + + assertEquals('\"', (char) csvFormatTwo.getQuoteCharacter()); + assertNull(csvFormatTwo.getNullString()); + + assertEquals(',', csvFormatTwo.getDelimiter()); + assertFalse(csvFormatTwo.getTrailingDelimiter()); + + assertTrue(csvFormatTwo.isCommentMarkerSet()); + assertFalse(csvFormatTwo.getIgnoreHeaderCase()); + + assertFalse(csvFormatTwo.getTrim()); + assertNull(csvFormatTwo.getEscapeCharacter()); + + assertTrue(csvFormatTwo.isQuoteCharacterSet()); + assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); + + assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); + assertNull(csvFormatTwo.getQuoteMode()); + + assertEquals('n', (char) csvFormatTwo.getCommentMarker()); + assertFalse(csvFormatTwo.getSkipHeaderRecord()); + + assertFalse(csvFormatTwo.isEscapeCharacterSet()); + assertTrue(csvFormatTwo.getIgnoreEmptyLines()); + + assertNotSame(csvFormat, csvFormatTwo); + assertNotSame(csvFormatTwo, csvFormat); + + assertNotEquals(csvFormat, csvFormatTwo); + + assertNotEquals(csvFormatTwo, csvFormat); + assertEquals("Delimiter=<,> QuoteChar=<\"> CommentStart= RecordSeparator=<\r\n> EmptyLines:ignored SkipHeaderRecord:false", + csvFormatTwo.toString()); + + } + + @Test + void testTrim() throws IOException { + final CSVFormat formatWithTrim = CSVFormat.DEFAULT.withDelimiter(',').withTrim().withQuote(null).withRecordSeparator(CRLF); + + CharSequence in = "a,b,c"; + final StringBuilder out = new StringBuilder(); + formatWithTrim.print(in, out, true); + assertEquals("a,b,c", out.toString()); + + in = new StringBuilder(" x,y,z"); + out.setLength(0); + formatWithTrim.print(in, out, true); + assertEquals("x,y,z", out.toString()); + + in = new StringBuilder(""); + out.setLength(0); + formatWithTrim.print(in, out, true); + assertEquals("", out.toString()); + + in = new StringBuilder("header\r\n"); + out.setLength(0); + formatWithTrim.print(in, out, true); + assertEquals("header", out.toString()); + } + + @Test + void testWithCommentStart() { + final CSVFormat formatWithCommentStart = CSVFormat.DEFAULT.withCommentMarker('#'); + assertEquals(Character.valueOf('#'), formatWithCommentStart.getCommentMarker()); + } + + @Test + void testWithCommentStartCRThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withCommentMarker(CR)); + } + + @Test + void testWithDelimiter() { + final CSVFormat formatWithDelimiter = CSVFormat.DEFAULT.withDelimiter('!'); + assertEquals('!', formatWithDelimiter.getDelimiter()); + } + + @Test + void testWithDelimiterLFThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(LF)); + } + + @Test + void testWithEmptyDuplicates() { + final CSVFormat formatWithEmptyDuplicates = CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).get(); + + assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, formatWithEmptyDuplicates.getDuplicateHeaderMode()); + assertFalse(formatWithEmptyDuplicates.getAllowDuplicateHeaderNames()); + } + + @Test + void testWithEmptyEnum() { + final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(EmptyEnum.class); + assertEquals(0, formatWithHeader.getHeader().length); + } + + @Test + void testWithEscape() { + final CSVFormat formatWithEscape = CSVFormat.DEFAULT.withEscape('&'); + assertEquals(Character.valueOf('&'), formatWithEscape.getEscapeCharacter()); + } + + @Test + void testWithEscapeCRThrowsExceptions() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(CR)); + } + + @Test + void testWithFirstRecordAsHeader() { + final CSVFormat formatWithFirstRecordAsHeader = CSVFormat.DEFAULT.withFirstRecordAsHeader(); + assertTrue(formatWithFirstRecordAsHeader.getSkipHeaderRecord()); + assertEquals(0, formatWithFirstRecordAsHeader.getHeader().length); + } + + @Test + void testWithHeader() { + final String[] header = { "one", "two", "three" }; + // withHeader() makes a copy of the header array. + final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header); + assertArrayEquals(header, formatWithHeader.getHeader()); + assertNotSame(header, formatWithHeader.getHeader()); + } + + @Test + void testWithHeaderComments() { + + final CSVFormat csvFormat = CSVFormat.DEFAULT; + + assertEquals('\"', (char) csvFormat.getQuoteCharacter()); + assertFalse(csvFormat.isCommentMarkerSet()); + + assertFalse(csvFormat.isEscapeCharacterSet()); + assertTrue(csvFormat.isQuoteCharacterSet()); + + assertFalse(csvFormat.getSkipHeaderRecord()); + assertNull(csvFormat.getQuoteMode()); + + assertEquals(',', csvFormat.getDelimiter()); + assertTrue(csvFormat.getIgnoreEmptyLines()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertNull(csvFormat.getCommentMarker()); + + assertEquals("\r\n", csvFormat.getRecordSeparator()); + assertFalse(csvFormat.getTrailingDelimiter()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isNullStringSet()); + assertNull(csvFormat.getNullString()); + + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + assertNull(csvFormat.getEscapeCharacter()); + + final Object[] objectArray = new Object[8]; + final CSVFormat csvFormatTwo = csvFormat.withHeaderComments(objectArray); + + assertEquals('\"', (char) csvFormat.getQuoteCharacter()); + assertFalse(csvFormat.isCommentMarkerSet()); + + assertFalse(csvFormat.isEscapeCharacterSet()); + assertTrue(csvFormat.isQuoteCharacterSet()); + + assertFalse(csvFormat.getSkipHeaderRecord()); + assertNull(csvFormat.getQuoteMode()); + + assertEquals(',', csvFormat.getDelimiter()); + assertTrue(csvFormat.getIgnoreEmptyLines()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertNull(csvFormat.getCommentMarker()); + + assertEquals("\r\n", csvFormat.getRecordSeparator()); + assertFalse(csvFormat.getTrailingDelimiter()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isNullStringSet()); + assertNull(csvFormat.getNullString()); + + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + assertNull(csvFormat.getEscapeCharacter()); + + assertFalse(csvFormatTwo.getIgnoreHeaderCase()); + assertNull(csvFormatTwo.getQuoteMode()); + + assertTrue(csvFormatTwo.getIgnoreEmptyLines()); + assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); + + assertNull(csvFormatTwo.getEscapeCharacter()); + assertFalse(csvFormatTwo.getTrim()); + + assertFalse(csvFormatTwo.isEscapeCharacterSet()); + assertTrue(csvFormatTwo.isQuoteCharacterSet()); + + assertFalse(csvFormatTwo.getSkipHeaderRecord()); + assertEquals('\"', (char) csvFormatTwo.getQuoteCharacter()); + + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + assertNull(csvFormatTwo.getNullString()); + + assertFalse(csvFormatTwo.isNullStringSet()); + assertFalse(csvFormatTwo.getTrailingDelimiter()); + + assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); + assertEquals(',', csvFormatTwo.getDelimiter()); + + assertNull(csvFormatTwo.getCommentMarker()); + assertFalse(csvFormatTwo.isCommentMarkerSet()); + + assertNotSame(csvFormat, csvFormatTwo); + assertNotSame(csvFormatTwo, csvFormat); + + assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal + + final String string = csvFormatTwo.format(objectArray); + + assertEquals('\"', (char) csvFormat.getQuoteCharacter()); + assertFalse(csvFormat.isCommentMarkerSet()); + + assertFalse(csvFormat.isEscapeCharacterSet()); + assertTrue(csvFormat.isQuoteCharacterSet()); + + assertFalse(csvFormat.getSkipHeaderRecord()); + assertNull(csvFormat.getQuoteMode()); + + assertEquals(',', csvFormat.getDelimiter()); + assertTrue(csvFormat.getIgnoreEmptyLines()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertNull(csvFormat.getCommentMarker()); + + assertEquals("\r\n", csvFormat.getRecordSeparator()); + assertFalse(csvFormat.getTrailingDelimiter()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isNullStringSet()); + assertNull(csvFormat.getNullString()); + + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + assertNull(csvFormat.getEscapeCharacter()); + + assertFalse(csvFormatTwo.getIgnoreHeaderCase()); + assertNull(csvFormatTwo.getQuoteMode()); + + assertTrue(csvFormatTwo.getIgnoreEmptyLines()); + assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); + + assertNull(csvFormatTwo.getEscapeCharacter()); + assertFalse(csvFormatTwo.getTrim()); + + assertFalse(csvFormatTwo.isEscapeCharacterSet()); + assertTrue(csvFormatTwo.isQuoteCharacterSet()); + + assertFalse(csvFormatTwo.getSkipHeaderRecord()); + assertEquals('\"', (char) csvFormatTwo.getQuoteCharacter()); + + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + assertNull(csvFormatTwo.getNullString()); + + assertFalse(csvFormatTwo.isNullStringSet()); + assertFalse(csvFormatTwo.getTrailingDelimiter()); + + assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); + assertEquals(',', csvFormatTwo.getDelimiter()); + + assertNull(csvFormatTwo.getCommentMarker()); + assertFalse(csvFormatTwo.isCommentMarkerSet()); + + assertNotSame(csvFormat, csvFormatTwo); + assertNotSame(csvFormatTwo, csvFormat); + + assertNotNull(string); + assertNotEquals(csvFormat, csvFormatTwo); // CSV-244 - should not be equal + + assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal + assertEquals(",,,,,,,", string); + + } + + @Test + void testWithHeaderEnum() { + final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(Header.class); + assertArrayEquals(new String[] { "Name", "Email", "Phone" }, formatWithHeader.getHeader()); + } + + @Test + void testWithHeaderEnumNull() { + final CSVFormat format = CSVFormat.DEFAULT; + final Class> simpleName = null; + format.withHeader(simpleName); + } + + @Test + void testWithHeaderResultSetNull() throws SQLException { + final CSVFormat format = CSVFormat.DEFAULT; + final ResultSet resultSet = null; + format.withHeader(resultSet); + } + + @Test + void testWithIgnoreEmptyLines() { + assertFalse(CSVFormat.DEFAULT.withIgnoreEmptyLines(false).getIgnoreEmptyLines()); + assertTrue(CSVFormat.DEFAULT.withIgnoreEmptyLines().getIgnoreEmptyLines()); + } + + @Test + void testWithIgnoreSurround() { + assertFalse(CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces()); + assertTrue(CSVFormat.DEFAULT.withIgnoreSurroundingSpaces().getIgnoreSurroundingSpaces()); + } + + @Test + void testWithNullString() { + final CSVFormat formatWithNullString = CSVFormat.DEFAULT.withNullString("null"); + assertEquals("null", formatWithNullString.getNullString()); + } + + @Test + void testWithQuoteChar() { + final CSVFormat formatWithQuoteChar = CSVFormat.DEFAULT.withQuote('"'); + assertEquals(Character.valueOf('"'), formatWithQuoteChar.getQuoteCharacter()); + } + + @Test + void testWithQuoteLFThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote(LF)); + } + + @Test + void testWithQuotePolicy() { + final CSVFormat formatWithQuotePolicy = CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL); + assertEquals(QuoteMode.ALL, formatWithQuotePolicy.getQuoteMode()); + } + + @Test + void testWithRecordSeparatorCR() { + final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(CR); + assertEquals(String.valueOf(CR), formatWithRecordSeparator.getRecordSeparator()); + } + + @Test + void testWithRecordSeparatorCRLF() { + final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(CRLF); + assertEquals(CRLF, formatWithRecordSeparator.getRecordSeparator()); + } + + @Test + void testWithRecordSeparatorLF() { + final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(LF); + assertEquals(String.valueOf(LF), formatWithRecordSeparator.getRecordSeparator()); + } + + @Test + void testWithSystemRecordSeparator() { + final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withSystemRecordSeparator(); + assertEquals(System.lineSeparator(), formatWithRecordSeparator.getRecordSeparator()); + } +} diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index ce26ca7b82..6d9bdd9e80 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -1,1503 +1,2116 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.CRLF; -import static org.apache.commons.csv.Constants.LF; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.File; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.PipedReader; -import java.io.PipedWriter; -import java.io.Reader; -import java.io.StringReader; -import java.io.StringWriter; -import java.io.UncheckedIOException; -import java.net.URL; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.stream.Collectors; - -import org.apache.commons.io.input.BOMInputStream; -import org.apache.commons.io.input.BrokenInputStream; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -/** - * CSVParserTest - * - * The test are organized in three different sections: The 'setter/getter' section, the lexer section and finally the - * parser section. In case a test fails, you should follow a top-down approach for fixing a potential bug (its likely - * that the parser itself fails if the lexer has problems...). - */ -public class CSVParserTest { - - private static final Charset UTF_8 = StandardCharsets.UTF_8; - - private static final String UTF_8_NAME = UTF_8.name(); - - private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n" - // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n"; - + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping - - private static final String CSV_INPUT_1 = "a,b,c,d"; - - private static final String CSV_INPUT_2 = "a,b,1 2"; - - private static final String[][] RESULT = {{"a", "b", "c", "d"}, {"a", "b", "1 2"}, {"foo baar", "b", ""}, {"foo\n,,\n\",,\n\"", "d", "e"}}; - - // CSV with no header comments - static private final String CSV_INPUT_NO_COMMENT = "A,B"+CRLF+"1,2"+CRLF; - - // CSV with a header comment - static private final String CSV_INPUT_HEADER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF; - - // CSV with a single line header and trailer comment - static private final String CSV_INPUT_HEADER_TRAILER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + "# comment"; - - // CSV with a multi-line header and trailer comment - static private final String CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT = "# multi-line" + CRLF + "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + "# multi-line" + CRLF + "# comment"; - - // Format with auto-detected header - static private final CSVFormat FORMAT_AUTO_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT).setCommentMarker('#').setHeader().build(); - - // Format with explicit header - static private final CSVFormat FORMAT_EXPLICIT_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT) - .setSkipHeaderRecord(true) - .setCommentMarker('#') - .setHeader("A", "B") - .build(); - - // Format with explicit header that does not skip the header line - CSVFormat FORMAT_EXPLICIT_HEADER_NOSKIP = CSVFormat.Builder.create(CSVFormat.DEFAULT) - .setCommentMarker('#') - .setHeader("A", "B") - .build(); - - @SuppressWarnings("resource") // caller releases - private BOMInputStream createBOMInputStream(final String resource) throws IOException { - return new BOMInputStream(ClassLoader.getSystemClassLoader().getResource(resource).openStream()); - } - - private void parseFully(final CSVParser parser) { - parser.forEach(record -> assertNotNull(record)); - } - - @Test - public void testBackslashEscaping() throws IOException { - - // To avoid confusion over the need for escaping chars in java code, - // We will test with a forward slash as the escape char, and a single - // quote as the encapsulator. - - final String code = "one,two,three\n" // 0 - + "'',''\n" // 1) empty encapsulators - + "/',/'\n" // 2) single encapsulators - + "'/'','/''\n" // 3) single encapsulators encapsulated via escape - + "'''',''''\n" // 4) single encapsulators encapsulated via doubling - + "/,,/,\n" // 5) separator escaped - + "//,//\n" // 6) escape escaped - + "'//','//'\n" // 7) escape escaped in encapsulation - + " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces - + "9, /\n \n" // escaped newline - + ""; - final String[][] res = {{"one", "two", "three"}, // 0 - {"", ""}, // 1 - {"'", "'"}, // 2 - {"'", "'"}, // 3 - {"'", "'"}, // 4 - {",", ","}, // 5 - {"/", "/"}, // 6 - {"/", "/"}, // 7 - {" 8 ", " \"quoted \"\" /\" / string\" "}, {"9", " \n "},}; - - final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(CRLF).withEscape('/').withIgnoreEmptyLines(); - - try (final CSVParser parser = CSVParser.parse(code, format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - - Utils.compare("Records do not match expected result", res, records); - } - } - - @Test - public void testBackslashEscaping2() throws IOException { - - // To avoid confusion over the need for escaping chars in java code, - // We will test with a forward slash as the escape char, and a single - // quote as the encapsulator. - - final String code = "" + " , , \n" // 1) - + " \t , , \n" // 2) - + " // , /, , /,\n" // 3) - + ""; - final String[][] res = {{" ", " ", " "}, // 1 - {" \t ", " ", " "}, // 2 - {" / ", " , ", " ,"}, // 3 - }; - - final CSVFormat format = CSVFormat.newFormat(',').withRecordSeparator(CRLF).withEscape('/').withIgnoreEmptyLines(); - - try (final CSVParser parser = CSVParser.parse(code, format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - - Utils.compare("", res, records); - } - } - - @Test - @Disabled - public void testBackslashEscapingOld() throws IOException { - final String code = "one,two,three\n" + "on\\\"e,two\n" + "on\"e,two\n" + "one,\"tw\\\"o\"\n" + "one,\"t\\,wo\"\n" + "one,two,\"th,ree\"\n" - + "\"a\\\\\"\n" + "a\\,b\n" + "\"a\\\\,b\""; - final String[][] res = {{"one", "two", "three"}, {"on\\\"e", "two"}, {"on\"e", "two"}, {"one", "tw\"o"}, {"one", "t\\,wo"}, // backslash in quotes only - // escapes a delimiter (",") - {"one", "two", "th,ree"}, {"a\\\\"}, // backslash in quotes only escapes a delimiter (",") - {"a\\", "b"}, // a backslash must be returned - {"a\\\\,b"} // backslash in quotes only escapes a delimiter (",") - }; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - - @Test - @Disabled("CSV-107") - public void testBOM() throws IOException { - final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/bom.csv"); - try (final CSVParser parser = CSVParser.parse(url, Charset.forName(UTF_8_NAME), CSVFormat.EXCEL.withHeader())) { - parser.forEach(record -> assertNotNull(record.get("Date"))); - } - } - - @Test - public void testBOMInputStream_ParserWithInputStream() throws IOException { - try (final BOMInputStream inputStream = createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"); - final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) { - parser.forEach(record -> assertNotNull(record.get("Date"))); - } - } - - @Test - public void testBOMInputStream_ParserWithReader() throws IOException { - try (final Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME); - final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader())) { - parser.forEach(record -> assertNotNull(record.get("Date"))); - } - } - - @Test - public void testBOMInputStream_parseWithReader() throws IOException { - try (final Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME); - final CSVParser parser = CSVParser.parse(reader, CSVFormat.EXCEL.withHeader())) { - parser.forEach(record -> assertNotNull(record.get("Date"))); - } - } - - @Test - public void testCarriageReturnEndings() throws IOException { - final String code = "foo\rbaar,\rhello,world\r,kanu"; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(4, records.size()); - } - } - - @Test - public void testCarriageReturnLineFeedEndings() throws IOException { - final String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(4, records.size()); - } - } - - @Test - public void testClose() throws Exception { - final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z"); - final Iterator records; - try (final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) { - records = parser.iterator(); - assertTrue(records.hasNext()); - } - assertFalse(records.hasNext()); - assertThrows(NoSuchElementException.class, records::next); - } - - @Test - public void testCSV235() throws IOException { - final String dqString = "\"aaa\",\"b\"\"bb\",\"ccc\""; // "aaa","b""bb","ccc" - try (final CSVParser parser = CSVFormat.RFC4180.parse(new StringReader(dqString))) { - final Iterator records = parser.iterator(); - final CSVRecord record = records.next(); - assertFalse(records.hasNext()); - assertEquals(3, record.size()); - assertEquals("aaa", record.get(0)); - assertEquals("b\"bb", record.get(1)); - assertEquals("ccc", record.get(2)); - } - } - - @Test - public void testCSV57() throws Exception { - try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) { - final List list = parser.getRecords(); - assertNotNull(list); - assertEquals(0, list.size()); - } - } - - @Test - public void testDefaultFormat() throws IOException { - final String code = "" + "a,b#\n" // 1) - + "\"\n\",\" \",#\n" // 2) - + "#,\"\"\n" // 3) - + "# Final comment\n"// 4) - ; - final String[][] res = {{"a", "b#"}, {"\n", " ", "#"}, {"#", ""}, {"# Final comment"}}; - - CSVFormat format = CSVFormat.DEFAULT; - assertFalse(format.isCommentMarkerSet()); - final String[][] res_comments = {{"a", "b#"}, {"\n", " ", "#"},}; - - try (final CSVParser parser = CSVParser.parse(code, format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - - Utils.compare("Failed to parse without comments", res, records); - - format = CSVFormat.DEFAULT.withCommentMarker('#'); - } - try (final CSVParser parser = CSVParser.parse(code, format)) { - final List records = parser.getRecords(); - - Utils.compare("Failed to parse with comments", res_comments, records); - } - } - - @Test - public void testDuplicateHeadersAllowedByDefault() throws Exception { - try (CSVParser parser = CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader())) { - // noop - } - } - - @Test - public void testDuplicateHeadersNotAllowed() { - assertThrows(IllegalArgumentException.class, - () -> CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader().withAllowDuplicateHeaderNames(false))); - } - - @Test - public void testEmptyFile() throws Exception { - try (final CSVParser parser = CSVParser.parse(Paths.get("src/test/resources/org/apache/commons/csv/empty.txt"), StandardCharsets.UTF_8, - CSVFormat.DEFAULT)) { - assertNull(parser.nextRecord()); - } - } - - @Test - public void testEmptyFileHeaderParsing() throws Exception { - try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT.withFirstRecordAsHeader())) { - assertNull(parser.nextRecord()); - assertTrue(parser.getHeaderNames().isEmpty()); - } - } - - @Test - public void testEmptyLineBehaviorCSV() throws Exception { - final String[] codes = {"hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"}; - final String[][] res = {{"hello", ""} // CSV format ignores empty lines - }; - for (final String code : codes) { - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - } - - @Test - public void testEmptyLineBehaviorExcel() throws Exception { - final String[] codes = {"hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"}; - final String[][] res = {{"hello", ""}, {""}, // Excel format does not ignore empty lines - {""}}; - for (final String code : codes) { - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - } - - @Test - public void testEmptyString() throws Exception { - try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) { - assertNull(parser.nextRecord()); - } - } - - @Test - public void testEndOfFileBehaviorCSV() throws Exception { - final String[] codes = {"hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", "hello,\r\n\r\nworld,\"\"", - "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\""}; - final String[][] res = {{"hello", ""}, // CSV format ignores empty lines - {"world", ""}}; - for (final String code : codes) { - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - } - - @Test - public void testEndOfFileBehaviorExcel() throws Exception { - final String[] codes = {"hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", "hello,\r\n\r\nworld,\"\"", - "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\""}; - final String[][] res = {{"hello", ""}, {""}, // Excel format does not ignore empty lines - {"world", ""}}; - - for (final String code : codes) { - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - } - - @Test - public void testExcelFormat1() throws IOException { - final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n x,,," + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n"; - final String[][] res = {{"value1", "value2", "value3", "value4"}, {"a", "b", "c", "d"}, {" x", "", "", ""}, {""}, - {"\"hello\"", " \"world\"", "abc\ndef", ""}}; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - - @Test - public void testExcelFormat2() throws Exception { - final String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n"; - final String[][] res = {{"foo", "baar"}, {""}, {"hello", ""}, {""}, {"world", ""}}; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - - /** - * Tests an exported Excel worksheet with a header row and rows that have more columns than the headers - */ - @Test - public void testExcelHeaderCountLessThanData() throws Exception { - final String code = "A,B,C,,\r\na,b,c,d,e\r\n"; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL.withHeader())) { - parser.getRecords().forEach(record -> { - assertEquals("a", record.get("A")); - assertEquals("b", record.get("B")); - assertEquals("c", record.get("C")); - }); - } - } - - @Test - public void testFirstEndOfLineCr() throws IOException { - final String data = "foo\rbaar,\rhello,world\r,kanu"; - try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(4, records.size()); - assertEquals("\r", parser.getFirstEndOfLine()); - } - } - - @Test - public void testFirstEndOfLineCrLf() throws IOException { - final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; - try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(4, records.size()); - assertEquals("\r\n", parser.getFirstEndOfLine()); - } - } - - @Test - public void testFirstEndOfLineLf() throws IOException { - final String data = "foo\nbaar,\nhello,world\n,kanu"; - try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(4, records.size()); - assertEquals("\n", parser.getFirstEndOfLine()); - } - } - - @Test - public void testForEach() throws Exception { - try (final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); final CSVParser parser = CSVFormat.DEFAULT.parse(in)) { - final List records = new ArrayList<>(); - for (final CSVRecord record : parser) { - records.add(record); - } - assertEquals(3, records.size()); - assertArrayEquals(new String[] {"a", "b", "c"}, records.get(0).values()); - assertArrayEquals(new String[] {"1", "2", "3"}, records.get(1).values()); - assertArrayEquals(new String[] {"x", "y", "z"}, records.get(2).values()); - } - } - - @Test - public void testGetHeaderComment_HeaderComment1() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) { - parser.getRecords(); - // Expect a header comment - assertTrue(parser.hasHeaderComment()); - assertEquals("header comment", parser.getHeaderComment()); - } - } - - @Test - public void testGetHeaderComment_HeaderComment2() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) { - parser.getRecords(); - // Expect a header comment - assertTrue(parser.hasHeaderComment()); - assertEquals("header comment", parser.getHeaderComment()); - } - } - - @Test - public void testGetHeaderComment_HeaderComment3() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { - parser.getRecords(); - // Expect no header comment - the text "comment" is attached to the first record - assertFalse(parser.hasHeaderComment()); - assertNull(parser.getHeaderComment()); - } - } - - @Test - public void testGetHeaderComment_HeaderTrailerComment() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { - parser.getRecords(); - // Expect a header comment - assertTrue(parser.hasHeaderComment()); - assertEquals("multi-line"+LF+"header comment", parser.getHeaderComment()); - } - } - - @Test - public void testGetHeaderComment_NoComment1() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_AUTO_HEADER)) { - parser.getRecords(); - // Expect no header comment - assertFalse(parser.hasHeaderComment()); - assertNull(parser.getHeaderComment()); - } - } - - @Test - public void testGetHeaderComment_NoComment2() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER)) { - parser.getRecords(); - // Expect no header comment - assertFalse(parser.hasHeaderComment()); - assertNull(parser.getHeaderComment()); - } - } - - @Test - public void testGetHeaderComment_NoComment3() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { - parser.getRecords(); - // Expect no header comment - assertFalse(parser.hasHeaderComment()); - assertNull(parser.getHeaderComment()); - } - } - - @Test - public void testGetHeaderMap() throws Exception { - try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { - final Map headerMap = parser.getHeaderMap(); - final Iterator columnNames = headerMap.keySet().iterator(); - // Headers are iterated in column order. - assertEquals("A", columnNames.next()); - assertEquals("B", columnNames.next()); - assertEquals("C", columnNames.next()); - final Iterator records = parser.iterator(); - - // Parse to make sure getHeaderMap did not have a side-effect. - for (int i = 0; i < 3; i++) { - assertTrue(records.hasNext()); - final CSVRecord record = records.next(); - assertEquals(record.get(0), record.get("A")); - assertEquals(record.get(1), record.get("B")); - assertEquals(record.get(2), record.get("C")); - } - - assertFalse(records.hasNext()); - } - } - - @Test - public void testGetHeaderNames() throws IOException { - try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { - final Map nameIndexMap = parser.getHeaderMap(); - final List headerNames = parser.getHeaderNames(); - assertNotNull(headerNames); - assertEquals(nameIndexMap.size(), headerNames.size()); - for (int i = 0; i < headerNames.size(); i++) { - final String name = headerNames.get(i); - assertEquals(i, nameIndexMap.get(name).intValue()); - } - } - } - - @Test - public void testGetHeaderNamesReadOnly() throws IOException { - try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { - final List headerNames = parser.getHeaderNames(); - assertNotNull(headerNames); - assertThrows(UnsupportedOperationException.class, () -> headerNames.add("This is a read-only list.")); - } - } - - @Test - public void testGetLine() throws IOException { - try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { - for (final String[] re : RESULT) { - assertArrayEquals(re, parser.nextRecord().values()); - } - - assertNull(parser.nextRecord()); - } - } - - @Test - public void testGetLineNumberWithCR() throws Exception { - this.validateLineNumbers(String.valueOf(CR)); - } - - @Test - public void testGetLineNumberWithCRLF() throws Exception { - this.validateLineNumbers(CRLF); - } - - @Test - public void testGetLineNumberWithLF() throws Exception { - this.validateLineNumbers(String.valueOf(LF)); - } - - @Test - public void testGetOneLine() throws IOException { - try (final CSVParser parser = CSVParser.parse(CSV_INPUT_1, CSVFormat.DEFAULT)) { - final CSVRecord record = parser.getRecords().get(0); - assertArrayEquals(RESULT[0], record.values()); - } - } - - /** - * Tests reusing a parser to process new string records one at a time as they are being discovered. See [CSV-110]. - * - * @throws IOException when an I/O error occurs. - */ - @Test - public void testGetOneLineOneParser() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT; - try (final PipedWriter writer = new PipedWriter(); final CSVParser parser = new CSVParser(new PipedReader(writer), format)) { - writer.append(CSV_INPUT_1); - writer.append(format.getRecordSeparator()); - final CSVRecord record1 = parser.nextRecord(); - assertArrayEquals(RESULT[0], record1.values()); - writer.append(CSV_INPUT_2); - writer.append(format.getRecordSeparator()); - final CSVRecord record2 = parser.nextRecord(); - assertArrayEquals(RESULT[1], record2.values()); - } - } - - @Test - public void testGetRecordNumberWithCR() throws Exception { - this.validateRecordNumbers(String.valueOf(CR)); - } - - @Test - public void testGetRecordNumberWithCRLF() throws Exception { - this.validateRecordNumbers(CRLF); - } - - @Test - public void testGetRecordNumberWithLF() throws Exception { - this.validateRecordNumbers(String.valueOf(LF)); - } - - @Test - public void testGetRecordPositionWithCRLF() throws Exception { - this.validateRecordPosition(CRLF); - } - - @Test - public void testGetRecordPositionWithLF() throws Exception { - this.validateRecordPosition(String.valueOf(LF)); - } - - @Test - public void testGetRecords() throws IOException { - try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { - final List records = parser.getRecords(); - assertEquals(RESULT.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < RESULT.length; i++) { - assertArrayEquals(RESULT[i], records.get(i).values()); - } - } - } - - @Test - public void testGetRecordsFromBrokenInputStream() throws IOException { - @SuppressWarnings("resource") // We also get an exception on close, which is OK but can't assert in a try. - final CSVParser parser = CSVParser.parse(new BrokenInputStream(), UTF_8, CSVFormat.DEFAULT); - assertThrows(UncheckedIOException.class, parser::getRecords); - - } - - @Test - public void testGetRecordWithMultiLineValues() throws Exception { - try (final CSVParser parser = CSVParser.parse("\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"", - CSVFormat.DEFAULT.withRecordSeparator(CRLF))) { - CSVRecord record; - assertEquals(0, parser.getRecordNumber()); - assertEquals(0, parser.getCurrentLineNumber()); - assertNotNull(record = parser.nextRecord()); - assertEquals(3, parser.getCurrentLineNumber()); - assertEquals(1, record.getRecordNumber()); - assertEquals(1, parser.getRecordNumber()); - assertNotNull(record = parser.nextRecord()); - assertEquals(6, parser.getCurrentLineNumber()); - assertEquals(2, record.getRecordNumber()); - assertEquals(2, parser.getRecordNumber()); - assertNotNull(record = parser.nextRecord()); - assertEquals(9, parser.getCurrentLineNumber()); - assertEquals(3, record.getRecordNumber()); - assertEquals(3, parser.getRecordNumber()); - assertNull(record = parser.nextRecord()); - assertEquals(9, parser.getCurrentLineNumber()); - assertEquals(3, parser.getRecordNumber()); - } - } - - @Test - public void testGetTrailerComment_HeaderComment1() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) { - parser.getRecords(); - assertFalse(parser.hasTrailerComment()); - assertNull(parser.getTrailerComment()); - } - } - - @Test - public void testGetTrailerComment_HeaderComment2() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) { - parser.getRecords(); - assertFalse(parser.hasTrailerComment()); - assertNull(parser.getTrailerComment()); - } - } - - @Test - public void testGetTrailerComment_HeaderComment3() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { - parser.getRecords(); - assertFalse(parser.hasTrailerComment()); - assertNull(parser.getTrailerComment()); - } - } - - @Test - public void testGetTrailerComment_HeaderTrailerComment1() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { - parser.getRecords(); - assertTrue(parser.hasTrailerComment()); - assertEquals("comment", parser.getTrailerComment()); - } - } - - @Test - public void testGetTrailerComment_HeaderTrailerComment2() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER)) { - parser.getRecords(); - assertTrue(parser.hasTrailerComment()); - assertEquals("comment", parser.getTrailerComment()); - } - } - - @Test - public void testGetTrailerComment_HeaderTrailerComment3() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { - parser.getRecords(); - assertTrue(parser.hasTrailerComment()); - assertEquals("comment", parser.getTrailerComment()); - } - } - - @Test - public void testGetTrailerComment_MultilineComment() throws IOException { - try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { - parser.getRecords(); - assertTrue(parser.hasTrailerComment()); - assertEquals("multi-line"+LF+"comment", parser.getTrailerComment()); - } - } - - @Test - public void testHeader() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - - try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) { - final Iterator records = parser.iterator(); - - for (int i = 0; i < 2; i++) { - assertTrue(records.hasNext()); - final CSVRecord record = records.next(); - assertEquals(record.get(0), record.get("a")); - assertEquals(record.get(1), record.get("b")); - assertEquals(record.get(2), record.get("c")); - } - - assertFalse(records.hasNext()); - } - } - - @Test - public void testHeaderComment() throws Exception { - final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z"); - - try (final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) { - final Iterator records = parser.iterator(); - - for (int i = 0; i < 2; i++) { - assertTrue(records.hasNext()); - final CSVRecord record = records.next(); - assertEquals(record.get(0), record.get("a")); - assertEquals(record.get(1), record.get("b")); - assertEquals(record.get(2), record.get("c")); - } - - assertFalse(records.hasNext()); - } - } - - @Test - public void testHeaderMissing() throws Exception { - final Reader in = new StringReader("a,,c\n1,2,3\nx,y,z"); - - try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in)) { - final Iterator records = parser.iterator(); - - for (int i = 0; i < 2; i++) { - assertTrue(records.hasNext()); - final CSVRecord record = records.next(); - assertEquals(record.get(0), record.get("a")); - assertEquals(record.get(2), record.get("c")); - } - - assertFalse(records.hasNext()); - } - } - - @Test - public void testHeaderMissingWithNull() throws Exception { - final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); - try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("").withAllowMissingColumnNames().parse(in)) { - parser.iterator(); - } - } - - @Test - public void testHeadersMissing() throws Exception { - try (final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); - final CSVParser parser = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in)) { - parser.iterator(); - } - } - - @Test - public void testHeadersMissingException() { - final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator()); - } - - @Test - public void testHeadersMissingOneColumnException() { - final Reader in = new StringReader("a,,c,d,e\n1,2,3,4,5\nv,w,x,y,z"); - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator()); - } - - @Test - public void testHeadersWithNullColumnName() throws IOException { - final Reader in = new StringReader("header1,null,header3\n1,2,3\n4,5,6"); - final Iterator records = CSVFormat.DEFAULT.withHeader().withNullString("null").withAllowMissingColumnNames().parse(in).iterator(); - final CSVRecord record = records.next(); - // Expect the null header to be missing - assertEquals(Arrays.asList("header1", "header3"), record.getParser().getHeaderNames()); - assertEquals(2, record.getParser().getHeaderMap().size()); - } - - @Test - public void testIgnoreCaseHeaderMapping() throws Exception { - final Reader reader = new StringReader("1,2,3"); - final Iterator records = CSVFormat.DEFAULT.withHeader("One", "TWO", "three").withIgnoreHeaderCase().parse(reader).iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("one")); - assertEquals("2", record.get("two")); - assertEquals("3", record.get("THREE")); - } - - @Test - public void testIgnoreEmptyLines() throws IOException { - final String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n"; - // String code = "world\r\n\n"; - // String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n"; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(3, records.size()); - } - } - - @Test - public void testInvalidFormat() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR)); - } - - @Test - public void testIterator() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - - final Iterator iterator = CSVFormat.DEFAULT.parse(in).iterator(); - - assertTrue(iterator.hasNext()); - assertThrows(UnsupportedOperationException.class, iterator::remove); - assertArrayEquals(new String[] {"a", "b", "c"}, iterator.next().values()); - assertArrayEquals(new String[] {"1", "2", "3"}, iterator.next().values()); - assertTrue(iterator.hasNext()); - assertTrue(iterator.hasNext()); - assertTrue(iterator.hasNext()); - assertArrayEquals(new String[] {"x", "y", "z"}, iterator.next().values()); - assertFalse(iterator.hasNext()); - - assertThrows(NoSuchElementException.class, iterator::next); - } - - @Test - public void testIteratorSequenceBreaking() throws IOException { - final String fiveRows = "1\n2\n3\n4\n5\n"; - - // Iterator hasNext() shouldn't break sequence - try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { - - final Iterator iter = parser.iterator(); - int recordNumber = 0; - while (iter.hasNext()) { - final CSVRecord record = iter.next(); - recordNumber++; - assertEquals(String.valueOf(recordNumber), record.get(0)); - if (recordNumber >= 2) { - break; - } - } - iter.hasNext(); - while (iter.hasNext()) { - final CSVRecord record = iter.next(); - recordNumber++; - assertEquals(String.valueOf(recordNumber), record.get(0)); - } - } - - // Consecutive enhanced for loops shouldn't break sequence - try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { - int recordNumber = 0; - for (final CSVRecord record : parser) { - recordNumber++; - assertEquals(String.valueOf(recordNumber), record.get(0)); - if (recordNumber >= 2) { - break; - } - } - for (final CSVRecord record : parser) { - recordNumber++; - assertEquals(String.valueOf(recordNumber), record.get(0)); - } - } - - // Consecutive enhanced for loops with hasNext() peeking shouldn't break sequence - try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { - int recordNumber = 0; - for (final CSVRecord record : parser) { - recordNumber++; - assertEquals(String.valueOf(recordNumber), record.get(0)); - if (recordNumber >= 2) { - break; - } - } - parser.iterator().hasNext(); - for (final CSVRecord record : parser) { - recordNumber++; - assertEquals(String.valueOf(recordNumber), record.get(0)); - } - } - } - - @Test - public void testLineFeedEndings() throws IOException { - final String code = "foo\nbaar,\nhello,world\n,kanu"; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(4, records.size()); - } - } - - @Test - public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2\nx,y,z"); - final Iterator records = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord().parse(in).iterator(); - CSVRecord record; - - // 1st record - record = records.next(); - assertTrue(record.isMapped("A")); - assertTrue(record.isMapped("B")); - assertTrue(record.isMapped("C")); - assertTrue(record.isSet("A")); - assertTrue(record.isSet("B")); - assertFalse(record.isSet("C")); - assertEquals("1", record.get("A")); - assertEquals("2", record.get("B")); - assertFalse(record.isConsistent()); - - // 2nd record - record = records.next(); - assertTrue(record.isMapped("A")); - assertTrue(record.isMapped("B")); - assertTrue(record.isMapped("C")); - assertTrue(record.isSet("A")); - assertTrue(record.isSet("B")); - assertTrue(record.isSet("C")); - assertEquals("x", record.get("A")); - assertEquals("y", record.get("B")); - assertEquals("z", record.get("C")); - assertTrue(record.isConsistent()); - - assertFalse(records.hasNext()); - } - - @Test - @Disabled - public void testMongoDbCsv() throws Exception { - try (final CSVParser parser = CSVParser.parse("\"a a\",b,c" + LF + "d,e,f", CSVFormat.MONGODB_CSV)) { - final Iterator itr1 = parser.iterator(); - final Iterator itr2 = parser.iterator(); - - final CSVRecord first = itr1.next(); - assertEquals("a a", first.get(0)); - assertEquals("b", first.get(1)); - assertEquals("c", first.get(2)); - - final CSVRecord second = itr2.next(); - assertEquals("d", second.get(0)); - assertEquals("e", second.get(1)); - assertEquals("f", second.get(2)); - } - } - - @Test - // TODO this may lead to strange behavior, throw an exception if iterator() has already been called? - public void testMultipleIterators() throws Exception { - try (final CSVParser parser = CSVParser.parse("a,b,c" + CRLF + "d,e,f", CSVFormat.DEFAULT)) { - final Iterator itr1 = parser.iterator(); - - final CSVRecord first = itr1.next(); - assertEquals("a", first.get(0)); - assertEquals("b", first.get(1)); - assertEquals("c", first.get(2)); - - final CSVRecord second = itr1.next(); - assertEquals("d", second.get(0)); - assertEquals("e", second.get(1)); - assertEquals("f", second.get(2)); - } - } - - @Test - public void testNewCSVParserNullReaderFormat() { - assertThrows(NullPointerException.class, () -> new CSVParser(null, CSVFormat.DEFAULT)); - } - - @Test - public void testNewCSVParserReaderNullFormat() { - assertThrows(NullPointerException.class, () -> new CSVParser(new StringReader(""), null)); - } - - @Test - public void testNoHeaderMap() throws Exception { - try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT)) { - assertNull(parser.getHeaderMap()); - } - } - - @Test - public void testNotValueCSV() throws IOException { - final String source = "#"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.withCommentMarker('#'); - final CSVParser csvParser = csvFormat.parse(new StringReader(source)); - final CSVRecord csvRecord = csvParser.nextRecord(); - assertNull(csvRecord); - } - - @Test - public void testParse() throws Exception { - final ClassLoader loader = ClassLoader.getSystemClassLoader(); - final URL url = loader.getResource("org/apache/commons/csv/CSVFileParser/test.csv"); - final CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C", "D"); - final Charset charset = StandardCharsets.UTF_8; - - try (final CSVParser parser = CSVParser.parse(new InputStreamReader(url.openStream(), charset), format)) { - parseFully(parser); - } - try (final CSVParser parser = CSVParser.parse(new String(Files.readAllBytes(Paths.get(url.toURI())), charset), format)) { - parseFully(parser); - } - try (final CSVParser parser = CSVParser.parse(new File(url.toURI()), charset, format)) { - parseFully(parser); - } - try (final CSVParser parser = CSVParser.parse(url.openStream(), charset, format)) { - parseFully(parser); - } - try (final CSVParser parser = CSVParser.parse(Paths.get(url.toURI()), charset, format)) { - parseFully(parser); - } - try (final CSVParser parser = CSVParser.parse(url, charset, format)) { - parseFully(parser); - } - try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format)) { - parseFully(parser); - } - try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format, /* characterOffset= */0, /* recordNumber= */1)) { - parseFully(parser); - } - } - - @Test - public void testParseFileNullFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse(new File("CSVFileParser/test.csv"), Charset.defaultCharset(), null)); - } - - @Test - public void testParseNullFileFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); - } - - @Test - public void testParseNullPathFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse((Path) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); - } - - @Test - public void testParseNullStringFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse((String) null, CSVFormat.DEFAULT)); - } - - @Test - public void testParseNullUrlCharsetFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse((URL) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); - } - - @Test - public void testParserUrlNullCharsetFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse(new URL("https://commons.apache.org"), null, CSVFormat.DEFAULT)); - } - - @Test - public void testParseStringNullFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse("csv data", (CSVFormat) null)); - } - - @Test - public void testParseUrlCharsetNullFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse(new URL("https://commons.apache.org"), Charset.defaultCharset(), null)); - } - - @Test - public void testParseWithDelimiterStringWithEscape() throws IOException { - final String source = "a![!|!]b![|]c[|]xyz\r\nabc[abc][|]xyz"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').build(); - try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { - CSVRecord csvRecord = csvParser.nextRecord(); - assertEquals("a[|]b![|]c", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - csvRecord = csvParser.nextRecord(); - assertEquals("abc[abc]", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - } - } - @Test - public void testParseWithDelimiterStringWithQuote() throws IOException { - final String source = "'a[|]b[|]c'[|]xyz\r\nabc[abc][|]xyz"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').build(); - try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { - CSVRecord csvRecord = csvParser.nextRecord(); - assertEquals("a[|]b[|]c", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - csvRecord = csvParser.nextRecord(); - assertEquals("abc[abc]", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - } - } - @Test - public void testParseWithDelimiterWithEscape() throws IOException { - final String source = "a!,b!,c,xyz"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.withEscape('!'); - try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { - final CSVRecord csvRecord = csvParser.nextRecord(); - assertEquals("a,b,c", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - } - } - @Test - public void testParseWithDelimiterWithQuote() throws IOException { - final String source = "'a,b,c',xyz"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\''); - try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { - final CSVRecord csvRecord = csvParser.nextRecord(); - assertEquals("a,b,c", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - } - } - @Test - public void testParseWithQuoteThrowsException() { - final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\''); - assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c','")).nextRecord()); - assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c'abc,xyz")).nextRecord()); - assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'abc'a,b,c',xyz")).nextRecord()); - } - @Test - public void testParseWithQuoteWithEscape() throws IOException { - final String source = "'a?,b?,c?d',xyz"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'').withEscape('?'); - try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { - final CSVRecord csvRecord = csvParser.nextRecord(); - assertEquals("a,b,c?d", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - } - } - @Test - public void testProvidedHeader() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - - final Iterator records = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in).iterator(); - - for (int i = 0; i < 3; i++) { - assertTrue(records.hasNext()); - final CSVRecord record = records.next(); - assertTrue(record.isMapped("A")); - assertTrue(record.isMapped("B")); - assertTrue(record.isMapped("C")); - assertFalse(record.isMapped("NOT MAPPED")); - assertEquals(record.get(0), record.get("A")); - assertEquals(record.get(1), record.get("B")); - assertEquals(record.get(2), record.get("C")); - } - - assertFalse(records.hasNext()); - } - - @Test - public void testProvidedHeaderAuto() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - - final Iterator records = CSVFormat.DEFAULT.withHeader().parse(in).iterator(); - - for (int i = 0; i < 2; i++) { - assertTrue(records.hasNext()); - final CSVRecord record = records.next(); - assertTrue(record.isMapped("a")); - assertTrue(record.isMapped("b")); - assertTrue(record.isMapped("c")); - assertFalse(record.isMapped("NOT MAPPED")); - assertEquals(record.get(0), record.get("a")); - assertEquals(record.get(1), record.get("b")); - assertEquals(record.get(2), record.get("c")); - } - - assertFalse(records.hasNext()); - } - - @Test - public void testRepeatedHeadersAreReturnedInCSVRecordHeaderNames() throws IOException { - final Reader in = new StringReader("header1,header2,header1\n1,2,3\n4,5,6"); - final Iterator records = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().parse(in).iterator(); - final CSVRecord record = records.next(); - assertEquals(Arrays.asList("header1", "header2", "header1"), record.getParser().getHeaderNames()); - } - - @Test - public void testRoundtrip() throws Exception { - final StringWriter out = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT)) { - final String input = "a,b,c\r\n1,2,3\r\nx,y,z\r\n"; - for (final CSVRecord record : CSVParser.parse(input, CSVFormat.DEFAULT)) { - printer.printRecord(record); - } - assertEquals(input, out.toString()); - } - } - - @Test - public void testSkipAutoHeader() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - final Iterator records = CSVFormat.DEFAULT.withHeader().parse(in).iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("a")); - assertEquals("2", record.get("b")); - assertEquals("3", record.get("c")); - } - - @Test - public void testSkipHeaderOverrideDuplicateHeaders() throws Exception { - final Reader in = new StringReader("a,a,a\n1,2,3\nx,y,z"); - final Iterator records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in).iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("X")); - assertEquals("2", record.get("Y")); - assertEquals("3", record.get("Z")); - } - - @Test - public void testSkipSetAltHeaders() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - final Iterator records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in).iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("X")); - assertEquals("2", record.get("Y")); - assertEquals("3", record.get("Z")); - } - - @Test - public void testSkipSetHeader() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - final Iterator records = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord().parse(in).iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("a")); - assertEquals("2", record.get("b")); - assertEquals("3", record.get("c")); - } - - @Test - @Disabled - public void testStartWithEmptyLinesThenHeaders() throws Exception { - final String[] codes = {"\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"}; - final String[][] res = {{"hello", ""}, {""}, // Excel format does not ignore empty lines - {""}}; - for (final String code : codes) { - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - } - - @Test - public void testStream() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - final List list = CSVFormat.DEFAULT.parse(in).stream().collect(Collectors.toList()); - assertFalse(list.isEmpty()); - assertArrayEquals(new String[] {"a", "b", "c"}, list.get(0).values()); - assertArrayEquals(new String[] {"1", "2", "3"}, list.get(1).values()); - assertArrayEquals(new String[] {"x", "y", "z"}, list.get(2).values()); - } - - @Test - public void testTrailingDelimiter() throws Exception { - final Reader in = new StringReader("a,a,a,\n\"1\",\"2\",\"3\",\nx,y,z,"); - final Iterator records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrailingDelimiter().parse(in).iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("X")); - assertEquals("2", record.get("Y")); - assertEquals("3", record.get("Z")); - assertEquals(3, record.size()); - } - - @Test - public void testTrim() throws Exception { - final Reader in = new StringReader("a,a,a\n\" 1 \",\" 2 \",\" 3 \"\nx,y,z"); - final Iterator records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrim().parse(in).iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("X")); - assertEquals("2", record.get("Y")); - assertEquals("3", record.get("Z")); - assertEquals(3, record.size()); - } - - private void validateLineNumbers(final String lineSeparator) throws IOException { - try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) { - assertEquals(0, parser.getCurrentLineNumber()); - assertNotNull(parser.nextRecord()); - assertEquals(1, parser.getCurrentLineNumber()); - assertNotNull(parser.nextRecord()); - assertEquals(2, parser.getCurrentLineNumber()); - assertNotNull(parser.nextRecord()); - // Read EOF without EOL should 3 - assertEquals(3, parser.getCurrentLineNumber()); - assertNull(parser.nextRecord()); - // Read EOF without EOL should 3 - assertEquals(3, parser.getCurrentLineNumber()); - } - } - - private void validateRecordNumbers(final String lineSeparator) throws IOException { - try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) { - CSVRecord record; - assertEquals(0, parser.getRecordNumber()); - assertNotNull(record = parser.nextRecord()); - assertEquals(1, record.getRecordNumber()); - assertEquals(1, parser.getRecordNumber()); - assertNotNull(record = parser.nextRecord()); - assertEquals(2, record.getRecordNumber()); - assertEquals(2, parser.getRecordNumber()); - assertNotNull(record = parser.nextRecord()); - assertEquals(3, record.getRecordNumber()); - assertEquals(3, parser.getRecordNumber()); - assertNull(record = parser.nextRecord()); - assertEquals(3, parser.getRecordNumber()); - } - } - - private void validateRecordPosition(final String lineSeparator) throws IOException { - final String nl = lineSeparator; // used as linebreak in values for better distinction - - final String code = "a,b,c" + lineSeparator + "1,2,3" + lineSeparator + - // to see if recordPosition correctly points to the enclosing quote - "'A" + nl + "A','B" + nl + "B',CC" + lineSeparator + - // unicode test... not very relevant while operating on strings instead of bytes, but for - // completeness... - "\u00c4,\u00d6,\u00dc" + lineSeparator + "EOF,EOF,EOF"; - - final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(lineSeparator); - CSVParser parser = CSVParser.parse(code, format); - - CSVRecord record; - assertEquals(0, parser.getRecordNumber()); - - assertNotNull(record = parser.nextRecord()); - assertEquals(1, record.getRecordNumber()); - assertEquals(code.indexOf('a'), record.getCharacterPosition()); - - assertNotNull(record = parser.nextRecord()); - assertEquals(2, record.getRecordNumber()); - assertEquals(code.indexOf('1'), record.getCharacterPosition()); - - assertNotNull(record = parser.nextRecord()); - final long positionRecord3 = record.getCharacterPosition(); - assertEquals(3, record.getRecordNumber()); - assertEquals(code.indexOf("'A"), record.getCharacterPosition()); - assertEquals("A" + lineSeparator + "A", record.get(0)); - assertEquals("B" + lineSeparator + "B", record.get(1)); - assertEquals("CC", record.get(2)); - - assertNotNull(record = parser.nextRecord()); - assertEquals(4, record.getRecordNumber()); - assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition()); - - assertNotNull(record = parser.nextRecord()); - assertEquals(5, record.getRecordNumber()); - assertEquals(code.indexOf("EOF"), record.getCharacterPosition()); - - parser.close(); - - // now try to read starting at record 3 - parser = new CSVParser(new StringReader(code.substring((int) positionRecord3)), format, positionRecord3, 3); - - assertNotNull(record = parser.nextRecord()); - assertEquals(3, record.getRecordNumber()); - assertEquals(code.indexOf("'A"), record.getCharacterPosition()); - assertEquals("A" + lineSeparator + "A", record.get(0)); - assertEquals("B" + lineSeparator + "B", record.get(1)); - assertEquals("CC", record.get(2)); - - assertNotNull(record = parser.nextRecord()); - assertEquals(4, record.getRecordNumber()); - assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition()); - assertEquals("\u00c4", record.get(0)); - - parser.close(); - } - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.apache.commons.csv.Constants.CR; +import static org.apache.commons.csv.Constants.CRLF; +import static org.apache.commons.csv.Constants.LF; +import static org.apache.commons.csv.CsvAssertions.assertValuesEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.PipedReader; +import java.io.PipedWriter; +import java.io.Reader; +import java.io.StringReader; +import java.io.StringWriter; +import java.io.UncheckedIOException; +import java.net.URL; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.commons.io.input.BOMInputStream; +import org.apache.commons.io.input.BrokenInputStream; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; +import org.junit.jupiter.params.provider.ValueSource; + +/** + * Tests {@link CSVParser}. + * + * The test are organized in three different sections: The 'setter/getter' section, the lexer section and finally the parser section. In case a test fails, you + * should follow a top-down approach for fixing a potential bug (its likely that the parser itself fails if the lexer has problems...). + */ +class CSVParserTest { + + private static final CSVFormat EXCEL_WITH_HEADER = CSVFormat.EXCEL.withHeader(); + + private static final Charset UTF_8 = StandardCharsets.UTF_8; + + private static final String UTF_8_NAME = UTF_8.name(); + + // @formatter:off + private static final String CSV_INPUT = "a,b,c,d\n" + + " a , b , 1 2 \n" + + "\"foo baar\", b,\n" + + // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n"; + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping + // @formatter:on + + private static final String CSV_INPUT_1 = "a,b,c,d"; + + private static final String CSV_INPUT_2 = "a,b,1 2"; + + private static final String[][] RESULT = { { "a", "b", "c", "d" }, { "a", "b", "1 2" }, { "foo baar", "b", "" }, { "foo\n,,\n\",,\n\"", "d", "e" } }; + + // CSV with no header comments + private static final String CSV_INPUT_NO_COMMENT = "A,B" + CRLF + "1,2" + CRLF; + + // CSV with a header comment + private static final String CSV_INPUT_HEADER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF; + + // CSV with a single line header and trailer comment + private static final String CSV_INPUT_HEADER_TRAILER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + "# comment"; + + // CSV with a multi-line header and trailer comment + private static final String CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT = "# multi-line" + CRLF + "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + + "# multi-line" + CRLF + "# comment"; + + // Format with auto-detected header + private static final CSVFormat FORMAT_AUTO_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT).setCommentMarker('#').setHeader().get(); + + // Format with explicit header + // @formatter:off + private static final CSVFormat FORMAT_EXPLICIT_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT) + .setSkipHeaderRecord(true) + .setCommentMarker('#') + .setHeader("A", "B") + .get(); + // @formatter:on + + // Format with explicit header that does not skip the header line + // @formatter:off + CSVFormat FORMAT_EXPLICIT_HEADER_NOSKIP = CSVFormat.Builder.create(CSVFormat.DEFAULT) + .setCommentMarker('#') + .setHeader("A", "B") + .get(); + // @formatter:on + + @SuppressWarnings("resource") // caller releases + private BOMInputStream createBOMInputStream(final String resource) throws IOException { + return new BOMInputStream(ClassLoader.getSystemClassLoader().getResource(resource).openStream()); + } + + CSVRecord parse(final CSVParser parser, final int failParseRecordNo) throws IOException { + if (parser.getRecordNumber() + 1 == failParseRecordNo) { + assertThrows(IOException.class, () -> parser.nextRecord()); + return null; + } + return parser.nextRecord(); + } + + private void parseFully(final CSVParser parser) { + parser.forEach(Assertions::assertNotNull); + } + + @Test + void testBackslashEscaping() throws IOException { + // To avoid confusion over the need for escaping chars in java code, + // We will test with a forward slash as the escape char, and a single + // quote as the encapsulator. + + // @formatter:off + final String code = "one,two,three\n" + // 0 + "'',''\n" + // 1) empty encapsulators + "/',/'\n" + // 2) single encapsulators + "'/'','/''\n" + // 3) single encapsulators encapsulated via escape + "'''',''''\n" + // 4) single encapsulators encapsulated via doubling + "/,,/,\n" + // 5) separator escaped + "//,//\n" + // 6) escape escaped + "'//','//'\n" + // 7) escape escaped in encapsulation + " 8 , \"quoted \"\" /\" // string\" \n" + // don't eat spaces + "9, /\n \n" + // escaped newline + ""; + final String[][] res = {{"one", "two", "three"}, // 0 + {"", ""}, // 1 + {"'", "'"}, // 2 + {"'", "'"}, // 3 + {"'", "'"}, // 4 + {",", ","}, // 5 + {"/", "/"}, // 6 + {"/", "/"}, // 7 + {" 8 ", " \"quoted \"\" /\" / string\" "}, {"9", " \n "} }; + // @formatter:on + final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(CRLF).withEscape('/').withIgnoreEmptyLines(); + try (CSVParser parser = CSVParser.parse(code, format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + Utils.compare("Records do not match expected result", res, records, -1); + } + } + + @Test + void testBackslashEscaping2() throws IOException { + // To avoid confusion over the need for escaping chars in java code, + // We will test with a forward slash as the escape char, and a single + // quote as the encapsulator. + // @formatter:off + final String code = " , , \n" + // 1) + " \t , , \n" + // 2) + " // , /, , /,\n" + // 3) + ""; + final String[][] res = {{" ", " ", " "}, // 1 + {" \t ", " ", " "}, // 2 + {" / ", " , ", " ,"}, // 3 + }; + // @formatter:on + final CSVFormat format = CSVFormat.newFormat(',').withRecordSeparator(CRLF).withEscape('/').withIgnoreEmptyLines(); + try (CSVParser parser = CSVParser.parse(code, format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + Utils.compare("", res, records, -1); + } + } + + @Test + @Disabled + void testBackslashEscapingOld() throws IOException { + // @formatter:off + final String code = "one,two,three\n" + + "on\\\"e,two\n" + + "on\"e,two\n" + + "one,\"tw\\\"o\"\n" + + "one,\"t\\,wo\"\n" + + "one,two,\"th,ree\"\n" + + "\"a\\\\\"\n" + + "a\\,b\n" + + "\"a\\\\,b\""; + // @formatter:on + final String[][] res = { { "one", "two", "three" }, { "on\\\"e", "two" }, { "on\"e", "two" }, { "one", "tw\"o" }, { "one", "t\\,wo" }, // backslash in + // quotes only + // escapes a + // delimiter + // (",") + { "one", "two", "th,ree" }, { "a\\\\" }, // backslash in quotes only escapes a delimiter (",") + { "a\\", "b" }, // a backslash must be returned + { "a\\\\,b" } // backslash in quotes only escapes a delimiter (",") + }; + try (CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertValuesEquals(res[i], records.get(i)); + } + } + } + + @Test + @Disabled("CSV-107") + void testBOM() throws IOException { + final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/bom.csv"); + try (CSVParser parser = CSVParser.parse(url, StandardCharsets.UTF_8, EXCEL_WITH_HEADER)) { + parser.forEach(record -> assertNotNull(record.get("Date"))); + } + } + + @Test + void testBOMInputStreamParserWithInputStream() throws IOException { + try (BOMInputStream inputStream = createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"); + CSVParser parser = CSVParser.parse(inputStream, UTF_8, EXCEL_WITH_HEADER)) { + parser.forEach(record -> assertNotNull(record.get("Date"))); + } + } + + @Test + void testBOMInputStreamParserWithReader() throws IOException { + try (Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME); + CSVParser parser = CSVParser.builder() + .setReader(reader) + .setFormat(EXCEL_WITH_HEADER) + .get()) { + parser.forEach(record -> assertNotNull(record.get("Date"))); + } + } + + @Test + void testBOMInputStreamParseWithReader() throws IOException { + try (Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME); + CSVParser parser = CSVParser.builder() + .setReader(reader) + .setFormat(EXCEL_WITH_HEADER) + .get()) { + parser.forEach(record -> assertNotNull(record.get("Date"))); + } + } + + @Test + void testCarriageReturnEndings() throws IOException { + final String string = "foo\rbaar,\rhello,world\r,kanu"; + try (CSVParser parser = CSVParser.builder().setCharSequence(string).get()) { + final List records = parser.getRecords(); + assertEquals(4, records.size()); + } + } + + @Test + void testCarriageReturnLineFeedEndings() throws IOException { + final String string = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; + try (CSVParser parser = CSVParser.builder().setCharSequence(string).get()) { + final List records = parser.getRecords(); + assertEquals(4, records.size()); + } + } + + @Test + void testClose() throws Exception { + final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z"); + final Iterator records; + try (CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) { + records = parser.iterator(); + assertTrue(records.hasNext()); + } + assertFalse(records.hasNext()); + assertThrows(NoSuchElementException.class, records::next); + } + + @Test + void testCSV141CSVFormat_DEFAULT() throws Exception { + testCSV141Failure(CSVFormat.DEFAULT, 3); + } + + @Test + void testCSV141CSVFormat_INFORMIX_UNLOAD() throws Exception { + testCSV141Failure(CSVFormat.INFORMIX_UNLOAD, 1); + } + + @Test + void testCSV141CSVFormat_INFORMIX_UNLOAD_CSV() throws Exception { + testCSV141Failure(CSVFormat.INFORMIX_UNLOAD_CSV, 3); + } + + @Test + void testCSV141CSVFormat_ORACLE() throws Exception { + testCSV141Failure(CSVFormat.ORACLE, 2); + } + + @Test + void testCSV141CSVFormat_POSTGRESQL_CSV() throws Exception { + testCSV141Failure(CSVFormat.POSTGRESQL_CSV, 3); + } + + @Test + void testCSV141Excel() throws Exception { + testCSV141Ok(CSVFormat.EXCEL); + } + + private void testCSV141Failure(final CSVFormat format, final int failParseRecordNo) throws IOException { + final Path path = Paths.get("src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv"); + try (CSVParser parser = CSVParser.parse(path, StandardCharsets.UTF_8, format)) { + // row 1 + CSVRecord record = parse(parser, failParseRecordNo); + if (record == null) { + return; // expected failure + } + assertEquals("1414770317901", record.get(0)); + assertEquals("android.widget.EditText", record.get(1)); + assertEquals("pass sem1 _84*|*", record.get(2)); + assertEquals("0", record.get(3)); + assertEquals("pass sem1 _8", record.get(4)); + assertEquals(5, record.size()); + // row 2 + record = parse(parser, failParseRecordNo); + if (record == null) { + return; // expected failure + } + assertEquals("1414770318470", record.get(0)); + assertEquals("android.widget.EditText", record.get(1)); + assertEquals("pass sem1 _84:|", record.get(2)); + assertEquals("0", record.get(3)); + assertEquals("pass sem1 _84:\\", record.get(4)); + assertEquals(5, record.size()); + // row 3: Fail for certain + assertThrows(IOException.class, () -> parser.nextRecord()); + } + } + + private void testCSV141Ok(final CSVFormat format) throws IOException { + final Path path = Paths.get("src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv"); + try (CSVParser parser = CSVParser.parse(path, StandardCharsets.UTF_8, format)) { + // row 1 + CSVRecord record = parser.nextRecord(); + assertEquals("1414770317901", record.get(0)); + assertEquals("android.widget.EditText", record.get(1)); + assertEquals("pass sem1 _84*|*", record.get(2)); + assertEquals("0", record.get(3)); + assertEquals("pass sem1 _8", record.get(4)); + assertEquals(5, record.size()); + // row 2 + record = parser.nextRecord(); + assertEquals("1414770318470", record.get(0)); + assertEquals("android.widget.EditText", record.get(1)); + assertEquals("pass sem1 _84:|", record.get(2)); + assertEquals("0", record.get(3)); + assertEquals("pass sem1 _84:\\", record.get(4)); + assertEquals(5, record.size()); + // row 3 + record = parser.nextRecord(); + assertEquals("1414770318327", record.get(0)); + assertEquals("android.widget.EditText", record.get(1)); + assertEquals("pass sem1\n1414770318628\"", record.get(2)); + assertEquals("android.widget.EditText", record.get(3)); + assertEquals("pass sem1 _84*|*", record.get(4)); + assertEquals("0", record.get(5)); + assertEquals("pass sem1\n", record.get(6)); + assertEquals(7, record.size()); + // EOF + record = parser.nextRecord(); + assertNull(record); + } + } + + @Test + void testCSV141RFC4180() throws Exception { + testCSV141Failure(CSVFormat.RFC4180, 3); + } + + @Test + void testCSV235() throws IOException { + final String dqString = "\"aaa\",\"b\"\"bb\",\"ccc\""; // "aaa","b""bb","ccc" + try (CSVParser parser = CSVFormat.RFC4180.parse(new StringReader(dqString))) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertFalse(records.hasNext()); + assertEquals(3, record.size()); + assertEquals("aaa", record.get(0)); + assertEquals("b\"bb", record.get(1)); + assertEquals("ccc", record.get(2)); + } + } + + @Test + void testCSV57() throws Exception { + try (CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) { + final List list = parser.getRecords(); + assertNotNull(list); + assertEquals(0, list.size()); + } + } + + @Test + void testDefaultFormat() throws IOException { + // @formatter:off + final String code = "a,b#\n" + // 1) + "\"\n\",\" \",#\n" + // 2) + "#,\"\"\n" + // 3) + "# Final comment\n" // 4) + ; + // @formatter:on + final String[][] res = { { "a", "b#" }, { "\n", " ", "#" }, { "#", "" }, { "# Final comment" } }; + CSVFormat format = CSVFormat.DEFAULT; + assertFalse(format.isCommentMarkerSet()); + final String[][] resComments = { { "a", "b#" }, { "\n", " ", "#" } }; + try (CSVParser parser = CSVParser.parse(code, format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + Utils.compare("Failed to parse without comments", res, records, -1); + format = CSVFormat.DEFAULT.withCommentMarker('#'); + } + try (CSVParser parser = CSVParser.parse(code, format)) { + final List records = parser.getRecords(); + Utils.compare("Failed to parse with comments", resComments, records, -1); + } + } + + @Test + void testDuplicateHeadersAllowedByDefault() throws Exception { + try (CSVParser parser = CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader())) { + // noop + } + } + + @Test + void testDuplicateHeadersNotAllowed() { + assertThrows(IllegalArgumentException.class, + () -> CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader().withAllowDuplicateHeaderNames(false))); + } + + /** + * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, + * the empty field at the delimiter boundary must survive. The delimiter look-ahead is consumed while skipping + * leading whitespace, so re-evaluating it would drop the empty field and merge the following field's value. + */ + @Test + void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); + try (CSVParser parser = CSVParser.parse(" |a", format)) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "", "a" }, records.get(0)); + } + try (CSVParser parser = CSVParser.parse("a | |b", format)) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "a", "", "b" }, records.get(0)); + } + try (CSVParser parser = CSVParser.parse("a | |b |", format)) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "a", "", "b", "" }, records.get(0)); + } + } + + @Test + void testEmptyFile() throws Exception { + try (CSVParser parser = CSVParser.parse(Paths.get("src/test/resources/org/apache/commons/csv/empty.txt"), StandardCharsets.UTF_8, + CSVFormat.DEFAULT)) { + assertNull(parser.nextRecord()); + } + } + + @Test + void testEmptyFileHeaderParsing() throws Exception { + try (CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT.withFirstRecordAsHeader())) { + assertNull(parser.nextRecord()); + assertTrue(parser.getHeaderNames().isEmpty()); + } + } + + @Test + void testEmptyLineBehaviorCSV() throws Exception { + final String[] codes = { "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" }; + final String[][] res = { { "hello", "" } // CSV format ignores empty lines + }; + for (final String code : codes) { + try (CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertValuesEquals(res[i], records.get(i)); + } + } + } + } + + @Test + void testEmptyLineBehaviorExcel() throws Exception { + final String[] codes = { "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" }; + final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines + { "" } }; + for (final String code : codes) { + try (CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertValuesEquals(res[i], records.get(i)); + } + } + } + } + + @Test + void testEmptyString() throws Exception { + try (CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) { + assertNull(parser.nextRecord()); + } + } + + @Test + void testEndOfFileBehaviorCSV() throws Exception { + final String[] codes = { "hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", "hello,\r\n\r\nworld,\"\"", + "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\"" }; + final String[][] res = { { "hello", "" }, // CSV format ignores empty lines + { "world", "" } }; + for (final String code : codes) { + try (CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertValuesEquals(res[i], records.get(i)); + } + } + } + } + + @Test + void testEndOfFileBehaviorExcel() throws Exception { + final String[] codes = { "hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", "hello,\r\n\r\nworld,\"\"", + "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\"" }; + final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines + { "world", "" } }; + + for (final String code : codes) { + try (CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertValuesEquals(res[i], records.get(i)); + } + } + } + } + + @Test + void testExcelFormat1() throws IOException { + final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n x,,,\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n"; + final String[][] res = { { "value1", "value2", "value3", "value4" }, { "a", "b", "c", "d" }, { " x", "", "", "" }, { "" }, + { "\"hello\"", " \"world\"", "abc\ndef", "" } }; + try (CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertValuesEquals(res[i], records.get(i)); + } + } + } + + @Test + void testExcelFormat2() throws Exception { + final String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n"; + final String[][] res = { { "foo", "baar" }, { "" }, { "hello", "" }, { "" }, { "world", "" } }; + try (CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertValuesEquals(res[i], records.get(i)); + } + } + } + + /** + * Tests an exported Excel worksheet with a header row and rows that have more columns than the headers + */ + @Test + void testExcelHeaderCountLessThanData() throws Exception { + final String code = "A,B,C,,\r\na,b,c,d,e\r\n"; + try (CSVParser parser = CSVParser.parse(code, EXCEL_WITH_HEADER)) { + parser.getRecords().forEach(record -> { + assertEquals("a", record.get("A")); + assertEquals("b", record.get("B")); + assertEquals("c", record.get("C")); + }); + } + } + + @Test + void testFirstEndOfLineCr() throws IOException { + final String data = "foo\rbaar,\rhello,world\r,kanu"; + try (CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(4, records.size()); + assertEquals("\r", parser.getFirstEndOfLine()); + } + } + + @Test + void testFirstEndOfLineCrLf() throws IOException { + final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; + try (CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(4, records.size()); + assertEquals("\r\n", parser.getFirstEndOfLine()); + } + } + + @Test + void testFirstEndOfLineLf() throws IOException { + final String data = "foo\nbaar,\nhello,world\n,kanu"; + try (CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(4, records.size()); + assertEquals("\n", parser.getFirstEndOfLine()); + } + } + + @Test + void testForEach() throws Exception { + try (Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + CSVParser parser = CSVFormat.DEFAULT.parse(in)) { + final List records = new ArrayList<>(); + for (final CSVRecord record : parser) { + records.add(record); + } + assertEquals(3, records.size()); + assertValuesEquals(new String[] { "a", "b", "c" }, records.get(0)); + assertValuesEquals(new String[] { "1", "2", "3" }, records.get(1)); + assertValuesEquals(new String[] { "x", "y", "z" }, records.get(2)); + } + } + + @Test + void testGetBytePositionMultiCharacterDelimiter() throws IOException { + final String code = "aa[|]bb\ncc[|]dd\n"; + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get(); + try (CSVParser parser = CSVParser.builder() + .setReader(new StringReader(code)) + .setFormat(format) + .setCharset(StandardCharsets.UTF_8) + .setTrackBytes(true) + .get()) { + final Iterator it = parser.iterator(); + final CSVRecord first = it.next(); + final CSVRecord second = it.next(); + assertEquals(0, first.getBytePosition()); + assertEquals(8, second.getBytePosition()); + } + } + + /** + * Tests CSV-329. + */ + @Test + void testGetBytePositionMultiCharacterDelimiterWithSupplementaryCharacter() throws IOException { + final String delimiter = "x๐Ÿ˜€"; + final String code = "ax๐Ÿ˜€b\ncx๐Ÿ˜€d\n"; + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(delimiter).get(); + try (CSVParser parser = CSVParser.builder() + .setReader(new StringReader(code)) + .setFormat(format) + .setCharset(UTF_8) + .setTrackBytes(true) + .get()) { + final CSVRecord first = parser.nextRecord(); + final CSVRecord second = parser.nextRecord(); + assertNotNull(first); + assertNotNull(second); + assertValuesEquals(new String[] { "a", "b" }, first); + assertValuesEquals(new String[] { "c", "d" }, second); + assertEquals(0, first.getBytePosition()); + assertEquals("ax๐Ÿ˜€b\n".getBytes(UTF_8).length, second.getBytePosition()); + } + } + + @Test + void testGetBytePositionWithCharacterOffsetAndMultiBytePrefix() throws Exception { + final String row0 = "รฉ,x\n"; + final Charset charset = UTF_8; + // row0 char count is 4 + assertEquals(4, row0.length()); + // row0 byte count is 5 + final int record1ByteOffset = row0.getBytes(charset).length; + assertEquals(5, record1ByteOffset); + final String row1 = "b,c\n"; + final String rows = row0 + row1; + final long record1CharOffset = row0.length(); + final long expectedByteOffset = row0.getBytes(charset).length; + try (CSVParser parser = CSVParser.builder() + .setReader(new StringReader(row1)) + .setFormat(CSVFormat.DEFAULT) + .setCharset(charset) + .setTrackBytes(true) + .setByteOffset(record1ByteOffset) + .setCharacterOffset(record1CharOffset) + .setRecordNumber(2) // not relevant but a better use case example. + .get()) { + final CSVRecord record = parser.nextRecord(); + assertNotNull(record); + assertEquals(4, record.getCharacterPosition()); + assertEquals(record1CharOffset, record.getCharacterPosition()); + assertEquals(expectedByteOffset, record.getBytePosition()); + } + } + + @Test + void testGetBytePositionWithSingleByteCharset() throws IOException { + // A single-byte charset cannot encode U+FFFF, the char value of the EOF sentinel. + // Byte counting must skip the EOF read so a valid file parses without throwing. + final String code = "a,b\nc,d\n"; + try (CSVParser parser = CSVParser.builder() + .setReader(new StringReader(code)) + .setFormat(CSVFormat.DEFAULT) + .setCharset(StandardCharsets.ISO_8859_1) + .setTrackBytes(true) + .get()) { + final CSVRecord first = parser.nextRecord(); + final CSVRecord second = parser.nextRecord(); + assertNotNull(first); + assertNotNull(second); + assertNull(parser.nextRecord()); + assertEquals(0, first.getBytePosition()); + assertEquals(4, second.getBytePosition()); + } + } + + @Test + void testGetHeaderComment_HeaderComment1() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + // Expect a header comment + assertTrue(parser.hasHeaderComment()); + assertEquals("header comment", parser.getHeaderComment()); + } + } + + @Test + void testGetHeaderComment_HeaderComment2() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) { + parser.getRecords(); + // Expect a header comment + assertTrue(parser.hasHeaderComment()); + assertEquals("header comment", parser.getHeaderComment()); + } + } + + @Test + void testGetHeaderComment_HeaderComment3() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { + parser.getRecords(); + // Expect no header comment - the text "comment" is attached to the first record + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + + @Test + void testGetHeaderComment_HeaderTrailerComment() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + // Expect a header comment + assertTrue(parser.hasHeaderComment()); + assertEquals("multi-line" + LF + "header comment", parser.getHeaderComment()); + } + } + + @Test + void testGetHeaderComment_NoComment1() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + // Expect no header comment + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + + @Test + void testGetHeaderComment_NoComment2() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER)) { + parser.getRecords(); + // Expect no header comment + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + + @Test + void testGetHeaderComment_NoComment3() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { + parser.getRecords(); + // Expect no header comment + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + + @Test + void testGetHeaderMap() throws Exception { + try (CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { + final Map headerMap = parser.getHeaderMap(); + final Iterator columnNames = headerMap.keySet().iterator(); + // Headers are iterated in column order. + assertEquals("A", columnNames.next()); + assertEquals("B", columnNames.next()); + assertEquals("C", columnNames.next()); + final Iterator records = parser.iterator(); + + // Parse to make sure getHeaderMap did not have a side-effect. + for (int i = 0; i < 3; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertEquals(record.get(0), record.get("A")); + assertEquals(record.get(1), record.get("B")); + assertEquals(record.get(2), record.get("C")); + } + + assertFalse(records.hasNext()); + } + } + + @Test + void testGetHeaderNames() throws IOException { + try (CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { + final Map nameIndexMap = parser.getHeaderMap(); + final List headerNames = parser.getHeaderNames(); + assertNotNull(headerNames); + assertEquals(nameIndexMap.size(), headerNames.size()); + for (int i = 0; i < headerNames.size(); i++) { + final String name = headerNames.get(i); + assertEquals(i, nameIndexMap.get(name).intValue()); + } + } + } + + @Test + void testGetHeaderNamesReadOnly() throws IOException { + try (CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { + final List headerNames = parser.getHeaderNames(); + assertNotNull(headerNames); + assertThrows(UnsupportedOperationException.class, () -> headerNames.add("This is a read-only list.")); + } + } + + @Test + void testGetLine() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { + for (final String[] re : RESULT) { + assertValuesEquals(re, parser.nextRecord()); + } + + assertNull(parser.nextRecord()); + } + } + + @Test + void testGetLineNumberWithCR() throws Exception { + validateLineNumbers(String.valueOf(CR)); + } + + @Test + void testGetLineNumberWithCRLF() throws Exception { + validateLineNumbers(CRLF); + } + + @Test + void testGetLineNumberWithLF() throws Exception { + validateLineNumbers(String.valueOf(LF)); + } + + @Test + void testGetOneLine() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_1, CSVFormat.DEFAULT)) { + final CSVRecord record = parser.getRecords().get(0); + assertValuesEquals(RESULT[0], record); + } + } + + /** + * Tests reusing a parser to process new string records one at a time as they are being discovered. See [CSV-110]. + * + * @throws IOException when an I/O error occurs. + */ + @Test + void testGetOneLineOneParser() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT; + try (PipedWriter writer = new PipedWriter(); + PipedReader origin = new PipedReader(writer); + CSVParser parser = CSVParser.builder() + .setReader(origin) + .setFormat(format) + .get()) { + writer.append(CSV_INPUT_1); + writer.append(format.getRecordSeparator()); + final CSVRecord record1 = parser.nextRecord(); + assertValuesEquals(RESULT[0], record1); + writer.append(CSV_INPUT_2); + writer.append(format.getRecordSeparator()); + final CSVRecord record2 = parser.nextRecord(); + assertValuesEquals(RESULT[1], record2); + } + } + + @Test + void testGetRecordFourBytesRead() throws Exception { + final String code = "id,a,b,c\n" + + "1,๐Ÿ˜Š,๐Ÿค”,๐Ÿ˜‚\n" + + "2,๐Ÿ˜Š,๐Ÿค”,๐Ÿ˜‚\n" + + "3,๐Ÿ˜Š,๐Ÿค”,๐Ÿ˜‚\n"; + final CSVFormat format = CSVFormat.Builder.create() + .setDelimiter(',') + .setQuote('\'') + .get(); + try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setTrackBytes(true).get()) { + CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L); + + assertEquals(0, parser.getRecordNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(1, record.getRecordNumber()); + assertEquals(code.indexOf('i'), record.getCharacterPosition()); + assertEquals(record.getBytePosition(), record.getCharacterPosition()); + + assertNotNull(record = parser.nextRecord()); + assertEquals(2, record.getRecordNumber()); + assertEquals(code.indexOf('1'), record.getCharacterPosition()); + assertEquals(record.getBytePosition(), record.getCharacterPosition()); + assertNotNull(record = parser.nextRecord()); + assertEquals(3, record.getRecordNumber()); + assertEquals(code.indexOf('2'), record.getCharacterPosition()); + assertEquals(record.getBytePosition(), 26); + assertNotNull(record = parser.nextRecord()); + assertEquals(4, record.getRecordNumber()); + assertEquals(code.indexOf('3'), record.getCharacterPosition()); + assertEquals(record.getBytePosition(), 43); + } + } + + @Test + void testGetRecordNumberWithCR() throws Exception { + validateRecordNumbers(String.valueOf(CR)); + } + + @Test + void testGetRecordNumberWithCRLF() throws Exception { + validateRecordNumbers(CRLF); + } + + @Test + void testGetRecordNumberWithLF() throws Exception { + validateRecordNumbers(String.valueOf(LF)); + } + + @Test + void testGetRecordPositionWithCRLF() throws Exception { + validateRecordPosition(CRLF); + } + + @Test + void testGetRecordPositionWithLF() throws Exception { + validateRecordPosition(String.valueOf(LF)); + } + + @Test + void testGetRecords() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { + final List records = parser.getRecords(); + assertEquals(RESULT.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < RESULT.length; i++) { + assertValuesEquals(RESULT[i], records.get(i)); + } + } + } + + @Test + void testGetRecordsFromBrokenInputStream() throws IOException { + @SuppressWarnings("resource") // We also get an exception on close, which is OK but can't assert in a try. + final CSVParser parser = CSVParser.parse(new BrokenInputStream(), UTF_8, CSVFormat.DEFAULT); + assertThrows(UncheckedIOException.class, parser::getRecords); + + } + + @ParameterizedTest + @ValueSource(longs = { -1, 0, 1, 2, 3, 4, Long.MAX_VALUE }) + void testGetRecordsMaxRows(final long maxRows) throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.builder().setIgnoreSurroundingSpaces(true).setMaxRows(maxRows).get())) { + final List records = parser.getRecords(); + final long expectedLength = maxRows <= 0 || maxRows > RESULT.length ? RESULT.length : maxRows; + assertEquals(expectedLength, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < expectedLength; i++) { + assertValuesEquals(RESULT[i], records.get(i)); + } + } + } + + /** + * Tests CSV-327. + */ + @Test + void testGetRecordsMaxRowsWithRecordNumberOffset() throws IOException { + try (CSVParser parser = CSVParser.builder() + .setReader(new StringReader("a,b\nc,d\n")) + .setFormat(CSVFormat.DEFAULT.builder().setMaxRows(1).get()) + .setRecordNumber(2) + .get()) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + assertEquals(2, records.get(0).getRecordNumber()); + assertValuesEquals(new String[] { "a", "b" }, records.get(0)); + } + } + + @Test + void testGetRecordThreeBytesRead() throws Exception { + final String code = "id,date,val5,val4\n" + + "11111111111111,'4017-09-01',ใใกใ‚“ใจ็ฏ€ๅˆ†่ฟ‘ใใซใฏๅ’ฒใ„ใฆใ‚‹๏ฝž,v4\n" + + "22222222222222,'4017-01-01',ใŠใฏใ‚ˆใ†็งใฎๅ‹ไบบ๏ฝž,v4\n" + + "33333333333333,'4017-01-01',ใใ‚‹่‡ช็„ถใฎๅŠ›ใฃใฆใ™ใ”ใ„ใช๏ฝž,v4\n"; + final CSVFormat format = CSVFormat.Builder.create() + .setDelimiter(',') + .setQuote('\'') + .get(); + try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setTrackBytes(true).get()) { + CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L); + + assertEquals(0, parser.getRecordNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(1, record.getRecordNumber()); + assertEquals(code.indexOf('i'), record.getCharacterPosition()); + assertEquals(record.getBytePosition(), record.getCharacterPosition()); + + assertNotNull(record = parser.nextRecord()); + assertEquals(2, record.getRecordNumber()); + assertEquals(code.indexOf('1'), record.getCharacterPosition()); + assertEquals(record.getBytePosition(), record.getCharacterPosition()); + + assertNotNull(record = parser.nextRecord()); + assertEquals(3, record.getRecordNumber()); + assertEquals(code.indexOf('2'), record.getCharacterPosition()); + assertEquals(record.getBytePosition(), 95); + + assertNotNull(record = parser.nextRecord()); + assertEquals(4, record.getRecordNumber()); + assertEquals(code.indexOf('3'), record.getCharacterPosition()); + assertEquals(record.getBytePosition(), 154); + } + } + + @Test + void testGetRecordWithMultiLineValues() throws Exception { + try (CSVParser parser = CSVParser.parse("\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"", + CSVFormat.DEFAULT.withRecordSeparator(CRLF))) { + CSVRecord record; + assertEquals(0, parser.getRecordNumber()); + assertEquals(0, parser.getCurrentLineNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(3, parser.getCurrentLineNumber()); + assertEquals(1, record.getRecordNumber()); + assertEquals(1, parser.getRecordNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(6, parser.getCurrentLineNumber()); + assertEquals(2, record.getRecordNumber()); + assertEquals(2, parser.getRecordNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(9, parser.getCurrentLineNumber()); + assertEquals(3, record.getRecordNumber()); + assertEquals(3, parser.getRecordNumber()); + assertNull(record = parser.nextRecord()); + assertEquals(9, parser.getCurrentLineNumber()); + assertEquals(3, parser.getRecordNumber()); + } + } + + @Test + void testGetTrailerComment_HeaderComment1() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + assertFalse(parser.hasTrailerComment()); + assertNull(parser.getTrailerComment()); + } + } + + @Test + void testGetTrailerComment_HeaderComment2() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) { + parser.getRecords(); + assertFalse(parser.hasTrailerComment()); + assertNull(parser.getTrailerComment()); + } + } + + @Test + void testGetTrailerComment_HeaderComment3() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { + parser.getRecords(); + assertFalse(parser.hasTrailerComment()); + assertNull(parser.getTrailerComment()); + } + } + + @Test + void testGetTrailerComment_HeaderTrailerComment1() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("comment", parser.getTrailerComment()); + } + } + + @Test + void testGetTrailerComment_HeaderTrailerComment2() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("comment", parser.getTrailerComment()); + } + } + + @Test + void testGetTrailerComment_HeaderTrailerComment3() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("comment", parser.getTrailerComment()); + } + } + + @Test + void testGetTrailerComment_MultilineComment() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("multi-line" + LF + "comment", parser.getTrailerComment()); + } + } + + @Test + void testHeader() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + + try (CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) { + final Iterator records = parser.iterator(); + + for (int i = 0; i < 2; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertEquals(record.get(0), record.get("a")); + assertEquals(record.get(1), record.get("b")); + assertEquals(record.get(2), record.get("c")); + } + + assertFalse(records.hasNext()); + } + } + + @Test + void testHeaderComment() throws Exception { + final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) { + final Iterator records = parser.iterator(); + for (int i = 0; i < 2; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertEquals(record.get(0), record.get("a")); + assertEquals(record.get(1), record.get("b")); + assertEquals(record.get(2), record.get("c")); + } + assertFalse(records.hasNext()); + } + } + + @Test + void testHeaderMissing() throws Exception { + final Reader in = new StringReader("a,,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in)) { + final Iterator records = parser.iterator(); + for (int i = 0; i < 2; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertEquals(record.get(0), record.get("a")); + assertEquals(record.get(2), record.get("c")); + } + assertFalse(records.hasNext()); + } + } + + @Test + void testHeaderMissingWithNull() throws Exception { + final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("").withAllowMissingColumnNames().parse(in)) { + parser.iterator(); + } + } + + @Test + void testHeadersMissing() throws Exception { + try (Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); + CSVParser parser = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in)) { + parser.iterator(); + } + } + + @Test + void testHeadersMissingException() { + final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator()); + } + + @Test + void testHeadersMissingOneColumnException() { + final Reader in = new StringReader("a,,c,d,e\n1,2,3,4,5\nv,w,x,y,z"); + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator()); + } + + @Test + void testHeadersWithNullColumnName() throws IOException { + final Reader in = new StringReader("header1,null,header3\n1,2,3\n4,5,6"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("null").withAllowMissingColumnNames().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + // Expect the null header to be missing + @SuppressWarnings("resource") + final CSVParser recordParser = record.getParser(); + assertEquals(Arrays.asList("header1", "header3"), recordParser.getHeaderNames()); + assertEquals(2, recordParser.getHeaderMap().size()); + } + } + + @Test + void testIgnoreCaseHeaderMapping() throws Exception { + final Reader reader = new StringReader("1,2,3"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader("One", "TWO", "three").withIgnoreHeaderCase().parse(reader)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("one")); + assertEquals("2", record.get("two")); + assertEquals("3", record.get("THREE")); + } + } + + @Test + void testIgnoreEmptyLines() throws IOException { + final String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n"; + // String code = "world\r\n\n"; + // String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n"; + try (CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(3, records.size()); + } + } + + @Test + void testInvalidFormat() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR)); + } + + @Test + void testIterator() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.parse(in)) { + final Iterator iterator = parser.iterator(); + assertTrue(iterator.hasNext()); + assertThrows(UnsupportedOperationException.class, iterator::remove); + assertValuesEquals(new String[] { "a", "b", "c" }, iterator.next()); + assertValuesEquals(new String[] { "1", "2", "3" }, iterator.next()); + assertTrue(iterator.hasNext()); + assertTrue(iterator.hasNext()); + assertTrue(iterator.hasNext()); + assertValuesEquals(new String[] { "x", "y", "z" }, iterator.next()); + assertFalse(iterator.hasNext()); + assertThrows(NoSuchElementException.class, iterator::next); + } + } + + @ParameterizedTest + @ValueSource(longs = { -1, 0, 1, 2, 3, 4, 5, Long.MAX_VALUE }) + void testIteratorMaxRows(final long maxRows) throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.builder().setMaxRows(maxRows).get().parse(in)) { + final Iterator iterator = parser.iterator(); + assertTrue(iterator.hasNext()); + assertThrows(UnsupportedOperationException.class, iterator::remove); + assertValuesEquals(new String[] { "a", "b", "c" }, iterator.next()); + final boolean noLimit = maxRows <= 0; + final int fixtureLen = 3; + final long expectedLen = noLimit ? fixtureLen : Math.min(fixtureLen, maxRows); + if (expectedLen > 1) { + assertTrue(iterator.hasNext()); + assertValuesEquals(new String[] { "1", "2", "3" }, iterator.next()); + } + assertEquals(expectedLen > 2, iterator.hasNext()); + // again + assertEquals(expectedLen > 2, iterator.hasNext()); + if (expectedLen == fixtureLen) { + assertTrue(iterator.hasNext()); + assertValuesEquals(new String[] { "x", "y", "z" }, iterator.next()); + } + assertFalse(iterator.hasNext()); + assertThrows(NoSuchElementException.class, iterator::next); + } + } + + @Test + void testIteratorSequenceBreaking() throws IOException { + final String fiveRows = "1\n2\n3\n4\n5\n"; + // Iterator hasNext() shouldn't break sequence + try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { + final Iterator iter = parser.iterator(); + int recordNumber = 0; + while (iter.hasNext()) { + final CSVRecord record = iter.next(); + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + if (recordNumber >= 2) { + break; + } + } + iter.hasNext(); + while (iter.hasNext()) { + final CSVRecord record = iter.next(); + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + } + } + // Consecutive enhanced for loops shouldn't break sequence + try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { + int recordNumber = 0; + for (final CSVRecord record : parser) { + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + if (recordNumber >= 2) { + break; + } + } + for (final CSVRecord record : parser) { + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + } + } + // Consecutive enhanced for loops with hasNext() peeking shouldn't break sequence + try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { + int recordNumber = 0; + for (final CSVRecord record : parser) { + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + if (recordNumber >= 2) { + break; + } + } + parser.iterator().hasNext(); + for (final CSVRecord record : parser) { + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + } + } + } + + @Test + void testLineFeedEndings() throws IOException { + final String code = "foo\nbaar,\nhello,world\n,kanu"; + try (CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(4, records.size()); + } + } + + @Test + void testMappedButNotSetAsOutlook2007ContactExport() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord().parse(in)) { + final Iterator records = parser.iterator(); + CSVRecord record; + // 1st record + record = records.next(); + assertTrue(record.isMapped("A")); + assertTrue(record.isMapped("B")); + assertTrue(record.isMapped("C")); + assertTrue(record.isSet("A")); + assertTrue(record.isSet("B")); + assertFalse(record.isSet("C")); + assertEquals("1", record.get("A")); + assertEquals("2", record.get("B")); + assertFalse(record.isConsistent()); + // 2nd record + record = records.next(); + assertTrue(record.isMapped("A")); + assertTrue(record.isMapped("B")); + assertTrue(record.isMapped("C")); + assertTrue(record.isSet("A")); + assertTrue(record.isSet("B")); + assertTrue(record.isSet("C")); + assertEquals("x", record.get("A")); + assertEquals("y", record.get("B")); + assertEquals("z", record.get("C")); + assertTrue(record.isConsistent()); + // end + assertFalse(records.hasNext()); + } + } + + @Test + @Disabled + void testMongoDbCsv() throws Exception { + try (CSVParser parser = CSVParser.parse("\"a a\",b,c" + LF + "d,e,f", CSVFormat.MONGODB_CSV)) { + final Iterator itr1 = parser.iterator(); + final Iterator itr2 = parser.iterator(); + + final CSVRecord first = itr1.next(); + assertEquals("a a", first.get(0)); + assertEquals("b", first.get(1)); + assertEquals("c", first.get(2)); + + final CSVRecord second = itr2.next(); + assertEquals("d", second.get(0)); + assertEquals("e", second.get(1)); + assertEquals("f", second.get(2)); + } + } + + @Test + // TODO this may lead to strange behavior, throw an exception if iterator() has already been called? + void testMultipleIterators() throws Exception { + try (CSVParser parser = CSVParser.parse("a,b,c" + CRLF + "d,e,f", CSVFormat.DEFAULT)) { + final Iterator itr1 = parser.iterator(); + + final CSVRecord first = itr1.next(); + assertEquals("a", first.get(0)); + assertEquals("b", first.get(1)); + assertEquals("c", first.get(2)); + + final CSVRecord second = itr1.next(); + assertEquals("d", second.get(0)); + assertEquals("e", second.get(1)); + assertEquals("f", second.get(2)); + } + } + + @Test + void testNewCSVParserNullReaderFormat() { + assertThrows(NullPointerException.class, () -> new CSVParser(null, CSVFormat.DEFAULT)); + } + + @Test + void testNewCSVParserReaderNullFormat() { + assertThrows(NullPointerException.class, () -> new CSVParser(new StringReader(""), null)); + } + + @Test + void testNoHeaderMap() throws Exception { + try (CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT)) { + assertNull(parser.getHeaderMap()); + } + } + + @Test + void testNotValueCSV() throws IOException { + final String source = "#"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.withCommentMarker('#'); + try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + final CSVRecord csvRecord = csvParser.nextRecord(); + assertNull(csvRecord); + } + } + + @Test + void testParse() throws Exception { + final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/test.csv"); + final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader("A", "B", "C", "D").get(); + final Charset charset = StandardCharsets.UTF_8; + // Reader + try (CSVParser parser = CSVParser.parse(new InputStreamReader(url.openStream(), charset), format)) { + parseFully(parser); + } + try (CSVParser parser = CSVParser.builder().setReader(new InputStreamReader(url.openStream(), charset)).setFormat(format).get()) { + parseFully(parser); + } + // String + final Path path = Paths.get(url.toURI()); + final String string = new String(Files.readAllBytes(path), charset); + try (CSVParser parser = CSVParser.parse(string, format)) { + parseFully(parser); + } + try (CSVParser parser = CSVParser.builder().setCharSequence(string).setFormat(format).get()) { + parseFully(parser); + } + // File + final File file = new File(url.toURI()); + try (CSVParser parser = CSVParser.parse(file, charset, format)) { + parseFully(parser); + } + try (CSVParser parser = CSVParser.builder().setFile(file).setCharset(charset).setFormat(format).get()) { + parseFully(parser); + } + // InputStream + try (CSVParser parser = CSVParser.parse(url.openStream(), charset, format)) { + parseFully(parser); + } + try (CSVParser parser = CSVParser.builder().setInputStream(url.openStream()).setCharset(charset).setFormat(format).get()) { + parseFully(parser); + } + // Path + try (CSVParser parser = CSVParser.parse(path, charset, format)) { + parseFully(parser); + } + try (CSVParser parser = CSVParser.builder().setPath(path).setCharset(charset).setFormat(format).get()) { + parseFully(parser); + } + // URL + try (CSVParser parser = CSVParser.parse(url, charset, format)) { + parseFully(parser); + } + try (CSVParser parser = CSVParser.builder().setURI(url.toURI()).setCharset(charset).setFormat(format).get()) { + parseFully(parser); + } + // InputStreamReader + try (CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format)) { + parseFully(parser); + } + try (CSVParser parser = CSVParser.builder().setReader(new InputStreamReader(url.openStream(), charset)).setFormat(format).get()) { + parseFully(parser); + } + // InputStreamReader with longs + try (CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format, /* characterOffset= */0, /* recordNumber= */1)) { + parseFully(parser); + } + try (CSVParser parser = CSVParser.builder().setReader(new InputStreamReader(url.openStream(), charset)).setFormat(format).setCharacterOffset(0) + .setRecordNumber(0).get()) { + parseFully(parser); + } + } + + @Test + void testParseFileCharsetNullFormat() throws IOException { + final File file = new File("src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv"); + try (CSVParser parser = CSVParser.parse(file, Charset.defaultCharset(), null)) { + // null maps to DEFAULT. + parseFully(parser); + } + } + + @Test + void testParseInputStreamCharsetNullFormat() throws IOException { + try (InputStream in = Files.newInputStream(Paths.get("src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv")); + CSVParser parser = CSVParser.parse(in, Charset.defaultCharset(), null)) { + // null maps to DEFAULT. + parseFully(parser); + } + } + + @Test + void testParseNullFileFormat() { + assertThrows(NullPointerException.class, () -> CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); + } + + @Test + void testParseNullPathFormat() { + assertThrows(NullPointerException.class, () -> CSVParser.parse((Path) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); + } + + @Test + void testParseNullStringFormat() { + assertThrows(NullPointerException.class, () -> CSVParser.parse((String) null, CSVFormat.DEFAULT)); + } + + @Test + void testParseNullUrlCharsetFormat() { + assertThrows(NullPointerException.class, () -> CSVParser.parse((URL) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); + } + + @Test + void testParsePathCharsetNullFormat() throws IOException { + final Path path = Paths.get("src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv"); + try (CSVParser parser = CSVParser.parse(path, Charset.defaultCharset(), null)) { + // null maps to DEFAULT. + parseFully(parser); + } + } + + @Test + void testParserUrlNullCharsetFormat() throws IOException { + final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/test.csv"); + try (CSVParser parser = CSVParser.parse(url, null, CSVFormat.DEFAULT)) { + // null maps to DEFAULT. + parseFully(parser); + } + } + + @Test + void testParseStringNullFormat() throws IOException { + try (CSVParser parser = CSVParser.parse("1,2,3", null)) { + // null maps to DEFAULT. + final List records = parser.getRecords(); + assertEquals(1, records.size()); + final CSVRecord record = records.get(0); + assertEquals(3, record.size()); + assertEquals("1", record.get(0)); + assertEquals("2", record.get(1)); + assertEquals("3", record.get(2)); + } + } + + @Test + void testParseUrlCharsetNullFormat() throws IOException { + final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/test.csv"); + try (CSVParser parser = CSVParser.parse(url, Charset.defaultCharset(), null)) { + // null maps to DEFAULT. + parseFully(parser); + } + } + + @Test + void testParseWithDelimiterStringWithEscape() throws IOException { + final String source = "a![!|!]b![|]c[|]xyz\r\nabc[abc][|]xyz"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').get(); + try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + CSVRecord csvRecord = csvParser.nextRecord(); + assertEquals("a[|]b![|]c", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + csvRecord = csvParser.nextRecord(); + assertEquals("abc[abc]", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + } + } + + @Test + void testParseWithDelimiterStringWithQuote() throws IOException { + final String source = "'a[|]b[|]c'[|]xyz\r\nabc[abc][|]xyz"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').get(); + try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + CSVRecord csvRecord = csvParser.nextRecord(); + assertEquals("a[|]b[|]c", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + csvRecord = csvParser.nextRecord(); + assertEquals("abc[abc]", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + } + } + + @Test + void testParseWithDelimiterWithEscape() throws IOException { + final String source = "a!,b!,c,xyz"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.withEscape('!'); + try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + final CSVRecord csvRecord = csvParser.nextRecord(); + assertEquals("a,b,c", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + } + } + + @Test + void testParseWithDelimiterWithQuote() throws IOException { + final String source = "'a,b,c',xyz"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\''); + try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + final CSVRecord csvRecord = csvParser.nextRecord(); + assertEquals("a,b,c", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + } + } + + @Test + void testParseWithQuoteThrowsException() { + final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\''); + assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c','")).nextRecord()); + assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c'abc,xyz")).nextRecord()); + assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'abc'a,b,c',xyz")).nextRecord()); + } + + @Test + void testParseWithQuoteWithEscape() throws IOException { + final String source = "'a?,b?,c?d',xyz"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'').withEscape('?'); + try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + final CSVRecord csvRecord = csvParser.nextRecord(); + assertEquals("a,b,c?d", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + } + } + + @ParameterizedTest + @EnumSource(CSVFormat.Predefined.class) + void testParsingPrintedEmptyFirstColumn(final CSVFormat.Predefined format) throws Exception { + final String[][] lines = { { "a", "b" }, { "", "x" } }; + final StringWriter buf = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(buf, format.getFormat())) { + printer.printRecords(Stream.of(lines)); + } + try (CSVParser csvRecords = CSVParser.builder() + .setReader(new StringReader(buf.toString())) + .setFormat(format.getFormat()) + .get()) { + for (final String[] line : lines) { + assertValuesEquals(line, csvRecords.nextRecord()); + } + assertNull(csvRecords.nextRecord()); + } + } + + /** + * A truncated escaped multi-character delimiter at EOF must stay literal data and not be completed from a stale + * escape delimiter look-ahead. + */ + @Test + void testPartialEscapedMultiCharacterDelimiterAtEOF() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').get(); + try (CSVParser parser = format.parse(new StringReader("x![!|!]y![!|"))) { + final CSVRecord record = parser.nextRecord(); + assertEquals("x[|]y![!|", record.get(0)); + assertEquals(1, record.size()); + } + } + + /** + * Tests CSV-324. + */ + @Test + void testPartialMultiCharacterDelimiterAtEOF() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get(); + try (CSVParser parser = format.parse(new StringReader("a[|]b[|"))) { + final CSVRecord record = parser.nextRecord(); + assertEquals("a", record.get(0)); + assertEquals("b[|", record.get(1)); + assertEquals(2, record.size()); + } + } + + /** + * A truncated multi-character delimiter at EOF must not be completed from the look-ahead buffer left dirty by an + * earlier non-matching peek in the same token. + */ + @Test + void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get(); + // The "[a]" peek leaves ']' in the look-ahead buffer; the trailing "[|" must not match "[|]". + final String recordString = "x[a][|"; + try (CSVParser parser = format.parse(new StringReader(recordString))) { + final CSVRecord record = parser.nextRecord(); + assertEquals(recordString, record.get(0)); + assertEquals(1, record.size()); + } + } + + @Test + void testProvidedHeader() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in)) { + final Iterator records = parser.iterator(); + for (int i = 0; i < 3; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertTrue(record.isMapped("A")); + assertTrue(record.isMapped("B")); + assertTrue(record.isMapped("C")); + assertFalse(record.isMapped("NOT MAPPED")); + assertEquals(record.get(0), record.get("A")); + assertEquals(record.get(1), record.get("B")); + assertEquals(record.get(2), record.get("C")); + } + assertFalse(records.hasNext()); + } + } + + @Test + void testProvidedHeaderAuto() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) { + final Iterator records = parser.iterator(); + for (int i = 0; i < 2; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertTrue(record.isMapped("a")); + assertTrue(record.isMapped("b")); + assertTrue(record.isMapped("c")); + assertFalse(record.isMapped("NOT MAPPED")); + assertEquals(record.get(0), record.get("a")); + assertEquals(record.get(1), record.get("b")); + assertEquals(record.get(2), record.get("c")); + } + assertFalse(records.hasNext()); + } + } + + @Test + void testRepeatedHeadersAreReturnedInCSVRecordHeaderNames() throws IOException { + final Reader in = new StringReader("header1,header2,header1\n1,2,3\n4,5,6"); + try (CSVParser parser = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + @SuppressWarnings("resource") + final CSVParser recordParser = record.getParser(); + assertEquals(Arrays.asList("header1", "header2", "header1"), recordParser.getHeaderNames()); + } + } + + @Test + void testRoundtrip() throws Exception { + final StringWriter out = new StringWriter(); + final String data = "a,b,c\r\n1,2,3\r\nx,y,z\r\n"; + try (CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT); + CSVParser parse = CSVParser.parse(data, CSVFormat.DEFAULT)) { + for (final CSVRecord record : parse) { + printer.printRecord(record); + } + assertEquals(data, out.toString()); + } + } + + @Test + void testSkipAutoHeader() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("a")); + assertEquals("2", record.get("b")); + assertEquals("3", record.get("c")); + } + } + + @Test + void testSkipHeaderOverrideDuplicateHeaders() throws Exception { + final Reader in = new StringReader("a,a,a\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("X")); + assertEquals("2", record.get("Y")); + assertEquals("3", record.get("Z")); + } + } + + @Test + void testSkipSetAltHeaders() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("X")); + assertEquals("2", record.get("Y")); + assertEquals("3", record.get("Z")); + } + } + + @Test + void testSkipSetHeader() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("a")); + assertEquals("2", record.get("b")); + assertEquals("3", record.get("c")); + } + } + + @Test + @Disabled + void testStartWithEmptyLinesThenHeaders() throws Exception { + final String[] codes = { "\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" }; + final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines + { "" } }; + for (final String code : codes) { + try (CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertValuesEquals(res[i], records.get(i)); + } + } + } + } + + @Test + void testStream() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.parse(in)) { + final List list = parser.stream().collect(Collectors.toList()); + assertFalse(list.isEmpty()); + assertValuesEquals(new String[] { "a", "b", "c" }, list.get(0)); + assertValuesEquals(new String[] { "1", "2", "3" }, list.get(1)); + assertValuesEquals(new String[] { "x", "y", "z" }, list.get(2)); + } + } + + @ParameterizedTest + @ValueSource(longs = { -1, 0, 1, 2, 3, 4, Long.MAX_VALUE }) + void testStreamMaxRows(final long maxRows) throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.builder().setMaxRows(maxRows).get().parse(in)) { + final List list = parser.stream().collect(Collectors.toList()); + assertFalse(list.isEmpty()); + assertValuesEquals(new String[] { "a", "b", "c" }, list.get(0)); + if (maxRows <= 0 || maxRows > 1) { + assertValuesEquals(new String[] { "1", "2", "3" }, list.get(1)); + } + if (maxRows <= 0 || maxRows > 2) { + assertValuesEquals(new String[] { "x", "y", "z" }, list.get(2)); + } + } + } + + @Test + void testThrowExceptionWithLineAndPosition() throws IOException { + final String csvContent = "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10\nrec1,rec2,rec3,rec4,rec5,rec6,rec7,rec8,\"\"rec9\"\",rec10"; + final StringReader stringReader = new StringReader(csvContent); + // @formatter:off + final CSVFormat csvFormat = CSVFormat.DEFAULT.builder() + .setHeader() + .setSkipHeaderRecord(true) + .get(); + // @formatter:on + try (CSVParser csvParser = csvFormat.parse(stringReader)) { + final UncheckedIOException exception = assertThrows(UncheckedIOException.class, csvParser::getRecords); + assertInstanceOf(CSVException.class, exception.getCause()); + assertTrue(exception.getMessage().contains("Invalid character between encapsulated token and delimiter at line: 2, position: 94"), + exception::getMessage); + } + } + + @Test + void testTrailingDelimiter() throws Exception { + final Reader in = new StringReader("a,a,a,\n\"1\",\"2\",\"3\",\nx,y,z,"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrailingDelimiter().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("X")); + assertEquals("2", record.get("Y")); + assertEquals("3", record.get("Z")); + assertEquals(3, record.size()); + } + } + + @Test + void testTrailingDelimiterKeepsQuotedEmptyLastField() throws Exception { + final CSVFormat format = CSVFormat.DEFAULT.builder().setTrailingDelimiter(true).get(); + try (CSVParser parser = CSVParser.parse("a,b,\"\"", format)) { + final CSVRecord record = parser.iterator().next(); + assertEquals(3, record.size()); + assertEquals("a", record.get(0)); + assertEquals("b", record.get(1)); + assertEquals("", record.get(2)); + } + // An unquoted trailing delimiter still drops the empty field. + try (CSVParser parser = CSVParser.parse("a,b,", format)) { + final CSVRecord record = parser.iterator().next(); + assertEquals(2, record.size()); + } + } + + @Test + void testTrim() throws Exception { + final Reader in = new StringReader("a,a,a\n\" 1 \",\" 2 \",\" 3 \"\nx,y,z"); + try (CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrim().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("X")); + assertEquals("2", record.get("Y")); + assertEquals("3", record.get("Z")); + assertEquals(3, record.size()); + } + } + + private void validateLineNumbers(final String lineSeparator) throws IOException { + try (CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) { + assertEquals(0, parser.getCurrentLineNumber()); + assertNotNull(parser.nextRecord()); + assertEquals(1, parser.getCurrentLineNumber()); + assertNotNull(parser.nextRecord()); + assertEquals(2, parser.getCurrentLineNumber()); + assertNotNull(parser.nextRecord()); + // Read EOF without EOL should 3 + assertEquals(3, parser.getCurrentLineNumber()); + assertNull(parser.nextRecord()); + // Read EOF without EOL should 3 + assertEquals(3, parser.getCurrentLineNumber()); + } + } + + private void validateRecordNumbers(final String lineSeparator) throws IOException { + try (CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) { + CSVRecord record; + assertEquals(0, parser.getRecordNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(1, record.getRecordNumber()); + assertEquals(1, parser.getRecordNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(2, record.getRecordNumber()); + assertEquals(2, parser.getRecordNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(3, record.getRecordNumber()); + assertEquals(3, parser.getRecordNumber()); + assertNull(record = parser.nextRecord()); + assertEquals(3, parser.getRecordNumber()); + } + } + + private void validateRecordPosition(final String lineSeparator) throws IOException { + final String nl = lineSeparator; // used as linebreak in values for better distinction + final String code = "a,b,c" + lineSeparator + "1,2,3" + lineSeparator + + // to see if recordPosition correctly points to the enclosing quote + "'A" + nl + "A','B" + nl + "B',CC" + lineSeparator + + // unicode test... not very relevant while operating on strings instead of bytes, but for + // completeness... + "\u00c4,\u00d6,\u00dc" + lineSeparator + "EOF,EOF,EOF"; + final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(lineSeparator); + final long positionRecord3; + try (CSVParser parser = CSVParser.parse(code, format)) { + CSVRecord record; + assertEquals(0, parser.getRecordNumber()); + // nextRecord + assertNotNull(record = parser.nextRecord()); + assertEquals(1, record.getRecordNumber()); + assertEquals(code.indexOf('a'), record.getCharacterPosition()); + // nextRecord + assertNotNull(record = parser.nextRecord()); + assertEquals(2, record.getRecordNumber()); + assertEquals(code.indexOf('1'), record.getCharacterPosition()); + // nextRecord + assertNotNull(record = parser.nextRecord()); + positionRecord3 = record.getCharacterPosition(); + assertEquals(3, record.getRecordNumber()); + assertEquals(code.indexOf("'A"), record.getCharacterPosition()); + assertEquals("A" + lineSeparator + "A", record.get(0)); + assertEquals("B" + lineSeparator + "B", record.get(1)); + assertEquals("CC", record.get(2)); + // nextRecord + assertNotNull(record = parser.nextRecord()); + assertEquals(4, record.getRecordNumber()); + assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition()); + // nextRecord + assertNotNull(record = parser.nextRecord()); + assertEquals(5, record.getRecordNumber()); + assertEquals(code.indexOf("EOF"), record.getCharacterPosition()); + } + // now try to read starting at record 3 + try (CSVParser parser = CSVParser.builder() + .setReader(new StringReader(code.substring((int) positionRecord3))) + .setFormat(format) + .setCharacterOffset(positionRecord3) + .setRecordNumber(3) + .get()) { + CSVRecord record; + // nextRecord + assertNotNull(record = parser.nextRecord()); + assertEquals(3, record.getRecordNumber()); + assertEquals(code.indexOf("'A"), record.getCharacterPosition()); + assertEquals("A" + lineSeparator + "A", record.get(0)); + assertEquals("B" + lineSeparator + "B", record.get(1)); + assertEquals("CC", record.get(2)); + // nextRecord + assertNotNull(record = parser.nextRecord()); + assertEquals(4, record.getRecordNumber()); + assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition()); + assertEquals("\u00c4", record.get(0)); + } // again with ctor + try (CSVParser parser = new CSVParser(new StringReader(code.substring((int) positionRecord3)), format, positionRecord3, 3)) { + CSVRecord record; + // nextRecord + assertNotNull(record = parser.nextRecord()); + assertEquals(3, record.getRecordNumber()); + assertEquals(code.indexOf("'A"), record.getCharacterPosition()); + assertEquals("A" + lineSeparator + "A", record.get(0)); + assertEquals("B" + lineSeparator + "B", record.get(1)); + assertEquals("CC", record.get(2)); + // nextRecord + assertNotNull(record = parser.nextRecord()); + assertEquals(4, record.getRecordNumber()); + assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition()); + assertEquals("\u00c4", record.get(0)); + } + } +} diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java index 8a9a36bcd2..9ae80c1e51 100644 --- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java +++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java @@ -1,1742 +1,2062 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.BACKSLASH; -import static org.apache.commons.csv.Constants.CR; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; - -import java.io.CharArrayWriter; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.io.PrintStream; -import java.io.Reader; -import java.io.StringReader; -import java.io.StringWriter; -import java.io.Writer; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.sql.BatchUpdateException; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.Arrays; -import java.util.Date; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Objects; -import java.util.Random; -import java.util.Vector; -import java.util.stream.Stream; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.output.NullOutputStream; -import org.apache.commons.lang3.StringUtils; -import org.h2.tools.SimpleResultSet; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -/** - * Tests {@link CSVPrinter}. - */ -public class CSVPrinterTest { - - private static final char DQUOTE_CHAR = '"'; - private static final char EURO_CH = '\u20AC'; - private static final int ITERATIONS_FOR_RANDOM_TEST = 50000; - private static final char QUOTE_CH = '\''; - - private static String printable(final String s) { - final StringBuilder sb = new StringBuilder(); - for (int i = 0; i < s.length(); i++) { - final char ch = s.charAt(i); - if (ch <= ' ' || ch >= 128) { - sb.append("(").append((int) ch).append(")"); - } else { - sb.append(ch); - } - } - return sb.toString(); - } - - private String longText2; - - private final String recordSeparator = CSVFormat.DEFAULT.getRecordSeparator(); - - private void doOneRandom(final CSVFormat format) throws Exception { - final Random r = new Random(); - - final int nLines = r.nextInt(4) + 1; - final int nCol = r.nextInt(3) + 1; - // nLines=1;nCol=2; - final String[][] lines = generateLines(nLines, nCol); - - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - - for (int i = 0; i < nLines; i++) { - // for (int j=0; j parseResult = parser.getRecords(); - - final String[][] expected = lines.clone(); - for (int i = 0; i < expected.length; i++) { - expected[i] = expectNulls(expected[i], format); - } - Utils.compare("Printer output :" + printable(result), expected, parseResult); - } - } - - private void doRandom(final CSVFormat format, final int iter) throws Exception { - for (int i = 0; i < iter; i++) { - doOneRandom(format); - } - } - - /** - * Converts an input CSV array into expected output values WRT NULLs. NULL strings are converted to null values - * because the parser will convert these strings to null. - */ - private T[] expectNulls(final T[] original, final CSVFormat csvFormat) { - final T[] fixed = original.clone(); - for (int i = 0; i < fixed.length; i++) { - if (Objects.equals(csvFormat.getNullString(), fixed[i])) { - fixed[i] = null; - } - } - return fixed; - } - - private String[][] generateLines(final int nLines, final int nCol) { - final String[][] lines = new String[nLines][]; - for (int i = 0; i < nLines; i++) { - final String[] line = new String[nCol]; - lines[i] = line; - for (int j = 0; j < nCol; j++) { - line[j] = randStr(); - } - } - return lines; - } - - private Connection getH2Connection() throws SQLException, ClassNotFoundException { - Class.forName("org.h2.Driver"); - return DriverManager.getConnection("jdbc:h2:mem:my_test;", "sa", ""); - } - - private CSVPrinter printWithHeaderComments(final StringWriter sw, final Date now, final CSVFormat baseFormat) - throws IOException { - // Use withHeaderComments first to test CSV-145 - // @formatter:off - final CSVFormat format = baseFormat.builder() - .setHeaderComments("Generated by Apache Commons CSV 1.1", now) - .setCommentMarker('#') - .setHeader("Col1", "Col2") - .build(); - // @formatter:on - final CSVPrinter csvPrinter = format.print(sw); - csvPrinter.printRecord("A", "B"); - csvPrinter.printRecord("C", "D"); - csvPrinter.close(); - return csvPrinter; - } - - private String randStr() { - final Random r = new Random(); - - final int sz = r.nextInt(20); - // sz = r.nextInt(3); - final char[] buf = new char[sz]; - for (int i = 0; i < sz; i++) { - // stick in special chars with greater frequency - final char ch; - final int what = r.nextInt(20); - switch (what) { - case 0: - ch = '\r'; - break; - case 1: - ch = '\n'; - break; - case 2: - ch = '\t'; - break; - case 3: - ch = '\f'; - break; - case 4: - ch = ' '; - break; - case 5: - ch = ','; - break; - case 6: - ch = DQUOTE_CHAR; - break; - case 7: - ch = '\''; - break; - case 8: - ch = BACKSLASH; - break; - default: - ch = (char) r.nextInt(300); - break; - // default: ch = 'a'; break; - } - buf[i] = ch; - } - return new String(buf); - } - - private void setUpTable(final Connection connection) throws SQLException { - try (final Statement statement = connection.createStatement()) { - statement.execute("CREATE TABLE TEST(ID INT PRIMARY KEY, NAME VARCHAR(255), TEXT CLOB)"); - statement.execute("insert into TEST values(1, 'r1', 'long text 1')"); - longText2 = StringUtils.repeat('a', IOUtils.DEFAULT_BUFFER_SIZE - 4); - longText2 += "\"\r\n\"a\""; - longText2 += StringUtils.repeat('a', IOUtils.DEFAULT_BUFFER_SIZE - 1); - statement.execute("insert into TEST values(2, 'r2', '" + longText2 + "')"); - longText2 = longText2.replace("\"","\"\""); - } - } - - @Test - public void testCloseBackwardCompatibility() throws IOException { - try (final Writer writer = mock(Writer.class)) { - final CSVFormat csvFormat = CSVFormat.DEFAULT; - try (CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat)) { - // empty - } - verify(writer, never()).flush(); - verify(writer, times(1)).close(); - }} - - @Test - public void testCloseWithCsvFormatAutoFlushOff() throws IOException { - try (final Writer writer = mock(Writer.class)) { - final CSVFormat csvFormat = CSVFormat.DEFAULT.withAutoFlush(false); - try (CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat)) { - // empty - } - verify(writer, never()).flush(); - verify(writer, times(1)).close(); - } - } - - @Test - public void testCloseWithCsvFormatAutoFlushOn() throws IOException { - // System.out.println("start method"); - try (final Writer writer = mock(Writer.class)) { - final CSVFormat csvFormat = CSVFormat.DEFAULT.withAutoFlush(true); - try (CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat)) { - // empty - } - verify(writer, times(1)).flush(); - verify(writer, times(1)).close(); - }} - - @Test - public void testCloseWithFlushOff() throws IOException { - try (final Writer writer = mock(Writer.class)) { - final CSVFormat csvFormat = CSVFormat.DEFAULT; - @SuppressWarnings("resource") - final CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat); - csvPrinter.close(false); - verify(writer, never()).flush(); - verify(writer, times(1)).close(); - } - } - - @Test - public void testCloseWithFlushOn() throws IOException { - try (final Writer writer = mock(Writer.class)) { - @SuppressWarnings("resource") - final CSVPrinter csvPrinter = new CSVPrinter(writer, CSVFormat.DEFAULT); - csvPrinter.close(true); - verify(writer, times(1)).flush(); - } - } - - @Test - public void testCRComment() throws IOException { - final StringWriter sw = new StringWriter(); - final Object value = "abc"; - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { - printer.print(value); - printer.printComment("This is a comment\r\non multiple lines\rthis is next comment\r"); - assertEquals("abc" + recordSeparator + "# This is a comment" + recordSeparator + "# on multiple lines" - + recordSeparator + "# this is next comment" + recordSeparator + "# " + recordSeparator, sw.toString()); - } - } - - @Test - public void testCSV135() throws IOException { - final List list = new LinkedList<>(); - list.add("\"\""); // "" - list.add("\\\\"); // \\ - list.add("\\\"\\"); // \"\ - // - // "",\\,\"\ (unchanged) - tryFormat(list, null, null, "\"\",\\\\,\\\"\\"); - // - // """""",\\,"\""\" (quoted, and embedded DQ doubled) - tryFormat(list, '"', null, "\"\"\"\"\"\",\\\\,\"\\\"\"\\\""); - // - // "",\\\\,\\"\\ (escapes escaped, not quoted) - tryFormat(list, null, '\\', "\"\",\\\\\\\\,\\\\\"\\\\"); - // - // "\"\"","\\\\","\\\"\\" (quoted, and embedded DQ & escape escaped) - tryFormat(list, '"', '\\', "\"\\\"\\\"\",\"\\\\\\\\\",\"\\\\\\\"\\\\\""); - // - // """""",\\,"\""\" (quoted, embedded DQ escaped) - tryFormat(list, '"', '"', "\"\"\"\"\"\",\\\\,\"\\\"\"\\\""); - } - - @Test - public void testCSV259() throws IOException { - final StringWriter sw = new StringWriter(); - try (final Reader reader = new FileReader("src/test/resources/org/apache/commons/csv/CSV-259/sample.txt"); - final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape('!').withQuote(null))) { - printer.print(reader); - assertEquals("x!,y!,z", sw.toString()); - } - } - - @Test - public void testDelimeterQuoted() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { - printer.print("a,b,c"); - printer.print("xyz"); - assertEquals("'a,b,c',xyz", sw.toString()); - } - } - - @Test - public void testDelimeterQuoteNone() throws IOException { - final StringWriter sw = new StringWriter(); - final CSVFormat format = CSVFormat.DEFAULT.withEscape('!').withQuoteMode(QuoteMode.NONE); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - printer.print("a,b,c"); - printer.print("xyz"); - assertEquals("a!,b!,c,xyz", sw.toString()); - } - } - - @Test - public void testDelimeterStringQuoted() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').build())) { - printer.print("a[|]b[|]c"); - printer.print("xyz"); - assertEquals("'a[|]b[|]c'[|]xyz", sw.toString()); - } - } - - @Test - public void testDelimeterStringQuoteNone() throws IOException { - final StringWriter sw = new StringWriter(); - final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').setQuoteMode(QuoteMode.NONE).build(); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - printer.print("a[|]b[|]c"); - printer.print("xyz"); - printer.print("a[xy]bc[]"); - assertEquals("a![!|!]b![!|!]c[|]xyz[|]a[xy]bc[]", sw.toString()); - } - } - - @Test - public void testDelimiterEscaped() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape('!').withQuote(null))) { - printer.print("a,b,c"); - printer.print("xyz"); - assertEquals("a!,b!,c,xyz", sw.toString()); - } - } - - @Test - public void testDelimiterPlain() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { - printer.print("a,b,c"); - printer.print("xyz"); - assertEquals("a,b,c,xyz", sw.toString()); - } - } - - @Test - public void testDelimiterStringEscaped() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.builder().setDelimiter("|||").setEscape('!').setQuote(null).build())) { - printer.print("a|||b|||c"); - printer.print("xyz"); - assertEquals("a!|!|!|b!|!|!|c|||xyz", sw.toString()); - } - } - - @Test - public void testDisabledComment() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printComment("This is a comment"); - assertEquals("", sw.toString()); - } - } - - @Test - public void testDontQuoteEuroFirstChar() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) { - printer.printRecord(EURO_CH, "Deux"); - assertEquals(EURO_CH + ",Deux" + recordSeparator, sw.toString()); - } - } - - @Test - public void testEolEscaped() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withEscape('!'))) { - printer.print("a\rb\nc"); - printer.print("x\fy\bz"); - assertEquals("a!rb!nc,x\fy\bz", sw.toString()); - } - } - - @Test - public void testEolPlain() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { - printer.print("a\rb\nc"); - printer.print("x\fy\bz"); - assertEquals("a\rb\nc,x\fy\bz", sw.toString()); - } - } - - @Test - public void testEolQuoted() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { - printer.print("a\rb\nc"); - printer.print("x\by\fz"); - assertEquals("'a\rb\nc',x\by\fz", sw.toString()); - } - } - - @Test - public void testEscapeBackslash1() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { - printer.print("\\"); - } - assertEquals("\\", sw.toString()); - } - - @Test - public void testEscapeBackslash2() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { - printer.print("\\\r"); - } - assertEquals("'\\\r'", sw.toString()); - } - - @Test - public void testEscapeBackslash3() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { - printer.print("X\\\r"); - } - assertEquals("'X\\\r'", sw.toString()); - } - - @Test - public void testEscapeBackslash4() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { - printer.print("\\\\"); - } - assertEquals("\\\\", sw.toString()); - } - - @Test - public void testEscapeBackslash5() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { - printer.print("\\\\"); - } - assertEquals("\\\\", sw.toString()); - } - - @Test - public void testEscapeNull1() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { - printer.print("\\"); - } - assertEquals("\\", sw.toString()); - } - - @Test - public void testEscapeNull2() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { - printer.print("\\\r"); - } - assertEquals("\"\\\r\"", sw.toString()); - } - - @Test - public void testEscapeNull3() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { - printer.print("X\\\r"); - } - assertEquals("\"X\\\r\"", sw.toString()); - } - - @Test - public void testEscapeNull4() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { - printer.print("\\\\"); - } - assertEquals("\\\\", sw.toString()); - } - - @Test - public void testEscapeNull5() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { - printer.print("\\\\"); - } - assertEquals("\\\\", sw.toString()); - } - - @Test - public void testExcelPrintAllArrayOfArrays() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecords((Object[]) new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } }); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); - } - } - - @Test - public void testExcelPrintAllArrayOfLists() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecords( - (Object[]) new List[] { Arrays.asList("r1c1", "r1c2"), Arrays.asList("r2c1", "r2c2") }); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); - } - } - - @Test - public void testExcelPrintAllIterableOfArrays() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecords(Arrays.asList(new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } })); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); - } - } - - @Test - public void testExcelPrintAllIterableOfLists() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecords( - Arrays.asList(Arrays.asList("r1c1", "r1c2"), Arrays.asList("r2c1", "r2c2"))); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); - } - } - - @Test - public void testExcelPrintAllStreamOfArrays() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecords(Stream.of(new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } })); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); - } - } - - @Test - public void testExcelPrinter1() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecord("a", "b"); - assertEquals("a,b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testExcelPrinter2() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecord("a,b", "b"); - assertEquals("\"a,b\",b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testHeader() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, - CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3"))) { - printer.printRecord("a", "b", "c"); - printer.printRecord("x", "y", "z"); - assertEquals("C1,C2,C3\r\na,b,c\r\nx,y,z\r\n", sw.toString()); - } - } - - @Test - public void testHeaderCommentExcel() throws IOException { - final StringWriter sw = new StringWriter(); - final Date now = new Date(); - final CSVFormat format = CSVFormat.EXCEL; - try (final CSVPrinter csvPrinter = printWithHeaderComments(sw, now, format)) { - assertEquals("# Generated by Apache Commons CSV 1.1\r\n# " + now + "\r\nCol1,Col2\r\nA,B\r\nC,D\r\n", - sw.toString()); - } - } - - @Test - public void testHeaderCommentTdf() throws IOException { - final StringWriter sw = new StringWriter(); - final Date now = new Date(); - final CSVFormat format = CSVFormat.TDF; - try (final CSVPrinter csvPrinter = printWithHeaderComments(sw, now, format)) { - assertEquals("# Generated by Apache Commons CSV 1.1\r\n# " + now + "\r\nCol1\tCol2\r\nA\tB\r\nC\tD\r\n", - sw.toString()); - } - } - - @Test - public void testHeaderNotSet() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { - printer.printRecord("a", "b", "c"); - printer.printRecord("x", "y", "z"); - assertEquals("a,b,c\r\nx,y,z\r\n", sw.toString()); - } - } - - @Test - public void testInvalidFormat() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR)); - } - - @Test - public void testJdbcPrinter() throws IOException, ClassNotFoundException, SQLException { - final StringWriter sw = new StringWriter(); - try (final Connection connection = getH2Connection()) { - setUpTable(connection); - try (final Statement stmt = connection.createStatement(); - final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT); - final ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST");) { - printer.printRecords(resultSet); - } - } - assertEquals("1,r1,\"long text 1\"" + recordSeparator + "2,r2,\"" + longText2 + "\"" + recordSeparator, sw.toString()); - } - - @Test - public void testJdbcPrinterWithResultSet() throws IOException, ClassNotFoundException, SQLException { - final StringWriter sw = new StringWriter(); - Class.forName("org.h2.Driver"); - try (final Connection connection = getH2Connection()) { - setUpTable(connection); - try (final Statement stmt = connection.createStatement(); - final ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST"); - final CSVPrinter printer = CSVFormat.DEFAULT.withHeader(resultSet).print(sw)) { - printer.printRecords(resultSet); - } - } - assertEquals("ID,NAME,TEXT" + recordSeparator + "1,r1,\"long text 1\"" + recordSeparator + "2,r2,\"" + longText2 - + "\"" + recordSeparator, sw.toString()); - } - - @Test - public void testJdbcPrinterWithResultSetHeader() throws IOException, ClassNotFoundException, SQLException { - final StringWriter sw = new StringWriter(); - try (final Connection connection = getH2Connection()) { - setUpTable(connection); - try (final Statement stmt = connection.createStatement(); - final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT);) { - try (final ResultSet resultSet = stmt.executeQuery("select ID, NAME from TEST")) { - printer.printRecords(resultSet, true); - assertEquals("ID,NAME" + recordSeparator + "1,r1" + recordSeparator + "2,r2" + recordSeparator, - sw.toString()); - } - try (final ResultSet resultSet = stmt.executeQuery("select ID, NAME from TEST")) { - printer.printRecords(resultSet, false); - assertNotEquals("ID,NAME" + recordSeparator + "1,r1" + recordSeparator + "2,r2" + recordSeparator, - sw.toString()); - } - } - } - } - - @Test - public void testJdbcPrinterWithResultSetMetaData() throws IOException, ClassNotFoundException, SQLException { - final StringWriter sw = new StringWriter(); - Class.forName("org.h2.Driver"); - try (final Connection connection = getH2Connection()) { - setUpTable(connection); - try (final Statement stmt = connection.createStatement(); - final ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST"); - final CSVPrinter printer = CSVFormat.DEFAULT.withHeader(resultSet.getMetaData()).print(sw)) { - printer.printRecords(resultSet); - assertEquals("ID,NAME,TEXT" + recordSeparator + "1,r1,\"long text 1\"" + recordSeparator + "2,r2,\"" - + longText2 + "\"" + recordSeparator, sw.toString()); - } - } - } - - @Test - @Disabled - public void testJira135_part1() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); - final StringWriter sw = new StringWriter(); - final List list = new LinkedList<>(); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - list.add("\""); - printer.printRecord(list); - } - final String expected = "\"\\\"\"" + format.getRecordSeparator(); - assertEquals(expected, sw.toString()); - final String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(list.toArray(), format), record0); - } - - @Test - @Disabled - public void testJira135_part2() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); - final StringWriter sw = new StringWriter(); - final List list = new LinkedList<>(); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - list.add("\n"); - printer.printRecord(list); - } - final String expected = "\"\\n\"" + format.getRecordSeparator(); - assertEquals(expected, sw.toString()); - final String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(list.toArray(), format), record0); - } - - @Test - @Disabled - public void testJira135_part3() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); - final StringWriter sw = new StringWriter(); - final List list = new LinkedList<>(); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - list.add("\\"); - printer.printRecord(list); - } - final String expected = "\"\\\\\"" + format.getRecordSeparator(); - assertEquals(expected, sw.toString()); - final String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(list.toArray(), format), record0); - } - - @Test - @Disabled - public void testJira135All() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); - final StringWriter sw = new StringWriter(); - final List list = new LinkedList<>(); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - list.add("\""); - list.add("\n"); - list.add("\\"); - printer.printRecord(list); - } - final String expected = "\"\\\"\",\"\\n\",\"\\\"" + format.getRecordSeparator(); - assertEquals(expected, sw.toString()); - final String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(list.toArray(), format), record0); - } - - @Test - public void testMongoDbCsvBasic() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { - printer.printRecord("a", "b"); - assertEquals("a,b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMongoDbCsvCommaInValue() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { - printer.printRecord("a,b", "c"); - assertEquals("\"a,b\",c" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMongoDbCsvDoubleQuoteInValue() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { - printer.printRecord("a \"c\" b", "d"); - assertEquals("\"a \"\"c\"\" b\",d" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMongoDbCsvTabInValue() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { - printer.printRecord("a\tb", "c"); - assertEquals("a\tb,c" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMongoDbTsvBasic() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { - printer.printRecord("a", "b"); - assertEquals("a\tb" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMongoDbTsvCommaInValue() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { - printer.printRecord("a,b", "c"); - assertEquals("a,b\tc" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMongoDbTsvTabInValue() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { - printer.printRecord("a\tb", "c"); - assertEquals("\"a\tb\"\tc" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMultiLineComment() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { - printer.printComment("This is a comment\non multiple lines"); - - assertEquals("# This is a comment" + recordSeparator + "# on multiple lines" + recordSeparator, - sw.toString()); - } - } - - @Test - public void testMySqlNullOutput() throws IOException { - Object[] s = new String[] { "NULL", null }; - CSVFormat format = CSVFormat.MYSQL.withQuote(DQUOTE_CHAR).withNullString("NULL") - .withQuoteMode(QuoteMode.NON_NUMERIC); - StringWriter writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - String expected = "\"NULL\"\tNULL\n"; - assertEquals(expected, writer.toString()); - String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(s, record0); - - s = new String[] { "\\N", null }; - format = CSVFormat.MYSQL.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\t\\N\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\N", "A" }; - format = CSVFormat.MYSQL.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\tA\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\n", "A" }; - format = CSVFormat.MYSQL.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\n\tA\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "", null }; - format = CSVFormat.MYSQL.withNullString("NULL"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\tNULL\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "", null }; - format = CSVFormat.MYSQL; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\t\\N\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\N", "", "\u000e,\\\r" }; - format = CSVFormat.MYSQL; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\t\t\u000e,\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "NULL", "\\\r" }; - format = CSVFormat.MYSQL; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "NULL\t\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\\r" }; - format = CSVFormat.MYSQL; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - } - - @Test - public void testMySqlNullStringDefault() { - assertEquals("\\N", CSVFormat.MYSQL.getNullString()); - } - - @Test - public void testNewCsvPrinterAppendableNullFormat() { - assertThrows(NullPointerException.class, () -> new CSVPrinter(new StringWriter(), null)); - } - - @Test - public void testNewCsvPrinterNullAppendableFormat() { - assertThrows(NullPointerException.class, () -> new CSVPrinter(null, CSVFormat.DEFAULT)); - } - - @Test - public void testNotFlushable() throws IOException { - final Appendable out = new StringBuilder(); - try (final CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT)) { - printer.printRecord("a", "b", "c"); - assertEquals("a,b,c" + recordSeparator, out.toString()); - printer.flush(); - } - } - - @Test - public void testParseCustomNullValues() throws IOException { - final StringWriter sw = new StringWriter(); - final CSVFormat format = CSVFormat.DEFAULT.withNullString("NULL"); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - printer.printRecord("a", null, "b"); - } - final String csvString = sw.toString(); - assertEquals("a,NULL,b" + recordSeparator, csvString); - try (final CSVParser iterable = format.parse(new StringReader(csvString))) { - final Iterator iterator = iterable.iterator(); - final CSVRecord record = iterator.next(); - assertEquals("a", record.get(0)); - assertNull(record.get(1)); - assertEquals("b", record.get(2)); - assertFalse(iterator.hasNext()); - } - } - - @Test - public void testPlainEscaped() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withEscape('!'))) { - printer.print("abc"); - printer.print("xyz"); - assertEquals("abc,xyz", sw.toString()); - } - } - - @Test - public void testPlainPlain() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { - printer.print("abc"); - printer.print("xyz"); - assertEquals("abc,xyz", sw.toString()); - } - } - - @Test - public void testPlainQuoted() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { - printer.print("abc"); - assertEquals("abc", sw.toString()); - } - } - - @Test - @Disabled - public void testPostgreSqlCsvNullOutput() throws IOException { - Object[] s = new String[] { "NULL", null }; - CSVFormat format = CSVFormat.POSTGRESQL_CSV.withQuote(DQUOTE_CHAR).withNullString("NULL").withQuoteMode(QuoteMode.ALL_NON_NULL); - StringWriter writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - String expected = "\"NULL\",NULL\n"; - assertEquals(expected, writer.toString()); - String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(new Object[2], record0); - - s = new String[] { "\\N", null }; - format = CSVFormat.POSTGRESQL_CSV.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\t\\N\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\N", "A" }; - format = CSVFormat.POSTGRESQL_CSV.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\tA\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\n", "A" }; - format = CSVFormat.POSTGRESQL_CSV.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\n\tA\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "", null }; - format = CSVFormat.POSTGRESQL_CSV.withNullString("NULL"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\tNULL\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "", null }; - format = CSVFormat.POSTGRESQL_CSV; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\t\\N\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\N", "", "\u000e,\\\r" }; - format = CSVFormat.POSTGRESQL_CSV; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\t\t\u000e,\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "NULL", "\\\r" }; - format = CSVFormat.POSTGRESQL_CSV; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "NULL\t\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\\r" }; - format = CSVFormat.POSTGRESQL_CSV; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - } - - @Test - @Disabled - public void testPostgreSqlCsvTextOutput() throws IOException { - Object[] s = new String[] { "NULL", null }; - CSVFormat format = CSVFormat.POSTGRESQL_TEXT.withQuote(DQUOTE_CHAR).withNullString("NULL").withQuoteMode(QuoteMode.ALL_NON_NULL); - StringWriter writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - String expected = "\"NULL\"\tNULL\n"; - assertEquals(expected, writer.toString()); - String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(new Object[2], record0); - - s = new String[] { "\\N", null }; - format = CSVFormat.POSTGRESQL_TEXT.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\t\\N\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\N", "A" }; - format = CSVFormat.POSTGRESQL_TEXT.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\tA\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\n", "A" }; - format = CSVFormat.POSTGRESQL_TEXT.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\n\tA\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "", null }; - format = CSVFormat.POSTGRESQL_TEXT.withNullString("NULL"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\tNULL\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "", null }; - format = CSVFormat.POSTGRESQL_TEXT; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\t\\N\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\N", "", "\u000e,\\\r" }; - format = CSVFormat.POSTGRESQL_TEXT; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\t\t\u000e,\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "NULL", "\\\r" }; - format = CSVFormat.POSTGRESQL_TEXT; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "NULL\t\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\\r" }; - format = CSVFormat.POSTGRESQL_TEXT; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - } - - @Test - public void testPostgreSqlNullStringDefaultCsv() { - assertEquals("", CSVFormat.POSTGRESQL_CSV.getNullString()); - } - - @Test - public void testPostgreSqlNullStringDefaultText() { - assertEquals("\\N", CSVFormat.POSTGRESQL_TEXT.getNullString()); - } - - @Test - public void testPrint() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = CSVFormat.DEFAULT.print(sw)) { - printer.printRecord("a", "b\\c"); - assertEquals("a,b\\c" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrintCSVParser() throws IOException { - final String code = "a1,b1\n" // 1) - + "a2,b2\n" // 2) - + "a3,b3\n" // 3) - + "a4,b4\n"// 4) - ; - final String[][] res = {{"a1", "b1"}, {"a2", "b2"}, {"a3", "b3"}, {"a4", "b4"}}; - final CSVFormat format = CSVFormat.DEFAULT; - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = format.print(sw); final CSVParser parser = CSVParser.parse(code, format)) { - printer.printRecords(parser); - } - try (final CSVParser parser = CSVParser.parse(sw.toString(), format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - Utils.compare("Fail", res, records); - } - } - - @Test - public void testPrintCSVRecord() throws IOException { - final String code = "a1,b1\n" // 1) - + "a2,b2\n" // 2) - + "a3,b3\n" // 3) - + "a4,b4\n"// 4) - ; - final String[][] res = {{"a1", "b1"}, {"a2", "b2"}, {"a3", "b3"}, {"a4", "b4"}}; - final CSVFormat format = CSVFormat.DEFAULT; - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = format.print(sw); final CSVParser parser = CSVParser.parse(code, format)) { - for (final CSVRecord record : parser) { - printer.printRecord(record); - } - } - try (final CSVParser parser = CSVParser.parse(sw.toString(), format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - Utils.compare("Fail", res, records); - } - } - - @Test - public void testPrintCSVRecords() throws IOException { - final String code = "a1,b1\n" // 1) - + "a2,b2\n" // 2) - + "a3,b3\n" // 3) - + "a4,b4\n"// 4) - ; - final String[][] res = {{"a1", "b1"}, {"a2", "b2"}, {"a3", "b3"}, {"a4", "b4"}}; - final CSVFormat format = CSVFormat.DEFAULT; - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = format.print(sw); final CSVParser parser = CSVParser.parse(code, format)) { - printer.printRecords(parser.getRecords()); - } - try (final CSVParser parser = CSVParser.parse(sw.toString(), format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - Utils.compare("Fail", res, records); - } - } - - @Test - public void testPrintCustomNullValues() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withNullString("NULL"))) { - printer.printRecord("a", null, "b"); - assertEquals("a,NULL,b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter1() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a", "b"); - assertEquals("a,b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter2() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a,b", "b"); - assertEquals("\"a,b\",b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter3() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a, b", "b "); - assertEquals("\"a, b\",\"b \"" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter4() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a", "b\"c"); - assertEquals("a,\"b\"\"c\"" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter5() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a", "b\nc"); - assertEquals("a,\"b\nc\"" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter6() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a", "b\r\nc"); - assertEquals("a,\"b\r\nc\"" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter7() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a", "b\\c"); - assertEquals("a,b\\c" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrintNullValues() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a", null, "b"); - assertEquals("a,,b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrintOnePositiveInteger() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.MINIMAL))) { - printer.print(Integer.MAX_VALUE); - assertEquals(String.valueOf(Integer.MAX_VALUE), sw.toString()); - } - } - - /** - * Test to target the use of {@link IOUtils#copy(java.io.Reader, Appendable)} which directly - * buffers the value from the Reader to the Appendable. - * - *

      Requires the format to have no quote or escape character, value to be a - * {@link java.io.Reader Reader} and the output MUST NOT be a - * {@link java.io.Writer Writer} but some other Appendable.

      - * - * @throws IOException Not expected to happen - */ - @Test - public void testPrintReaderWithoutQuoteToAppendable() throws IOException { - final StringBuilder sb = new StringBuilder(); - final String content = "testValue"; - try (final CSVPrinter printer = new CSVPrinter(sb, CSVFormat.DEFAULT.withQuote(null))) { - final StringReader value = new StringReader(content); - printer.print(value); - } - assertEquals(content, sb.toString()); - } - - /** - * Test to target the use of {@link IOUtils#copyLarge(java.io.Reader, Writer)} which directly - * buffers the value from the Reader to the Writer. - * - *

      Requires the format to have no quote or escape character, value to be a - * {@link java.io.Reader Reader} and the output MUST be a - * {@link java.io.Writer Writer}.

      - * - * @throws IOException Not expected to happen - */ - @Test - public void testPrintReaderWithoutQuoteToWriter() throws IOException { - final StringWriter sw = new StringWriter(); - final String content = "testValue"; - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { - final StringReader value = new StringReader(content); - printer.print(value); - } - assertEquals(content, sw.toString()); - } - - @Test - public void testPrintRecordStream() throws IOException { - final String code = "a1,b1\n" // 1) - + "a2,b2\n" // 2) - + "a3,b3\n" // 3) - + "a4,b4\n"// 4) - ; - final String[][] res = {{"a1", "b1"}, {"a2", "b2"}, {"a3", "b3"}, {"a4", "b4"}}; - final CSVFormat format = CSVFormat.DEFAULT; - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = format.print(sw); final CSVParser parser = CSVParser.parse(code, format)) { - for (final CSVRecord record : parser) { - printer.printRecord(record.stream()); - } - } - try (final CSVParser parser = CSVParser.parse(sw.toString(), format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - Utils.compare("Fail", res, records); - } - } - - @Test - public void testPrintRecordsWithCSVRecord() throws IOException { - final String[] values = {"A", "B", "C"}; - final String rowData = StringUtils.join(values, ','); - final CharArrayWriter charArrayWriter = new CharArrayWriter(0); - try (final CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(rowData)); - final CSVPrinter csvPrinter = CSVFormat.INFORMIX_UNLOAD.print(charArrayWriter)) { - for (final CSVRecord record : parser) { - csvPrinter.printRecord(record); - } - } - assertEquals(6, charArrayWriter.size()); - assertEquals("A|B|C" + CSVFormat.INFORMIX_UNLOAD.getRecordSeparator(), charArrayWriter.toString()); - } - - @Test - public void testPrintRecordsWithEmptyVector() throws IOException { - final PrintStream out = System.out; - try { - System.setOut(new PrintStream(NullOutputStream.NULL_OUTPUT_STREAM)); - try (CSVPrinter csvPrinter = CSVFormat.POSTGRESQL_TEXT.printer()) { - final Vector vector = new Vector<>(); - final int expectedCapacity = 23; - vector.setSize(expectedCapacity); - csvPrinter.printRecords(vector); - assertEquals(expectedCapacity, vector.capacity()); - } - } finally { - System.setOut(out); - } - } - - @Test - public void testPrintRecordsWithObjectArray() throws IOException { - final CharArrayWriter charArrayWriter = new CharArrayWriter(0); - try (CSVPrinter csvPrinter = CSVFormat.INFORMIX_UNLOAD.print(charArrayWriter)) { - final HashSet hashSet = new HashSet<>(); - final Object[] objectArray = new Object[6]; - objectArray[3] = hashSet; - csvPrinter.printRecords(objectArray); - } - assertEquals(6, charArrayWriter.size()); - assertEquals("\n\n\n\n\n\n", charArrayWriter.toString()); - } - - @Test - public void testPrintRecordsWithResultSetOneRow() throws IOException, SQLException { - try (CSVPrinter csvPrinter = CSVFormat.MYSQL.printer()) { - try (ResultSet resultSet = new SimpleResultSet()) { - csvPrinter.printRecords(resultSet); - assertEquals(0, resultSet.getRow()); - } - } - } - - @Test - public void testPrintToFileWithCharsetUtf16Be() throws IOException { - final File file = File.createTempFile(getClass().getName(), ".csv"); - try (final CSVPrinter printer = CSVFormat.DEFAULT.print(file, StandardCharsets.UTF_16BE)) { - printer.printRecord("a", "b\\c"); - } - assertEquals("a,b\\c" + recordSeparator, FileUtils.readFileToString(file, StandardCharsets.UTF_16BE)); - } - - @Test - public void testPrintToFileWithDefaultCharset() throws IOException { - final File file = File.createTempFile(getClass().getName(), ".csv"); - try (final CSVPrinter printer = CSVFormat.DEFAULT.print(file, Charset.defaultCharset())) { - printer.printRecord("a", "b\\c"); - } - assertEquals("a,b\\c" + recordSeparator, FileUtils.readFileToString(file, Charset.defaultCharset())); - } - - @Test - public void testPrintToPathWithDefaultCharset() throws IOException { - final File file = File.createTempFile(getClass().getName(), ".csv"); - try (final CSVPrinter printer = CSVFormat.DEFAULT.print(file.toPath(), Charset.defaultCharset())) { - printer.printRecord("a", "b\\c"); - } - assertEquals("a,b\\c" + recordSeparator, FileUtils.readFileToString(file, Charset.defaultCharset())); - } - - @Test - public void testQuoteAll() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL))) { - printer.printRecord("a", "b\nc", "d"); - assertEquals("\"a\",\"b\nc\",\"d\"" + recordSeparator, sw.toString()); - } - } - - @Test - public void testQuoteCommaFirstChar() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) { - printer.printRecord(","); - assertEquals("\",\"" + recordSeparator, sw.toString()); - } - } - - @Test - public void testQuoteNonNumeric() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.NON_NUMERIC))) { - printer.printRecord("a", "b\nc", Integer.valueOf(1)); - assertEquals("\"a\",\"b\nc\",1" + recordSeparator, sw.toString()); - } - } - - @Test - public void testRandomDefault() throws Exception { - doRandom(CSVFormat.DEFAULT, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - public void testRandomExcel() throws Exception { - doRandom(CSVFormat.EXCEL, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - @Disabled - public void testRandomMongoDbCsv() throws Exception { - doRandom(CSVFormat.MONGODB_CSV, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - public void testRandomMySql() throws Exception { - doRandom(CSVFormat.MYSQL, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - @Disabled - public void testRandomOracle() throws Exception { - doRandom(CSVFormat.ORACLE, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - @Disabled - public void testRandomPostgreSqlCsv() throws Exception { - doRandom(CSVFormat.POSTGRESQL_CSV, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - @Disabled - public void testRandomPostgreSqlText() throws Exception { - doRandom(CSVFormat.POSTGRESQL_TEXT, ITERATIONS_FOR_RANDOM_TEST); - } - - - @Test - public void testRandomRfc4180() throws Exception { - doRandom(CSVFormat.RFC4180, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - public void testRandomTdf() throws Exception { - doRandom(CSVFormat.TDF, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - public void testSingleLineComment() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { - printer.printComment("This is a comment"); - assertEquals("# This is a comment" + recordSeparator, sw.toString()); - } - } - - @Test - public void testSingleQuoteQuoted() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { - printer.print("a'b'c"); - printer.print("xyz"); - assertEquals("'a''b''c',xyz", sw.toString()); - } - } - - @Test - public void testSkipHeaderRecordFalse() throws IOException { - // functionally identical to testHeader, used to test CSV-153 - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, - CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3").withSkipHeaderRecord(false))) { - printer.printRecord("a", "b", "c"); - printer.printRecord("x", "y", "z"); - assertEquals("C1,C2,C3\r\na,b,c\r\nx,y,z\r\n", sw.toString()); - } - } - - @Test - public void testSkipHeaderRecordTrue() throws IOException { - // functionally identical to testHeaderNotSet, used to test CSV-153 - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, - CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3").withSkipHeaderRecord(true))) { - printer.printRecord("a", "b", "c"); - printer.printRecord("x", "y", "z"); - assertEquals("a,b,c\r\nx,y,z\r\n", sw.toString()); - } - } - - @Test - public void testTrailingDelimiterOnTwoColumns() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrailingDelimiter())) { - printer.printRecord("A", "B"); - assertEquals("A,B,\r\n", sw.toString()); - } - } - - @Test - public void testTrimOffOneColumn() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim(false))) { - printer.print(" A "); - assertEquals("\" A \"", sw.toString()); - } - } - - @Test - public void testTrimOnOneColumn() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim())) { - printer.print(" A "); - assertEquals("A", sw.toString()); - } - } - - @Test - public void testTrimOnTwoColumns() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim())) { - printer.print(" A "); - printer.print(" B "); - assertEquals("A,B", sw.toString()); - } - } - - private String[] toFirstRecordValues(final String expected, final CSVFormat format) throws IOException { - return CSVParser.parse(expected, format).getRecords().get(0).values(); - } - - private void tryFormat(final List list, final Character quote, final Character escape, final String expected) throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.withQuote(quote).withEscape(escape).withRecordSeparator(null); - final Appendable out = new StringBuilder(); - try (final CSVPrinter printer = new CSVPrinter(out, format)) { - printer.printRecord(list); - } - assertEquals(expected, out.toString()); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.apache.commons.csv.Constants.BACKSLASH; +import static org.apache.commons.csv.Constants.CR; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import java.io.CharArrayWriter; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.PrintStream; +import java.io.Reader; +import java.io.StringReader; +import java.io.StringWriter; +import java.io.Writer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.sql.BatchUpdateException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Arrays; +import java.util.Date; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; +import java.util.Random; +import java.util.Vector; +import java.util.stream.Stream; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.output.NullOutputStream; +import org.apache.commons.lang3.StringUtils; +import org.h2.tools.SimpleResultSet; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +/** + * Tests {@link CSVPrinter}. + */ +class CSVPrinterTest { + + private static final int TABLE_RECORD_COUNT = 2; + private static final int TABLE_AND_HEADER_RECORD_COUNT = TABLE_RECORD_COUNT + 1; + private static final char DQUOTE_CHAR = '"'; + private static final char EURO_CH = '\u20AC'; + private static final int ITERATIONS_FOR_RANDOM_TEST = 50_000; + private static final char QUOTE_CH = '\''; + private static final String RECORD_SEPARATOR = CSVFormat.DEFAULT.getRecordSeparator(); + + private static String printable(final String s) { + final StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.length(); i++) { + final char ch = s.charAt(i); + if (ch <= ' ' || ch >= 128) { + sb.append("(").append((int) ch).append(")"); + } else { + sb.append(ch); + } + } + return sb.toString(); + } + + private String longText2; + + private void assertInitialState(final CSVPrinter printer) { + assertEquals(0, printer.getRecordCount()); + } + + private void assertRowCount(final CSVFormat format, final String resultString, final int rowCount) throws IOException { + try (CSVParser parser = format.parse(new StringReader(resultString))) { + assertEquals(rowCount, parser.getRecords().size()); + } + } + + private File createTempFile() throws IOException { + return createTempPath().toFile(); + } + + private Path createTempPath() throws IOException { + return Files.createTempFile(getClass().getName(), ".csv"); + } + + private void doOneRandom(final CSVFormat format) throws Exception { + final Random r = new Random(); + + final int nLines = r.nextInt(4) + 1; + final int nCol = r.nextInt(3) + 1; + // nLines=1;nCol=2; + final String[][] lines = generateLines(nLines, nCol); + + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + + for (int i = 0; i < nLines; i++) { + // for (int j=0; j parseResult = parser.getRecords(); + + final String[][] expected = lines.clone(); + for (int i = 0; i < expected.length; i++) { + expected[i] = expectNulls(expected[i], format); + } + Utils.compare("Printer output :" + printable(result), expected, parseResult, -1); + } + } + + private void doRandom(final CSVFormat format, final int iter) throws Exception { + for (int i = 0; i < iter; i++) { + doOneRandom(format); + } + } + + /** + * Converts an input CSV array into expected output values, including NULLs. NULL strings are converted to null values because the parser will convert + * these strings to null. + */ + private T[] expectNulls(final T[] original, final CSVFormat csvFormat) { + final T[] fixed = original.clone(); + for (int i = 0; i < fixed.length; i++) { + if (Objects.equals(csvFormat.getNullString(), fixed[i])) { + fixed[i] = null; + } + } + return fixed; + } + + private String[][] generateLines(final int nLines, final int nCol) { + final String[][] lines = new String[nLines][]; + for (int i = 0; i < nLines; i++) { + final String[] line = new String[nCol]; + lines[i] = line; + for (int j = 0; j < nCol; j++) { + line[j] = randStr(); + } + } + return lines; + } + + private Connection getH2Connection() throws SQLException, ClassNotFoundException { + Class.forName("org.h2.Driver"); + return DriverManager.getConnection("jdbc:h2:mem:my_test;", "sa", ""); + } + + private CSVPrinter printWithHeaderComments(final StringWriter sw, final Date now, final CSVFormat baseFormat) throws IOException { + // Use withHeaderComments first to test CSV-145 + // @formatter:off + final CSVFormat format = baseFormat.builder() + .setHeaderComments((String[]) null) // don't blow up + .setHeaderComments((Object[]) null) // don't blow up + .setHeaderComments("Generated by Apache Commons CSV 1.1", now) + .setCommentMarker('#') + .setHeader("Col1", "Col2") + .get(); + // @formatter:on + final CSVPrinter printer = format.print(sw); + printer.printRecord("A", "B"); + printer.printRecord("C", "D"); + printer.close(); + return printer; + } + + private String randStr() { + final Random r = new Random(); + final int sz = r.nextInt(20); + // sz = r.nextInt(3); + final char[] buf = new char[sz]; + for (int i = 0; i < sz; i++) { + // stick in special chars with greater frequency + final char ch; + final int what = r.nextInt(20); + switch (what) { + case 0: + ch = '\r'; + break; + case 1: + ch = '\n'; + break; + case 2: + ch = '\t'; + break; + case 3: + ch = '\f'; + break; + case 4: + ch = ' '; + break; + case 5: + ch = ','; + break; + case 6: + ch = DQUOTE_CHAR; + break; + case 7: + ch = '\''; + break; + case 8: + ch = BACKSLASH; + break; + default: + ch = (char) r.nextInt(300); + break; + // default: ch = 'a'; break; + } + buf[i] = ch; + } + return new String(buf); + } + + private void setUpTable(final Connection connection) throws SQLException { + try (Statement statement = connection.createStatement()) { + statement.execute("CREATE TABLE TEST(ID INT PRIMARY KEY, NAME VARCHAR(255), TEXT CLOB, BIN_DATA BLOB)"); + statement.execute("insert into TEST values(1, 'r1', 'long text 1', 'binary data 1')"); + longText2 = StringUtils.repeat('a', IOUtils.DEFAULT_BUFFER_SIZE - 4); + longText2 += "\"\r\n\"b\""; + longText2 += StringUtils.repeat('c', IOUtils.DEFAULT_BUFFER_SIZE - 1); + statement.execute("insert into TEST values(2, 'r2', '" + longText2 + "', 'binary data 2')"); + longText2 = longText2.replace("\"", "\"\""); + } + } + + @Test + void testCloseBackwardCompatibility() throws IOException { + try (Writer writer = mock(Writer.class)) { + final CSVFormat csvFormat = CSVFormat.DEFAULT; + try (CSVPrinter printer = new CSVPrinter(writer, csvFormat)) { + assertInitialState(printer); + } + verify(writer, never()).flush(); + verify(writer, times(1)).close(); + } + } + + @Test + void testCloseWithCsvFormatAutoFlushOff() throws IOException { + try (Writer writer = mock(Writer.class)) { + final CSVFormat csvFormat = CSVFormat.DEFAULT.withAutoFlush(false); + try (CSVPrinter printer = new CSVPrinter(writer, csvFormat)) { + assertInitialState(printer); + } + verify(writer, never()).flush(); + verify(writer, times(1)).close(); + } + } + + @Test + void testCloseWithCsvFormatAutoFlushOn() throws IOException { + // System.out.println("start method"); + try (Writer writer = mock(Writer.class)) { + final CSVFormat csvFormat = CSVFormat.DEFAULT.withAutoFlush(true); + try (CSVPrinter printer = new CSVPrinter(writer, csvFormat)) { + assertInitialState(printer); + } + verify(writer, times(1)).flush(); + verify(writer, times(1)).close(); + } + } + + @Test + void testCloseWithFlushOff() throws IOException { + try (Writer writer = mock(Writer.class)) { + final CSVFormat csvFormat = CSVFormat.DEFAULT; + @SuppressWarnings("resource") + final CSVPrinter printer = new CSVPrinter(writer, csvFormat); + assertInitialState(printer); + printer.close(false); + assertEquals(0, printer.getRecordCount()); + verify(writer, never()).flush(); + verify(writer, times(1)).close(); + } + } + + @Test + void testCloseWithFlushOn() throws IOException { + try (Writer writer = mock(Writer.class)) { + @SuppressWarnings("resource") + final CSVPrinter printer = new CSVPrinter(writer, CSVFormat.DEFAULT); + assertInitialState(printer); + printer.close(true); + assertEquals(0, printer.getRecordCount()); + verify(writer, times(1)).flush(); + } + } + + @Test + void testCRComment() throws IOException { + final StringWriter sw = new StringWriter(); + final Object value = "abc"; + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { + assertInitialState(printer); + printer.print(value); + assertEquals(0, printer.getRecordCount()); + printer.printComment("This is a comment\r\non multiple lines\rthis is next comment\r"); + assertEquals("abc" + RECORD_SEPARATOR + "# This is a comment" + RECORD_SEPARATOR + "# on multiple lines" + RECORD_SEPARATOR + + "# this is next comment" + RECORD_SEPARATOR + "# " + RECORD_SEPARATOR, sw.toString()); + assertEquals(0, printer.getRecordCount()); + } + } + + @Test + void testCSV135() throws IOException { + final List list = new LinkedList<>(); + list.add("\"\""); // "" + list.add("\\\\"); // \\ + list.add("\\\"\\"); // \"\ + // + // "",\\,\"\ (unchanged) + tryFormat(list, null, null, "\"\",\\\\,\\\"\\"); + // + // """""",\\,"\""\" (quoted, and embedded DQ doubled) + tryFormat(list, '"', null, "\"\"\"\"\"\",\\\\,\"\\\"\"\\\""); + // + // "",\\\\,\\"\\ (escapes escaped, not quoted) + tryFormat(list, null, '\\', "\"\",\\\\\\\\,\\\\\"\\\\"); + // + // "\"\"","\\\\","\\\"\\" (quoted, and embedded DQ & escape escaped) + tryFormat(list, '"', '\\', "\"\\\"\\\"\",\"\\\\\\\\\",\"\\\\\\\"\\\\\""); + // + // """""",\\,"\""\" (quoted, embedded DQ escaped) + tryFormat(list, '"', '"', "\"\"\"\"\"\",\\\\,\"\\\"\"\\\""); + } + + @Test + void testCSV259() throws IOException { + final StringWriter sw = new StringWriter(); + try (Reader reader = new FileReader("src/test/resources/org/apache/commons/csv/CSV-259/sample.txt"); + CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape('!').withQuote(null))) { + assertInitialState(printer); + printer.print(reader); + assertEquals("x!,y!,z", sw.toString()); + } + } + + @Test + void testDelimeterQuoted() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { + assertInitialState(printer); + printer.print("a,b,c"); + printer.print("xyz"); + assertEquals("'a,b,c',xyz", sw.toString()); + } + } + + @Test + void testDelimeterQuoteNone() throws IOException { + final StringWriter sw = new StringWriter(); + final CSVFormat format = CSVFormat.DEFAULT.withEscape('!').withQuoteMode(QuoteMode.NONE); + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + assertInitialState(printer); + printer.print("a,b,c"); + printer.print("xyz"); + assertEquals("a!,b!,c,xyz", sw.toString()); + } + } + + @Test + void testDelimeterStringQuoted() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').get())) { + assertInitialState(printer); + printer.print("a[|]b[|]c"); + printer.print("xyz"); + assertEquals("'a[|]b[|]c'[|]xyz", sw.toString()); + } + } + + @Test + void testDelimeterStringQuoteNone() throws IOException { + final StringWriter sw = new StringWriter(); + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').setQuoteMode(QuoteMode.NONE).get(); + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + assertInitialState(printer); + printer.print("a[|]b[|]c"); + printer.print("xyz"); + printer.print("a[xy]bc[]"); + assertEquals("a![!|!]b![!|!]c[|]xyz[|]a[xy]bc[]", sw.toString()); + } + } + + @Test + void testDelimiterEscaped() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape('!').withQuote(null))) { + assertInitialState(printer); + printer.print("a,b,c"); + printer.print("xyz"); + assertEquals("a!,b!,c,xyz", sw.toString()); + } + } + + @Test + void testDelimiterPlain() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { + assertInitialState(printer); + printer.print("a,b,c"); + printer.print("xyz"); + assertEquals("a,b,c,xyz", sw.toString()); + } + } + + @Test + void testDelimiterStringEscaped() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.builder().setDelimiter("|||").setEscape('!').setQuote(null).get())) { + assertInitialState(printer); + printer.print("a|||b|||c"); + printer.print("xyz"); + assertEquals("a!|!|!|b!|!|!|c|||xyz", sw.toString()); + } + } + + @Test + void testDisabledComment() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + assertInitialState(printer); + printer.printComment("This is a comment"); + assertEquals("", sw.toString()); + assertEquals(0, printer.getRecordCount()); + } + } + + @Test + void testDontQuoteEuroFirstChar() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) { + assertInitialState(printer); + printer.printRecord(EURO_CH, "Deux"); + assertEquals(EURO_CH + ",Deux" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testEolEscaped() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withEscape('!'))) { + assertInitialState(printer); + printer.print("a\rb\nc"); + printer.print("x\fy\bz"); + assertEquals("a!rb!nc,x\fy\bz", sw.toString()); + } + } + + @Test + void testEolPlain() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { + assertInitialState(printer); + printer.print("a\rb\nc"); + printer.print("x\fy\bz"); + assertEquals("a\rb\nc,x\fy\bz", sw.toString()); + } + } + + @Test + void testEolQuoted() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { + assertInitialState(printer); + printer.print("a\rb\nc"); + printer.print("x\by\fz"); + assertEquals("'a\rb\nc',x\by\fz", sw.toString()); + } + } + + @SuppressWarnings("unlikely-arg-type") + @Test + void testEquals() throws IOException { + // Don't use assertNotEquals here + assertFalse(CSVFormat.DEFAULT.equals(null)); + // Don't use assertNotEquals here + assertFalse(CSVFormat.DEFAULT.equals("")); + } + + @Test + void testEscapeBackslash1() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { + assertInitialState(printer); + printer.print("\\"); + } + assertEquals("\\", sw.toString()); + } + + @Test + void testEscapeBackslash2() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { + assertInitialState(printer); + printer.print("\\\r"); + } + assertEquals("'\\\r'", sw.toString()); + } + + @Test + void testEscapeBackslash3() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { + assertInitialState(printer); + printer.print("X\\\r"); + } + assertEquals("'X\\\r'", sw.toString()); + } + + @Test + void testEscapeBackslash4() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { + assertInitialState(printer); + printer.print("\\\\"); + } + assertEquals("\\\\", sw.toString()); + } + + @Test + void testEscapeBackslash5() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { + assertInitialState(printer); + printer.print("\\\\"); + } + assertEquals("\\\\", sw.toString()); + } + + @Test + void testEscapeCommentMarkerFirstChar() throws IOException { + // No quoting available in escape mode, so a leading comment marker must be escaped or the + // record reads back as a comment and is dropped. Mirrors the quoting fix for QuoteMode.MINIMAL. + final CSVFormat format = CSVFormat.DEFAULT.builder().setQuote(null).setEscape('\\').setCommentMarker(';').get(); + final StringWriter sw = new StringWriter(); + final String col1 = ";comment-like"; + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.printRecord(col1, "b"); + printer.printRecord(new StringReader(col1), new StringReader("b")); + // The marker past the first character does not start a comment and is left alone. + printer.printRecord("a;b", ";c"); + } + final String string = sw.toString(); + assertEquals("\\;comment-like,b" + RECORD_SEPARATOR + + "\\;comment-like,b" + RECORD_SEPARATOR + + "a;b,\\;c" + RECORD_SEPARATOR, string); + // The emitted records must read back as the original values, none parsed as a comment. + try (CSVParser parser = CSVParser.parse(string, format)) { + final List records = parser.getRecords(); + assertEquals(3, records.size()); + assertEquals(col1, records.get(0).get(0)); + assertEquals("b", records.get(0).get(1)); + assertEquals(col1, records.get(1).get(0)); + assertEquals("b", records.get(1).get(1)); + assertEquals("a;b", records.get(2).get(0)); + assertEquals(";c", records.get(2).get(1)); + } + } + + @Test + void testEscapeCommentMarkerFirstCharWithQuoteModeNone() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setEscape('\\').setQuoteMode(QuoteMode.NONE).setCommentMarker(';').get(); + final StringWriter sw = new StringWriter(); + final String col1 = ";bar"; + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.printRecord(col1, "b"); + printer.printRecord(new StringReader(col1), new StringReader("b")); + } + final String string = sw.toString(); + assertEquals("\\;bar,b" + RECORD_SEPARATOR + "\\;bar,b" + RECORD_SEPARATOR, string); + try (CSVParser parser = CSVParser.parse(string, format)) { + final List records = parser.getRecords(); + assertEquals(2, records.size()); + for (final CSVRecord record : records) { + assertEquals(col1, record.get(0)); + assertEquals("b", record.get(1)); + } + } + } + + @Test + void testEscapeNull1() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { + assertInitialState(printer); + printer.print("\\"); + } + assertEquals("\\", sw.toString()); + } + + @Test + void testEscapeNull2() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { + assertInitialState(printer); + printer.print("\\\r"); + } + assertEquals("\"\\\r\"", sw.toString()); + } + + @Test + void testEscapeNull3() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { + assertInitialState(printer); + printer.print("X\\\r"); + } + assertEquals("\"X\\\r\"", sw.toString()); + } + + @Test + void testEscapeNull4() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { + assertInitialState(printer); + printer.print("\\\\"); + } + assertEquals("\\\\", sw.toString()); + } + + @Test + void testEscapeNull5() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { + assertInitialState(printer); + printer.print("\\\\"); + } + assertEquals("\\\\", sw.toString()); + } + + @Test + void testExcelPrintAllArrayOfArrays() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + assertInitialState(printer); + printer.printRecords((Object[]) new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } }); + assertEquals("r1c1,r1c2" + RECORD_SEPARATOR + "r2c1,r2c2" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testExcelPrintAllArrayOfArraysWithFirstEmptyValue2() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + assertInitialState(printer); + printer.printRecords((Object[]) new String[][] { { "" } }); + assertEquals("\"\"" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testExcelPrintAllArrayOfArraysWithFirstSpaceValue1() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + assertInitialState(printer); + printer.printRecords((Object[]) new String[][] { { " ", "r1c2" } }); + assertEquals("\" \",r1c2" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testExcelPrintAllArrayOfArraysWithFirstTabValue1() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + assertInitialState(printer); + printer.printRecords((Object[]) new String[][] { { "\t", "r1c2" } }); + assertEquals("\"\t\",r1c2" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testExcelPrintAllArrayOfLists() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + assertInitialState(printer); + printer.printRecords((Object[]) new List[] { Arrays.asList("r1c1", "r1c2"), Arrays.asList("r2c1", "r2c2") }); + assertEquals("r1c1,r1c2" + RECORD_SEPARATOR + "r2c1,r2c2" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testExcelPrintAllArrayOfListsWithFirstEmptyValue2() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + assertInitialState(printer); + printer.printRecords((Object[]) new List[] { Arrays.asList("") }); + assertEquals("\"\"" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testExcelPrintAllIterableOfArrays() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + assertInitialState(printer); + printer.printRecords(Arrays.asList(new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } })); + assertEquals("r1c1,r1c2" + RECORD_SEPARATOR + "r2c1,r2c2" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testExcelPrintAllIterableOfArraysWithFirstEmptyValue2() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + assertInitialState(printer); + printer.printRecords(Arrays.asList(new String[][] { { "" } })); + assertEquals("\"\"" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testExcelPrintAllIterableOfLists() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + assertInitialState(printer); + printer.printRecords(Arrays.asList(Arrays.asList("r1c1", "r1c2"), Arrays.asList("r2c1", "r2c2"))); + assertEquals("r1c1,r1c2" + RECORD_SEPARATOR + "r2c1,r2c2" + RECORD_SEPARATOR, sw.toString()); + } + } + + @ParameterizedTest + @ValueSource(longs = { -1, 0, 1, 2, Long.MAX_VALUE }) + void testExcelPrintAllStreamOfArrays(final long maxRows) throws IOException { + final StringWriter sw = new StringWriter(); + final CSVFormat format = CSVFormat.EXCEL.builder().setMaxRows(maxRows).get(); + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + assertInitialState(printer); + printer.printRecords(Stream.of(new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } })); + String expected = "r1c1,r1c2" + RECORD_SEPARATOR; + if (maxRows != 1) { + expected += "r2c1,r2c2" + RECORD_SEPARATOR; + } + assertEquals(expected, sw.toString()); + } + } + + @Test + void testExcelPrinter1() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + assertInitialState(printer); + printer.printRecord("a", "b"); + assertEquals("a,b" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testExcelPrinter2() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + assertInitialState(printer); + printer.printRecord("a,b", "b"); + assertEquals("\"a,b\",b" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testHeader() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3"))) { + assertEquals(1, printer.getRecordCount()); + printer.printRecord("a", "b", "c"); + printer.printRecord("x", "y", "z"); + assertEquals("C1,C2,C3\r\na,b,c\r\nx,y,z\r\n", sw.toString()); + } + } + + @Test + void testHeaderCommentExcel() throws IOException { + final StringWriter sw = new StringWriter(); + final Date now = new Date(); + final CSVFormat format = CSVFormat.EXCEL; + try (CSVPrinter csvPrinter = printWithHeaderComments(sw, now, format)) { + assertEquals("# Generated by Apache Commons CSV 1.1\r\n# " + now + "\r\nCol1,Col2\r\nA,B\r\nC,D\r\n", sw.toString()); + } + } + + @Test + void testHeaderCommentTdf() throws IOException { + final StringWriter sw = new StringWriter(); + final Date now = new Date(); + final CSVFormat format = CSVFormat.TDF; + try (CSVPrinter csvPrinter = printWithHeaderComments(sw, now, format)) { + assertEquals("# Generated by Apache Commons CSV 1.1\r\n# " + now + "\r\nCol1\tCol2\r\nA\tB\r\nC\tD\r\n", sw.toString()); + } + } + + @Test + void testHeaderNotSet() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { + assertInitialState(printer); + printer.printRecord("a", "b", "c"); + printer.printRecord("x", "y", "z"); + assertEquals("a,b,c\r\nx,y,z\r\n", sw.toString()); + } + } + + @Test + void testInvalidFormat() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR)); + } + + @Test + void testJdbcPrinter() throws IOException, ClassNotFoundException, SQLException { + final StringWriter sw = new StringWriter(); + final CSVFormat csvFormat = CSVFormat.DEFAULT; + try (Connection connection = getH2Connection()) { + setUpTable(connection); + try (Statement stmt = connection.createStatement(); + CSVPrinter printer = new CSVPrinter(sw, csvFormat); + ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT, BIN_DATA from TEST")) { + assertInitialState(printer); + printer.printRecords(resultSet); + assertEquals(TABLE_RECORD_COUNT, printer.getRecordCount()); + } + } + final String csv = sw.toString(); + assertEquals("1,r1,\"long text 1\",\"YmluYXJ5IGRhdGEgMQ==\"" + RECORD_SEPARATOR + "2,r2,\"" + longText2 + "\",\"YmluYXJ5IGRhdGEgMg==\"" + + RECORD_SEPARATOR, csv); + // Round trip the data + try (StringReader reader = new StringReader(csv); + CSVParser csvParser = csvFormat.parse(reader)) { + // Row 1 + CSVRecord record = csvParser.nextRecord(); + assertEquals("1", record.get(0)); + assertEquals("r1", record.get(1)); + assertEquals("long text 1", record.get(2)); + assertEquals("YmluYXJ5IGRhdGEgMQ==", record.get(3)); + // Row 2 + record = csvParser.nextRecord(); + assertEquals("2", record.get(0)); + assertEquals("r2", record.get(1)); + assertEquals("YmluYXJ5IGRhdGEgMg==", record.get(3)); + } + } + + @Test + void testJdbcPrinterWithFirstEmptyValue2() throws IOException, ClassNotFoundException, SQLException { + final StringWriter sw = new StringWriter(); + try (Connection connection = getH2Connection()) { + try (Statement stmt = connection.createStatement(); + ResultSet resultSet = stmt.executeQuery("select '' AS EMPTYVALUE from DUAL"); + CSVPrinter printer = CSVFormat.DEFAULT.withHeader(resultSet).print(sw)) { + printer.printRecords(resultSet); + } + } + assertEquals("EMPTYVALUE" + RECORD_SEPARATOR + "\"\"" + RECORD_SEPARATOR, sw.toString()); + } + + @ParameterizedTest + @ValueSource(longs = { -1, 0, 1, 2, 3, 4, Long.MAX_VALUE }) + void testJdbcPrinterWithResultSet(final long maxRows) throws IOException, ClassNotFoundException, SQLException { + final StringWriter sw = new StringWriter(); + final CSVFormat format = CSVFormat.DEFAULT.builder().setMaxRows(maxRows).get(); + try (Connection connection = getH2Connection()) { + setUpTable(connection); + try (Statement stmt = connection.createStatement(); + ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST"); + CSVPrinter printer = format.withHeader(resultSet).print(sw)) { + printer.printRecords(resultSet); + } + } + final String resultString = sw.toString(); + final String header = "ID,NAME,TEXT"; + final String headerRow1 = header + RECORD_SEPARATOR + "1,r1,\"long text 1\"" + RECORD_SEPARATOR; + final String allRows = headerRow1 + "2,r2,\"" + longText2 + "\"" + RECORD_SEPARATOR; + final int expectedRowsWithHeader; + if (maxRows == 1) { + assertEquals(headerRow1, resultString); + expectedRowsWithHeader = 2; + } else { + assertEquals(allRows, resultString); + expectedRowsWithHeader = TABLE_AND_HEADER_RECORD_COUNT; + } + assertRowCount(CSVFormat.DEFAULT, resultString, expectedRowsWithHeader); + } + + @ParameterizedTest + @ValueSource(longs = { -1, 0, 3, 4, Long.MAX_VALUE }) + void testJdbcPrinterWithResultSetHeader(final long maxRows) throws IOException, ClassNotFoundException, SQLException { + final StringWriter sw = new StringWriter(); + try (Connection connection = getH2Connection()) { + setUpTable(connection); + final CSVFormat format = CSVFormat.DEFAULT.builder().setMaxRows(maxRows).get(); + try (Statement stmt = connection.createStatement(); + CSVPrinter printer = new CSVPrinter(sw, format)) { + try (ResultSet resultSet = stmt.executeQuery("select ID, NAME from TEST")) { + printer.printRecords(resultSet, true); + assertEquals(TABLE_RECORD_COUNT, printer.getRecordCount()); + assertEquals("ID,NAME" + RECORD_SEPARATOR + "1,r1" + RECORD_SEPARATOR + "2,r2" + RECORD_SEPARATOR, sw.toString()); + } + assertRowCount(format, sw.toString(), TABLE_AND_HEADER_RECORD_COUNT); + try (ResultSet resultSet = stmt.executeQuery("select ID, NAME from TEST")) { + printer.printRecords(resultSet, false); + assertEquals(TABLE_RECORD_COUNT * 2, printer.getRecordCount()); + assertNotEquals("ID,NAME" + RECORD_SEPARATOR + "1,r1" + RECORD_SEPARATOR + "2,r2" + RECORD_SEPARATOR, sw.toString()); + } + assertRowCount(CSVFormat.DEFAULT, sw.toString(), TABLE_AND_HEADER_RECORD_COUNT + TABLE_RECORD_COUNT); + } + } + } + + @ParameterizedTest + @ValueSource(longs = { -1, 0, 3, 4, Long.MAX_VALUE }) + void testJdbcPrinterWithResultSetMetaData(final long maxRows) throws IOException, ClassNotFoundException, SQLException { + final StringWriter sw = new StringWriter(); + try (Connection connection = getH2Connection()) { + setUpTable(connection); + final CSVFormat format = CSVFormat.DEFAULT.builder().setMaxRows(maxRows).get(); + try (Statement stmt = connection.createStatement(); + ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST"); + CSVPrinter printer = format.withHeader(resultSet.getMetaData()).print(sw)) { + // The header is the first record. + assertEquals(1, printer.getRecordCount()); + printer.printRecords(resultSet); + assertEquals(3, printer.getRecordCount()); + assertEquals("ID,NAME,TEXT" + RECORD_SEPARATOR + "1,r1,\"long text 1\"" + RECORD_SEPARATOR + "2,r2,\"" + longText2 + "\"" + RECORD_SEPARATOR, + sw.toString()); + } + assertRowCount(format, sw.toString(), TABLE_AND_HEADER_RECORD_COUNT); + } + } + + @Test + void testJira135_part1() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); + final StringWriter sw = new StringWriter(); + final List list = new LinkedList<>(); + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + list.add("\""); + printer.printRecord(list); + } + final String expected = "\"\\\"\"" + format.getRecordSeparator(); + assertEquals(expected, sw.toString()); + final String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(list.toArray(), format), record0); + } + + @Test + @Disabled + void testJira135_part2() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); + final StringWriter sw = new StringWriter(); + final List list = new LinkedList<>(); + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + list.add("\n"); + printer.printRecord(list); + } + final String expected = "\"\\n\"" + format.getRecordSeparator(); + assertEquals(expected, sw.toString()); + final String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(list.toArray(), format), record0); + } + + @Test + void testJira135_part3() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); + final StringWriter sw = new StringWriter(); + final List list = new LinkedList<>(); + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + list.add("\\"); + printer.printRecord(list); + } + final String expected = "\"\\\\\"" + format.getRecordSeparator(); + assertEquals(expected, sw.toString()); + final String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(list.toArray(), format), record0); + } + + @Test + @Disabled + void testJira135All() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); + final StringWriter sw = new StringWriter(); + final List list = new LinkedList<>(); + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + list.add("\""); + list.add("\n"); + list.add("\\"); + printer.printRecord(list); + } + final String expected = "\"\\\"\",\"\\n\",\"\\\"" + format.getRecordSeparator(); + assertEquals(expected, sw.toString()); + final String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(list.toArray(), format), record0); + } + + @Test + void testMongoDbCsvBasic() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { + printer.printRecord("a", "b"); + assertEquals("a,b" + RECORD_SEPARATOR, sw.toString()); + assertEquals(1, printer.getRecordCount()); + } + } + + @Test + void testMongoDbCsvCommaInValue() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { + printer.printRecord("a,b", "c"); + assertEquals("\"a,b\",c" + RECORD_SEPARATOR, sw.toString()); + assertEquals(1, printer.getRecordCount()); + } + } + + @Test + void testMongoDbCsvDoubleQuoteInValue() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { + printer.printRecord("a \"c\" b", "d"); + assertEquals("\"a \"\"c\"\" b\",d" + RECORD_SEPARATOR, sw.toString()); + assertEquals(1, printer.getRecordCount()); + } + } + + @Test + void testMongoDbCsvTabInValue() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { + printer.printRecord("a\tb", "c"); + assertEquals("a\tb,c" + RECORD_SEPARATOR, sw.toString()); + assertEquals(1, printer.getRecordCount()); + } + } + + @Test + void testMongoDbTsvBasic() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { + printer.printRecord("a", "b"); + assertEquals("a\tb" + RECORD_SEPARATOR, sw.toString()); + assertEquals(1, printer.getRecordCount()); + } + } + + @Test + void testMongoDbTsvCommaInValue() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { + printer.printRecord("a,b", "c"); + assertEquals("a,b\tc" + RECORD_SEPARATOR, sw.toString()); + assertEquals(1, printer.getRecordCount()); + } + } + + @Test + void testMongoDbTsvTabInValue() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { + printer.printRecord("a\tb", "c"); + assertEquals("\"a\tb\"\tc" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testMultiLineComment() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { + printer.printComment("This is a comment\non multiple lines"); + assertEquals("# This is a comment" + RECORD_SEPARATOR + "# on multiple lines" + RECORD_SEPARATOR, sw.toString()); + assertEquals(0, printer.getRecordCount()); + } + } + + @Test + void testMySqlNullOutput() throws IOException { + Object[] s = new String[] { "NULL", null }; + CSVFormat format = CSVFormat.MYSQL.withQuote(DQUOTE_CHAR).withNullString("NULL").withQuoteMode(QuoteMode.NON_NUMERIC); + StringWriter writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + String expected = "\"NULL\"\tNULL\n"; + assertEquals(expected, writer.toString()); + String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(s, record0); + + s = new String[] { "\\N", null }; + format = CSVFormat.MYSQL.withNullString("\\N"); + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\t\\N\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\N", "A" }; + format = CSVFormat.MYSQL.withNullString("\\N"); + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\tA\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\n", "A" }; + format = CSVFormat.MYSQL.withNullString("\\N"); + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\n\tA\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "", null }; + format = CSVFormat.MYSQL.withNullString("NULL"); + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\tNULL\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "", null }; + format = CSVFormat.MYSQL; + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\t\\N\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\N", "", "\u000e,\\\r" }; + format = CSVFormat.MYSQL; + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\t\t\u000e,\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "NULL", "\\\r" }; + format = CSVFormat.MYSQL; + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "NULL\t\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\\r" }; + format = CSVFormat.MYSQL; + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + } + + @Test + void testMySqlNullStringDefault() { + assertEquals("\\N", CSVFormat.MYSQL.getNullString()); + } + + @Test + void testNewCsvPrinterAppendableNullFormat() { + assertThrows(NullPointerException.class, () -> new CSVPrinter(new StringWriter(), null)); + } + + @Test + void testNewCsvPrinterNullAppendableFormat() { + assertThrows(NullPointerException.class, () -> new CSVPrinter(null, CSVFormat.DEFAULT)); + } + + @Test + void testNotFlushable() throws IOException { + final Appendable out = new StringBuilder(); + try (CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT)) { + printer.printRecord("a", "b", "c"); + assertEquals("a,b,c" + RECORD_SEPARATOR, out.toString()); + printer.flush(); + } + } + + @Test + void testParseCustomNullValues() throws IOException { + final StringWriter sw = new StringWriter(); + final CSVFormat format = CSVFormat.DEFAULT.withNullString("NULL"); + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.printRecord("a", null, "b"); + } + final String csvString = sw.toString(); + assertEquals("a,NULL,b" + RECORD_SEPARATOR, csvString); + try (CSVParser iterable = format.parse(new StringReader(csvString))) { + final Iterator iterator = iterable.iterator(); + final CSVRecord record = iterator.next(); + assertEquals("a", record.get(0)); + assertNull(record.get(1)); + assertEquals("b", record.get(2)); + assertFalse(iterator.hasNext()); + } + } + + @Test + void testPlainEscaped() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withEscape('!'))) { + printer.print("abc"); + printer.print("xyz"); + assertEquals("abc,xyz", sw.toString()); + } + } + + @Test + void testPlainPlain() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { + printer.print("abc"); + printer.print("xyz"); + assertEquals("abc,xyz", sw.toString()); + } + } + + @Test + void testPlainQuoted() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { + printer.print("abc"); + assertEquals("abc", sw.toString()); + } + } + + @Test + @Disabled + void testPostgreSqlCsvNullOutput() throws IOException { + Object[] s = new String[] { "NULL", null }; + CSVFormat format = CSVFormat.POSTGRESQL_CSV.withQuote(DQUOTE_CHAR).withNullString("NULL").withQuoteMode(QuoteMode.ALL_NON_NULL); + StringWriter writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + String expected = "\"NULL\",NULL\n"; + assertEquals(expected, writer.toString()); + String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(new Object[2], record0); + + s = new String[] { "\\N", null }; + format = CSVFormat.POSTGRESQL_CSV.withNullString("\\N"); + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\t\\N\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\N", "A" }; + format = CSVFormat.POSTGRESQL_CSV.withNullString("\\N"); + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\tA\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\n", "A" }; + format = CSVFormat.POSTGRESQL_CSV.withNullString("\\N"); + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\n\tA\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "", null }; + format = CSVFormat.POSTGRESQL_CSV.withNullString("NULL"); + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\tNULL\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "", null }; + format = CSVFormat.POSTGRESQL_CSV; + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\t\\N\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\N", "", "\u000e,\\\r" }; + format = CSVFormat.POSTGRESQL_CSV; + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\t\t\u000e,\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "NULL", "\\\r" }; + format = CSVFormat.POSTGRESQL_CSV; + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "NULL\t\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\\r" }; + format = CSVFormat.POSTGRESQL_CSV; + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + } + + @Test + @Disabled + void testPostgreSqlCsvTextOutput() throws IOException { + Object[] s = new String[] { "NULL", null }; + CSVFormat format = CSVFormat.POSTGRESQL_TEXT.withQuote(DQUOTE_CHAR).withNullString("NULL").withQuoteMode(QuoteMode.ALL_NON_NULL); + StringWriter writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + String expected = "\"NULL\"\tNULL\n"; + assertEquals(expected, writer.toString()); + String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(new Object[2], record0); + + s = new String[] { "\\N", null }; + format = CSVFormat.POSTGRESQL_TEXT.withNullString("\\N"); + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\t\\N\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\N", "A" }; + format = CSVFormat.POSTGRESQL_TEXT.withNullString("\\N"); + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\tA\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\n", "A" }; + format = CSVFormat.POSTGRESQL_TEXT.withNullString("\\N"); + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\n\tA\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "", null }; + format = CSVFormat.POSTGRESQL_TEXT.withNullString("NULL"); + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\tNULL\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "", null }; + format = CSVFormat.POSTGRESQL_TEXT; + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\t\\N\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\N", "", "\u000e,\\\r" }; + format = CSVFormat.POSTGRESQL_TEXT; + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\t\t\u000e,\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "NULL", "\\\r" }; + format = CSVFormat.POSTGRESQL_TEXT; + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "NULL\t\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\\r" }; + format = CSVFormat.POSTGRESQL_TEXT; + writer = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + } + + @Test + void testPostgreSqlNullStringDefaultCsv() { + assertEquals("", CSVFormat.POSTGRESQL_CSV.getNullString()); + } + + @Test + void testPostgreSqlNullStringDefaultText() { + assertEquals("\\N", CSVFormat.POSTGRESQL_TEXT.getNullString()); + } + + @Test + void testPrint() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = CSVFormat.DEFAULT.print(sw)) { + assertInitialState(printer); + printer.printRecord("a", "b\\c"); + assertEquals("a,b\\c" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testPrintCSVParser() throws IOException { + // @formatter:off + final String code = "a1,b1\n" + // 1) + "a2,b2\n" + // 2) + "a3,b3\n" + // 3) + "a4,b4\n"; // 4) + // @formatter:on + final String[][] res = { { "a1", "b1" }, { "a2", "b2" }, { "a3", "b3" }, { "a4", "b4" } }; + final CSVFormat format = CSVFormat.DEFAULT; + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = format.print(sw); + CSVParser parser = CSVParser.parse(code, format)) { + assertInitialState(printer); + printer.printRecords(parser); + } + try (CSVParser parser = CSVParser.parse(sw.toString(), format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + Utils.compare("Fail", res, records, -1); + } + } + + @Test + void testPrintCSVRecord() throws IOException { + // @formatter:off + final String code = "a1,b1\n" + // 1) + "a2,b2\n" + // 2) + "a3,b3\n" + // 3) + "a4,b4\n"; // 4) + // @formatter:on + final String[][] res = { { "a1", "b1" }, { "a2", "b2" }, { "a3", "b3" }, { "a4", "b4" } }; + final CSVFormat format = CSVFormat.DEFAULT; + final StringWriter sw = new StringWriter(); + int row = 0; + try (CSVPrinter printer = format.print(sw); + CSVParser parser = CSVParser.parse(code, format)) { + assertInitialState(printer); + for (final CSVRecord record : parser) { + printer.printRecord(record); + assertEquals(++row, printer.getRecordCount()); + } + assertEquals(row, printer.getRecordCount()); + } + try (CSVParser parser = CSVParser.parse(sw.toString(), format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + Utils.compare("Fail", res, records, -1); + } + } + + @ParameterizedTest + @ValueSource(longs = { -1, 0, 3, 4, Long.MAX_VALUE }) + void testPrintCSVRecords(final long maxRows) throws IOException { + // @formatter:off + final String code = "a1,b1\n" + // 1) + "a2,b2\n" + // 2) + "a3,b3\n" + // 3) + "a4,b4\n"; // 4) + // @formatter:on + final String[][] expected = { { "a1", "b1" }, { "a2", "b2" }, { "a3", "b3" }, { "a4", "b4" } }; + final CSVFormat format = CSVFormat.DEFAULT.builder().setMaxRows(maxRows).get(); + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = format.print(sw); + CSVParser parser = CSVParser.parse(code, format)) { + assertInitialState(printer); + printer.printRecords(parser.getRecords()); + } + try (CSVParser parser = CSVParser.parse(sw.toString(), format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + Utils.compare("Fail", expected, records, maxRows); + } + } + + @Test + void testPrintCustomNullValues() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withNullString("NULL"))) { + assertInitialState(printer); + printer.printRecord("a", null, "b"); + assertEquals("a,NULL,b" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testPrinter1() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + assertInitialState(printer); + printer.printRecord("a", "b"); + assertEquals(1, printer.getRecordCount()); + assertEquals("a,b" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testPrinter2() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + assertInitialState(printer); + printer.printRecord("a,b", "b"); + assertEquals("\"a,b\",b" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testPrinter3() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + assertInitialState(printer); + printer.printRecord("a, b", "b "); + assertEquals("\"a, b\",\"b \"" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testPrinter4() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + assertInitialState(printer); + printer.printRecord("a", "b\"c"); + assertEquals("a,\"b\"\"c\"" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testPrinter5() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + assertInitialState(printer); + printer.printRecord("a", "b\nc"); + assertEquals("a,\"b\nc\"" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testPrinter6() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + assertInitialState(printer); + printer.printRecord("a", "b\r\nc"); + assertEquals("a,\"b\r\nc\"" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testPrinter7() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + assertInitialState(printer); + printer.printRecord("a", "b\\c"); + assertEquals("a,b\\c" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testPrintNullValues() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + assertInitialState(printer); + printer.printRecord("a", null, "b"); + assertEquals("a,,b" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testPrintOnePositiveInteger() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.MINIMAL))) { + assertInitialState(printer); + printer.print(Integer.MAX_VALUE); + assertEquals(String.valueOf(Integer.MAX_VALUE), sw.toString()); + } + } + + /** + * Test to target the use of {@link IOUtils#copy(java.io.Reader, Appendable)} which directly buffers the value from the Reader to the Appendable. + * + *

      + * Requires the format to have no quote or escape character, value to be a {@link Reader Reader} and the output MUST NOT be a {@link Writer Writer} + * but some other Appendable. + *

      + * + * @throws IOException Not expected to happen + */ + @Test + void testPrintReaderWithoutQuoteToAppendable() throws IOException { + final StringBuilder sb = new StringBuilder(); + final String content = "testValue"; + try (CSVPrinter printer = new CSVPrinter(sb, CSVFormat.DEFAULT.withQuote(null))) { + assertInitialState(printer); + final StringReader value = new StringReader(content); + printer.print(value); + } + assertEquals(content, sb.toString()); + } + + /** + * Test to target the use of {@link IOUtils#copyLarge(java.io.Reader, Writer)} which directly buffers the value from the Reader to the Writer. + * + *

      + * Requires the format to have no quote or escape character, value to be a {@link Reader Reader} and the output MUST be a {@link Writer Writer}. + *

      + * + * @throws IOException Not expected to happen + */ + @Test + void testPrintReaderWithoutQuoteToWriter() throws IOException { + final StringWriter sw = new StringWriter(); + final String content = "testValue"; + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { + final StringReader value = new StringReader(content); + printer.print(value); + } + assertEquals(content, sw.toString()); + } + + @Test + void testPrintRecordStream() throws IOException { + // @formatter:off + final String code = "a1,b1\n" + // 1) + "a2,b2\n" + // 2) + "a3,b3\n" + // 3) + "a4,b4\n"; // 4) + // @formatter:on + final String[][] res = { { "a1", "b1" }, { "a2", "b2" }, { "a3", "b3" }, { "a4", "b4" } }; + final CSVFormat format = CSVFormat.DEFAULT; + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = format.print(sw); + CSVParser parser = CSVParser.parse(code, format)) { + long count = 0; + for (final CSVRecord record : parser) { + printer.printRecord(record.stream()); + assertEquals(++count, printer.getRecordCount()); + } + } + try (CSVParser parser = CSVParser.parse(sw.toString(), format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + Utils.compare("Fail", res, records, -1); + } + } + + @Test + void testPrintRecordsWithCSVRecord() throws IOException { + final String[] values = { "A", "B", "C" }; + final String rowData = StringUtils.join(values, ','); + final CharArrayWriter charArrayWriter = new CharArrayWriter(0); + try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(rowData)); + CSVPrinter printer = CSVFormat.INFORMIX_UNLOAD.print(charArrayWriter)) { + long count = 0; + for (final CSVRecord record : parser) { + printer.printRecord(record); + assertEquals(++count, printer.getRecordCount()); + } + } + assertEquals(6, charArrayWriter.size()); + assertEquals("A|B|C" + CSVFormat.INFORMIX_UNLOAD.getRecordSeparator(), charArrayWriter.toString()); + } + + @Test + void testPrintRecordsWithEmptyVector() throws IOException { + final PrintStream out = System.out; + try { + System.setOut(new PrintStream(NullOutputStream.INSTANCE)); + try (CSVPrinter printer = CSVFormat.POSTGRESQL_TEXT.printer()) { + final Vector vector = new Vector<>(); + final int expectedCapacity = 23; + vector.setSize(expectedCapacity); + printer.printRecords(vector); + assertEquals(expectedCapacity, vector.capacity()); + assertEquals(expectedCapacity, printer.getRecordCount()); + } + } finally { + System.setOut(out); + } + } + + @Test + void testPrintRecordsWithObjectArray() throws IOException { + final CharArrayWriter charArrayWriter = new CharArrayWriter(0); + final Object[] objectArray = new Object[6]; + try (CSVPrinter printer = CSVFormat.INFORMIX_UNLOAD.print(charArrayWriter)) { + final HashSet hashSet = new HashSet<>(); + objectArray[3] = hashSet; + printer.printRecords(objectArray); + assertEquals(objectArray.length, printer.getRecordCount()); + } + assertEquals(6, charArrayWriter.size()); + assertEquals("\n\n\n\n\n\n", charArrayWriter.toString()); + } + + @Test + void testPrintRecordsWithResultSetOneRow() throws IOException, SQLException { + try (CSVPrinter printer = CSVFormat.MYSQL.printer()) { + try (ResultSet resultSet = new SimpleResultSet()) { + assertInitialState(printer); + printer.printRecords(resultSet); + assertInitialState(printer); + assertEquals(0, resultSet.getRow()); + } + } + } + + @Test + void testPrintToFileWithCharsetUtf16Be() throws IOException { + final File file = createTempFile(); + try (CSVPrinter printer = CSVFormat.DEFAULT.print(file, StandardCharsets.UTF_16BE)) { + printer.printRecord("a", "b\\c"); + } + assertEquals("a,b\\c" + RECORD_SEPARATOR, FileUtils.readFileToString(file, StandardCharsets.UTF_16BE)); + } + + @Test + void testPrintToFileWithDefaultCharset() throws IOException { + final File file = createTempFile(); + try (CSVPrinter printer = CSVFormat.DEFAULT.print(file, Charset.defaultCharset())) { + printer.printRecord("a", "b\\c"); + } + assertEquals("a,b\\c" + RECORD_SEPARATOR, FileUtils.readFileToString(file, Charset.defaultCharset())); + } + + @Test + void testPrintToPathWithDefaultCharset() throws IOException { + final Path file = createTempPath(); + try (CSVPrinter printer = CSVFormat.DEFAULT.print(file, Charset.defaultCharset())) { + printer.printRecord("a", "b\\c"); + } + assertEquals("a,b\\c" + RECORD_SEPARATOR, new String(Files.readAllBytes(file), Charset.defaultCharset())); + } + + @Test + void testQuoteAll() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL))) { + printer.printRecord("a", "b\nc", "d"); + assertEquals("\"a\",\"b\nc\",\"d\"" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testQuoteCharEscapedWithQuoteModeNone() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setQuote('"').setEscape('?').setQuoteMode(QuoteMode.NONE).get(); + final StringWriter sw = new StringWriter(); + final String col1 = "\"abc"; + final String col2 = "x\"y"; + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.printRecord(col1, col2); + printer.printRecord(new StringReader(col1), new StringReader(col2)); + } + assertEquals("?\"abc,x?\"y" + RECORD_SEPARATOR + "?\"abc,x?\"y" + RECORD_SEPARATOR, sw.toString()); + // The emitted records must read back as the original values. + try (CSVParser parser = CSVParser.parse(sw.toString(), format)) { + final List records = parser.getRecords(); + assertEquals(2, records.size()); + for (final CSVRecord record : records) { + assertEquals(col1, record.get(0)); + assertEquals(col2, record.get(1)); + } + } + } + + @Test + void testQuoteCommaFirstChar() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) { + printer.printRecord(","); + assertEquals("\",\"" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testQuoteCommentMarkerFirstChar() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setCommentMarker(';').get(); + final StringWriter sw = new StringWriter(); + final String col1 = ";comment-like"; + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + // A real comment is written with the marker, unquoted. + printer.printComment("a real comment"); + // A value starting with the marker is quoted, so it does not read back as a comment. + printer.printRecord(col1, "b"); + // The marker past the first character does not start a comment, so only the leading-marker value is quoted. + printer.printRecord("a;b", ";c"); + } + final String string = sw.toString(); + assertEquals("; a real comment" + RECORD_SEPARATOR + + "\";comment-like\",b" + RECORD_SEPARATOR + + "a;b,\";c\"" + RECORD_SEPARATOR, string); + // The comment is dropped on read; both data records survive intact. + try (CSVParser parser = CSVParser.parse(string, format)) { + final List records = parser.getRecords(); + assertEquals(2, records.size()); + assertEquals(col1, records.get(0).get(0)); + assertEquals("b", records.get(0).get(1)); + assertEquals("a;b", records.get(1).get(0)); + assertEquals(";c", records.get(1).get(1)); + } + } + + @Test + void testQuoteNonNumeric() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.NON_NUMERIC))) { + printer.printRecord("a", "b\nc", Integer.valueOf(1)); + assertEquals("\"a\",\"b\nc\",1" + RECORD_SEPARATOR, sw.toString()); + } + } + + @Test + void testRandomDefault() throws Exception { + doRandom(CSVFormat.DEFAULT, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + void testRandomExcel() throws Exception { + doRandom(CSVFormat.EXCEL, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + @Disabled + void testRandomMongoDbCsv() throws Exception { + doRandom(CSVFormat.MONGODB_CSV, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + void testRandomMySql() throws Exception { + doRandom(CSVFormat.MYSQL, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + @Disabled + void testRandomOracle() throws Exception { + doRandom(CSVFormat.ORACLE, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + @Disabled + void testRandomPostgreSqlCsv() throws Exception { + doRandom(CSVFormat.POSTGRESQL_CSV, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + void testRandomPostgreSqlText() throws Exception { + doRandom(CSVFormat.POSTGRESQL_TEXT, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + void testRandomRfc4180() throws Exception { + doRandom(CSVFormat.RFC4180, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + void testRandomTdf() throws Exception { + doRandom(CSVFormat.TDF, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + void testSingleLineComment() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { + printer.printComment("This is a comment"); + assertEquals("# This is a comment" + RECORD_SEPARATOR, sw.toString()); + assertEquals(0, printer.getRecordCount()); + } + } + + @Test + void testSingleQuoteQuoted() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { + printer.print("a'b'c"); + printer.print("xyz"); + assertEquals("'a''b''c',xyz", sw.toString()); + } + } + + @Test + void testSkipHeaderRecordFalse() throws IOException { + // functionally identical to testHeader, used to test CSV-153 + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3").withSkipHeaderRecord(false))) { + printer.printRecord("a", "b", "c"); + printer.printRecord("x", "y", "z"); + assertEquals("C1,C2,C3\r\na,b,c\r\nx,y,z\r\n", sw.toString()); + } + } + + @Test + void testSkipHeaderRecordTrue() throws IOException { + // functionally identical to testHeaderNotSet, used to test CSV-153 + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3").withSkipHeaderRecord(true))) { + printer.printRecord("a", "b", "c"); + printer.printRecord("x", "y", "z"); + assertEquals("a,b,c\r\nx,y,z\r\n", sw.toString()); + } + } + + @Test + void testTrailingDelimiterOnTwoColumns() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrailingDelimiter())) { + printer.printRecord("A", "B"); + assertEquals("A,B,\r\n", sw.toString()); + } + } + + @Test + void testTrimOffOneColumn() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim(false))) { + printer.print(" A "); + assertEquals("\" A \"", sw.toString()); + } + } + + @Test + void testTrimOnOneColumn() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim())) { + printer.print(" A "); + assertEquals("A", sw.toString()); + } + } + + @Test + void testTrimOnTwoColumns() throws IOException { + final StringWriter sw = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim())) { + printer.print(" A "); + printer.print(" B "); + assertEquals("A,B", sw.toString()); + } + } + + private String[] toFirstRecordValues(final String expected, final CSVFormat format) throws IOException { + try (CSVParser parser = CSVParser.parse(expected, format)) { + return parser.getRecords().get(0).values(); + } + } + + private void tryFormat(final List list, final Character quote, final Character escape, final String expected) throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.withQuote(quote).withEscape(escape).withRecordSeparator(null); + final Appendable out = new StringBuilder(); + try (CSVPrinter printer = new CSVPrinter(out, format)) { + printer.printRecord(list); + } + assertEquals(expected, out.toString()); + } + +} diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java b/src/test/java/org/apache/commons/csv/CSVRecordTest.java index 4833c26c1e..94060d62b2 100644 --- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java +++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java @@ -1,24 +1,28 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; +import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -41,7 +45,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -public class CSVRecordTest { +class CSVRecordTest { private enum EnumFixture { UNKNOWN_COLUMN @@ -64,89 +68,122 @@ public String toString() { } private Map headerMap; - private CSVRecord record, recordWithHeader; + private CSVRecord record; + private CSVRecord recordWithHeader; private String[] values; @BeforeEach public void setUp() throws Exception { values = new String[] { "A", "B", "C" }; final String rowData = StringUtils.join(values, ','); - try (final CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(rowData))) { + try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(rowData))) { record = parser.iterator().next(); } - try (final CSVParser parser = CSVFormat.DEFAULT.builder().setHeader(EnumHeader.class).build().parse(new StringReader(rowData))) { + try (CSVParser parser = CSVFormat.DEFAULT.builder().setHeader(EnumHeader.class).get().parse(new StringReader(rowData))) { recordWithHeader = parser.iterator().next(); headerMap = parser.getHeaderMap(); } } @Test - public void testCSVRecordNULLValues() throws IOException { - final CSVParser parser = CSVParser.parse("A,B\r\nONE,TWO", CSVFormat.DEFAULT.withHeader()); - final CSVRecord csvRecord = new CSVRecord(parser, null, null, 0L, 0L); - assertEquals(0, csvRecord.size()); - assertThrows(IllegalArgumentException.class, () -> csvRecord.get("B")); + void testCSVRecordNULLValues() throws IOException { + try (CSVParser parser = CSVParser.parse("A,B\r\nONE,TWO", CSVFormat.DEFAULT.withHeader())) { + final CSVRecord csvRecord = new CSVRecord(parser, null, null, 0L, 0L, 0L); + assertEquals(0, csvRecord.size()); + assertThrows(IllegalArgumentException.class, () -> csvRecord.get("B")); + } + } + + @Test + void testDuplicateHeaderGet() throws IOException { + final String csv = "A,A,B,B\n1,2,5,6\n"; + final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().get(); + + try (CSVParser parser = CSVParser.parse(csv, format)) { + final CSVRecord record = parser.nextRecord(); + + assertAll("Test that it gets the last instance of a column when there are duplicate headings", + () -> assertEquals("2", record.get("A")), + () -> assertEquals("6", record.get("B")) + ); + } + } + + @Test + void testDuplicateHeaderToMap() throws IOException { + final String csv = "A,A,B,B\n1,2,5,6\n"; + final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().get(); + + try (CSVParser parser = CSVParser.parse(csv, format)) { + final CSVRecord record = parser.nextRecord(); + final Map map = record.toMap(); + + assertAll("Test that it gets the last instance of a column when there are duplicate headings", + () -> assertEquals("2", map.get("A")), + () -> assertEquals("6", map.get("B")) + ); + } } @Test - public void testGetInt() { + void testGetInt() { assertEquals(values[0], record.get(0)); assertEquals(values[1], record.get(1)); assertEquals(values[2], record.get(2)); } @Test - public void testGetNullEnum() { + void testGetNullEnum() { assertThrows(IllegalArgumentException.class, () -> recordWithHeader.get((Enum) null)); } @Test - public void testGetString() { + void testGetString() { assertEquals(values[0], recordWithHeader.get(EnumHeader.FIRST.name())); assertEquals(values[1], recordWithHeader.get(EnumHeader.SECOND.name())); assertEquals(values[2], recordWithHeader.get(EnumHeader.THIRD.name())); } @Test - public void testGetStringInconsistentRecord() { + void testGetStringInconsistentRecord() { headerMap.put("fourth", Integer.valueOf(4)); assertThrows(IllegalArgumentException.class, () -> recordWithHeader.get("fourth")); } @Test - public void testGetStringNoHeader() { + void testGetStringNoHeader() { assertThrows(IllegalStateException.class, () -> record.get("first")); } @Test - public void testGetUnmappedEnum() { + void testGetUnmappedEnum() { assertThrows(IllegalArgumentException.class, () -> recordWithHeader.get(EnumFixture.UNKNOWN_COLUMN)); } @Test - public void testGetUnmappedName() { + void testGetUnmappedName() { assertThrows(IllegalArgumentException.class, () -> assertNull(recordWithHeader.get("fourth"))); } @Test - public void testGetUnmappedNegativeInt() { + void testGetUnmappedNegativeInt() { assertThrows(ArrayIndexOutOfBoundsException.class, () -> recordWithHeader.get(Integer.MIN_VALUE)); } @Test - public void testGetUnmappedPositiveInt() { + void testGetUnmappedPositiveInt() { assertThrows(ArrayIndexOutOfBoundsException.class, () -> recordWithHeader.get(Integer.MAX_VALUE)); } @Test - public void testGetWithEnum() { + void testGetWithEnum() { assertEquals(recordWithHeader.get("FIRST"), recordWithHeader.get(EnumHeader.FIRST)); assertEquals(recordWithHeader.get("SECOND"), recordWithHeader.get(EnumHeader.SECOND)); assertThrows(IllegalArgumentException.class, () -> recordWithHeader.get(EnumFixture.UNKNOWN_COLUMN)); } @Test - public void testIsConsistent() { + void testIsConsistent() { assertTrue(record.isConsistent()); assertTrue(recordWithHeader.isConsistent()); final Map map = recordWithHeader.getParser().getHeaderMap(); @@ -156,10 +193,10 @@ public void testIsConsistent() { } @Test - public void testIsInconsistent() throws IOException { + void testIsInconsistent() throws IOException { final String[] headers = { "first", "second", "third" }; final String rowData = StringUtils.join(values, ','); - try (final CSVParser parser = CSVFormat.DEFAULT.withHeader(headers).parse(new StringReader(rowData))) { + try (CSVParser parser = CSVFormat.DEFAULT.withHeader(headers).parse(new StringReader(rowData))) { final Map map = parser.getHeaderMapRaw(); final CSVRecord record1 = parser.iterator().next(); map.put("fourth", Integer.valueOf(4)); @@ -168,14 +205,14 @@ public void testIsInconsistent() throws IOException { } @Test - public void testIsMapped() { + void testIsMapped() { assertFalse(record.isMapped("first")); assertTrue(recordWithHeader.isMapped(EnumHeader.FIRST.name())); assertFalse(recordWithHeader.isMapped("fourth")); } @Test - public void testIsSetInt() { + void testIsSetInt() { assertFalse(record.isSet(-1)); assertTrue(record.isSet(0)); assertTrue(record.isSet(2)); @@ -185,14 +222,14 @@ public void testIsSetInt() { } @Test - public void testIsSetString() { + void testIsSetString() { assertFalse(record.isSet("first")); assertTrue(recordWithHeader.isSet(EnumHeader.FIRST.name())); assertFalse(recordWithHeader.isSet("DOES NOT EXIST")); } @Test - public void testIterator() { + void testIterator() { int i = 0; for (final String value : record) { assertEquals(values[i], value); @@ -201,19 +238,19 @@ public void testIterator() { } @Test - public void testPutInMap() { + void testPutInMap() { final Map map = new ConcurrentHashMap<>(); this.recordWithHeader.putIn(map); - this.validateMap(map, false); + validateMap(map, false); // Test that we can compile with assignment to the same map as the param. final TreeMap map2 = recordWithHeader.putIn(new TreeMap<>()); - this.validateMap(map2, false); + validateMap(map2, false); } @Test - public void testRemoveAndAddColumns() throws IOException { + void testRemoveAndAddColumns() throws IOException { // do: - try (final CSVPrinter printer = new CSVPrinter(new StringBuilder(), CSVFormat.DEFAULT)) { + try (CSVPrinter printer = new CSVPrinter(new StringBuilder(), CSVFormat.DEFAULT)) { final Map map = recordWithHeader.toMap(); map.remove("OldColumn"); map.put("ZColumn", "NewValue"); @@ -226,9 +263,9 @@ public void testRemoveAndAddColumns() throws IOException { } @Test - public void testSerialization() throws IOException, ClassNotFoundException { + void testSerialization() throws IOException, ClassNotFoundException { final CSVRecord shortRec; - try (final CSVParser parser = CSVParser.parse("A,B\n#my comment\nOne,Two", CSVFormat.DEFAULT.withHeader().withCommentMarker('#'))) { + try (CSVParser parser = CSVParser.parse("A,B\n#my comment\nOne,Two", CSVFormat.DEFAULT.withHeader().withCommentMarker('#'))) { shortRec = parser.iterator().next(); } final ByteArrayOutputStream out = new ByteArrayOutputStream(); @@ -238,7 +275,7 @@ public void testSerialization() throws IOException, ClassNotFoundException { final ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); try (ObjectInputStream ois = new ObjectInputStream(in)) { final Object object = ois.readObject(); - assertTrue(object instanceof CSVRecord); + assertInstanceOf(CSVRecord.class, object); final CSVRecord rec = (CSVRecord) object; assertEquals(1L, rec.getRecordNumber()); assertEquals("One", rec.get(0)); @@ -254,17 +291,12 @@ public void testSerialization() throws IOException, ClassNotFoundException { assertFalse(rec.isSet("A")); assertEquals(0, rec.toMap().size()); // This will throw - try { - rec.get("A"); - org.junit.jupiter.api.Assertions.fail("Access by name is not expected after deserialisation"); - } catch (final IllegalStateException expected) { - // OK - } + assertThrows(IllegalStateException.class, () -> rec.get("A")); } } @Test - public void testStream() { + void testStream() { final AtomicInteger i = new AtomicInteger(); record.stream().forEach(value -> { assertEquals(values[i.get()], value); @@ -273,7 +305,7 @@ public void testStream() { } @Test - public void testToListAdd() { + void testToListAdd() { final String[] expected = values.clone(); final List list = record.toList(); list.add("Last"); @@ -283,7 +315,7 @@ public void testToListAdd() { } @Test - public void testToListFor() { + void testToListFor() { int i = 0; for (final String value : record.toList()) { assertEquals(values[i], value); @@ -292,7 +324,7 @@ public void testToListFor() { } @Test - public void testToListForEach() { + void testToListForEach() { final AtomicInteger i = new AtomicInteger(); record.toList().forEach(e -> { assertEquals(values[i.getAndIncrement()], e); @@ -300,7 +332,7 @@ public void testToListForEach() { } @Test - public void testToListSet() { + void testToListSet() { final String[] expected = values.clone(); final List list = record.toList(); list.set(list.size() - 1, "Last"); @@ -310,14 +342,14 @@ public void testToListSet() { } @Test - public void testToMap() { + void testToMap() { final Map map = this.recordWithHeader.toMap(); - this.validateMap(map, true); + validateMap(map, true); } @Test - public void testToMapWithNoHeader() throws Exception { - try (final CSVParser parser = CSVParser.parse("a,b", CSVFormat.newFormat(','))) { + void testToMapWithNoHeader() throws Exception { + try (CSVParser parser = CSVParser.parse("a,b", CSVFormat.newFormat(','))) { final CSVRecord shortRec = parser.iterator().next(); final Map map = shortRec.toMap(); assertNotNull(map, "Map is not null."); @@ -326,15 +358,15 @@ public void testToMapWithNoHeader() throws Exception { } @Test - public void testToMapWithShortRecord() throws Exception { - try (final CSVParser parser = CSVParser.parse("a,b", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { + void testToMapWithShortRecord() throws Exception { + try (CSVParser parser = CSVParser.parse("a,b", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { final CSVRecord shortRec = parser.iterator().next(); shortRec.toMap(); } } @Test - public void testToString() { + void testToString() { assertNotNull(recordWithHeader.toString()); assertTrue(recordWithHeader.toString().contains("comment=")); assertTrue(recordWithHeader.toString().contains("recordNumber=")); diff --git a/src/test/java/org/apache/commons/csv/CsvAssertions.java b/src/test/java/org/apache/commons/csv/CsvAssertions.java new file mode 100644 index 0000000000..b6c2b5d9cd --- /dev/null +++ b/src/test/java/org/apache/commons/csv/CsvAssertions.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; + +public class CsvAssertions { + + public static void assertValuesEquals(final String[] expected, final CSVRecord actual) { + assertArrayEquals(expected, actual.values()); + } +} diff --git a/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java b/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java index a6396ee518..b8d9b9f198 100644 --- a/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java +++ b/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java @@ -1,47 +1,55 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; -import static org.apache.commons.csv.Constants.END_OF_STREAM; import static org.apache.commons.csv.Constants.UNDEFINED; +import static org.apache.commons.io.IOUtils.EOF; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import org.junit.jupiter.api.Test; /** * Test {@link ExtendedBufferedReader}. */ -public class ExtendedBufferedReaderTest { +class ExtendedBufferedReaderTest { + + static final String LF = "\n"; + static final String CR = "\r"; + static final String CRLF = CR + LF; + static final String LFCR = LF + CR; // easier to read the string below private ExtendedBufferedReader createBufferedReader(final String s) { return new ExtendedBufferedReader(new StringReader(s)); } @Test - public void testEmptyInput() throws Exception { - try (final ExtendedBufferedReader br = createBufferedReader("")) { - assertEquals(END_OF_STREAM, br.read()); - assertEquals(END_OF_STREAM, br.lookAhead()); - assertEquals(END_OF_STREAM, br.getLastChar()); + void testEmptyInput() throws Exception { + try (ExtendedBufferedReader br = createBufferedReader("")) { + assertEquals(EOF, br.read()); + assertEquals(EOF, br.peek()); + assertEquals(EOF, br.getLastChar()); assertNull(br.readLine()); assertEquals(0, br.read(new char[10], 0, 0)); } @@ -49,160 +57,175 @@ public void testEmptyInput() throws Exception { /* * Test to illustrate https://issues.apache.org/jira/browse/CSV-75 - * */ @Test - public void testReadChar() throws Exception { - final String LF = "\n"; - final String CR = "\r"; - final String CRLF = CR + LF; - final String LFCR = LF + CR;// easier to read the string below + void testReadChar() throws Exception { final String test = "a" + LF + "b" + CR + "c" + LF + LF + "d" + CR + CR + "e" + LFCR + "f " + CRLF; // EOL eol EOL EOL eol eol EOL+CR EOL - final int EOLeolct = 9; + final int eolCount = 9; - try (final ExtendedBufferedReader br = createBufferedReader(test)) { - assertEquals(0, br.getCurrentLineNumber()); + try (ExtendedBufferedReader br = createBufferedReader(test)) { + assertEquals(0, br.getLineNumber()); + int lineCount = 0; while (br.readLine() != null) { // consume all + lineCount++; } - assertEquals(EOLeolct, br.getCurrentLineNumber()); + assertEquals(eolCount, br.getLineNumber()); + assertEquals(lineCount, br.getLineNumber()); } - try (final ExtendedBufferedReader br = createBufferedReader(test)) { - assertEquals(0, br.getCurrentLineNumber()); - while (br.read() != -1) { + try (ExtendedBufferedReader br = createBufferedReader(test)) { + assertEquals(0, br.getLineNumber()); + int readCount = 0; + while (br.read() != EOF) { // consume all + readCount++; } - assertEquals(EOLeolct, br.getCurrentLineNumber()); + assertEquals(eolCount, br.getLineNumber()); + assertEquals(readCount, test.length()); } - try (final ExtendedBufferedReader br = createBufferedReader(test)) { - assertEquals(0, br.getCurrentLineNumber()); + try (ExtendedBufferedReader br = createBufferedReader(test)) { + assertEquals(0, br.getLineNumber()); final char[] buff = new char[10]; - while (br.read(buff, 0, 3) != -1) { + while (br.read(buff, 0, 3) != EOF) { // consume all } - assertEquals(EOLeolct, br.getCurrentLineNumber()); + assertEquals(eolCount, br.getLineNumber()); } } @Test - public void testReadingInDifferentBuffer() throws Exception { - final char[] tmp1 = new char[2], tmp2 = new char[4]; + void testReadingInDifferentBuffer() throws Exception { + final char[] tmp1 = new char[2]; + final char[] tmp2 = new char[4]; try (ExtendedBufferedReader reader = createBufferedReader("1\r\n2\r\n")) { reader.read(tmp1, 0, 2); reader.read(tmp2, 2, 2); - assertEquals(2, reader.getCurrentLineNumber()); + assertEquals(2, reader.getLineNumber()); + } + } + + @Test + void testReadingSupplementaryCharacterTracksBytes() throws Exception { + final String input = "๐Ÿ˜€"; + final char[] buffer = new char[input.length()]; + try (ExtendedBufferedReader reader = new ExtendedBufferedReader(new StringReader(input), StandardCharsets.UTF_8, true)) { + assertEquals(input.length(), reader.read(buffer, 0, buffer.length)); + assertArrayEquals(input.toCharArray(), buffer); + assertEquals(input.getBytes(StandardCharsets.UTF_8).length, reader.getBytesRead()); + assertEquals(input.length(), reader.getPosition()); + assertEquals(input.charAt(input.length() - 1), reader.getLastChar()); } } @Test - public void testReadLine() throws Exception { - try (final ExtendedBufferedReader br = createBufferedReader("")) { + void testReadLine() throws Exception { + try (ExtendedBufferedReader br = createBufferedReader("")) { assertNull(br.readLine()); } - try (final ExtendedBufferedReader br = createBufferedReader("\n")) { + try (ExtendedBufferedReader br = createBufferedReader("\n")) { assertEquals("", br.readLine()); assertNull(br.readLine()); } - try (final ExtendedBufferedReader br = createBufferedReader("foo\n\nhello")) { - assertEquals(0, br.getCurrentLineNumber()); + try (ExtendedBufferedReader br = createBufferedReader("foo\n\nhello")) { + assertEquals(0, br.getLineNumber()); assertEquals("foo", br.readLine()); - assertEquals(1, br.getCurrentLineNumber()); + assertEquals(1, br.getLineNumber()); assertEquals("", br.readLine()); - assertEquals(2, br.getCurrentLineNumber()); + assertEquals(2, br.getLineNumber()); assertEquals("hello", br.readLine()); - assertEquals(3, br.getCurrentLineNumber()); + assertEquals(3, br.getLineNumber()); assertNull(br.readLine()); - assertEquals(3, br.getCurrentLineNumber()); + assertEquals(3, br.getLineNumber()); } - try (final ExtendedBufferedReader br = createBufferedReader("foo\n\nhello")) { + try (ExtendedBufferedReader br = createBufferedReader("foo\n\nhello")) { assertEquals('f', br.read()); - assertEquals('o', br.lookAhead()); + assertEquals('o', br.peek()); assertEquals("oo", br.readLine()); - assertEquals(1, br.getCurrentLineNumber()); - assertEquals('\n', br.lookAhead()); + assertEquals(1, br.getLineNumber()); + assertEquals('\n', br.peek()); assertEquals("", br.readLine()); - assertEquals(2, br.getCurrentLineNumber()); - assertEquals('h', br.lookAhead()); + assertEquals(2, br.getLineNumber()); + assertEquals('h', br.peek()); assertEquals("hello", br.readLine()); assertNull(br.readLine()); - assertEquals(3, br.getCurrentLineNumber()); + assertEquals(3, br.getLineNumber()); } - try (final ExtendedBufferedReader br = createBufferedReader("foo\rbaar\r\nfoo")) { + try (ExtendedBufferedReader br = createBufferedReader("foo\rbaar\r\nfoo")) { assertEquals("foo", br.readLine()); - assertEquals('b', br.lookAhead()); + assertEquals('b', br.peek()); assertEquals("baar", br.readLine()); - assertEquals('f', br.lookAhead()); + assertEquals('f', br.peek()); assertEquals("foo", br.readLine()); assertNull(br.readLine()); } } @Test - public void testReadLookahead1() throws Exception { - try (final ExtendedBufferedReader br = createBufferedReader("1\n2\r3\n")) { - assertEquals(0, br.getCurrentLineNumber()); - assertEquals('1', br.lookAhead()); + void testReadLookahead1() throws Exception { + try (ExtendedBufferedReader br = createBufferedReader("1\n2\r3\n")) { + assertEquals(0, br.getLineNumber()); + assertEquals('1', br.peek()); assertEquals(UNDEFINED, br.getLastChar()); - assertEquals(0, br.getCurrentLineNumber()); + assertEquals(0, br.getLineNumber()); assertEquals('1', br.read()); // Start line 1 assertEquals('1', br.getLastChar()); - assertEquals(1, br.getCurrentLineNumber()); - assertEquals('\n', br.lookAhead()); - assertEquals(1, br.getCurrentLineNumber()); + assertEquals(1, br.getLineNumber()); + assertEquals('\n', br.peek()); + assertEquals(1, br.getLineNumber()); assertEquals('1', br.getLastChar()); assertEquals('\n', br.read()); - assertEquals(1, br.getCurrentLineNumber()); + assertEquals(1, br.getLineNumber()); assertEquals('\n', br.getLastChar()); - assertEquals(1, br.getCurrentLineNumber()); + assertEquals(1, br.getLineNumber()); - assertEquals('2', br.lookAhead()); - assertEquals(1, br.getCurrentLineNumber()); + assertEquals('2', br.peek()); + assertEquals(1, br.getLineNumber()); assertEquals('\n', br.getLastChar()); - assertEquals(1, br.getCurrentLineNumber()); + assertEquals(1, br.getLineNumber()); assertEquals('2', br.read()); // Start line 2 - assertEquals(2, br.getCurrentLineNumber()); + assertEquals(2, br.getLineNumber()); assertEquals('2', br.getLastChar()); - assertEquals('\r', br.lookAhead()); - assertEquals(2, br.getCurrentLineNumber()); + assertEquals('\r', br.peek()); + assertEquals(2, br.getLineNumber()); assertEquals('2', br.getLastChar()); assertEquals('\r', br.read()); assertEquals('\r', br.getLastChar()); - assertEquals(2, br.getCurrentLineNumber()); + assertEquals(2, br.getLineNumber()); - assertEquals('3', br.lookAhead()); + assertEquals('3', br.peek()); assertEquals('\r', br.getLastChar()); assertEquals('3', br.read()); // Start line 3 assertEquals('3', br.getLastChar()); - assertEquals(3, br.getCurrentLineNumber()); + assertEquals(3, br.getLineNumber()); - assertEquals('\n', br.lookAhead()); - assertEquals(3, br.getCurrentLineNumber()); + assertEquals('\n', br.peek()); + assertEquals(3, br.getLineNumber()); assertEquals('3', br.getLastChar()); assertEquals('\n', br.read()); - assertEquals(3, br.getCurrentLineNumber()); + assertEquals(3, br.getLineNumber()); assertEquals('\n', br.getLastChar()); - assertEquals(3, br.getCurrentLineNumber()); + assertEquals(3, br.getLineNumber()); - assertEquals(END_OF_STREAM, br.lookAhead()); + assertEquals(EOF, br.peek()); assertEquals('\n', br.getLastChar()); - assertEquals(END_OF_STREAM, br.read()); - assertEquals(END_OF_STREAM, br.getLastChar()); - assertEquals(END_OF_STREAM, br.read()); - assertEquals(END_OF_STREAM, br.lookAhead()); - assertEquals(3, br.getCurrentLineNumber()); + assertEquals(EOF, br.read()); + assertEquals(EOF, br.getLastChar()); + assertEquals(EOF, br.read()); + assertEquals(EOF, br.peek()); + assertEquals(3, br.getLineNumber()); } } @Test - public void testReadLookahead2() throws Exception { + void testReadLookahead2() throws Exception { final char[] ref = new char[5]; final char[] res = new char[5]; - try (final ExtendedBufferedReader br = createBufferedReader("abcdefg")) { + try (ExtendedBufferedReader br = createBufferedReader("abcdefg")) { ref[0] = 'a'; ref[1] = 'b'; ref[2] = 'c'; @@ -210,7 +233,7 @@ public void testReadLookahead2() throws Exception { assertArrayEquals(ref, res); assertEquals('c', br.getLastChar()); - assertEquals('d', br.lookAhead()); + assertEquals('d', br.peek()); ref[4] = 'd'; assertEquals(1, br.read(res, 4, 1)); assertArrayEquals(ref, res); diff --git a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java new file mode 100644 index 0000000000..aaf8e206b3 --- /dev/null +++ b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.StandardCharsets; + +import org.junit.jupiter.api.Test; + +class JiraCsv196Test { + + private Reader getTestInput(final String path) { + return new InputStreamReader(ClassLoader.getSystemClassLoader().getResourceAsStream(path)); + } + + @Test + void testParseFourBytes() throws IOException { + final CSVFormat format = CSVFormat.Builder.create().setDelimiter(',').setQuote('\'').get(); + // @formatter:off + try (@SuppressWarnings("resource") // parser closes the reader. + CSVParser parser = new CSVParser.Builder() + .setFormat(format) + .setReader(getTestInput("org/apache/commons/csv/CSV-196/emoji.csv")) + .setCharset(StandardCharsets.UTF_8) + .setTrackBytes(true) + .get()) { + // @formatter:on + final long[] charByteKey = { 0, 84, 701, 1318, 1935 }; + int idx = 0; + for (final CSVRecord record : parser) { + assertEquals(charByteKey[idx++], record.getBytePosition(), "At index " + idx); + } + } + } + + @Test + void testParseThreeBytes() throws IOException { + final CSVFormat format = CSVFormat.Builder.create().setDelimiter(',').setQuote('\'').get(); + // @formatter:off + try (@SuppressWarnings("resource") // parser closes the reader. + CSVParser parser = new CSVParser.Builder() + .setFormat(format) + .setReader(getTestInput("org/apache/commons/csv/CSV-196/japanese.csv")) + .setCharset(StandardCharsets.UTF_8) + .setTrackBytes(true) + .get()) { + // @formatter:on + final long[] charByteKey = { 0, 89, 242, 395 }; + int idx = 0; + for (final CSVRecord record : parser) { + assertEquals(charByteKey[idx++], record.getBytePosition(), "At index " + idx); + } + } + } +} diff --git a/src/test/java/org/apache/commons/csv/JiraCsv318Test.java b/src/test/java/org/apache/commons/csv/JiraCsv318Test.java new file mode 100644 index 0000000000..984509e87d --- /dev/null +++ b/src/test/java/org/apache/commons/csv/JiraCsv318Test.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import org.apache.commons.io.function.IOConsumer; +import org.apache.commons.io.function.IOStream; +import org.apache.commons.lang3.ArrayUtils; +import org.junit.jupiter.api.Test; + +/** + * Tests https://issues.apache.org/jira/projects/CSV/issues/CSV-318?filter=allopenissues + * + * @see CSVPrinter + */ +class JiraCsv318Test { + + private void checkOutput(final ByteArrayOutputStream baos) { + checkOutput(baos.toString()); + } + + private void checkOutput(final String string) { + assertEquals("col a,col b,col c", string.trim()); + } + + private Stream newParallelStream() { + // returned stream is intermediate + return newStream().parallel(); + } + + private CSVPrinter newPrinter(final ByteArrayOutputStream baos) throws IOException { + return new CSVPrinter(new PrintWriter(baos), CSVFormat.DEFAULT); + } + + private Stream newSequentialStream() { + // returned stream is intermediate + return newStream().sequential(); + } + + private Stream newStream() { + return Stream.of("col a", "col b", "col c"); + } + + @Test + void testDefaultStream() throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (CSVPrinter printer = newPrinter(baos)) { + printer.printRecord(newStream()); + } + checkOutput(baos); + } + + @SuppressWarnings("resource") + @Test + void testParallelIOStream() throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (CSVPrinter printer = newPrinter(baos)) { + IOStream.adapt(newParallelStream()).forEachOrdered(printer::print); + } + // No EOR marker in this test intentionally, so checkOutput will trim. + checkOutput(baos); + } + + @SuppressWarnings("resource") + @Test + void testParallelIOStreamSynchronizedPrinterNotUsed() throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (CSVPrinter printer = newPrinter(baos)) { + synchronized (printer) { + IOStream.adapt(newParallelStream()).forEachOrdered(IOConsumer.noop()); + } + } + final List list = new ArrayList<>(); + try (CSVPrinter printer = newPrinter(baos)) { + synchronized (printer) { + IOStream.adapt(newParallelStream()).forEachOrdered(list::add); + } + } + // No EOR marker in this test intentionally, so checkOutput will trim. + checkOutput(String.join(",", list.toArray(ArrayUtils.EMPTY_STRING_ARRAY))); + } + + @Test + void testParallelStream() throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (CSVPrinter printer = newPrinter(baos)) { + printer.printRecord(newParallelStream()); + } + checkOutput(baos); + } + + @Test + void testSequentialStream() throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (CSVPrinter printer = newPrinter(baos)) { + printer.printRecord(newSequentialStream()); + } + checkOutput(baos); + } +} diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java index cc8d728af0..a76f6e513b 100644 --- a/src/test/java/org/apache/commons/csv/LexerTest.java +++ b/src/test/java/org/apache/commons/csv/LexerTest.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; @@ -26,9 +28,6 @@ import static org.apache.commons.csv.Token.Type.EOF; import static org.apache.commons.csv.Token.Type.EORECORD; import static org.apache.commons.csv.Token.Type.TOKEN; -import static org.apache.commons.csv.TokenMatchers.hasContent; -import static org.apache.commons.csv.TokenMatchers.matches; -import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -41,9 +40,22 @@ import org.junit.jupiter.api.Test; /** - * */ -public class LexerTest { +class LexerTest { + + private static void assertContent(final String expectedContent, final Token actualToken) { + assertEquals(expectedContent, actualToken.content.toString()); + } + + private static void assertNextToken(final String expectedContent, final Lexer lexer) throws IOException { + assertContent(expectedContent, lexer.nextToken(new Token())); + } + + private static void assertNextToken(final Token.Type expectedType, final String expectedContent, final Lexer lexer) throws IOException { + final Token actualToken = lexer.nextToken(new Token()); + assertEquals(expectedType, actualToken.type); + assertContent(expectedContent, actualToken); + } private CSVFormat formatWithEscaping; @@ -59,78 +71,85 @@ public void setUp() { // simple token with escaping enabled @Test - public void testBackslashWithEscaping() throws IOException { + void testBackslashWithEscaping() throws IOException { /* * file: a,\,,b \,, */ final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne"; final CSVFormat format = formatWithEscaping.withIgnoreEmptyLines(false); assertTrue(format.isEscapeCharacterSet()); - try (final Lexer parser = createLexer(code, format)) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, ",")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, ",")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "\nc")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "d\r")); - assertThat(parser.nextToken(new Token()), matches(EOF, "e")); + try (Lexer lexer = createLexer(code, format)) { + assertNextToken(TOKEN, "a", lexer); + assertNextToken(TOKEN, ",", lexer); + assertNextToken(EORECORD, "b\\", lexer); + assertNextToken(TOKEN, ",", lexer); + assertNextToken(TOKEN, "\nc", lexer); + assertNextToken(EORECORD, "d\r", lexer); + assertNextToken(EOF, "e", lexer); } } // simple token with escaping not enabled @Test - public void testBackslashWithoutEscaping() throws IOException { + void testBackslashWithoutEscaping() throws IOException { /* * file: a,\,,b \,, */ final String code = "a,\\,,b\\\n\\,,"; final CSVFormat format = CSVFormat.DEFAULT; assertFalse(format.isEscapeCharacterSet()); - try (final Lexer parser = createLexer(code, format)) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); + try (Lexer lexer = createLexer(code, format)) { + // parser.nextToken(new Token()) + assertNextToken(TOKEN, "a", lexer); // an unquoted single backslash is not an escape char - assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\")); + assertNextToken(TOKEN, "\\", lexer); + assertNextToken(TOKEN, "", lexer); + assertNextToken(EORECORD, "b\\", lexer); // an unquoted single backslash is not an escape char - assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(EOF, "")); + assertNextToken(TOKEN, "\\", lexer); + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "", lexer); } } @Test - public void testBackspace() throws Exception { - try (final Lexer lexer = createLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "NotEscaped")); + void testBackspace() throws Exception { + try (Lexer lexer = createLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping)) { + assertNextToken("character" + BACKSPACE + "NotEscaped", lexer); } } @Test - public void testComments() throws IOException { - final String code = "first,line,\n" + "second,line,tokenWith#no-comment\n" + "# comment line \n" + - "third,line,#no-comment\n" + "# penultimate comment\n" + "# Final comment\n"; + void testComments() throws IOException { + // @formatter:off + final String code = "first,line,\n" + + "second,line,tokenWith#no-comment\n" + + "# comment line \n" + + "third,line,#no-comment\n" + + "# penultimate comment\n" + + "# Final comment\n"; + // @formatter:on final CSVFormat format = CSVFormat.DEFAULT.withCommentMarker('#'); - try (final Lexer parser = createLexer(code, format)) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "first")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "line")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "second")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "line")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "tokenWith#no-comment")); - assertThat(parser.nextToken(new Token()), matches(COMMENT, "comment line")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "third")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "line")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "#no-comment")); - assertThat(parser.nextToken(new Token()), matches(COMMENT, "penultimate comment")); - assertThat(parser.nextToken(new Token()), matches(COMMENT, "Final comment")); - assertThat(parser.nextToken(new Token()), matches(EOF, "")); - assertThat(parser.nextToken(new Token()), matches(EOF, "")); - } - } - - @Test - public void testCommentsAndEmptyLines() throws IOException { + try (Lexer lexer = createLexer(code, format)) { + assertNextToken(TOKEN, "first", lexer); + assertNextToken(TOKEN, "line", lexer); + assertNextToken(EORECORD, "", lexer); + assertNextToken(TOKEN, "second", lexer); + assertNextToken(TOKEN, "line", lexer); + assertNextToken(EORECORD, "tokenWith#no-comment", lexer); + assertNextToken(COMMENT, "comment line", lexer); + assertNextToken(TOKEN, "third", lexer); + assertNextToken(TOKEN, "line", lexer); + assertNextToken(EORECORD, "#no-comment", lexer); + assertNextToken(COMMENT, "penultimate comment", lexer); + assertNextToken(COMMENT, "Final comment", lexer); + assertNextToken(EOF, "", lexer); + assertNextToken(EOF, "", lexer); + } + } + + @Test + void testCommentsAndEmptyLines() throws IOException { final String code = "1,2,3,\n" + // 1 "\n" + // 1b "\n" + // 1c @@ -148,287 +167,384 @@ public void testCommentsAndEmptyLines() throws IOException { final CSVFormat format = CSVFormat.DEFAULT.withCommentMarker('#').withIgnoreEmptyLines(false); assertFalse(format.getIgnoreEmptyLines(), "Should not ignore empty lines"); - try (final Lexer parser = createLexer(code, format)) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "1")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "2")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "3")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 1 - assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 1b - assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 1c - assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "b x")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "c#no-comment")); // 2 - assertThat(parser.nextToken(new Token()), matches(COMMENT, "foo")); // 3 - assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 4 - assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 4b - assertThat(parser.nextToken(new Token()), matches(TOKEN, "d")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "e")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "#no-comment")); // 5 - assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 5b - assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 5c - assertThat(parser.nextToken(new Token()), matches(COMMENT, "penultimate comment")); // 6 - assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 6b - assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 6c - assertThat(parser.nextToken(new Token()), matches(COMMENT, "Final comment")); // 7 - assertThat(parser.nextToken(new Token()), matches(EOF, "")); - assertThat(parser.nextToken(new Token()), matches(EOF, "")); - } - } - - @Test - public void testCR() throws Exception { - try (final Lexer lexer = createLexer("character" + CR + "NotEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character")); - assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped")); + try (Lexer lexer = createLexer(code, format)) { + assertNextToken(TOKEN, "1", lexer); + assertNextToken(TOKEN, "2", lexer); + assertNextToken(TOKEN, "3", lexer); + assertNextToken(EORECORD, "", lexer); // 1 + assertNextToken(EORECORD, "", lexer); // 1b + assertNextToken(EORECORD, "", lexer); // 1c + assertNextToken(TOKEN, "a", lexer); + assertNextToken(TOKEN, "b x", lexer); + assertNextToken(EORECORD, "c#no-comment", lexer); // 2 + assertNextToken(COMMENT, "foo", lexer); // 3 + assertNextToken(EORECORD, "", lexer); // 4 + assertNextToken(EORECORD, "", lexer); // 4b + assertNextToken(TOKEN, "d", lexer); + assertNextToken(TOKEN, "e", lexer); + assertNextToken(EORECORD, "#no-comment", lexer); // 5 + assertNextToken(EORECORD, "", lexer); // 5b + assertNextToken(EORECORD, "", lexer); // 5c + assertNextToken(COMMENT, "penultimate comment", lexer); // 6 + assertNextToken(EORECORD, "", lexer); // 6b + assertNextToken(EORECORD, "", lexer); // 6c + assertNextToken(COMMENT, "Final comment", lexer); // 7 + assertNextToken(EOF, "", lexer); + assertNextToken(EOF, "", lexer); + } + } + + @Test + void testCR() throws Exception { + try (Lexer lexer = createLexer("character" + CR + "NotEscaped", formatWithEscaping)) { + assertNextToken("character", lexer); + assertNextToken("NotEscaped", lexer); } } // From CSV-1 @Test - public void testDelimiterIsWhitespace() throws IOException { + void testDelimiterIsWhitespace() throws IOException { final String code = "one\ttwo\t\tfour \t five\t six"; - try (final Lexer parser = createLexer(code, CSVFormat.TDF)) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "one")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "two")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "four")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "five")); - assertThat(parser.nextToken(new Token()), matches(EOF, "six")); + try (Lexer lexer = createLexer(code, CSVFormat.TDF)) { + assertNextToken(TOKEN, "one", lexer); + assertNextToken(TOKEN, "two", lexer); + assertNextToken(TOKEN, "", lexer); + assertNextToken(TOKEN, "four", lexer); + assertNextToken(TOKEN, "five", lexer); + assertNextToken(EOF, "six", lexer); + } + } + + /** + * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, + * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the + * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped. + */ + @Test + void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); + try (Lexer lexer = createLexer(" |a", format)) { + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "a", lexer); + } + try (Lexer lexer = createLexer("a | |b", format)) { + assertNextToken(TOKEN, "a", lexer); + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "b", lexer); + } + } + + @Test + void testEOFWithoutClosingQuote() throws Exception { + final String code = "a,\"b"; + try (Lexer lexer = createLexer(code, CSVFormat.Builder.create().setLenientEof(true).get())) { + assertNextToken(TOKEN, "a", lexer); + assertNextToken(EOF, "b", lexer); + } + try (Lexer lexer = createLexer(code, CSVFormat.Builder.create().setLenientEof(false).get())) { + assertNextToken(TOKEN, "a", lexer); + assertThrows(IOException.class, () -> lexer.nextToken(new Token())); } } @Test // TODO is this correct? Do we expect BACKSPACE to be unescaped? - public void testEscapedBackspace() throws Exception { - try (final Lexer lexer = createLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "Escaped")); + void testEscapedBackspace() throws Exception { + try (Lexer lexer = createLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping)) { + assertNextToken("character" + BACKSPACE + "Escaped", lexer); } } @Test - public void testEscapedCharacter() throws Exception { - try (final Lexer lexer = createLexer("character\\aEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character\\aEscaped")); + void testEscapedCharacter() throws Exception { + try (Lexer lexer = createLexer("character\\aEscaped", formatWithEscaping)) { + assertNextToken("character\\aEscaped", lexer); } } @Test - public void testEscapedControlCharacter() throws Exception { + void testEscapedControlCharacter() throws Exception { // we are explicitly using an escape different from \ here - try (final Lexer lexer = createLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'))) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); + try (Lexer lexer = createLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'))) { + assertNextToken("character" + CR + "Escaped", lexer); } } @Test - public void testEscapedControlCharacter2() throws Exception { - try (final Lexer lexer = createLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'))) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); + void testEscapedControlCharacter2() throws Exception { + try (Lexer lexer = createLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'))) { + assertNextToken("character" + CR + "Escaped", lexer); } } @Test - public void testEscapedCR() throws Exception { - try (final Lexer lexer = createLexer("character\\" + CR + "Escaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); + void testEscapedCR() throws Exception { + try (Lexer lexer = createLexer("character\\" + CR + "Escaped", formatWithEscaping)) { + assertNextToken("character" + CR + "Escaped", lexer); } } @Test // TODO is this correct? Do we expect FF to be unescaped? - public void testEscapedFF() throws Exception { - try (final Lexer lexer = createLexer("character\\" + FF + "Escaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "Escaped")); + void testEscapedFF() throws Exception { + try (Lexer lexer = createLexer("character\\" + FF + "Escaped", formatWithEscaping)) { + assertNextToken("character" + FF + "Escaped", lexer); } } @Test - public void testEscapedLF() throws Exception { - try (final Lexer lexer = createLexer("character\\" + LF + "Escaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + LF + "Escaped")); + void testEscapedLF() throws Exception { + try (Lexer lexer = createLexer("character\\" + LF + "Escaped", formatWithEscaping)) { + assertNextToken("character" + LF + "Escaped", lexer); } } @Test - public void testEscapedMySqlNullValue() throws Exception { + void testEscapedMySqlNullValue() throws Exception { // MySQL uses \N to symbolize null values. We have to restore this - try (final Lexer lexer = createLexer("character\\NEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character\\NEscaped")); + try (Lexer lexer = createLexer("character\\NEscaped", formatWithEscaping)) { + assertNextToken("character\\NEscaped", lexer); } } @Test // TODO is this correct? Do we expect TAB to be unescaped? - public void testEscapedTab() throws Exception { - try (final Lexer lexer = createLexer("character\\" + TAB + "Escaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "Escaped")); + void testEscapedTab() throws Exception { + try (Lexer lexer = createLexer("character\\" + TAB + "Escaped", formatWithEscaping)) { + assertNextToken("character" + TAB + "Escaped", lexer); } } @Test - public void testEscapingAtEOF() throws Exception { + void testEscapingAtEOF() throws Exception { final String code = "escaping at EOF is evil\\"; - try (final Lexer lexer = createLexer(code, formatWithEscaping)) { + try (Lexer lexer = createLexer(code, formatWithEscaping)) { assertThrows(IOException.class, () -> lexer.nextToken(new Token())); } } @Test - public void testFF() throws Exception { - try (final Lexer lexer = createLexer("character" + FF + "NotEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "NotEscaped")); + void testFF() throws Exception { + try (Lexer lexer = createLexer("character" + FF + "NotEscaped", formatWithEscaping)) { + assertNextToken("character" + FF + "NotEscaped", lexer); } } @Test - public void testIgnoreEmptyLines() throws IOException { - final String code = "first,line,\n" + "\n" + "\n" + "second,line\n" + "\n" + "\n" + "third line \n" + "\n" + - "\n" + "last, line \n" + "\n" + "\n" + "\n"; + void testIgnoreEmptyLines() throws IOException { + // @formatter:off + final String code = "first,line,\n" + + "\n" + + "\n" + + "second,line\n" + + "\n" + + "\n" + + "third line \n" + + "\n" + + "\n" + + "last, line \n" + + "\n" + + "\n" + + "\n"; + // @formatter:on final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines(); - try (final Lexer parser = createLexer(code, format)) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "first")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "line")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "second")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "line")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "third line ")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "last")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, " line ")); - assertThat(parser.nextToken(new Token()), matches(EOF, "")); - assertThat(parser.nextToken(new Token()), matches(EOF, "")); + try (Lexer lexer = createLexer(code, format)) { + assertNextToken(TOKEN, "first", lexer); + assertNextToken(TOKEN, "line", lexer); + assertNextToken(EORECORD, "", lexer); + assertNextToken(TOKEN, "second", lexer); + assertNextToken(EORECORD, "line", lexer); + assertNextToken(EORECORD, "third line ", lexer); + assertNextToken(TOKEN, "last", lexer); + assertNextToken(EORECORD, " line ", lexer); + assertNextToken(EOF, "", lexer); + assertNextToken(EOF, "", lexer); } } @Test - public void testIsMetaCharCommentStart() throws IOException { - try (final Lexer lexer = createLexer("#", CSVFormat.DEFAULT.withCommentMarker('#'))) { + void testIsMetaCharCommentStart() throws IOException { + try (Lexer lexer = createLexer("#", CSVFormat.DEFAULT.withCommentMarker('#'))) { final int ch = lexer.readEscape(); assertEquals('#', ch); } } @Test - public void testLF() throws Exception { - try (final Lexer lexer = createLexer("character" + LF + "NotEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character")); - assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped")); + void testLF() throws Exception { + try (Lexer lexer = createLexer("character" + LF + "NotEscaped", formatWithEscaping)) { + assertNextToken("character", lexer); + assertNextToken("NotEscaped", lexer); } } // encapsulator tokenizer (single line) @Test - public void testNextToken4() throws IOException { + void testNextToken4() throws IOException { /* * file: a,"foo",b a, " foo",b a,"foo " ,b // whitespace after closing encapsulator a, " foo " ,b */ final String code = "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b"; - try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "b")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, " foo")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "b")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo ")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "b")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, " foo ")); - // assertTokenEquals(EORECORD, "b", parser.nextToken(new Token())); - assertThat(parser.nextToken(new Token()), matches(EOF, "b")); + try (Lexer lexer = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { + assertNextToken(TOKEN, "a", lexer); + assertNextToken(TOKEN, "foo", lexer); + assertNextToken(EORECORD, "b", lexer); + assertNextToken(TOKEN, "a", lexer); + assertNextToken(TOKEN, " foo", lexer); + assertNextToken(EORECORD, "b", lexer); + assertNextToken(TOKEN, "a", lexer); + assertNextToken(TOKEN, "foo ", lexer); + assertNextToken(EORECORD, "b", lexer); + assertNextToken(TOKEN, "a", lexer); + assertNextToken(TOKEN, " foo ", lexer); + // assertTokenEquals(EORECORD, "b", parser); + assertNextToken(EOF, "b", lexer); } } // encapsulator tokenizer (multi line, delimiter in string) @Test - public void testNextToken5() throws IOException { + void testNextToken5() throws IOException { final String code = "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\""; - try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT)) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo\n")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "b")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "foo\n baar ,,,")); - assertThat(parser.nextToken(new Token()), matches(EOF, "\n\t \n")); + try (Lexer lexer = createLexer(code, CSVFormat.DEFAULT)) { + assertNextToken(TOKEN, "a", lexer); + assertNextToken(TOKEN, "foo\n", lexer); + assertNextToken(EORECORD, "b", lexer); + assertNextToken(EORECORD, "foo\n baar ,,,", lexer); + assertNextToken(EOF, "\n\t \n", lexer); } } // change delimiters, comment, encapsulater @Test - public void testNextToken6() throws IOException { + void testNextToken6() throws IOException { /* * file: a;'b and \' more ' !comment;;;; ;; */ final String code = "a;'b and '' more\n'\n!comment;;;;\n;;"; final CSVFormat format = CSVFormat.DEFAULT.withQuote('\'').withCommentMarker('!').withDelimiter(';'); - try (final Lexer parser = createLexer(code, format)) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "b and ' more\n")); + try (Lexer lexer = createLexer(code, format)) { + assertNextToken(TOKEN, "a", lexer); + assertNextToken(EORECORD, "b and ' more\n", lexer); + } + } + + /** + * A truncated escaped multi-character delimiter at EOF must not be accepted by reusing the previous escape delimiter + * look-ahead in {@link Lexer#isEscapeDelimiter()}. + */ + @Test + void testPartialEscapedMultiCharacterDelimiterAtEOF() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').get(); + try (Lexer lexer = createLexer("x![!|!]y![!|", format)) { + assertNextToken(EOF, "x[|]y![!|", lexer); + } + } + + /** + * Tests CSV-324. + */ + @Test + void testPartialMultiCharacterDelimiterAtEOF() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get(); + try (Lexer lexer = createLexer("a[|]b[|", format)) { + assertNextToken(TOKEN, "a", lexer); + assertNextToken(EOF, "b[|", lexer); + } + } + + /** + * A truncated multi-character delimiter at EOF must not be accepted by reusing the look-ahead buffer left dirty by an + * earlier non-matching peek in the same token (CSV-324 only cleared the buffer once per token). + */ + @Test + void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get(); + // The "[a]" peek leaves ']' in the look-ahead buffer; the trailing "[|" must not match "[|]". + final String recordString = "x[a][|"; + try (Lexer lexer = createLexer(recordString, format)) { + assertNextToken(EOF, recordString, lexer); } } @Test - public void testReadEscapeBackspace() throws IOException { - try (final Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) { + void testReadEscapeBackspace() throws IOException { + try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) { final int ch = lexer.readEscape(); assertEquals(BACKSPACE, ch); } } @Test - public void testReadEscapeFF() throws IOException { - try (final Lexer lexer = createLexer("f", CSVFormat.DEFAULT.withEscape('\f'))) { + void testReadEscapeFF() throws IOException { + try (Lexer lexer = createLexer("f", CSVFormat.DEFAULT.withEscape('\f'))) { final int ch = lexer.readEscape(); assertEquals(FF, ch); } } @Test - public void testReadEscapeTab() throws IOException { - try (final Lexer lexer = createLexer("t", CSVFormat.DEFAULT.withEscape('\t'))) { + void testReadEscapeTab() throws IOException { + try (Lexer lexer = createLexer("t", CSVFormat.DEFAULT.withEscape('\t'))) { final int ch = lexer.readEscape(); - assertThat(lexer.nextToken(new Token()), matches(EOF, "")); + assertNextToken(EOF, "", lexer); assertEquals(TAB, ch); } } @Test - public void testSurroundingSpacesAreDeleted() throws IOException { + void testSurroundingSpacesAreDeleted() throws IOException { final String code = "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,,"; - try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "noSpaces")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingSpaces")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingSpaces")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingSpaces")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(EOF, "")); + try (Lexer lexer = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { + assertNextToken(TOKEN, "noSpaces", lexer); + assertNextToken(TOKEN, "leadingSpaces", lexer); + assertNextToken(TOKEN, "trailingSpaces", lexer); + assertNextToken(TOKEN, "surroundingSpaces", lexer); + assertNextToken(TOKEN, "", lexer); + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "", lexer); } } @Test - public void testSurroundingTabsAreDeleted() throws IOException { + void testSurroundingTabsAreDeleted() throws IOException { final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,"; - try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "noTabs")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingTab")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingTab")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingTabs")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(EOF, "")); + try (Lexer lexer = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { + assertNextToken(TOKEN, "noTabs", lexer); + assertNextToken(TOKEN, "leadingTab", lexer); + assertNextToken(TOKEN, "trailingTab", lexer); + assertNextToken(TOKEN, "surroundingTabs", lexer); + assertNextToken(TOKEN, "", lexer); + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "", lexer); } } @Test - public void testTab() throws Exception { - try (final Lexer lexer = createLexer("character" + TAB + "NotEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "NotEscaped")); + void testTab() throws Exception { + try (Lexer lexer = createLexer("character" + TAB + "NotEscaped", formatWithEscaping)) { + assertNextToken("character" + TAB + "NotEscaped", lexer); } } @Test - public void testTrimTrailingSpacesZeroLength() throws Exception { + void testTrailingTextAfterQuote() throws Exception { + final String code = "\"a\" b,\"a\" \" b,\"a\" b \"\""; + try (Lexer lexer = createLexer(code, CSVFormat.Builder.create().setTrailingData(true).get())) { + assertNextToken(TOKEN, "a b", lexer); + assertNextToken(TOKEN, "a \" b", lexer); + assertNextToken(EOF, "a b \"\"", lexer); + } + try (Lexer parser = createLexer(code, CSVFormat.Builder.create().setTrailingData(false).get())) { + assertThrows(IOException.class, () -> parser.nextToken(new Token())); + } + } + + @Test + void testTrimTrailingSpacesZeroLength() throws Exception { final StringBuilder buffer = new StringBuilder(""); - final Lexer lexer = createLexer(buffer.toString(), CSVFormat.DEFAULT); - lexer.trimTrailingSpaces(buffer); - assertThat(lexer.nextToken(new Token()), matches(EOF, "")); + try (Lexer lexer = createLexer(buffer.toString(), CSVFormat.DEFAULT)) { + lexer.trimTrailingSpaces(buffer); + assertNextToken(EOF, "", lexer); + } } } diff --git a/src/test/java/org/apache/commons/csv/PerformanceTest.java b/src/test/java/org/apache/commons/csv/PerformanceTest.java index ef328c6342..9284828e6c 100644 --- a/src/test/java/org/apache/commons/csv/PerformanceTest.java +++ b/src/test/java/org/apache/commons/csv/PerformanceTest.java @@ -1,330 +1,345 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.Reader; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.zip.GZIPInputStream; - -import org.apache.commons.io.IOUtils; - -/** - * Basic test harness. - */ -@SuppressWarnings("boxing") -public class PerformanceTest { - - @FunctionalInterface - private interface CSVParserFactory { - CSVParser createParser() throws IOException; - } - - // Container for basic statistics - private static class Stats { - final int count; - final int fields; - Stats(final int c, final int f) { - count = c; - fields = f; - } - } - - private static final String[] PROPERTY_NAMES = { - "java.version", // Java Runtime Environment version - "java.vendor", // Java Runtime Environment vendor -// "java.vm.specification.version", // Java Virtual Machine specification version -// "java.vm.specification.vendor", // Java Virtual Machine specification vendor -// "java.vm.specification.name", // Java Virtual Machine specification name - "java.vm.version", // Java Virtual Machine implementation version -// "java.vm.vendor", // Java Virtual Machine implementation vendor - "java.vm.name", // Java Virtual Machine implementation name -// "java.specification.version", // Java Runtime Environment specification version -// "java.specification.vendor", // Java Runtime Environment specification vendor -// "java.specification.name", // Java Runtime Environment specification name - - "os.name", // Operating system name - "os.arch", // Operating system architecture - "os.version", // Operating system version - }; - private static int max = 11; // skip first test - - private static int num; // number of elapsed times recorded - - private static final long[] ELAPSED_TIMES = new long[max]; - private static final CSVFormat format = CSVFormat.EXCEL; - - private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz"; - - private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt"); - - private static Reader createReader() throws IOException { - return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1); - } - - private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input) - throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception { - return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input); - } - - private static Constructor getLexerCtor(final String clazz) throws Exception { - @SuppressWarnings("unchecked") - final Class lexer = (Class) Class.forName("org.apache.commons.csv." + clazz); - return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class); - } - - private static Stats iterate(final Iterable iterable) { - int count = 0; - int fields = 0; - for (final CSVRecord record : iterable) { - count++; - fields += record.size(); - } - return new Stats(count, fields); - } - - public static void main(final String [] args) throws Exception { - if (BIG_FILE.exists()) { - System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length()); - } else { - System.out.println("Decompressing test fixture to: " + BIG_FILE + "..."); - try ( - final InputStream input = new GZIPInputStream( - PerformanceTest.class.getClassLoader().getResourceAsStream(TEST_RESRC)); - final OutputStream output = new FileOutputStream(BIG_FILE)) { - IOUtils.copy(input, output); - System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length())); - } - } - final int argc = args.length; - if (argc > 0) { - max = Integer.parseInt(args[0]); - } - - final String[] tests; - if (argc > 1) { - tests = new String[argc - 1]; - System.arraycopy(args, 1, tests, 0, argc - 1); - } else { - tests = new String[] { "file", "split", "extb", "exts", "csv", "csv-path", "csv-path-db", "csv-url", "lexreset", "lexnew" }; - } - for (final String p : PROPERTY_NAMES) { - System.out.printf("%s=%s%n", p, System.getProperty(p)); - } - System.out.printf("Max count: %d%n%n", max); - - for (final String test : tests) { - if ("file".equals(test)) { - testReadBigFile(false); - } else if ("split".equals(test)) { - testReadBigFile(true); - } else if ("csv".equals(test)) { - testParseCommonsCSV(); - } else if ("csv-path".equals(test)) { - testParsePath(); - } else if ("csv-path-db".equals(test)) { - testParsePathDoubleBuffering(); - } else if ("csv-url".equals(test)) { - testParseURL(); - } else if ("lexreset".equals(test)) { - testCSVLexer(false, test); - } else if ("lexnew".equals(test)) { - testCSVLexer(true, test); - } else if (test.startsWith("CSVLexer")) { - testCSVLexer(false, test); - } else if ("extb".equals(test)) { - testExtendedBuffer(false); - } else if ("exts".equals(test)) { - testExtendedBuffer(true); - } else { - System.out.printf("Invalid test name: %s%n", test); - } - } - } - - private static Stats readAll(final BufferedReader in, final boolean split) throws IOException { - int count = 0; - int fields = 0; - String record; - while ((record = in.readLine()) != null) { - count++; - fields += split ? record.split(",").length : 1; - } - return new Stats(count, fields); - } - - // calculate and show average - private static void show(){ - if (num > 1) { - long tot = 0; - for (int i = 1; i < num; i++) { // skip first test - tot += ELAPSED_TIMES[i]; - } - System.out.printf("%-20s: %5dms%n%n", "Average(not first)", tot / (num - 1)); - } - num = 0; // ready for next set - } - - // Display end stats; store elapsed for average - private static void show(final String msg, final Stats s, final long start) { - final long elapsed = System.currentTimeMillis() - start; - System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields); - ELAPSED_TIMES[num] = elapsed; - num++; - } - - private static void testCSVLexer(final boolean newToken, final String test) throws Exception { - Token token = new Token(); - String dynamic = ""; - for (int i = 0; i < max; i++) { - final String simpleName; - final Stats stats; - final long startMillis; - try (final ExtendedBufferedReader input = new ExtendedBufferedReader(createReader()); - final Lexer lexer = createTestCSVLexer(test, input)) { - if (test.startsWith("CSVLexer")) { - dynamic = "!"; - } - simpleName = lexer.getClass().getSimpleName(); - int count = 0; - int fields = 0; - startMillis = System.currentTimeMillis(); - do { - if (newToken) { - token = new Token(); - } else { - token.reset(); - } - lexer.nextToken(token); - switch (token.type) { - case EOF: - break; - case EORECORD: - fields++; - count++; - break; - case INVALID: - throw new IOException("invalid parse sequence <" + token.content.toString() + ">"); - case TOKEN: - fields++; - break; - case COMMENT: // not really expecting these - break; - default: - throw new IllegalStateException("Unexpected Token type: " + token.type); - } - } while (!token.type.equals(Token.Type.EOF)); - stats = new Stats(count, fields); - } - show(simpleName + dynamic + " " + (newToken ? "new" : "reset"), stats, startMillis); - } - show(); - } - - private static void testExtendedBuffer(final boolean makeString) throws Exception { - for (int i = 0; i < max; i++) { - int fields = 0; - int lines = 0; - final long startMillis; - try (final ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) { - startMillis = System.currentTimeMillis(); - int read; - if (makeString) { - StringBuilder sb = new StringBuilder(); - while ((read = in.read()) != -1) { - sb.append((char) read); - if (read == ',') { // count delimiters - sb.toString(); - sb = new StringBuilder(); - fields++; - } else if (read == '\n') { - sb.toString(); - sb = new StringBuilder(); - lines++; - } - } - } else { - while ((read = in.read()) != -1) { - if (read == ',') { // count delimiters - fields++; - } else if (read == '\n') { - lines++; - } - } - } - fields += lines; // EOL is a delimiter too - } - show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis); - } - show(); - } - - private static void testParseCommonsCSV() throws Exception { - testParser("CSV", () -> new CSVParser(createReader(), format)); - } - - private static void testParsePath() throws Exception { - testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format)); - } - - private static void testParsePathDoubleBuffering() throws Exception { - testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format)); - } - - private static void testParser(final String msg, final CSVParserFactory fac) throws Exception { - for (int i = 0; i < max; i++) { - final long startMillis; - final Stats stats; - try (final CSVParser parser = fac.createParser()) { - startMillis = System.currentTimeMillis(); - stats = iterate(parser); - } - show(msg, stats, startMillis); - } - show(); - } - - private static void testParseURL() throws Exception { - testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format)); - } - - private static void testReadBigFile(final boolean split) throws Exception { - for (int i = 0; i < max; i++) { - final long startMillis; - final Stats stats; - try (final BufferedReader in = new BufferedReader(createReader())) { - startMillis = System.currentTimeMillis(); - stats = readAll(in, split); - } - show(split ? "file+split" : "file", stats, startMillis); - } - show(); - } - -} \ No newline at end of file +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.apache.commons.io.IOUtils.EOF; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.Reader; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.zip.GZIPInputStream; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; + +/** + * Basic test harness. + */ +@SuppressWarnings("boxing") +class PerformanceTest { + + @FunctionalInterface + private interface CSVParserFactory { + CSVParser createParser() throws IOException; + } + + // Container for basic statistics + private static final class Stats { + final int count; + final int fields; + + Stats(final int c, final int f) { + count = c; + fields = f; + } + } + + private static final String[] PROPERTY_NAMES = { "java.version", // Java Runtime Environment version + "java.vendor", // Java Runtime Environment vendor +// "java.vm.specification.version", // Java Virtual Machine specification version +// "java.vm.specification.vendor", // Java Virtual Machine specification vendor +// "java.vm.specification.name", // Java Virtual Machine specification name + "java.vm.version", // Java Virtual Machine implementation version +// "java.vm.vendor", // Java Virtual Machine implementation vendor + "java.vm.name", // Java Virtual Machine implementation name +// "java.specification.version", // Java Runtime Environment specification version +// "java.specification.vendor", // Java Runtime Environment specification vendor +// "java.specification.name", // Java Runtime Environment specification name + + "os.name", // Operating system name + "os.arch", // Operating system architecture + "os.version", // Operating system version + }; + private static int max = 11; // skip first test + + private static int num; // number of elapsed times recorded + + private static final long[] ELAPSED_TIMES = new long[max]; + private static final CSVFormat format = CSVFormat.EXCEL; + + private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz"; + + private static final File BIG_FILE = new File(FileUtils.getTempDirectoryPath(), "worldcitiespop.txt"); + + private static Reader createReader() throws IOException { + return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1); + } + + private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input) + throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception { + return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input); + } + + private static Constructor getLexerCtor(final String clazz) throws Exception { + @SuppressWarnings("unchecked") + final Class lexer = (Class) Class.forName("org.apache.commons.csv." + clazz); + return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class); + } + + private static Stats iterate(final Iterable iterable) { + int count = 0; + int fields = 0; + for (final CSVRecord record : iterable) { + count++; + fields += record.size(); + } + return new Stats(count, fields); + } + + public static void main(final String[] args) throws Exception { + if (BIG_FILE.exists()) { + System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length()); + } else { + System.out.println("Decompressing test fixture to: " + BIG_FILE + "..."); + try (InputStream input = new GZIPInputStream(PerformanceTest.class.getClassLoader().getResourceAsStream(TEST_RESRC)); + OutputStream output = new FileOutputStream(BIG_FILE)) { + IOUtils.copy(input, output); + System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length())); + } + } + final int argc = args.length; + if (argc > 0) { + max = Integer.parseInt(args[0]); + } + + final String[] tests; + if (argc > 1) { + tests = new String[argc - 1]; + System.arraycopy(args, 1, tests, 0, argc - 1); + } else { + tests = new String[] { "file", "split", "extb", "exts", "csv", "csv-path", "csv-path-db", "csv-url", "lexreset", "lexnew" }; + } + for (final String p : PROPERTY_NAMES) { + System.out.printf("%s=%s%n", p, System.getProperty(p)); + } + System.out.printf("Max count: %d%n%n", max); + + for (final String test : tests) { + switch (test) { + case "file": + testReadBigFile(false); + break; + case "split": + testReadBigFile(true); + break; + case "csv": + testParseCommonsCSV(); + break; + case "csv-path": + testParsePath(); + break; + case "csv-path-db": + testParsePathDoubleBuffering(); + break; + case "csv-url": + testParseURL(); + break; + case "lexreset": + testCSVLexer(false, test); + break; + case "lexnew": + testCSVLexer(true, test); + break; + default: + if (test.startsWith("CSVLexer")) { + testCSVLexer(false, test); + } else if ("extb".equals(test)) { + testExtendedBuffer(false); + } else if ("exts".equals(test)) { + testExtendedBuffer(true); + } else { + System.out.printf("Invalid test name: %s%n", test); + } + break; + } + } + } + + private static Stats readAll(final BufferedReader in, final boolean split) throws IOException { + int count = 0; + int fields = 0; + String record; + while ((record = in.readLine()) != null) { + count++; + fields += split ? record.split(",").length : 1; + } + return new Stats(count, fields); + } + + // calculate and show average + private static void show() { + if (num > 1) { + long tot = 0; + for (int i = 1; i < num; i++) { // skip first test + tot += ELAPSED_TIMES[i]; + } + System.out.printf("%-20s: %5dms%n%n", "Average(not first)", tot / (num - 1)); + } + num = 0; // ready for next set + } + + // Display end stats; store elapsed for average + private static void show(final String msg, final Stats s, final long start) { + final long elapsed = System.currentTimeMillis() - start; + System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields); + ELAPSED_TIMES[num] = elapsed; + num++; + } + + private static void testCSVLexer(final boolean newToken, final String test) throws Exception { + Token token = new Token(); + String dynamic = ""; + for (int i = 0; i < max; i++) { + final String simpleName; + final Stats stats; + final long startMillis; + try (ExtendedBufferedReader input = new ExtendedBufferedReader(createReader()); + Lexer lexer = createTestCSVLexer(test, input)) { + if (test.startsWith("CSVLexer")) { + dynamic = "!"; + } + simpleName = lexer.getClass().getSimpleName(); + int count = 0; + int fields = 0; + startMillis = System.currentTimeMillis(); + do { + if (newToken) { + token = new Token(); + } else { + token.reset(); + } + lexer.nextToken(token); + switch (token.type) { + case EOF: + break; + case EORECORD: + fields++; + count++; + break; + case INVALID: + throw new IOException("invalid parse sequence <" + token.content.toString() + ">"); + case TOKEN: + fields++; + break; + case COMMENT: // not really expecting these + break; + default: + throw new IllegalStateException("Unexpected Token type: " + token.type); + } + } while (!token.type.equals(Token.Type.EOF)); + stats = new Stats(count, fields); + } + show(simpleName + dynamic + " " + (newToken ? "new" : "reset"), stats, startMillis); + } + show(); + } + + private static void testExtendedBuffer(final boolean makeString) throws Exception { + for (int i = 0; i < max; i++) { + int fields = 0; + int lines = 0; + final long startMillis; + try (ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) { + startMillis = System.currentTimeMillis(); + int read; + if (makeString) { + StringBuilder sb = new StringBuilder(); + while ((read = in.read()) != EOF) { + sb.append((char) read); + if (read == ',') { // count delimiters + sb.toString(); + sb = new StringBuilder(); + fields++; + } else if (read == '\n') { + sb.toString(); + sb = new StringBuilder(); + lines++; + } + } + } else { + while ((read = in.read()) != EOF) { + if (read == ',') { // count delimiters + fields++; + } else if (read == '\n') { + lines++; + } + } + } + fields += lines; // EOL is a delimiter too + } + show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis); + } + show(); + } + + private static void testParseCommonsCSV() throws Exception { + testParser("CSV", () -> CSVParser.builder().setReader(createReader()).setFormat(format).get()); + } + + private static void testParsePath() throws Exception { + testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format)); + } + + private static void testParsePathDoubleBuffering() throws Exception { + testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format)); + } + + private static void testParser(final String msg, final CSVParserFactory fac) throws Exception { + for (int i = 0; i < max; i++) { + final long startMillis; + final Stats stats; + try (CSVParser parser = fac.createParser()) { + startMillis = System.currentTimeMillis(); + stats = iterate(parser); + } + show(msg, stats, startMillis); + } + show(); + } + + private static void testParseURL() throws Exception { + testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format)); + } + + private static void testReadBigFile(final boolean split) throws Exception { + for (int i = 0; i < max; i++) { + final long startMillis; + final Stats stats; + try (BufferedReader in = new BufferedReader(createReader())) { + startMillis = System.currentTimeMillis(); + stats = readAll(in, split); + } + show(split ? "file+split" : "file", stats, startMillis); + } + show(); + } +} + diff --git a/src/test/java/org/apache/commons/csv/TokenMatchers.java b/src/test/java/org/apache/commons/csv/TokenMatchers.java deleted file mode 100644 index c081e7eee9..0000000000 --- a/src/test/java/org/apache/commons/csv/TokenMatchers.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.csv; - -import static org.hamcrest.core.AllOf.allOf; - -import org.hamcrest.Description; -import org.hamcrest.Matcher; -import org.hamcrest.TypeSafeDiagnosingMatcher; - -/** - * Collection of matchers for asserting the type and content of tokens. - */ -final class TokenMatchers { - - public static Matcher hasContent(final String expectedContent) { - return new TypeSafeDiagnosingMatcher() { - - @Override - public void describeTo(final Description description) { - description.appendText("token has content "); - description.appendValue(expectedContent); - } - - @Override - protected boolean matchesSafely(final Token item, - final Description mismatchDescription) { - mismatchDescription.appendText("token content is "); - mismatchDescription.appendValue(item.content.toString()); - return expectedContent.equals(item.content.toString()); - } - }; - } - - public static Matcher hasType(final Token.Type expectedType) { - return new TypeSafeDiagnosingMatcher() { - - @Override - public void describeTo(final Description description) { - description.appendText("token has type "); - description.appendValue(expectedType); - } - - @Override - protected boolean matchesSafely(final Token item, - final Description mismatchDescription) { - mismatchDescription.appendText("token type is "); - mismatchDescription.appendValue(item.type); - return item.type == expectedType; - } - }; - } - - public static Matcher isReady() { - return new TypeSafeDiagnosingMatcher() { - - @Override - public void describeTo(final Description description) { - description.appendText("token is ready "); - } - - @Override - protected boolean matchesSafely(final Token item, - final Description mismatchDescription) { - mismatchDescription.appendText("token is not ready "); - return item.isReady; - } - }; - } - - public static Matcher matches(final Token.Type expectedType, final String expectedContent) { - return allOf(hasType(expectedType), hasContent(expectedContent)); - } - -} diff --git a/src/test/java/org/apache/commons/csv/TokenMatchersTest.java b/src/test/java/org/apache/commons/csv/TokenMatchersTest.java deleted file mode 100644 index 47c213d7bd..0000000000 --- a/src/test/java/org/apache/commons/csv/TokenMatchersTest.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.csv; - -import static org.apache.commons.csv.TokenMatchers.hasContent; -import static org.apache.commons.csv.TokenMatchers.hasType; -import static org.apache.commons.csv.TokenMatchers.isReady; -import static org.apache.commons.csv.TokenMatchers.matches; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TokenMatchersTest { - - private Token token; - - @BeforeEach - public void setUp() { - token = new Token(); - token.type = Token.Type.TOKEN; - token.isReady = true; - token.content.append("content"); - } - - @Test - public void testHasContent() { - assertFalse(hasContent("This is not the token's content").matches(token)); - assertTrue(hasContent("content").matches(token)); - } - - @Test - public void testHasType() { - assertFalse(hasType(Token.Type.COMMENT).matches(token)); - assertFalse(hasType(Token.Type.EOF).matches(token)); - assertFalse(hasType(Token.Type.EORECORD).matches(token)); - assertTrue(hasType(Token.Type.TOKEN).matches(token)); - } - - @Test - public void testIsReady() { - assertTrue(isReady().matches(token)); - token.isReady = false; - assertFalse(isReady().matches(token)); - } - - @Test - public void testMatches() { - assertTrue(matches(Token.Type.TOKEN, "content").matches(token)); - assertFalse(matches(Token.Type.EOF, "content").matches(token)); - assertFalse(matches(Token.Type.TOKEN, "not the content").matches(token)); - assertFalse(matches(Token.Type.EORECORD, "not the content").matches(token)); - } - - @Test - public void testToString() { - assertTrue(matches(Token.Type.TOKEN, "content").matches(token)); - assertEquals("TOKEN", token.type.name()); - assertEquals("TOKEN [content]", token.toString()); - } -} diff --git a/src/test/java/org/apache/commons/csv/TokenTest.java b/src/test/java/org/apache/commons/csv/TokenTest.java new file mode 100644 index 0000000000..075c1b1d9c --- /dev/null +++ b/src/test/java/org/apache/commons/csv/TokenTest.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +/** + * Tests {@link Token}. + */ +class TokenTest { + + @ParameterizedTest + @EnumSource(Token.Type.class) + void testToString(final Token.Type type) { + // Should never blow up + final Token token = new Token(); + final String resetName = Token.Type.INVALID.name(); + assertTrue(token.toString().contains(resetName)); + token.reset(); + assertTrue(token.toString().contains(resetName)); + token.type = null; + assertFalse(token.toString().isEmpty()); + token.reset(); + token.type = type; + assertTrue(token.toString().contains(type.name())); + token.content.setLength(1000); + assertTrue(token.toString().contains(type.name())); + } +} diff --git a/src/test/java/org/apache/commons/csv/UserGuideTest.java b/src/test/java/org/apache/commons/csv/UserGuideTest.java new file mode 100644 index 0000000000..6cd8c72d7f --- /dev/null +++ b/src/test/java/org/apache/commons/csv/UserGuideTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.csv; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.input.BOMInputStream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Tests for the user guide. + */ +class UserGuideTest { + + @TempDir + Path tempDir; + + /** + * Creates a reader capable of handling BOMs. + * + * @param path The path to read. + * @return a new InputStreamReader for UTF-8 bytes. + * @throws IOException if an I/O error occurs. + */ + public InputStreamReader newReader(final Path path) throws IOException { + return new InputStreamReader(BOMInputStream.builder() + .setPath(path) + .get(), StandardCharsets.UTF_8); + } + + @Test + void testBomFull() throws UnsupportedEncodingException, IOException { + final Path path = tempDir.resolve("test1.csv"); + Files.copy(Utils.createUtf8Input("ColumnA, ColumnB, ColumnC\r\nA, B, C\r\n".getBytes(StandardCharsets.UTF_8), true), path); + // @formatter:off + try (Reader reader = new InputStreamReader(BOMInputStream.builder() + .setPath(path) + .get(), "UTF-8"); + CSVParser parser = CSVFormat.EXCEL.builder() + .setHeader() + .get() + .parse(reader)) { + // @formatter:off + for (final CSVRecord record : parser) { + final String string = record.get("ColumnA"); + assertEquals("A", string); + } + } + } + + @Test + void testBomUtil() throws UnsupportedEncodingException, IOException { + final Path path = tempDir.resolve("test2.csv"); + Files.copy(Utils.createUtf8Input("ColumnA, ColumnB, ColumnC\r\nA, B, C\r\n".getBytes(StandardCharsets.UTF_8), true), path); + try (Reader reader = newReader(path); + // @formatter:off + CSVParser parser = CSVFormat.EXCEL.builder() + .setHeader() + .get() + .parse(reader)) { + // @formatter:off + for (final CSVRecord record : parser) { + final String string = record.get("ColumnA"); + assertEquals("A", string); + } + } + } + +} diff --git a/src/test/java/org/apache/commons/csv/Utils.java b/src/test/java/org/apache/commons/csv/Utils.java index fcdbc3d17f..5b5a05e043 100644 --- a/src/test/java/org/apache/commons/csv/Utils.java +++ b/src/test/java/org/apache/commons/csv/Utils.java @@ -1,19 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * https://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv; @@ -21,6 +22,8 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import java.io.ByteArrayInputStream; +import java.io.InputStream; import java.util.List; /** @@ -31,18 +34,34 @@ final class Utils { /** * Checks if the 2d array has the same contents as the list of records. * - * @param message the message to be displayed + * @param message the message to be displayed * @param expected the 2d array of expected results - * @param actual the List of {@link CSVRecord} entries, each containing an array of values + * @param actual the List of {@link CSVRecord} entries, each containing an array of values + * @param maxRows the maximum number of rows expected, less than or equal to zero means no limit. */ - public static void compare(final String message, final String[][] expected, final List actual) { - final int expectedLength = expected.length; + public static void compare(final String message, final String[][] expected, final List actual, final long maxRows) { + final long expectedLength = maxRows > 0 ? Math.min(maxRows, expected.length) : expected.length; assertEquals(expectedLength, actual.size(), message + " - outer array size"); for (int i = 0; i < expectedLength; i++) { assertArrayEquals(expected[i], actual.get(i).values(), message + " (entry " + i + ")"); } } + /** + * Creates an input stream, with or without a BOM. + */ + static InputStream createUtf8Input(final byte[] baseData, final boolean addBom) { + byte[] data = baseData; + if (addBom) { + data = new byte[baseData.length + 3]; + data[0] = (byte) 0xEF; + data[1] = (byte) 0xBB; + data[2] = (byte) 0xBF; + System.arraycopy(baseData, 0, data, 3, baseData.length); + } + return new ByteArrayInputStream(data); + } + private Utils() { } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java index fca6bec299..67f1b785d5 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -22,44 +24,40 @@ import org.apache.commons.csv.QuoteMode; import org.junit.jupiter.api.Test; -public class JiraCsv148Test { +class JiraCsv148Test { @Test - public void testWithIgnoreSurroundingSpacesEmpty() { + void testWithIgnoreSurroundingSpacesEmpty() { // @formatter:off final CSVFormat format = CSVFormat.DEFAULT.builder() .setQuoteMode(QuoteMode.ALL) .setIgnoreSurroundingSpaces(true) - .build(); + .get(); // @formatter:on assertEquals( - "\"\",\" \",\" Single space on the left\",\"Single space on the right \"," - + "\" Single spaces on both sides \",\" Multiple spaces on the left\"," - + "\"Multiple spaces on the right \",\" Multiple spaces on both sides \"", - format.format("", " ", " Single space on the left", "Single space on the right ", - " Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ", - " Multiple spaces on both sides ")); + "\"\",\" \",\" Single space on the left\",\"Single space on the right \"," + + "\" Single spaces on both sides \",\" Multiple spaces on the left\"," + + "\"Multiple spaces on the right \",\" Multiple spaces on both sides \"", + format.format("", " ", " Single space on the left", "Single space on the right ", " Single spaces on both sides ", + " Multiple spaces on the left", "Multiple spaces on the right ", " Multiple spaces on both sides ")); } /** - * The difference between withTrim()and withIgnoreSurroundingSpace()๏ผš difference: withTrim() can remove the leading - * and trailing spaces and newlines in quotation marks, while withIgnoreSurroundingSpace() cannot The same point: - * you can remove the leading and trailing spaces,tabs and other symbols. + * The difference between withTrim()and withIgnoreSurroundingSpace()๏ผš difference: withTrim() can remove the leading and trailing spaces and newlines in + * quotation marks, while withIgnoreSurroundingSpace() cannot The same point: you can remove the leading and trailing spaces, tabs and other symbols. */ @Test - public void testWithTrimEmpty() { + void testWithTrimEmpty() { // @formatter:off final CSVFormat format = CSVFormat.DEFAULT.builder() .setQuoteMode(QuoteMode.ALL) .setTrim(true) - .build(); + .get(); // @formatter:on assertEquals( - "\"\",\"\",\"Single space on the left\",\"Single space on the right\"," - + "\"Single spaces on both sides\",\"Multiple spaces on the left\"," - + "\"Multiple spaces on the right\",\"Multiple spaces on both sides\"", - format.format("", " ", " Single space on the left", "Single space on the right ", - " Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ", - " Multiple spaces on both sides ")); + "\"\",\"\",\"Single space on the left\",\"Single space on the right\",\"Single spaces on both sides\",\"Multiple spaces on the left\"," + + "\"Multiple spaces on the right\",\"Multiple spaces on both sides\"", + format.format("", " ", " Single space on the left", "Single space on the right ", " Single spaces on both sides ", + " Multiple spaces on the left", "Multiple spaces on the right ", " Multiple spaces on both sides ")); } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv149Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv149Test.java index a42cb2a359..b32e965665 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv149Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv149Test.java @@ -1,22 +1,25 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; import java.io.StringReader; @@ -26,12 +29,12 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; -public class JiraCsv149Test { +class JiraCsv149Test { private static final String CR_LF = "\r\n"; @Test - public void testJiraCsv149EndWithEOL() throws IOException { + void testJiraCsv149EndWithEOL() throws IOException { testJiraCsv149EndWithEolAtEof(true); } @@ -40,24 +43,25 @@ private void testJiraCsv149EndWithEolAtEof(final boolean eolAtEof) throws IOExce if (eolAtEof) { source += CR_LF; } - final StringReader records = new StringReader(source); + final StringReader reader = new StringReader(source); // @formatter:off final CSVFormat format = CSVFormat.RFC4180.builder() .setHeader() .setSkipHeaderRecord(true) .setQuote('"') - .build(); + .get(); // @formatter:on int lineCounter = 2; - try (final CSVParser parser = new CSVParser(records, format)) { + try (CSVParser parser = CSVParser.builder().setReader(reader).setFormat(format).get()) { for (final CSVRecord record : parser) { + assertNotNull(record); assertEquals(lineCounter++, parser.getCurrentLineNumber()); } } } @Test - public void testJiraCsv149EndWithoutEOL() throws IOException { + void testJiraCsv149EndWithoutEOL() throws IOException { testJiraCsv149EndWithEolAtEof(false); } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv150Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv150Test.java new file mode 100644 index 0000000000..eec91d52d0 --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv150Test.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.io.StringReader; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.junit.jupiter.api.Test; + +class JiraCsv150Test { + + private void testDisable(final CSVFormat format, final StringReader reader) throws IOException { + try (CSVParser csvParser = CSVParser.builder().setReader(reader).setFormat(format).get()) { + assertEquals(1, csvParser.getRecords().size()); + } + } + + @Test + void testDisableComment() throws IOException { + final StringReader stringReader = new StringReader("\"66\u2441\",,\"\",\"DeutscheBK\ufffe\",\"000\"\r\n"); + testDisable(CSVFormat.DEFAULT.builder().setCommentMarker(null).get(), stringReader); + } + + @Test + void testDisableEncapsulation() throws IOException { + final StringReader stringReader = new StringReader("66\u2441,,\"\",\ufffeDeutscheBK,\"000\"\r\n"); + testDisable(CSVFormat.DEFAULT.builder().setQuote(null).get(), stringReader); + } + + @Test + void testDisableEscaping() throws IOException { + final StringReader stringReader = new StringReader("\"66\u2441\",,\"\",\"DeutscheBK\ufffe\",\"000\"\r\n"); + testDisable(CSVFormat.DEFAULT.builder().setEscape(null).get(), stringReader); + } +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv154Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv154Test.java index 1f7d93e26b..90d657fcd1 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv154Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv154Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -24,20 +26,20 @@ import org.apache.commons.csv.CSVPrinter; import org.junit.jupiter.api.Test; -public class JiraCsv154Test { +class JiraCsv154Test { @Test - public void testJiraCsv154_withCommentMarker() throws IOException { + void testJiraCsv154_withCommentMarker() throws IOException { final String comment = "This is a header comment"; // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setHeader("H1", "H2") .setCommentMarker('#') .setHeaderComments(comment) - .build(); + .get(); // @formatter:on final StringBuilder out = new StringBuilder(); - try (final CSVPrinter printer = format.print(out)) { + try (CSVPrinter printer = format.print(out)) { printer.print("A"); printer.print("B"); } @@ -46,17 +48,17 @@ public void testJiraCsv154_withCommentMarker() throws IOException { } @Test - public void testJiraCsv154_withHeaderComments() throws IOException { + void testJiraCsv154_withHeaderComments() throws IOException { final String comment = "This is a header comment"; // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setHeader("H1", "H2") .setHeaderComments(comment) .setCommentMarker('#') - .build(); + .get(); // @formatter:on final StringBuilder out = new StringBuilder(); - try (final CSVPrinter printer = format.print(out)) { + try (CSVPrinter printer = format.print(out)) { printer.print("A"); printer.print("B"); } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java index c7effb6e7f..607d0cf2a3 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -29,7 +31,7 @@ import org.apache.commons.csv.QuoteMode; import org.junit.jupiter.api.Test; -public class JiraCsv167Test { +class JiraCsv167Test { private Reader getTestReader() { return new InputStreamReader( @@ -37,10 +39,10 @@ private Reader getTestReader() { } @Test - public void parse() throws IOException { + void testParse() throws IOException { int totcomment = 0; int totrecs = 0; - try (final Reader reader = getTestReader(); final BufferedReader br = new BufferedReader(reader)) { + try (Reader reader = getTestReader(); BufferedReader br = new BufferedReader(reader)) { String s = null; boolean lastWasComment = false; while ((s = br.readLine()) != null) { @@ -70,11 +72,11 @@ public void parse() throws IOException { .setQuoteMode(QuoteMode.ALL) .setRecordSeparator('\n') .setSkipHeaderRecord(false) - .build(); + .get(); // @formatter:on int comments = 0; int records = 0; - try (final Reader reader = getTestReader(); final CSVParser parser = format.parse(reader)) { + try (Reader reader = getTestReader(); CSVParser parser = format.parse(reader)) { for (final CSVRecord csvRecord : parser) { records++; if (csvRecord.hasComment()) { diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java index 1dd072bfeb..1117c12ac9 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java @@ -1,52 +1,54 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.csv.issues; - -import static org.junit.jupiter.api.Assertions.assertNotNull; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.UnsupportedEncodingException; -import java.nio.charset.StandardCharsets; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.csv.CSVRecord; -import org.junit.jupiter.api.Test; - -public class JiraCsv198Test { - - // @formatter:off - private static final CSVFormat CSV_FORMAT = CSVFormat.EXCEL.builder() - .setDelimiter('^') - .setHeader() - .setSkipHeaderRecord(true) - .build(); - // @formatter:on - - @Test - public void test() throws UnsupportedEncodingException, IOException { - final InputStream pointsOfReference = getClass().getResourceAsStream("/org/apache/commons/csv/CSV-198/optd_por_public.csv"); - assertNotNull(pointsOfReference); - try (@SuppressWarnings("resource") - CSVParser parser = CSV_FORMAT.parse(new InputStreamReader(pointsOfReference, StandardCharsets.UTF_8))) { - parser.forEach(record -> assertNotNull(record.get("location_type"))); - } - } - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.junit.jupiter.api.Test; + +class JiraCsv198Test { + + // @formatter:off + private static final CSVFormat CSV_FORMAT = CSVFormat.EXCEL.builder() + .setDelimiter('^') + .setHeader() + .setSkipHeaderRecord(true) + .get(); + // @formatter:on + + @Test + void test() throws UnsupportedEncodingException, IOException { + final InputStream pointsOfReference = getClass().getResourceAsStream("/org/apache/commons/csv/CSV-198/optd_por_public.csv"); + assertNotNull(pointsOfReference); + try (@SuppressWarnings("resource") + CSVParser parser = CSV_FORMAT.parse(new InputStreamReader(pointsOfReference, StandardCharsets.UTF_8))) { + parser.forEach(record -> assertNotNull(record.get("location_type"))); + } + } + +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv203Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv203Test.java index 17c62351e2..2c9226506c 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv203Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv203Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -27,83 +29,83 @@ * JIRA: withNullString value is printed without quotes when * QuoteMode.ALL is specified */ -public class JiraCsv203Test { +class JiraCsv203Test { @Test - public void testQuoteModeAll() throws Exception { + void testQuoteModeAll() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") .setIgnoreSurroundingSpaces(true) .setQuoteMode(QuoteMode.ALL) - .build(); + .get(); // @formatter:on final StringBuilder buffer = new StringBuilder(); - try (final CSVPrinter printer = new CSVPrinter(buffer, format)) { + try (CSVPrinter printer = new CSVPrinter(buffer, format)) { printer.printRecord(null, "Hello", null, "World"); } assertEquals("\"N/A\",\"Hello\",\"N/A\",\"World\"\r\n", buffer.toString()); } @Test - public void testQuoteModeAllNonNull() throws Exception { + void testQuoteModeAllNonNull() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") .setIgnoreSurroundingSpaces(true) .setQuoteMode(QuoteMode.ALL_NON_NULL) - .build(); + .get(); // @formatter:on final StringBuilder buffer = new StringBuilder(); - try (final CSVPrinter printer = new CSVPrinter(buffer, format)) { + try (CSVPrinter printer = new CSVPrinter(buffer, format)) { printer.printRecord(null, "Hello", null, "World"); } assertEquals("N/A,\"Hello\",N/A,\"World\"\r\n", buffer.toString()); } @Test - public void testQuoteModeMinimal() throws Exception { + void testQuoteModeMinimal() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") .setIgnoreSurroundingSpaces(true) .setQuoteMode(QuoteMode.MINIMAL) - .build(); + .get(); // @formatter:on final StringBuilder buffer = new StringBuilder(); - try (final CSVPrinter printer = new CSVPrinter(buffer, format)) { + try (CSVPrinter printer = new CSVPrinter(buffer, format)) { printer.printRecord(null, "Hello", null, "World"); } assertEquals("N/A,Hello,N/A,World\r\n", buffer.toString()); } @Test - public void testQuoteModeNonNumeric() throws Exception { + void testQuoteModeNonNumeric() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") .setIgnoreSurroundingSpaces(true) .setQuoteMode(QuoteMode.NON_NUMERIC) - .build(); + .get(); // @formatter:on final StringBuilder buffer = new StringBuilder(); - try (final CSVPrinter printer = new CSVPrinter(buffer, format)) { + try (CSVPrinter printer = new CSVPrinter(buffer, format)) { printer.printRecord(null, "Hello", null, "World"); } assertEquals("N/A,\"Hello\",N/A,\"World\"\r\n", buffer.toString()); } @Test - public void testWithEmptyValues() throws Exception { + void testWithEmptyValues() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") .setIgnoreSurroundingSpaces(true) .setQuoteMode(QuoteMode.ALL) - .build(); + .get(); // @formatter:on final StringBuilder buffer = new StringBuilder(); - try (final CSVPrinter printer = new CSVPrinter(buffer, format)) { + try (CSVPrinter printer = new CSVPrinter(buffer, format)) { printer.printRecord("", "Hello", "", "World"); // printer.printRecord(new Object[] { null, "Hello", null, "World" }); } @@ -111,31 +113,31 @@ public void testWithEmptyValues() throws Exception { } @Test - public void testWithoutNullString() throws Exception { + void testWithoutNullString() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() //.setNullString("N/A") .setIgnoreSurroundingSpaces(true) .setQuoteMode(QuoteMode.ALL) - .build(); + .get(); // @formatter:on final StringBuilder buffer = new StringBuilder(); - try (final CSVPrinter printer = new CSVPrinter(buffer, format)) { + try (CSVPrinter printer = new CSVPrinter(buffer, format)) { printer.printRecord(null, "Hello", null, "World"); } assertEquals(",\"Hello\",,\"World\"\r\n", buffer.toString()); } @Test - public void testWithoutQuoteMode() throws Exception { + void testWithoutQuoteMode() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") .setIgnoreSurroundingSpaces(true) - .build(); + .get(); // @formatter:on final StringBuilder buffer = new StringBuilder(); - try (final CSVPrinter printer = new CSVPrinter(buffer, format)) { + try (CSVPrinter printer = new CSVPrinter(buffer, format)) { printer.printRecord(null, "Hello", null, "World"); } assertEquals("N/A,Hello,N/A,World\r\n", buffer.toString()); diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv206Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv206Test.java index 3d0a4fb4c7..2fecd10f16 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv206Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv206Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -28,16 +30,16 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; -public class JiraCsv206Test { +class JiraCsv206Test { @Test - public void testJiraCsv206MultipleCharacterDelimiter() throws IOException { + void testJiraCsv206MultipleCharacterDelimiter() throws IOException { // Read with multiple character delimiter final String source = "FirstName[|]LastName[|]Address\r\nJohn[|]Smith[|]123 Main St."; final StringReader reader = new StringReader(source); - final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").build(); + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get(); CSVRecord record = null; - try (final CSVParser csvParser = new CSVParser(reader, csvFormat)) { + try (CSVParser csvParser = CSVParser.builder().setReader(reader).setFormat(format).get()) { final Iterator iterator = csvParser.iterator(); record = iterator.next(); assertEquals("FirstName", record.get(0)); @@ -49,17 +51,21 @@ record = iterator.next(); assertEquals("123 Main St.", record.get(2)); } // Write with multiple character delimiter - final String outString = "# Change delimiter to [I]\r\n" + "first name[I]last name[I]address\r\n" - + "John[I]Smith[I]123 Main St."; + // @formatter:off + final String outString = + "# Change delimiter to [I]\r\n" + + "first name[I]last name[I]address\r\n" + + "John[I]Smith[I]123 Main St."; + // @formatter:on final String comment = "Change delimiter to [I]"; // @formatter:off - final CSVFormat format = CSVFormat.EXCEL.builder() + final CSVFormat formatExcel = CSVFormat.EXCEL.builder() .setDelimiter("[I]").setHeader("first name", "last name", "address") .setCommentMarker('#') - .setHeaderComments(comment).build(); + .setHeaderComments(comment).get(); // @formatter:on final StringBuilder out = new StringBuilder(); - try (final CSVPrinter printer = format.print(out)) { + try (CSVPrinter printer = formatExcel.print(out)) { printer.print(record.get(0)); printer.print(record.get(1)); printer.print(record.get(2)); diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv211Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv211Test.java index f90f18b083..28b559d1e1 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv211Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv211Test.java @@ -1,54 +1,54 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.csv.issues; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.IOException; -import java.io.StringReader; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.csv.CSVRecord; -import org.junit.jupiter.api.Test; - -public class JiraCsv211Test { - - @Test - public void testJiraCsv211Format() throws IOException { - final String[] values = {"1", "Jane Doe", "USA", ""}; - - // @formatter:off - final CSVFormat printFormat = CSVFormat.DEFAULT.builder() - .setDelimiter('\t') - .setHeader("ID", "Name", "Country", "Age") - .build(); - // @formatter:on - final String formatted = printFormat.format(values); - assertEquals("ID\tName\tCountry\tAge\r\n1\tJane Doe\tUSA\t", formatted); - - final CSVFormat parseFormat = CSVFormat.DEFAULT.builder().setDelimiter('\t').setHeader().setSkipHeaderRecord(true).build(); - try (final CSVParser parser = parseFormat.parse(new StringReader(formatted))) { - parser.forEach(record -> { - assertEquals("1", record.get(0)); - assertEquals("Jane Doe", record.get(1)); - assertEquals("USA", record.get(2)); - assertEquals("", record.get(3)); - }); - } - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.io.StringReader; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.junit.jupiter.api.Test; + +class JiraCsv211Test { + + @Test + void testJiraCsv211Format() throws IOException { + // @formatter:off + final CSVFormat printFormat = CSVFormat.DEFAULT.builder() + .setDelimiter('\t') + .setHeader("ID", "Name", "Country", "Age") + .get(); + // @formatter:on + final String formatted = printFormat.format("1", "Jane Doe", "USA", ""); + assertEquals("ID\tName\tCountry\tAge\r\n1\tJane Doe\tUSA\t", formatted); + + final CSVFormat parseFormat = CSVFormat.DEFAULT.builder().setDelimiter('\t').setHeader().setSkipHeaderRecord(true).get(); + try (CSVParser parser = parseFormat.parse(new StringReader(formatted))) { + parser.forEach(record -> { + assertEquals("1", record.get(0)); + assertEquals("Jane Doe", record.get(1)); + assertEquals("USA", record.get(2)); + assertEquals("", record.get(3)); + }); + } + } +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv213Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv213Test.java index 074883fe0b..90f5da4c5a 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv213Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv213Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -25,19 +27,18 @@ import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.QuoteMode; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; /** * Tests https://issues.apache.org/jira/browse/CSV-213 - * + *

      * This is normal behavior with the current architecture: The iterator() API presents an object that is backed by data * in the CSVParser as the parser is streaming over the file. The CSVParser is like a forward-only stream. When you * create a new Iterator you are only created a new view on the same position in the parser's stream. For the behavior * you want, you need to open a new CSVParser. + *

      */ -@Disabled -public class JiraCsv213Test { +class JiraCsv213Test { private void createEndChannel(final File csvFile) { // @formatter:off @@ -47,7 +48,7 @@ private void createEndChannel(final File csvFile) { .setSkipHeaderRecord(true) .setRecordSeparator('\n') .setQuoteMode(QuoteMode.ALL) - .build(); + .get(); // @formatter:on try (Reader reader = Files.newBufferedReader(csvFile.toPath(), StandardCharsets.UTF_8); CSVParser parser = csvFormat.parse(reader)) { @@ -63,7 +64,7 @@ private void createEndChannel(final File csvFile) { } @Test - public void test() { - createEndChannel(new File("src/test/resources/CSV-213/999751170.patch.csv")); + void test() { + createEndChannel(new File("src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv")); } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv227Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv227Test.java new file mode 100644 index 0000000000..2b9e335a8f --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv227Test.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.QuoteMode; +import org.junit.jupiter.api.Test; + +/** + * Tests https://issues.apache.org/jira/browse/CSV-227 + */ +class JiraCsv227Test { + + @Test + public void test() throws IOException { + final StringBuilder out = new StringBuilder(); + try (CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.MINIMAL))) { + printer.printRecord("ใ…ใ…Žใ„ทใ„น", "ใ…ใ…Žใ„ทใ„น", "", "test2"); + printer.printRecord("ํ•œ๊ธ€3", "hello3", "3ํ•œ๊ธ€3", "test3"); + printer.printRecord("", "hello4", "", "test4"); + } + // ใ…ใ…Žใ„ทใ„น,ใ…ใ…Žใ„ทใ„น,,test2 + // ํ•œ๊ธ€3,hello3,3ํ•œ๊ธ€3,test3 + // "",hello4,,test4 + assertEquals("ใ…ใ…Žใ„ทใ„น,ใ…ใ…Žใ„ทใ„น,,test2\r\nํ•œ๊ธ€3,hello3,3ํ•œ๊ธ€3,test3\r\n\"\",hello4,,test4\r\n", out.toString()); + } +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv247Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv247Test.java index 4dc18a001e..c2d9ac5910 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv247Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv247Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -32,16 +34,16 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; -public class JiraCsv247Test { +class JiraCsv247Test { @Test - public void testHeadersMissingOneColumnWhenAllowingMissingColumnNames() throws Exception { - final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setAllowMissingColumnNames(true).build(); + void testHeadersMissingOneColumnWhenAllowingMissingColumnNames() throws Exception { + final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setAllowMissingColumnNames(true).get(); assertTrue(format.getAllowMissingColumnNames(), "We should allow missing column names"); final Reader in = new StringReader("a,,c,d,e\n1,2,3,4,5\nv,w,x,y,z"); - try (final CSVParser parser = format.parse(in)) { + try (CSVParser parser = format.parse(in)) { assertEquals(Arrays.asList("a", "", "c", "d", "e"), parser.getHeaderNames()); final Iterator iterator = parser.iterator(); CSVRecord record = iterator.next(); @@ -61,20 +63,20 @@ record = iterator.next(); } @Test - public void testHeadersMissingThrowsWhenNotAllowingMissingColumnNames() { - final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().build(); + void testHeadersMissingThrowsWhenNotAllowingMissingColumnNames() { + final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().get(); assertFalse(format.getAllowMissingColumnNames(), "By default we should not allow missing column names"); assertThrows(IllegalArgumentException.class, () -> { - try (final Reader reader = new StringReader("a,,c,d,e\n1,2,3,4,5\nv,w,x,y,z"); + try (Reader reader = new StringReader("a,,c,d,e\n1,2,3,4,5\nv,w,x,y,z"); CSVParser parser = format.parse(reader);) { // should fail } }, "1 missing column header is not allowed"); assertThrows(IllegalArgumentException.class, () -> { - try (final Reader reader = new StringReader("a,,c,d,\n1,2,3,4,5\nv,w,x,y,z"); + try (Reader reader = new StringReader("a,,c,d,\n1,2,3,4,5\nv,w,x,y,z"); CSVParser parser = format.parse(reader);) { // should fail } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java index 84d274acf6..480a9dffa9 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java @@ -1,25 +1,29 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; @@ -29,13 +33,14 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; -public class JiraCsv248Test { +class JiraCsv248Test { + private static InputStream getTestInput() { return ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/CSV-248/csvRecord.bin"); } /** - * Test deserialisation of a CSVRecord created using version 1.6. + * Test deserialization of a CSVRecord created using version 1.6. * *

      * This test asserts that serialization from 1.8 onwards is consistent with previous versions. Serialization was @@ -45,15 +50,15 @@ private static InputStream getTestInput() { * @throws ClassNotFoundException If the CSVRecord cannot be deserialized */ @Test - public void testJiraCsv248() throws IOException, ClassNotFoundException { + void testJiraCsv248() throws IOException, ClassNotFoundException { // Record was originally created using CSV version 1.6 with the following code: - // try (final CSVParser parser = CSVParser.parse("A,B\n#my comment\nOne,Two", + // try (CSVParser parser = CSVParser.parse("A,B\n#my comment\nOne,Two", // CSVFormat.DEFAULT.builder().setHeader().setCommentMarker('#'))) { // CSVRecord rec = parser.iterator().next(); // } - try (InputStream in = getTestInput(); final ObjectInputStream ois = new ObjectInputStream(in)) { + try (InputStream in = getTestInput(); ObjectInputStream ois = new ObjectInputStream(in)) { final Object object = ois.readObject(); - assertTrue(object instanceof CSVRecord); + assertInstanceOf(CSVRecord.class, object); final CSVRecord rec = (CSVRecord) object; assertEquals(1L, rec.getRecordNumber()); assertEquals("One", rec.get(0)); @@ -70,12 +75,7 @@ public void testJiraCsv248() throws IOException, ClassNotFoundException { assertFalse(rec.isSet("A")); assertEquals(0, rec.toMap().size()); // This will throw - try { - rec.get("A"); - org.junit.jupiter.api.Assertions.fail("Access by name is not expected after deserialisation"); - } catch (final IllegalStateException expected) { - // OK - } + assertThrows(IllegalStateException.class, () -> rec.get("A")); } } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv249Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv249Test.java index 7989a4646f..4034b04bd7 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv249Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv249Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -30,18 +32,18 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; -public class JiraCsv249Test { +class JiraCsv249Test { @Test - public void testJiraCsv249() throws IOException { - final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setEscape('\\').build(); + void testJiraCsv249() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setEscape('\\').get(); final StringWriter stringWriter = new StringWriter(); - try (CSVPrinter printer = new CSVPrinter(stringWriter, csvFormat)) { + try (CSVPrinter printer = new CSVPrinter(stringWriter, format)) { printer.printRecord("foo \\", "bar"); } - final StringReader stringReader = new StringReader(stringWriter.toString()); + final StringReader reader = new StringReader(stringWriter.toString()); final List records; - try (CSVParser parser = new CSVParser(stringReader, csvFormat)) { + try (CSVParser parser = CSVParser.builder().setReader(reader).setFormat(format).get()) { records = parser.getRecords(); } records.forEach(record -> { diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv253Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv253Test.java index 90507313eb..13bb6a8270 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv253Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv253Test.java @@ -1,22 +1,24 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.apache.commons.csv.CsvAssertions.assertValuesEquals; import java.io.IOException; import java.io.StringReader; @@ -31,24 +33,24 @@ /** * Setting QuoteMode:ALL_NON_NULL or NON_NUMERIC can distinguish between empty string columns and absent value columns. */ -public class JiraCsv253Test { - - private void assertArrayEqual(final String[] expected, final CSVRecord actual) { - for (int i = 0; i < expected.length; i++) { - assertEquals(expected[i], actual.get(i)); - } - } +class JiraCsv253Test { @Test - public void testHandleAbsentValues() throws IOException { - final String source = "\"John\",,\"Doe\"\n" + ",\"AA\",123\n" + "\"John\",90,\n" + "\"\",,90"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.NON_NUMERIC).build(); - try (final CSVParser parser = csvFormat.parse(new StringReader(source))) { + void testHandleAbsentValues() throws IOException { + // @formatter:off + final String source = + "\"John\",,\"Doe\"\n" + + ",\"AA\",123\n" + + "\"John\",90,\n" + + "\"\",,90"; + // @formatter:on + final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.NON_NUMERIC).get(); + try (CSVParser parser = csvFormat.parse(new StringReader(source))) { final Iterator csvRecords = parser.iterator(); - assertArrayEqual(new String[] {"John", null, "Doe"}, csvRecords.next()); - assertArrayEqual(new String[] {null, "AA", "123"}, csvRecords.next()); - assertArrayEqual(new String[] {"John", "90", null}, csvRecords.next()); - assertArrayEqual(new String[] {"", null, "90"}, csvRecords.next()); + assertValuesEquals(new String[] {"John", null, "Doe"}, csvRecords.next()); + assertValuesEquals(new String[] {null, "AA", "123"}, csvRecords.next()); + assertValuesEquals(new String[] {"John", "90", null}, csvRecords.next()); + assertValuesEquals(new String[] {"", null, "90"}, csvRecords.next()); } } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv254Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv254Test.java new file mode 100644 index 0000000000..629b42ee6b --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv254Test.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.apache.commons.csv.CsvAssertions.assertValuesEquals; + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Iterator; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; +import org.junit.jupiter.api.Test; + +/** + * Tests https://issues.apache.org/jira/browse/CSV-254. + */ +class JiraCsv254Test { + + @Test + void test() throws IOException { + final CSVFormat csvFormat = CSVFormat.POSTGRESQL_CSV; + try (BufferedReader reader = Files.newBufferedReader(Paths.get("src/test/resources/org/apache/commons/csv/CSV-254/csv-254.csv"), + StandardCharsets.UTF_8); CSVParser parser = csvFormat.parse(reader)) { + final Iterator csvRecords = parser.iterator(); + assertValuesEquals(new String[] { "AA", "33", null }, csvRecords.next()); + assertValuesEquals(new String[] { "AA", null, "" }, csvRecords.next()); + assertValuesEquals(new String[] { null, "33", "CC" }, csvRecords.next()); + } + } +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv257Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv257Test.java new file mode 100644 index 0000000000..4234a7a0fa --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv257Test.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.io.IOException; +import java.io.StringReader; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.junit.jupiter.api.Test; + +/** + * Tests https://issues.apache.org/jira/browse/CSV-257 + */ +class JiraCsv257Test { + + private static final String INPUT = ","; + + @Test + void testHeaderBuilder() throws IOException { + // @formatter:off + final CSVFormat format = CSVFormat.RFC4180.builder() + .setDelimiter(INPUT.charAt(0)) + .setHeader() + .setSkipHeaderRecord(true) + .setIgnoreSurroundingSpaces(true) + .get(); + // @formatter:on + // Document the current behavior: Throw a IllegalArgumentException is a header name is missing. + assertThrows(IllegalArgumentException.class, () -> { + try (CSVParser parser = CSVParser.parse(INPUT, format)) { + // empty + } + }); + } + + @Test + void testHeaderDepreacted() throws IOException { + // @formatter:off + final CSVFormat format = CSVFormat.RFC4180 + .withDelimiter(INPUT.charAt(0)) + .withFirstRecordAsHeader() + .withIgnoreSurroundingSpaces(); + // @formatter:on + // Document the current behavior: Throw a IllegalArgumentException is a header name is missing. + assertThrows(IllegalArgumentException.class, () -> { + try (CSVParser parser = new CSVParser(new StringReader(INPUT), format)) { + // empty + } + }); + } + + @Test + void testNoHeaderBuilder() throws IOException { + // @formatter:off + final CSVFormat format = CSVFormat.RFC4180.builder() + .setDelimiter(INPUT.charAt(0)) + .setIgnoreSurroundingSpaces(true) + .get(); + // @formatter:on + try (CSVParser parser = CSVParser.parse(INPUT, format)) { + // empty + } + } +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv263Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv263Test.java index 062ed7caf6..18bb9580a3 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv263Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv263Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -30,17 +32,17 @@ /** * Tests [CSV-263] Print from Reader with embedded quotes generates incorrect output. */ -public class JiraCsv263Test { +class JiraCsv263Test { @Test - public void testPrintFromReaderWithQuotes() throws IOException { + void testPrintFromReaderWithQuotes() throws IOException { // @formatter:off final CSVFormat format = CSVFormat.RFC4180.builder() .setDelimiter(',') .setQuote('"') .setEscape('?') .setQuoteMode(QuoteMode.NON_NUMERIC) - .build(); + .get(); // @formatter:on final StringBuilder out = new StringBuilder(); diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java index 24bb97baa0..857e42cb8f 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -23,6 +25,7 @@ import java.io.StringReader; import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.DuplicateHeaderMode; import org.junit.jupiter.api.Test; @@ -32,7 +35,7 @@ * * @see Jira Ticker */ -public class JiraCsv264Test { +class JiraCsv264Test { private static final String CSV_STRING = "\"\",\"B\",\"\"\n" + "\"1\",\"2\",\"3\"\n" + @@ -46,42 +49,39 @@ public class JiraCsv264Test { "\"6\",\"7\",\"\",\"\",\"10\""; @Test - public void testJiraCsv264() { + void testJiraCsv264() { final CSVFormat csvFormat = CSVFormat.DEFAULT .builder() .setHeader() .setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW) .setAllowMissingColumnNames(true) - .build(); - + .get(); try (StringReader reader = new StringReader(CSV_STRING)) { assertThrows(IllegalArgumentException.class, () -> csvFormat.parse(reader)); } } @Test - public void testJiraCsv264WithGapAllowEmpty() throws IOException { + void testJiraCsv264WithGapAllowEmpty() throws IOException { final CSVFormat csvFormat = CSVFormat.DEFAULT .builder() .setHeader() .setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY) .setAllowMissingColumnNames(true) - .build(); - - try (StringReader reader = new StringReader(CSV_STRING_GAP)) { - csvFormat.parse(reader); + .get(); + try (StringReader reader = new StringReader(CSV_STRING_GAP); CSVParser parser = csvFormat.parse(reader)) { + // empty } } @Test - public void testJiraCsv264WithGapDisallow() { + void testJiraCsv264WithGapDisallow() { final CSVFormat csvFormat = CSVFormat.DEFAULT .builder() .setHeader() .setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW) .setAllowMissingColumnNames(true) - .build(); - + .get(); try (StringReader reader = new StringReader(CSV_STRING_GAP)) { assertThrows(IllegalArgumentException.class, () -> csvFormat.parse(reader)); } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java index f62b866585..1bccad702f 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -31,25 +33,26 @@ /** * Tests [CSV-265] {@link CSVRecord#getCharacterPosition()} returns the correct position after encountering a comment. */ -public class JiraCsv265Test { +class JiraCsv265Test { @Test - public void testCharacterPositionWithComments() throws IOException { + void testCharacterPositionWithComments() throws IOException { // @formatter:off - final String csv = "# Comment1\n" - + "Header1,Header2\n" - + "# Comment2\n" - + "Value1,Value2\n" - + "# Comment3\n" - + "Value3,Value4\n" - + "# Comment4\n"; + final String csv = + "# Comment1\n" + + "Header1,Header2\n" + + "# Comment2\n" + + "Value1,Value2\n" + + "# Comment3\n" + + "Value3,Value4\n" + + "# Comment4\n"; final CSVFormat csvFormat = CSVFormat.DEFAULT.builder() .setCommentMarker('#') .setHeader() .setSkipHeaderRecord(true) - .build(); + .get(); // @formatter:on - try (final CSVParser parser = csvFormat.parse(new StringReader(csv))) { + try (CSVParser parser = csvFormat.parse(new StringReader(csv))) { final Iterator itr = parser.iterator(); final CSVRecord record1 = itr.next(); assertEquals(csv.indexOf("# Comment2"), record1.getCharacterPosition()); @@ -59,24 +62,25 @@ public void testCharacterPositionWithComments() throws IOException { } @Test - public void testCharacterPositionWithCommentsSpanningMultipleLines() throws IOException { + void testCharacterPositionWithCommentsSpanningMultipleLines() throws IOException { // @formatter:off - final String csv = "# Comment1\n" - + "# Comment2\n" - + "Header1,Header2\n" - + "# Comment3\n" - + "# Comment4\n" - + "Value1,Value2\n" - + "# Comment5\n" - + "# Comment6\n" - + "Value3,Value4"; + final String csv = + "# Comment1\n" + + "# Comment2\n" + + "Header1,Header2\n" + + "# Comment3\n" + + "# Comment4\n" + + "Value1,Value2\n" + + "# Comment5\n" + + "# Comment6\n" + + "Value3,Value4"; final CSVFormat csvFormat = CSVFormat.DEFAULT.builder() .setCommentMarker('#') .setHeader() .setSkipHeaderRecord(true) - .build(); + .get(); // @formatter:on - try (final CSVParser parser = csvFormat.parse(new StringReader(csv))) { + try (CSVParser parser = csvFormat.parse(new StringReader(csv))) { final Iterator itr = parser.iterator(); final CSVRecord record1 = itr.next(); assertEquals(csv.indexOf("# Comment3"), record1.getCharacterPosition()); diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv271Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv271Test.java index 6150a76680..0269dec5d1 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv271Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv271Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -27,26 +29,26 @@ import org.apache.commons.csv.CSVPrinter; import org.junit.jupiter.api.Test; -public class JiraCsv271Test { +class JiraCsv271Test { @Test - public void testJiraCsv271_withArray() throws IOException { + void testJiraCsv271_withArray() throws IOException { final CSVFormat csvFormat = CSVFormat.DEFAULT; final StringWriter stringWriter = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(stringWriter, csvFormat)) { printer.print("a"); - printer.printRecord("b","c"); + printer.printRecord("b", "c"); } assertEquals("a,b,c\r\n", stringWriter.toString()); } @Test - public void testJiraCsv271_withList() throws IOException { + void testJiraCsv271_withList() throws IOException { final CSVFormat csvFormat = CSVFormat.DEFAULT; final StringWriter stringWriter = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(stringWriter, csvFormat)) { printer.print("a"); - printer.printRecord(Arrays.asList("b","c")); + printer.printRecord(Arrays.asList("b", "c")); } assertEquals("a,b,c\r\n", stringWriter.toString()); } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java index 920dcb73e6..065ee6bb37 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java @@ -1,230 +1,216 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv.issues; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.Reader; -import java.io.StringReader; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.csv.CSVPrinter; -import org.apache.commons.csv.CSVRecord; -import org.junit.jupiter.api.Test; - -public class JiraCsv288Test { - @Test - // Before fix: - // expected: but was: - public void testParseWithABADelimiter() throws Exception { - final Reader in = new StringReader("a|~|b|~|c|~|d|~||~|f"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser parser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|~|").build())) { - for (final CSVRecord csvRecord : parser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f", stringBuilder.toString()); - } - } - } - - @Test - // Before fix: - // expected: but was: - public void testParseWithDoublePipeDelimiter() throws Exception { - final Reader in = new StringReader("a||b||c||d||||f"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f", stringBuilder.toString()); - } - } - } - - @Test - // Regression, already passed before fix - - public void testParseWithDoublePipeDelimiterDoubleCharValue() throws Exception { - final Reader in = new StringReader("a||bb||cc||dd||f"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,bb,cc,dd,f", stringBuilder.toString()); - } - } - } - - @Test - // Before fix: - // expected: but was: - public void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws Exception { - final Reader in = new StringReader("a||b||c||d||||f||"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f,", stringBuilder.toString()); - } - } - } - - @Test - // Before fix: - // expected: but was: - public void testParseWithDoublePipeDelimiterQuoted() throws Exception { - final Reader in = new StringReader("a||\"b||c\"||d||||f"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b||c,d,,f", stringBuilder.toString()); - } - } - } - - @Test - // Regression, already passed before fix - public void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws Exception { - final Reader in = new StringReader("a|b|c|d||f|"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f,", stringBuilder.toString()); - } - } - } - - @Test - // Before fix: - // expected: but was: - public void testParseWithTriplePipeDelimiter() throws Exception { - final Reader in = new StringReader("a|||b|||c|||d||||||f"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|||").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f", stringBuilder.toString()); - } - } - } - - @Test - // Regression, already passed before fix - public void testParseWithTwoCharDelimiter1() throws Exception { - final Reader in = new StringReader("a~|b~|c~|d~|~|f"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f", stringBuilder.toString()); - } - } - } - - @Test - // Regression, already passed before fix - public void testParseWithTwoCharDelimiter2() throws Exception { - final Reader in = new StringReader("a~|b~|c~|d~|~|f~"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f~", stringBuilder.toString()); - } - } - } - - @Test - // Regression, already passed before fix - public void testParseWithTwoCharDelimiter3() throws Exception { - final Reader in = new StringReader("a~|b~|c~|d~|~|f|"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f|", stringBuilder.toString()); - } - } - } - - @Test - // Regression, already passed before fix - public void testParseWithTwoCharDelimiter4() throws Exception { - final Reader in = new StringReader("a~|b~|c~|d~|~|f~~||g"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f~,|g", stringBuilder.toString()); - } - } - } - - @Test - // Before fix: - // expected: but was: - public void testParseWithTwoCharDelimiterEndsWithDelimiter() throws Exception { - final Reader in = new StringReader("a~|b~|c~|d~|~|f~|"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f,", stringBuilder.toString()); - } - } - } -} \ No newline at end of file +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.CSVRecord; +import org.junit.jupiter.api.Test; + +class JiraCsv288Test { + + private void print(final CSVRecord csvRecord, final CSVPrinter csvPrinter) throws IOException { + for (final String value : csvRecord) { + csvPrinter.print(value); + } + } + + @Test + // Before fix: + // expected: but was: + void testParseWithABADelimiter() throws Exception { + final Reader in = new StringReader("a|~|b|~|c|~|d|~||~|f"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser parser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|~|").get())) { + for (final CSVRecord csvRecord : parser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + void testParseWithDoublePipeDelimiter() throws Exception { + final Reader in = new StringReader("a||b||c||d||||f"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").get())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + + void testParseWithDoublePipeDelimiterDoubleCharValue() throws Exception { + final Reader in = new StringReader("a||bb||cc||dd||f"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").get())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,bb,cc,dd,f", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws Exception { + final Reader in = new StringReader("a||b||c||d||||f||"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").get())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f,", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + void testParseWithDoublePipeDelimiterQuoted() throws Exception { + final Reader in = new StringReader("a||\"b||c\"||d||||f"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").get())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b||c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws Exception { + final Reader in = new StringReader("a|b|c|d||f|"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|").get())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f,", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + void testParseWithTriplePipeDelimiter() throws Exception { + final Reader in = new StringReader("a|||b|||c|||d||||||f"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|||").get())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + void testParseWithTwoCharDelimiter1() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").get())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + void testParseWithTwoCharDelimiter2() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f~"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").get())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f~", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + void testParseWithTwoCharDelimiter3() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f|"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").get())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f|", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + void testParseWithTwoCharDelimiter4() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f~~||g"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").get())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f~,|g", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + void testParseWithTwoCharDelimiterEndsWithDelimiter() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f~|"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").get())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f,", stringBuilder.toString()); + } + } + } +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv290Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv290Test.java new file mode 100644 index 0000000000..f251eeb7a5 --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv290Test.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +import java.io.InputStreamReader; +import java.io.StringReader; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.CSVRecord; +import org.junit.jupiter.api.Test; + +// psql (14.5 (Homebrew)) +// +// create table COMMONS_CSV_PSQL_TEST (ID INTEGER, COL1 VARCHAR, COL2 VARCHAR, COL3 VARCHAR, COL4 VARCHAR); +// insert into COMMONS_CSV_PSQL_TEST select 1, 'abc', 'test line 1' || chr(10) || 'test line 2', null, ''; +// insert into COMMONS_CSV_PSQL_TEST select 2, 'xyz', '\b:' || chr(8) || ' \t:' || chr(9) || ' \n:' || chr(10) || ' \r:' || chr(13), 'a', 'b'; +// insert into COMMONS_CSV_PSQL_TEST values (3, 'a', 'b,c,d', '"quoted"', 'e'); +// copy COMMONS_CSV_PSQL_TEST TO '/tmp/psql.csv' WITH (FORMAT CSV); +// copy COMMONS_CSV_PSQL_TEST TO '/tmp/psql.tsv'; +// +// cat /tmp/psql.csv +// 1,abc,"test line 1 +// test line 2",,"" +// 2,xyz,"\b:^H \t: \n: +// \r:^M",a,b +// 3,a,"b,c,d","""quoted""",e +// +// cat /tmp/psql.tsv +// 1 abc test line 1\ntest line 2 \N +// 2 xyz \\b:\b \\t:\t \\n:\n \\r:\r a b +// 3 a b,c,d "quoted" e +// +class JiraCsv290Test { + + private void testHelper(final String fileName, final CSVFormat format) throws Exception { + List> content = new ArrayList<>(); + try (CSVParser csvParser = CSVParser.parse(new InputStreamReader(this.getClass().getResourceAsStream("/org/apache/commons/csv/CSV-290/" + fileName)), + format)) { + content = csvParser.stream().collect(Collectors.mapping(CSVRecord::toList, Collectors.toList())); + } + + assertEquals(3, content.size()); + + assertEquals("1", content.get(0).get(0)); + assertEquals("abc", content.get(0).get(1)); + assertEquals("test line 1\ntest line 2", content.get(0).get(2)); // new line + assertNull(content.get(0).get(3)); // null + assertEquals("", content.get(0).get(4)); + + assertEquals("2", content.get(1).get(0)); + assertEquals("\\b:\b \\t:\t \\n:\n \\r:\r", content.get(1).get(2)); // \b, \t, \n, \r + + assertEquals("3", content.get(2).get(0)); + assertEquals("b,c,d", content.get(2).get(2)); // value has comma + assertEquals("\"quoted\"", content.get(2).get(3)); // quoted + } + + @Test + void testPostgresqlCsv() throws Exception { + testHelper("psql.csv", CSVFormat.POSTGRESQL_CSV); + } + + @Test + void testPostgresqlText() throws Exception { + testHelper("psql.tsv", CSVFormat.POSTGRESQL_TEXT); + } + + @Test + void testWriteThenRead() throws Exception { + final StringWriter sw = new StringWriter(); + final CSVFormat format = CSVFormat.POSTGRESQL_CSV.builder().setHeader().setSkipHeaderRecord(true).get(); + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.printRecord("column1", "column2"); + printer.printRecord("v11", "v12"); + printer.printRecord("v21", "v22"); + printer.close(); + try (CSVParser parser = CSVParser.builder().setReader(new StringReader(sw.toString())).setFormat(format).get()) { + assertArrayEquals(new Object[] { "column1", "column2" }, parser.getHeaderNames().toArray()); + final Iterator i = parser.iterator(); + assertArrayEquals(new String[] { "v11", "v12" }, i.next().toList().toArray()); + assertArrayEquals(new String[] { "v21", "v22" }, i.next().toList().toArray()); + } + } + } +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv294Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv294Test.java new file mode 100644 index 0000000000..0e5de0751b --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv294Test.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; +import java.util.List; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.CSVRecord; +import org.junit.jupiter.api.Test; + +class JiraCsv294Test { + + private static void testInternal(final CSVFormat format, final String expectedSubstring) throws IOException { + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + try (CSVPrinter printer = new CSVPrinter(new OutputStreamWriter(bos, StandardCharsets.UTF_8), format)) { + printer.printRecord("a", "b \"\"", "c"); + } + final byte[] written = bos.toByteArray(); + final String writtenString = new String(written, StandardCharsets.UTF_8); + assertTrue(writtenString.contains(expectedSubstring)); + try (CSVParser parser = CSVParser.builder().setReader(new InputStreamReader(new ByteArrayInputStream(written), StandardCharsets.UTF_8)) + .setFormat(format).get()) { + final List records = parser.getRecords(); + assertEquals(1, records.size()); + final CSVRecord record = records.get(0); + assertEquals("a", record.get(0)); + assertEquals("b \"\"", record.get(1)); + assertEquals("c", record.get(2)); + } + } + + @Test + void testDefaultCsvFormatWithBackslashEscapeWorks() throws IOException { + testInternal(CSVFormat.Builder.create().setEscape('\\').get(), ",\"b \\\"\\\"\","); + } + + @Test + void testDefaultCsvFormatWithNullEscapeWorks() throws IOException { + testInternal(CSVFormat.Builder.create().setEscape(null).get(), ",\"b \"\"\"\"\","); + } + + @Test + void testDefaultCsvFormatWithQuoteEscapeWorks() throws IOException { + // this one doesn't actually work but should behave like setEscape(null) + // Printer is writing the expected content but Parser is unable to consume it + testInternal(CSVFormat.Builder.create().setEscape('"').get(), ",\"b \"\"\"\"\","); + } + + @Test + void testDefaultCsvFormatWorks() throws IOException { + testInternal(CSVFormat.Builder.create().get(), ",\"b \"\"\"\"\","); + } +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv93Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv93Test.java index 5b62d9af42..7816412265 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv93Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv93Test.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.issues; @@ -41,7 +43,7 @@ * Jira CSV-253 to a certain extent. *

      */ -public class JiraCsv93Test { +class JiraCsv93Test { private static Object[] objects1 = {"abc", "", null, "a,b,c", 123}; private static Object[] objects2 = {"abc", "NULL", null, "a,b,c", 123}; @@ -50,7 +52,7 @@ private void every(final CSVFormat csvFormat, final Object[] objects, final Stri throws IOException { final String source = csvFormat.format(objects); assertEquals(format, csvFormat.format(objects)); - try (final CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { final CSVRecord csvRecord = csvParser.iterator().next(); for (int i = 0; i < data.length; i++) { assertEquals(csvRecord.get(i), data[i]); @@ -59,29 +61,29 @@ private void every(final CSVFormat csvFormat, final Object[] objects, final Stri } @Test - public void testWithNotSetNullString() throws IOException { + void testWithNotSetNullString() throws IOException { // @formatter:off every(CSVFormat.DEFAULT, objects1, "abc,,,\"a,b,c\",123", new String[]{"abc", "", "", "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.ALL).build(), + every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.ALL).get(), objects1, "\"abc\",\"\",,\"a,b,c\",\"123\"", new String[]{"abc", "", "", "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.ALL_NON_NULL).build(), + every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.ALL_NON_NULL).get(), objects1, "\"abc\",\"\",,\"a,b,c\",\"123\"", new String[]{"abc", "", null, "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.MINIMAL).build(), + every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.MINIMAL).get(), objects1, "abc,,,\"a,b,c\",123", new String[]{"abc", "", "", "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setEscape('?').setQuoteMode(QuoteMode.NONE).build(), + every(CSVFormat.DEFAULT.builder().setEscape('?').setQuoteMode(QuoteMode.NONE).get(), objects1, "abc,,,a?,b?,c,123", new String[]{"abc", "", "", "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.NON_NUMERIC).build(), + every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.NON_NUMERIC).get(), objects1, "\"abc\",\"\",,\"a,b,c\",123", new String[]{"abc", "", null, "a,b,c", "123"}); @@ -89,29 +91,29 @@ public void testWithNotSetNullString() throws IOException { } @Test - public void testWithSetNullStringEmptyString() throws IOException { + void testWithSetNullStringEmptyString() throws IOException { // @formatter:off - every(CSVFormat.DEFAULT.builder().setNullString("").build(), + every(CSVFormat.DEFAULT.builder().setNullString("").get(), objects1, "abc,,,\"a,b,c\",123", new String[]{"abc", null, null, "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.ALL).build(), + every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.ALL).get(), objects1, "\"abc\",\"\",\"\",\"a,b,c\",\"123\"", new String[]{"abc", null, null, "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.ALL_NON_NULL).build(), + every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.ALL_NON_NULL).get(), objects1, "\"abc\",\"\",,\"a,b,c\",\"123\"", new String[]{"abc", "", null, "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.MINIMAL).build(), + every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.MINIMAL).get(), objects1, "abc,,,\"a,b,c\",123", new String[]{"abc", null, null, "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setNullString("").setEscape('?').setQuoteMode(QuoteMode.NONE).build(), + every(CSVFormat.DEFAULT.builder().setNullString("").setEscape('?').setQuoteMode(QuoteMode.NONE).get(), objects1, "abc,,,a?,b?,c,123", new String[]{"abc", null, null, "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.NON_NUMERIC).build(), + every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.NON_NUMERIC).get(), objects1, "\"abc\",\"\",,\"a,b,c\",123", new String[]{"abc", "", null, "a,b,c", "123"}); @@ -119,29 +121,29 @@ public void testWithSetNullStringEmptyString() throws IOException { } @Test - public void testWithSetNullStringNULL() throws IOException { + void testWithSetNullStringNULL() throws IOException { // @formatter:off - every(CSVFormat.DEFAULT.builder().setNullString("NULL").build(), + every(CSVFormat.DEFAULT.builder().setNullString("NULL").get(), objects2, "abc,NULL,NULL,\"a,b,c\",123", new String[]{"abc", null, null, "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.ALL).build(), + every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.ALL).get(), objects2, "\"abc\",\"NULL\",\"NULL\",\"a,b,c\",\"123\"", new String[]{"abc", null, null, "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.ALL_NON_NULL).build(), + every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.ALL_NON_NULL).get(), objects2, "\"abc\",\"NULL\",NULL,\"a,b,c\",\"123\"", new String[]{"abc", "NULL", null, "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.MINIMAL).build(), + every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.MINIMAL).get(), objects2, "abc,NULL,NULL,\"a,b,c\",123", new String[]{"abc", null, null, "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setNullString("NULL").setEscape('?').setQuoteMode(QuoteMode.NONE).build(), + every(CSVFormat.DEFAULT.builder().setNullString("NULL").setEscape('?').setQuoteMode(QuoteMode.NONE).get(), objects2, "abc,NULL,NULL,a?,b?,c,123", new String[]{"abc", null, null, "a,b,c", "123"}); - every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.NON_NUMERIC).build(), + every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.NON_NUMERIC).get(), objects2, "\"abc\",\"NULL\",NULL,\"a,b,c\",123", new String[]{"abc", "NULL", null, "a,b,c", "123"}); diff --git a/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java b/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java index 6a049881f5..bead12378d 100644 --- a/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java +++ b/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java @@ -1,18 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.commons.csv.perf; @@ -31,6 +33,7 @@ import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; +import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -40,12 +43,12 @@ * * To run this test, use: mvn test -Dtest=PerformanceTest */ -@SuppressWarnings("boxing") // test code -public class PerformanceTest { +class PerformanceTest { private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz"; - private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt"); + private static final File BIG_FILE = new File(FileUtils.getTempDirectoryPath(), "worldcitiespop.txt"); + @BeforeAll public static void setUpClass() throws FileNotFoundException, IOException { if (BIG_FILE.exists()) { @@ -53,10 +56,8 @@ public static void setUpClass() throws FileNotFoundException, IOException { return; } System.out.println("Decompressing test fixture to: " + BIG_FILE + "..."); - try ( - final InputStream input = new GZIPInputStream( - PerformanceTest.class.getClassLoader().getResourceAsStream(TEST_RESRC)); - final OutputStream output = new FileOutputStream(BIG_FILE)) { + try (InputStream input = new GZIPInputStream(PerformanceTest.class.getClassLoader().getResourceAsStream(TEST_RESRC)); + OutputStream output = new FileOutputStream(BIG_FILE)) { IOUtils.copy(input, output); System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length())); } @@ -69,9 +70,9 @@ private BufferedReader createBufferedReader() throws IOException { } private long parse(final Reader reader, final boolean traverseColumns) throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.builder().setIgnoreSurroundingSpaces(false).build(); + final CSVFormat format = CSVFormat.DEFAULT.builder().setIgnoreSurroundingSpaces(false).get(); long recordCount = 0; - try (final CSVParser parser = format.parse(reader)) { + try (CSVParser parser = format.parse(reader)) { for (final CSVRecord record : parser) { recordCount++; if (traverseColumns) { @@ -89,7 +90,7 @@ private void println(final String s) { System.out.println(s); } - private long readAll(final BufferedReader in) throws IOException { + private long readLines(final BufferedReader in) throws IOException { long count = 0; while (in.readLine() != null) { count++; @@ -99,38 +100,38 @@ private long readAll(final BufferedReader in) throws IOException { public long testParseBigFile(final boolean traverseColumns) throws Exception { final long startMillis = System.currentTimeMillis(); - try (final BufferedReader reader = this.createBufferedReader()) { - final long count = this.parse(reader, traverseColumns); + try (BufferedReader reader = createBufferedReader()) { + final long count = parse(reader, traverseColumns); final long totalMillis = System.currentTimeMillis() - startMillis; - this.println( + println( String.format("File parsed in %,d milliseconds with Commons CSV: %,d lines.", totalMillis, count)); return totalMillis; } } @Test - public void testParseBigFileRepeat() throws Exception { + void testParseBigFileRepeat() throws Exception { long bestTime = Long.MAX_VALUE; for (int i = 0; i < this.max; i++) { - bestTime = Math.min(this.testParseBigFile(false), bestTime); + bestTime = Math.min(testParseBigFile(false), bestTime); } - this.println(String.format("Best time out of %,d is %,d milliseconds.", this.max, bestTime)); + println(String.format("Best time out of %,d is %,d milliseconds.", this.max, bestTime)); } @Test - public void testReadBigFile() throws Exception { + void testReadBigFile() throws Exception { long bestTime = Long.MAX_VALUE; long count; for (int i = 0; i < this.max; i++) { final long startMillis; - try (final BufferedReader in = this.createBufferedReader()) { + try (BufferedReader in = createBufferedReader()) { startMillis = System.currentTimeMillis(); - count = this.readAll(in); + count = readLines(in); } final long totalMillis = System.currentTimeMillis() - startMillis; bestTime = Math.min(totalMillis, bestTime); - this.println(String.format("File read in %,d milliseconds: %,d lines.", totalMillis, count)); + println(String.format("File read in %,d milliseconds: %,d lines.", totalMillis, count)); } - this.println(String.format("Best time out of %,d is %,d milliseconds.", this.max, bestTime)); + println(String.format("Best time out of %,d is %,d milliseconds.", this.max, bestTime)); } -} \ No newline at end of file +} diff --git a/src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv b/src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv new file mode 100644 index 0000000000..e685adc88f --- /dev/null +++ b/src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv @@ -0,0 +1,4 @@ +"1414770317901","android.widget.EditText","pass sem1 _84*|*","0","pass sem1 _8" +"1414770318470","android.widget.EditText","pass sem1 _84:|","0","pass sem1 _84:\" +"1414770318327","android.widget.EditText","pass sem1 +"1414770318628","android.widget.EditText","pass sem1 _84*|*","0","pass sem1 diff --git a/src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv b/src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv new file mode 100644 index 0000000000..0bff7a44f3 --- /dev/null +++ b/src/test/resources/org/apache/commons/csv/CSV-196/emoji.csv @@ -0,0 +1,5 @@ +id,val1,val2,val3,val4,val5,val6,val7,val8,val9,val10,val11,val12,val13,val14,val15 +1,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„ +2,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„ +3,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„ +4,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„,๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„๐Ÿ˜„ \ No newline at end of file diff --git a/src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv b/src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv new file mode 100644 index 0000000000..b06e04bd6a --- /dev/null +++ b/src/test/resources/org/apache/commons/csv/CSV-196/japanese.csv @@ -0,0 +1,4 @@ +id,date,val1,val2,val3,val4,val5,val6,val7,val8,val9,val10,val11,val12,val13,val14,val15 +00000000000001,2017-01-01,ใใกใ‚“ใจ็ฏ€ๅˆ†่ฟ‘ใใซใฏๅ’ฒใ„ใฆใ‚‹ใ€‚่‡ช็„ถใฎๅŠ›ใฃใฆใ™ใ”ใ„ใช๏ฝž,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15 +00000000000002,2017-01-01,ใใกใ‚“ใจ็ฏ€ๅˆ†่ฟ‘ใใซใฏๅ’ฒใ„ใฆใ‚‹ใ€‚่‡ช็„ถใฎๅŠ›ใฃใฆใ™ใ”ใ„ใช๏ฝž,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15 +00000000000003,2017-01-01,ใใกใ‚“ใจ็ฏ€ๅˆ†่ฟ‘ใใซใฏๅ’ฒใ„ใฆใ‚‹ใ€‚่‡ช็„ถใฎๅŠ›ใฃใฆใ™ใ”ใ„ใช๏ฝž,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15 \ No newline at end of file diff --git a/src/test/resources/org/apache/commons/csv/CSV-254/csv-254.csv b/src/test/resources/org/apache/commons/csv/CSV-254/csv-254.csv new file mode 100644 index 0000000000..e7d2972c5a --- /dev/null +++ b/src/test/resources/org/apache/commons/csv/CSV-254/csv-254.csv @@ -0,0 +1,3 @@ +AA,33, +AA,,"" +,33,CC diff --git a/src/test/resources/org/apache/commons/csv/CSV-290/psql.csv b/src/test/resources/org/apache/commons/csv/CSV-290/psql.csv new file mode 100644 index 0000000000..dd50f5a642 --- /dev/null +++ b/src/test/resources/org/apache/commons/csv/CSV-290/psql.csv @@ -0,0 +1,5 @@ +1,abc,"test line 1 +test line 2",,"" +2,xyz,"\b: \t: \n: + \r: ",a,b +3,a,"b,c,d","""quoted""",e diff --git a/src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv b/src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv new file mode 100644 index 0000000000..5358d8eac6 --- /dev/null +++ b/src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv @@ -0,0 +1,3 @@ +1 abc test line 1\ntest line 2 \N +2 xyz \\b:\b \\t:\t \\n:\n \\r:\r a b +3 a b,c,d "quoted" e