diff --git a/.asf.yaml b/.asf.yaml new file mode 100644 index 0000000000..c56b33138c --- /dev/null +++ b/.asf.yaml @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +github: + description: "Apache Commons CSV" + homepage: https://commons.apache.org/csv/ + +notifications: + commits: commits@commons.apache.org + issues: issues@commons.apache.org + pullrequests: issues@commons.apache.org + jira_options: link label + jobs: notifications@commons.apache.org + issues_bot_dependabot: notifications@commons.apache.org + pullrequests_bot_dependabot: notifications@commons.apache.org + issues_bot_codecov-commenter: notifications@commons.apache.org + pullrequests_bot_codecov-commenter: notifications@commons.apache.org diff --git a/.github/GH-ROBOTS.txt b/.github/GH-ROBOTS.txt new file mode 100644 index 0000000000..e3329e55fb --- /dev/null +++ b/.github/GH-ROBOTS.txt @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Keeps on creating FUD PRs in test code +# Does not follow Apache disclosure policies +User-agent: JLLeitschuh/security-research +Disallow: * diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 2bf1120423..b7a773f199 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -1,74 +1,85 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: "CodeQL" - -on: - push: - branches: [ master ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ master ] - schedule: - - cron: '33 9 * * 4' - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - permissions: - actions: read - contents: read - security-events: write - - strategy: - fail-fast: false - matrix: - language: [ 'java' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] - # Learn more about CodeQL language support at https://git.io/codeql-language-support - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v1 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - # queries: ./path/to/local/query, your-org/your-repo/queries@main - - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v1 - - # â„šī¸ Command-line programs to run using the OS shell. - # 📚 https://git.io/JvXDl - - # âœī¸ If the Autobuild fails above, remove it and uncomment the following three lines - # and modify them (or add more) to build your code if your project - # uses a compiled language - - #- run: | - # make bootstrap - # make release - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: "CodeQL" + +on: + push: + branches: [ master ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ master ] + schedule: + - cron: '33 9 * * 4' + +permissions: + contents: read + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'java' ] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] + # Learn more about CodeQL language support at https://git.io/codeql-language-support + + steps: + - name: Checkout repository + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # 4.1.1 + with: + persist-credentials: false + - uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84 # v3.3.2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # 3.22.12 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + # queries: ./path/to/local/query, your-org/your-repo/queries@main + + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # 3.22.12 + + # â„šī¸ Command-line programs to run using the OS shell. + # 📚 https://git.io/JvXDl + + # âœī¸ If the Autobuild fails above, remove it and uncomment the following three lines + # and modify them (or add more) to build your code if your project + # uses a compiled language + + #- run: | + # make bootstrap + # make release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # 3.22.12 diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 0000000000..07b235cf34 --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Coverage + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + java: [ 8 ] + + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # 4.1.1 + with: + persist-credentials: false + - uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84 # v3.3.2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@387ac29b308b003ca37ba93a6cab5eb57c8f5f93 # v4.0.0 + with: + distribution: 'temurin' + java-version: ${{ matrix.java }} + - name: Build with Maven + run: mvn --show-version --batch-mode --no-transfer-progress test jacoco:report + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d # v3.1.4 + with: + files: ./target/site/jacoco/jacoco.xml diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index fcf26a5ddb..91b3d8af35 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -1,47 +1,52 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: Java CI - -on: [push, pull_request] - -jobs: - build: - - runs-on: ubuntu-latest - continue-on-error: ${{ matrix.experimental }} - strategy: - matrix: - java: [ 8, 11, 17 ] - experimental: [false] -# include: -# - java: 18-ea -# experimental: true - - steps: - - uses: actions/checkout@v3 - - uses: actions/cache@v3.0.2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-maven- - - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v2 - with: - distribution: 'temurin' - java-version: ${{ matrix.java }} - - name: Build with Maven - run: mvn --file pom.xml --no-transfer-progress +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java CI + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + build: + + runs-on: ubuntu-latest + continue-on-error: ${{ matrix.experimental }} + strategy: + matrix: + java: [ 8, 11, 17, 21 ] + experimental: [false] +# include: +# - java: 22-ea +# experimental: true + + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # 4.1.1 + with: + persist-credentials: false + - uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84 # v3.3.2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@387ac29b308b003ca37ba93a6cab5eb57c8f5f93 # v4.0.0 + with: + distribution: 'temurin' + java-version: ${{ matrix.java }} + - name: Build with Maven + run: mvn -Ddoclint=all --show-version --batch-mode --no-transfer-progress diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml new file mode 100644 index 0000000000..f61c74675c --- /dev/null +++ b/.github/workflows/scorecards-analysis.yml @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache license, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the license for the specific language governing permissions and +# limitations under the license. + +name: "Scorecards supply-chain security" + +on: + branch_protection_rule: + schedule: + - cron: "30 1 * * 6" # Weekly on Saturdays + push: + branches: [ "master" ] + +permissions: read-all + +jobs: + + analysis: + + name: "Scorecards analysis" + runs-on: ubuntu-latest + permissions: + # Needed to upload the results to the code-scanning dashboard. + security-events: write + actions: read + id-token: write # This is required for requesting the JWT + contents: read # This is required for actions/checkout + + steps: + + - name: "Checkout code" + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # 4.1.1 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # 2.3.1 + with: + results_file: results.sarif + results_format: sarif + # A read-only PAT token, which is sufficient for the action to function. + # The relevant discussion: https://github.com/ossf/scorecard-action/issues/188 + repo_token: ${{ secrets.GITHUB_TOKEN }} + # Publish the results for public repositories to enable scorecard badges. + # For more details: https://github.com/ossf/scorecard-action#publishing-results + publish_results: true + + - name: "Upload artifact" + uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # 4.0.0 + with: + name: SARIF file + path: results.sarif + retention-days: 5 + + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # 3.22.12 + with: + sarif_file: results.sarif diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 98509cb85f..636aff79ef 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -61,7 +61,7 @@ Making Changes -------------- + Create a _topic branch_ for your isolated work. - * Usually you should base your branch on the `master` or `trunk` branch. + * Usually you should base your branch on the `master` branch. * A good topic branch name can be the JIRA bug id plus a keyword, e.g. `CSV-123-InputStream`. * If you have submitted multiple JIRA issues, try to maintain separate branches and pull requests. + Make commits of logical units. diff --git a/NOTICE.txt b/NOTICE.txt index ae103a0ad1..b323264536 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,5 +1,5 @@ Apache Commons CSV -Copyright 2005-2022 The Apache Software Foundation +Copyright 2005-2023 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (https://www.apache.org/). diff --git a/README.md b/README.md index 2eca14203d..bdc553013b 100644 --- a/README.md +++ b/README.md @@ -43,11 +43,12 @@ Apache Commons CSV =================== -[![GitHub Actions Status](https://github.com/apache/commons-csv/workflows/Java%20CI/badge.svg)](https://github.com/apache/commons-csv/actions) -[![Coverage Status](https://coveralls.io/repos/apache/commons-csv/badge.svg)](https://coveralls.io/r/apache/commons-csv) -[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-csv/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-csv/) -[![Javadocs](https://javadoc.io/badge/org.apache.commons/commons-csv/1.9.0.svg)](https://javadoc.io/doc/org.apache.commons/commons-csv/1.9.0) -[![CodeQL](https://github.com/apache/commons-csv/workflows/CodeQL/badge.svg)](https://github.com/apache/commons-csv/actions?query=workflow%3ACodeQL) +[![Java CI](https://github.com/apache/commons-csv/actions/workflows/maven.yml/badge.svg)](https://github.com/apache/commons-csv/actions/workflows/maven.yml) +[![Coverage Status](https://codecov.io/gh/apache/commons-csv/branch/master/graph/badge.svg)](https://app.codecov.io/gh/apache/commons-csv) +[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-csv/badge.svg?gav=true)](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-csv/?gav=true) +[![Javadocs](https://javadoc.io/badge/org.apache.commons/commons-csv/1.10.0.svg)](https://javadoc.io/doc/org.apache.commons/commons-csv/1.10.0) +[![CodeQL](https://github.com/apache/commons-csv/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/apache/commons-csv/actions/workflows/codeql-analysis.yml) +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/apache/commons-csv/badge)](https://api.securityscorecards.dev/projects/github.com/apache/commons-csv) The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. @@ -56,38 +57,38 @@ Documentation More information can be found on the [Apache Commons CSV homepage](https://commons.apache.org/proper/commons-csv). The [Javadoc](https://commons.apache.org/proper/commons-csv/apidocs) can be browsed. -Questions related to the usage of Apache Commons CSV should be posted to the [user mailing list][ml]. +Questions related to the usage of Apache Commons CSV should be posted to the [user mailing list](https://commons.apache.org/mail-lists.html). Where can I get the latest release? ----------------------------------- You can download source and binaries from our [download page](https://commons.apache.org/proper/commons-csv/download_csv.cgi). -Alternatively you can pull it from the central Maven repositories: +Alternatively, you can pull it from the central Maven repositories: ```xml org.apache.commons commons-csv - 1.9.0 + 1.10.0 ``` Contributing ------------ -We accept Pull Requests via GitHub. The [developer mailing list][ml] is the main channel of communication for contributors. +We accept Pull Requests via GitHub. The [developer mailing list](https://commons.apache.org/mail-lists.html) is the main channel of communication for contributors. There are some guidelines which will make applying PRs easier for us: + No tabs! Please use spaces for indentation. + Respect the code style. + Create minimal diffs - disable on save actions like reformat source code or organize imports. If you feel the source code should be reformatted create a separate PR for this change. -+ Provide JUnit tests for your changes and make sure your changes don't break any existing tests by running ```mvn clean test```. ++ Provide JUnit tests for your changes and make sure your changes don't break any existing tests by running ```mvn```. If you plan to contribute on a regular basis, please consider filing a [contributor license agreement](https://www.apache.org/licenses/#clas). You can learn more about contributing via GitHub in our [contribution guidelines](CONTRIBUTING.md). License ------- -This code is under the [Apache Licence v2](https://www.apache.org/licenses/LICENSE-2.0). +This code is under the [Apache License v2](https://www.apache.org/licenses/LICENSE-2.0). See the `NOTICE.txt` file for required notices and attributions. @@ -100,7 +101,61 @@ Additional Resources + [Apache Commons Homepage](https://commons.apache.org/) + [Apache Issue Tracker (JIRA)](https://issues.apache.org/jira/browse/CSV) ++ [Apache Commons Slack Channel](https://the-asf.slack.com/archives/C60NVB8AD) + [Apache Commons Twitter Account](https://twitter.com/ApacheCommons) + `#apache-commons` IRC channel on `irc.freenode.org` -[ml]:https://commons.apache.org/mail-lists.html +Apache Commons Components +------------------------- + +| Component | GitHub Repository | Apache Homepage | +| --------- | ----------------- | ----------------| +| Apache Commons BCEL | [commons-bcel](https://github.com/apache/commons-bcel) | [commons-bcel](https://commons.apache.org/proper/commons-bcel) | +| Apache Commons Beanutils | [commons-beanutils](https://github.com/apache/commons-beanutils) | [commons-beanutils](https://commons.apache.org/proper/commons-beanutils) | +| Apache Commons BSF | [commons-bsf](https://github.com/apache/commons-bsf) | [commons-bsf](https://commons.apache.org/proper/commons-bsf) | +| Apache Commons Build-plugin | [commons-build-plugin](https://github.com/apache/commons-build-plugin) | [commons-build-plugin](https://commons.apache.org/proper/commons-build-plugin) | +| Apache Commons Chain | [commons-chain](https://github.com/apache/commons-chain) | [commons-chain](https://commons.apache.org/proper/commons-chain) | +| Apache Commons CLI | [commons-cli](https://github.com/apache/commons-cli) | [commons-cli](https://commons.apache.org/proper/commons-cli) | +| Apache Commons Codec | [commons-codec](https://github.com/apache/commons-codec) | [commons-codec](https://commons.apache.org/proper/commons-codec) | +| Apache Commons Collections | [commons-collections](https://github.com/apache/commons-collections) | [commons-collections](https://commons.apache.org/proper/commons-collections) | +| Apache Commons Compress | [commons-compress](https://github.com/apache/commons-compress) | [commons-compress](https://commons.apache.org/proper/commons-compress) | +| Apache Commons Configuration | [commons-configuration](https://github.com/apache/commons-configuration) | [commons-configuration](https://commons.apache.org/proper/commons-configuration) | +| Apache Commons Crypto | [commons-crypto](https://github.com/apache/commons-crypto) | [commons-crypto](https://commons.apache.org/proper/commons-crypto) | +| Apache Commons CSV | [commons-csv](https://github.com/apache/commons-csv) | [commons-csv](https://commons.apache.org/proper/commons-csv) | +| Apache Commons Daemon | [commons-daemon](https://github.com/apache/commons-daemon) | [commons-daemon](https://commons.apache.org/proper/commons-daemon) | +| Apache Commons DBCP | [commons-dbcp](https://github.com/apache/commons-dbcp) | [commons-dbcp](https://commons.apache.org/proper/commons-dbcp) | +| Apache Commons Dbutils | [commons-dbutils](https://github.com/apache/commons-dbutils) | [commons-dbutils](https://commons.apache.org/proper/commons-dbutils) | +| Apache Commons Digester | [commons-digester](https://github.com/apache/commons-digester) | [commons-digester](https://commons.apache.org/proper/commons-digester) | +| Apache Commons Email | [commons-email](https://github.com/apache/commons-email) | [commons-email](https://commons.apache.org/proper/commons-email) | +| Apache Commons Exec | [commons-exec](https://github.com/apache/commons-exec) | [commons-exec](https://commons.apache.org/proper/commons-exec) | +| Apache Commons Fileupload | [commons-fileupload](https://github.com/apache/commons-fileupload) | [commons-fileupload](https://commons.apache.org/proper/commons-fileupload) | +| Apache Commons Functor | [commons-functor](https://github.com/apache/commons-functor) | [commons-functor](https://commons.apache.org/proper/commons-functor) | +| Apache Commons Geometry | [commons-geometry](https://github.com/apache/commons-geometry) | [commons-geometry](https://commons.apache.org/proper/commons-geometry) | +| Apache Commons Graph | [commons-graph](https://github.com/apache/commons-graph) | [commons-graph](https://commons.apache.org/proper/commons-graph) | +| Apache Commons Imaging | [commons-imaging](https://github.com/apache/commons-imaging) | [commons-imaging](https://commons.apache.org/proper/commons-imaging) | +| Apache Commons IO | [commons-io](https://github.com/apache/commons-io) | [commons-io](https://commons.apache.org/proper/commons-io) | +| Apache Commons JCI | [commons-jci](https://github.com/apache/commons-jci) | [commons-jci](https://commons.apache.org/proper/commons-jci) | +| Apache Commons JCS | [commons-jcs](https://github.com/apache/commons-jcs) | [commons-jcs](https://commons.apache.org/proper/commons-jcs) | +| Apache Commons Jelly | [commons-jelly](https://github.com/apache/commons-jelly) | [commons-jelly](https://commons.apache.org/proper/commons-jelly) | +| Apache Commons Jexl | [commons-jexl](https://github.com/apache/commons-jexl) | [commons-jexl](https://commons.apache.org/proper/commons-jexl) | +| Apache Commons Jxpath | [commons-jxpath](https://github.com/apache/commons-jxpath) | [commons-jxpath](https://commons.apache.org/proper/commons-jxpath) | +| Apache Commons Lang | [commons-lang](https://github.com/apache/commons-lang) | [commons-lang](https://commons.apache.org/proper/commons-lang) | +| Apache Commons Logging | [commons-logging](https://github.com/apache/commons-logging) | [commons-logging](https://commons.apache.org/proper/commons-logging) | +| Apache Commons Math | [commons-math](https://github.com/apache/commons-math) | [commons-math](https://commons.apache.org/proper/commons-math) | +| Apache Commons Net | [commons-net](https://github.com/apache/commons-net) | [commons-net](https://commons.apache.org/proper/commons-net) | +| Apache Commons Numbers | [commons-numbers](https://github.com/apache/commons-numbers) | [commons-numbers](https://commons.apache.org/proper/commons-numbers) | +| Apache Commons Parent | [commons-parent](https://github.com/apache/commons-parent) | [commons-parent](https://commons.apache.org/proper/commons-parent) | +| Apache Commons Pool | [commons-pool](https://github.com/apache/commons-pool) | [commons-pool](https://commons.apache.org/proper/commons-pool) | +| Apache Commons Proxy | [commons-proxy](https://github.com/apache/commons-proxy) | [commons-proxy](https://commons.apache.org/proper/commons-proxy) | +| Apache Commons RDF | [commons-rdf](https://github.com/apache/commons-rdf) | [commons-rdf](https://commons.apache.org/proper/commons-rdf) | +| Apache Commons Release-plugin | [commons-release-plugin](https://github.com/apache/commons-release-plugin) | [commons-release-plugin](https://commons.apache.org/proper/commons-release-plugin) | +| Apache Commons Rng | [commons-rng](https://github.com/apache/commons-rng) | [commons-rng](https://commons.apache.org/proper/commons-rng) | +| Apache Commons Scxml | [commons-scxml](https://github.com/apache/commons-scxml) | [commons-scxml](https://commons.apache.org/proper/commons-scxml) | +| Apache Commons Signing | [commons-signing](https://github.com/apache/commons-signing) | [commons-signing](https://commons.apache.org/proper/commons-signing) | +| Apache Commons Skin | [commons-skin](https://github.com/apache/commons-skin) | [commons-skin](https://commons.apache.org/proper/commons-skin) | +| Apache Commons Statistics | [commons-statistics](https://github.com/apache/commons-statistics) | [commons-statistics](https://commons.apache.org/proper/commons-statistics) | +| Apache Commons Testing | [commons-testing](https://github.com/apache/commons-testing) | [commons-testing](https://commons.apache.org/proper/commons-testing) | +| Apache Commons Text | [commons-text](https://github.com/apache/commons-text) | [commons-text](https://commons.apache.org/proper/commons-text) | +| Apache Commons Validator | [commons-validator](https://github.com/apache/commons-validator) | [commons-validator](https://commons.apache.org/proper/commons-validator) | +| Apache Commons VFS | [commons-vfs](https://github.com/apache/commons-vfs) | [commons-vfs](https://commons.apache.org/proper/commons-vfs) | +| Apache Commons Weaver | [commons-weaver](https://github.com/apache/commons-weaver) | [commons-weaver](https://commons.apache.org/proper/commons-weaver) | diff --git a/RELEASE-NOTES.txt b/RELEASE-NOTES.txt index e427967ab0..bbec6a3678 100644 --- a/RELEASE-NOTES.txt +++ b/RELEASE-NOTES.txt @@ -1,3 +1,89 @@ + Apache Commons CSV + Version 1.10.0 + Release Notes + + +INTRODUCTION: + +This document contains the release notes for the 1.10.0 version of Apache Commons CSV. +Commons CSV reads and writes files in variations of the Comma Separated Value (CSV) format. + +Commons CSV requires at least Java 8. + +The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. + +Feature and bug fix release (Java 8 or above) + +Changes in this version include: + +NEW FEATURES +============ + +o CSV-291: Make CSVRecord#values() public. Thanks to Gary Gregory. +o CSV-264: Add DuplicateHeaderMode for flexibility with header strictness. #114. Thanks to Sagar Tiwari, Seth Falco, Alex Herbert, Gary Gregory. +o CSV-295: Support for parallelism in CSVPrinter. Thanks to Gary Gregory. +o CSV-295: Add CSVPrinter.printRecord[s](Stream). Thanks to Gary Gregory. +o CSV-304: Add accessors for header/trailer comments #257. Thanks to Peter Hull, Bruno P. Kinoshita, Gary Gregory. +o Add github/codeql-action. + +FIXED BUGS +========== + +o Minor changes #172. Thanks to Arturo Bernal. +o CSV-292: No Automatic-Module-Name prevents usage in JPMS projects without repacking the JAR. Thanks to Rob Vesse. +o CSV-288: Fix for multi-char delimiter not working as expected #218. Thanks to Santhsoh, Angus. +o CSV-269: CSVRecord.get(Enum) should use Enum.name() instead of Enum.toString(). Thanks to Auke te Winkel, Gary Gregory. +o Allow org.apache.commons.csv.IOUtils.copy(Reader, Appendable, CharBuffer) to compile on Java 11 and run on Java 8. Thanks to Gary Gregory. +o CSV-300: CSVRecord.toList() does not give write access to the new List. Thanks to Markus Spann, Gary Gregory. +o CSVParser.getRecords() now throws UncheckedIOException instead of IOException. Thanks to Gary Gregory. +o CSV-274: Add comments to iterator() and stream() #270. Thanks to Peter Hull, Bruno P. Kinoshita, Gary Gregory. +o CSV-290: Fix wrong assumptions in PostgreSQL formats #265. Thanks to angusdev, Gary Gregory. +o Validate input to setDelimiter(String) for empty string #266. Thanks to Mykola Faryma. +o Bump CSVFormat#serialVersionUID from 1 to 2. Thanks to Dependabot. +o CSVParser: Identify duplicates in null, empty and blank header names #279. Thanks to Alex Herbert. + +CHANGES +======= + +o Bump actions/cache from 2.1.6 to 3.0.10 #196, #233, #243, #267, #271. Thanks to Dependabot, Gary Gregory. +o Bump actions/checkout from 2.3.4 to 3.1.0 #188, #195, #220, #272. Thanks to Dependabot, Gary Gregory. +o Bump actions/setup-java from 2 to 3.5.1. Thanks to Gary Gregory. +o Bump actions/upload-artifact from 3.1.0 to 3.1.1 #280. Thanks to Dependabot. +o Bump commons-parent from 52 to 56 #264, #288, #298. Thanks to Gary Gregory. +o Bump checkstyle from 8.44 to 9.2.1 #180, #190, #194, #202, #207. Thanks to Dependabot. +o Bump junit-jupiter from 5.8.0-M1 to 5.9.1 #179, #186, #201, #244, #263. Thanks to Dependabot. +o Bump jmh-core from 1.32 to 1.36 #176, #208, #229, #285. Thanks to Dependabot. +o Bump jmh-generator-annprocess from 1.32 to 1.36 #175, #206, #226, #283. Thanks to Dependabot. +o Bump mockito-core from 3.11.2 to 4.11.0 #187, #197, #204, #212, #230, #237, #251, #259, #284, #292, #297. Thanks to Dependabot, Gary Gregory. +o Bump maven-pmd-plugin from 3.14.0 to 3.19.0 #184, #219, #238, #254, #258. Thanks to Dependabot. +o Bump pmd from 6.36.0 to 6.52.0 #173, #189, #193, #199, #227, #233, #214, #236, #240, #247, #255, #273. Thanks to Dependabot, Gary Gregory. +o Bump opencsv from 5.5.1 to 5.7.1 #182, #221, #260, #281. Thanks to Gary Gregory. +o Bump spotbugs-maven-plugin from 4.3.0 to 4.7.3.0 #192, #198, #203, #211, #225, #234, #242, #245, #261, #275, #282. Thanks to Dependabot. +o Bump com.github.spotbugs:spotbugs from 4.5.3 to 4.7.2. Thanks to Gary Gregory. +o Bump h2 from 1.4.200 to 2.1.214 #200, #205, #213, #239. Thanks to Dependabot. +o Bump maven-javadoc-plugin from 3.3.0 to 3.4.1. Thanks to Gary Gregory. +o Bump biz.aQute.bnd:biz.aQute.bndlib from 5.3.0 to 6.3.1. Thanks to Gary Gregory. +o Bump jacoco-maven-plugin from 0.8.7 to 0.8.8. Thanks to Gary Gregory. +o Bump japicmp-maven-plugin from 0.15.3 to 0.16.0. Thanks to Gary Gregory. +o Bump maven-checkstyle-plugin from 3.1.2 to 3.2.0 #253. Thanks to Dependabot. + +Removed: +o Serialization in CSVFormat is not supported from one version to the next. + +Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html + +For complete information on Apache Commons CSV, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Commons CSV website: + +https://commons.apache.org/proper/commons-csv/ + +Download page: https://commons.apache.org/proper/commons-csv/download_csv.cgi + +Have fun! +-Apache Commons CSV team + +------------------------------------------------------------------------------ + Apache Commons CSV Version 1.9.0 Release Notes @@ -12,7 +98,7 @@ Commons CSV requires at least Java 8. The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. -Feature and bug fix release (Java 8) +Feature and bug fix release (Java 8 or above) Changes in this version include: @@ -94,7 +180,7 @@ o Bump biz.aQute.bnd:biz.aQute.bndlib from 5.1.2 to 5.3.0. Thanks to D Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -154,7 +240,7 @@ o Fix typo performance test #55. Thanks to Chen. Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -178,7 +264,7 @@ Commons CSV reads and writes files in variations of the Comma Separated Value (C The Apache Commons CSV library provides a simple interface for reading and writing CSV files of various types. -Feature and bug fix release (Java 8) +Feature and bug fix release (Java 8 or above) Changes in this version include: @@ -206,7 +292,7 @@ o Update tests from H2 1.4.198 to 1.4.199. Thanks to Gary Gregory. Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -319,7 +405,7 @@ o CSV-201: Do not use RuntimeException in CSVParser.iterator().new Iterator() {. Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -363,7 +449,7 @@ o CSV-183: Drop ferc.gov tests. Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -455,7 +541,7 @@ o CSV-156: Incorrect Javadoc on QuoteMode.NONE. Thanks to Jason Steenstra-Picke Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -501,7 +587,7 @@ o [CSV-134] Unified parameter validation. Thanks to wu wen. Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ @@ -547,7 +633,7 @@ FIXED BUGS ========== o CSV-125: No longer works with Java 6 -o CSV-122: NullPointerException when empty header string and and null string of "". +o CSV-122: NullPointerException when empty header string and null string of "". Thanks to Mike Lewis. o CSV-118: CSVRecord.toMap() throws NPE on formats with no headers. Thanks to Enrique Lara. @@ -596,7 +682,7 @@ o CSV-27: Decide whether to keep the csv.writer subpackage Historical list of changes: https://commons.apache.org/proper/commons-csv/changes-report.html For complete information on Apache Commons CSV, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache Apache Commons CSV website: +patches, or suggestions for improvement, see the Apache Commons CSV website: https://commons.apache.org/proper/commons-csv/ diff --git a/pom.xml b/pom.xml index ec33835329..3d08068a45 100644 --- a/pom.xml +++ b/pom.xml @@ -1,4 +1,4 @@ - + RC1 1.9.0 @@ -164,29 +173,18 @@ UTF-8 UTF-8 - 3.1.2 - 9.3 - ${basedir}/src/site/resources/checkstyle/checkstyle-header.txt - ${basedir}/src/site/resources/checkstyle/checkstyle.xml - ${basedir}/src/site/resources/checkstyle/checkstyle-suppressions.xml + ${basedir}/src/conf/checkstyle/checkstyle-header.txt + ${basedir}/src/conf/checkstyle/checkstyle.xml + ${basedir}/src/conf/checkstyle/checkstyle-suppressions.xml LICENSE.txt, NOTICE.txt, **/maven-archiver/pom.properties - 3.16.0 - 6.44.0 - 0.8.7 - 4.6.0.0 - 0.15.3 - 3.3.2 - 6.2.0 false true - Gary Gregory - 86fdc7e2a11262cb - clean verify apache-rat:check japicmp:cmp checkstyle:check spotbugs:check pmd:check javadoc:javadoc + clean verify apache-rat:check japicmp:cmp spotbugs:check pmd:check pmd:cpd-check javadoc:javadoc checkstyle:check @@ -198,30 +196,11 @@ false ${checkstyle.suppress.file} - - - com.puppycrawl.tools - checkstyle - ${commons.checkstyle.version} - - org.apache.maven.plugins maven-pmd-plugin ${commons.pmd.version} - - - net.sourceforge.pmd - pmd-core - ${commons.pmd-impl.version} - - - net.sourceforge.pmd - pmd-java - ${commons.pmd-impl.version} - - ${maven.compiler.target} false @@ -240,6 +219,7 @@ src/test/resources/org/apache/commons/csv/empty.txt + src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv src/test/resources/org/apache/commons/csv/csv-167/sample1.csv src/test/resources/org/apache/commons/csv/CSV-198/optd_por_public.csv src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv @@ -259,6 +239,8 @@ src/test/resources/org/apache/commons/csv/CSV-259/sample.txt src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt + src/test/resources/org/apache/commons/csv/CSV-290/psql.csv + src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv @@ -318,7 +300,6 @@ com.github.spotbugs spotbugs-maven-plugin - ${commons.spotbugs.version} ${basedir}/src/site/resources/spotbugs/spotbugs-exclude-filter.xml @@ -355,7 +336,6 @@ com.github.spotbugs spotbugs-maven-plugin - ${commons.spotbugs.version} ${basedir}/src/site/resources/spotbugs/spotbugs-exclude-filter.xml @@ -422,17 +402,11 @@ benchmark - - org.openjdk.jmh - jmh-core - 1.34 - test - org.openjdk.jmh jmh-generator-annprocess - 1.35 + 1.37 test @@ -453,7 +427,7 @@ com.opencsv opencsv - 5.6 + 5.9 test @@ -480,7 +454,7 @@ org.apache.commons commons-lang3 - 3.12.0 + 3.14.0 @@ -533,17 +507,6 @@ - - - java9 - - 9 - - - - true - - diff --git a/src/changes/changes.xml b/src/changes/changes.xml index eb1a7a842f..89ea70bab3 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -33,41 +33,78 @@ The type attribute can be add,update,fix,remove. --> - + Apache Commons CSV Release Notes - + + + [Javadoc] Add example to CSVFormat#setHeaderComments() #344. + + Replace deprecated method in user guide, update external link #324, #325. + Document duplicate header behavior #309. + Add missing docs #328. + [StepSecurity] CI: Harden GitHub Actions #329, #330. + Better error message during faulty CSV record read #347. + Misleading error message when QuoteMode set to None #352. + + Bump commons-io:commons-io: from 2.11.0 to 2.15.1. + Bump commons-parent from 57 to 65 #376. + Bump h2 from 2.1.214 to 2.2.224 #333, #349, #359. + Bump commons-lang3 from 3.12.0 to 3.14.0. + Update exception message in CSVRecord#getNextRecord() #348. + Bump tests using com.opencsv:opencsv from 5.8 to 5.9 #373. + + Minor changes #172. No Automatic-Module-Name prevents usage in JPMS projects without repacking the JAR. Fix for multi-char delimiter not working as expected #218. CSVRecord.get(Enum) should use Enum.name() instead of Enum.toString(). Allow org.apache.commons.csv.IOUtils.copy(Reader, Appendable, CharBuffer) to compile on Java 11 and run on Java 8. - Bump commons-parent from 52 to 53. + CSVRecord.toList() does not give write access to the new List. + CSVParser.getRecords() now throws UncheckedIOException instead of IOException. + Add comments to iterator() and stream() #270. + Fix wrong assumptions in PostgreSQL formats #265. + Validate input to setDelimiter(String) for empty string #266. + Bump CSVFormat#serialVersionUID from 1 to 2. + CSVParser: Identify duplicates in null, empty and blank header names #279. + + Serialization in CSVFormat is not supported from one version to the next. Make CSVRecord#values() public. Add DuplicateHeaderMode for flexibility with header strictness. #114. Support for parallelism in CSVPrinter. + Add CSVPrinter.printRecord[s](Stream). + Add accessors for header/trailer comments #257. + Add github/codeql-action. - Bump actions/checkout from 2.3.4 to 3 #188, #195, #220. - Bump actions/cache from 2.1.6 to 3.0.2 #196, #233. + Bump actions/cache from 2.1.6 to 3.0.10 #196, #233, #243, #267, #271. + Bump actions/checkout from 2.3.4 to 3.1.0 #188, #195, #220, #272. + Bump actions/setup-java from 2 to 3.5.1. + Bump actions/upload-artifact from 3.1.0 to 3.1.1 #280. + Bump commons-parent from 52 to 57 #264, #288, #298, #323. Bump checkstyle from 8.44 to 9.2.1 #180, #190, #194, #202, #207. - Bump junit-jupiter from 5.8.0-M1 to 5.8.2 #179, #186, #201. - Bump jmh-core from 1.32 to 1.34 #176, #208. - Bump jmh-generator-annprocess from 1.32 to 1.35 #175, #206, #226. - Bump commons.pmd-impl.version from 6.36.0 to 6.44.0 #173, #189, #193, #199, #227. - Bump mockito-core from 3.11.2 to 4.4.0 #187, #197, #204, #212. - Bump maven-pmd-plugin from 3.14.0 to 3.16.0 #184, #219. - Bump commons.pmd-impl.version from 6.41.0 to 6.42.0 #214. - Bump opencsv from 5.5.1 to 5.6 #182, #221. - Bump spotbugs-maven-plugin from 4.3.0 to 4.6.0.0 #192, #198, #203, #211, #225. - Bump h2 from 1.4.200 to 2.0.214 #200, #205, #213. - Bump maven-javadoc-plugin from 3.3.0 to 3.3.2. - Bump biz.aQute.bnd:biz.aQute.bndlib from 5.3.0 to 6.2.0. + Bump junit-jupiter from 5.8.0-M1 to 5.9.1 #179, #186, #201, #244, #263. + Bump jmh-core from 1.32 to 1.36 #176, #208, #229, #285. + Bump jmh-generator-annprocess from 1.32 to 1.36 #175, #206, #226, #283. + Bump mockito-core from 3.11.2 to 4.11.0 #187, #197, #204, #212, #230, #237, #251, #259, #284, #292, #297. + Bump maven-pmd-plugin from 3.14.0 to 3.19.0 #184, #219, #238, #254, #258. + Bump pmd from 6.36.0 to 6.52.0 #173, #189, #193, #199, #227, #233, #214, #236, #240, #247, #255, #273. + Bump opencsv from 5.5.1 to 5.7.1 #182, #221, #260, #281. + Bump spotbugs-maven-plugin from 4.3.0 to 4.7.3.0 #192, #198, #203, #211, #225, #234, #242, #245, #261, #275, #282. + Bump com.github.spotbugs:spotbugs from 4.5.3 to 4.7.2. + Bump h2 from 1.4.200 to 2.1.214 #200, #205, #213, #239. + Bump maven-javadoc-plugin from 3.3.0 to 3.4.1. + Bump biz.aQute.bnd:biz.aQute.bndlib from 5.3.0 to 6.3.1. + Bump jacoco-maven-plugin from 0.8.7 to 0.8.8. + Bump japicmp-maven-plugin from 0.15.3 to 0.16.0. + Bump maven-checkstyle-plugin from 3.1.2 to 3.2.0 #253. - + Replace FindBugs with SpotBugs #56. Javadoc typo in CSVFormat let's -> lets #57. @@ -134,7 +171,7 @@ Bump PMD core from 6.29.0 to 6.36.0. Bump biz.aQute.bnd:biz.aQute.bndlib from 5.1.2 to 5.3.0. - Fix typos in site and test #53. Fix typo performance test #55. - + Add predefined CSVFormats for printing MongoDB CSV and TSV. Fix escape character for POSTGRESQL_TEXT and POSTGRESQL_CSV formats. Site link "Source Repository" does not work. @@ -167,7 +204,7 @@ removed in version 2.0. Cannot get headers in column order from CSVRecord. Update tests from H2 1.4.198 to 1.4.199. - + Add more documentation to CSVPrinter. Add autoFlush option for CsvPrinter. PR #24. The behavior of quote char using is not similar as Excel does when the first string contains CJK char(s). @@ -179,7 +216,7 @@ removed in version 2.0. Parse method should avoid creating a redundant BufferedReader. Add predefined CSVFormats for printing MongoDB CSV and TSV. - + withNullString value is printed without quotes when QuoteMode.ALL is specified; add QuoteMode.ALL_NON_NULL. PR #17. Fix outdated comments about FileReader in CSVParser #13 Fix incorrect method name 'withFirstRowAsHeader' in user guide. @@ -194,12 +231,12 @@ removed in version 2.0. Provide a CSV Format for printing PostgreSQL CSV and Text formats. Adding a placeholder in the Lexer and CSV parser to store the end-of-line string. - + Make CSVPrinter.print(Object) GC-free. Allow some printing operations directly from CSVFormat. Drop ferc.gov tests. - + Add shortcut method for using first record as header to CSVFormat Add withHeader(Class<? extends Enum>) to CSVFormat Comment line hides next record; update Javadoc to make behavior clear @@ -213,12 +250,12 @@ removed in version 2.0. Support trimming leading and trailing blanks. Create default formats for Informix UNLOAD and UNLOAD CSV. - + CSVFormat.with* methods clear the header comments Incorrect Javadoc on QuoteMode.NONE Add enum CSVFormat.Predefined that contains the default CSVFormat values. - + QuoteMode.NON_NUMERIC doesn't work with CSVPrinter.printRecords(ResultSet) CSVFormat#withHeader doesn't work well with #printComment, add withHeaderComments(String...) CSVFormat.EXCEL should ignore empty header names @@ -229,9 +266,9 @@ removed in version 2.0. Save positions of records to enable random access CSVPrinter.printRecord(ResultSet) with metadata - + No longer works with Java 6 - NullPointerException when empty header string and and null string of "" + NullPointerException when empty header string and null string of "" Validate format parameters in constructor IllegalArgumentException thrown when the header contains duplicate names when the column names are empty. CSVFormat#withHeader doesn't work with CSVPrinter diff --git a/src/changes/release-notes.vm b/src/changes/release-notes.vm index 412324c0f6..41e80f22bc 100644 --- a/src/changes/release-notes.vm +++ b/src/changes/release-notes.vm @@ -118,7 +118,7 @@ Removed: Historical list of changes: ${project.url}changes-report.html For complete information on ${project.name}, including instructions on how to submit bug reports, -patches, or suggestions for improvement, see the Apache ${project.name} website: +patches, or suggestions for improvement, see the ${project.name} website: ${project.url} diff --git a/src/site/resources/checkstyle/checkstyle-header.txt b/src/conf/checkstyle/checkstyle-header.txt similarity index 100% rename from src/site/resources/checkstyle/checkstyle-header.txt rename to src/conf/checkstyle/checkstyle-header.txt diff --git a/src/site/resources/checkstyle/checkstyle-suppressions.xml b/src/conf/checkstyle/checkstyle-suppressions.xml similarity index 82% rename from src/site/resources/checkstyle/checkstyle-suppressions.xml rename to src/conf/checkstyle/checkstyle-suppressions.xml index abff74c8c5..e1a4807ccb 100644 --- a/src/site/resources/checkstyle/checkstyle-suppressions.xml +++ b/src/conf/checkstyle/checkstyle-suppressions.xml @@ -20,4 +20,6 @@ "https://checkstyle.org/dtds/suppressions_1_2.dtd"> + + diff --git a/src/site/resources/checkstyle/checkstyle.xml b/src/conf/checkstyle/checkstyle.xml similarity index 78% rename from src/site/resources/checkstyle/checkstyle.xml rename to src/conf/checkstyle/checkstyle.xml index 59d5fc9df5..a19f8f53e8 100644 --- a/src/site/resources/checkstyle/checkstyle.xml +++ b/src/conf/checkstyle/checkstyle.xml @@ -17,8 +17,8 @@ limitations under the License. --> + "-//Checkstyle//DTD Checkstyle Configuration 1.3//EN" + "https://checkstyle.org/dtds/configuration_1_3.dtd"> @@ -63,7 +63,19 @@ limitations under the License. + + + + + + + + + + + + + - diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index 27574b2e0a..3f75b78253 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -48,8 +48,11 @@ import java.util.Objects; import java.util.Set; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.function.Uncheck; + /** - * Specifies the format of a CSV file and parses input. + * Specifies the format of a CSV file for parsing and writing. * *

Using predefined formats

* @@ -154,11 +157,29 @@ * This makes your code impervious to changes in column order in the CSV file. *

* - *

Notes

+ *

Serialization

+ *

+ * This class implements the {@link Serializable} interface with the following caveats: + *

+ *
    + *
  • This class will no longer implement Serializable in 2.0.
  • + *
  • Serialization is not supported from one version to the next.
  • + *
+ *

+ * The {@code serialVersionUID} values are: + *

+ *
    + *
  • Version 1.10.0: {@code 2L}
  • + *
  • Version 1.9.0 through 1.0: {@code 1L}
  • + *
* + *

Notes

*

* This class is immutable. *

+ *

+ * Not all settings are used for both parsing and writing. + *

*/ public final class CSVFormat implements Serializable { @@ -238,7 +259,7 @@ private Builder(final CSVFormat csvFormat) { this.recordSeparator = csvFormat.recordSeparator; this.nullString = csvFormat.nullString; this.headerComments = csvFormat.headerComments; - this.headers = csvFormat.header; + this.headers = csvFormat.headers; this.skipHeaderRecord = csvFormat.skipHeaderRecord; this.ignoreHeaderCase = csvFormat.ignoreHeaderCase; this.trailingDelimiter = csvFormat.trailingDelimiter; @@ -266,24 +287,12 @@ public CSVFormat build() { */ @Deprecated public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { - final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY; - setDuplicateHeaderMode(mode); + setDuplicateHeaderMode(allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY); return this; } /** - * Sets the duplicate header names behavior. - * - * @param duplicateHeaderMode the duplicate header names behavior - * @return This instance. - */ - public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) { - this.duplicateHeaderMode = duplicateHeaderMode; - return this; - } - - /** - * Sets the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an + * Sets the parser missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an * {@link IllegalArgumentException} to be thrown. * * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to @@ -307,9 +316,34 @@ public Builder setAutoFlush(final boolean autoFlush) { } /** - * Sets the comment start marker, use {@code null} to disable. - * - * Note that the comment start character is only recognized at the start of a line. + * Sets the comment marker character, use {@code null} to disable comments. + *

+ * The comment start character is only recognized at the start of a line. + *

+ *

+ * Comments are printed first, before headers. + *

+ *

+ * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of + * each comment line. + *

+ *

+ * If the comment marker is not set, then the header comments are ignored. + *

+ *

+ * For example: + *

+ *
+         * builder.setCommentMarker('#')
+         *        .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+         * 
+ *

+ * writes: + *

+ *
+         * # Generated by Apache Commons CSV.
+         * # 1970-01-01T00:00:00Z
+         * 
* * @param commentMarker the comment start marker, use {@code null} to disable. * @return This instance. @@ -321,9 +355,34 @@ public Builder setCommentMarker(final char commentMarker) { } /** - * Sets the comment start marker, use {@code null} to disable. - * - * Note that the comment start character is only recognized at the start of a line. + * Sets the comment marker character, use {@code null} to disable comments. + *

+ * The comment start character is only recognized at the start of a line. + *

+ *

+ * Comments are printed first, before headers. + *

+ *

+ * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of + * each comment line. + *

+ *

+ * If the comment marker is not set, then the header comments are ignored. + *

+ *

+ * For example: + *

+ *
+         * builder.setCommentMarker('#')
+         *        .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+         * 
+ *

+ * writes: + *

+ *
+         * # Generated by Apache Commons CSV.
+         * # 1970-01-01T00:00:00Z
+         * 
* * @param commentMarker the comment start marker, use {@code null} to disable. * @return This instance. @@ -357,10 +416,25 @@ public Builder setDelimiter(final String delimiter) { if (containsLineBreak(delimiter)) { throw new IllegalArgumentException("The delimiter cannot be a line break"); } + if (delimiter.isEmpty()) { + throw new IllegalArgumentException("The delimiter cannot be empty"); + } this.delimiter = delimiter; return this; } + /** + * Sets the duplicate header names behavior. + * + * @param duplicateHeaderMode the duplicate header names behavior + * @return This instance. + * @since 1.10.0 + */ + public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) { + this.duplicateHeaderMode = Objects.requireNonNull(duplicateHeaderMode, "duplicateHeaderMode"); + return this; + } + /** * Sets the escape character. * @@ -406,7 +480,7 @@ public Builder setEscape(final Character escapeCharacter) { * The header is also used by the {@link CSVPrinter}. *

* - * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. + * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. * @return This instance. */ public Builder setHeader(final Class> headerEnum) { @@ -414,15 +488,13 @@ public Builder setHeader(final Class> headerEnum) { if (headerEnum != null) { final Enum[] enumValues = headerEnum.getEnumConstants(); header = new String[enumValues.length]; - for (int i = 0; i < enumValues.length; i++) { - header[i] = enumValues[i].name(); - } + Arrays.setAll(header, i -> enumValues[i].name()); } return setHeader(header); } /** - * Sets the header from the result set metadata. The header can either be parsed automatically from the input file with: + * Sets the header from the result set metadata. The header can be parsed automatically from the input file with: * *
          * builder.setHeader();
@@ -437,7 +509,7 @@ public Builder setHeader(final Class> headerEnum) {
          * The header is also used by the {@link CSVPrinter}.
          * 

* - * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. + * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. * @return This instance. * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. */ @@ -446,7 +518,7 @@ public Builder setHeader(final ResultSet resultSet) throws SQLException { } /** - * Sets the header from the result set metadata. The header can either be parsed automatically from the input file with: + * Sets the header from the result set metadata. The header can be parsed automatically from the input file with: * *
          * builder.setHeader();
@@ -461,7 +533,7 @@ public Builder setHeader(final ResultSet resultSet) throws SQLException {
          * The header is also used by the {@link CSVPrinter}.
          * 

* - * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. + * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. * @return This instance. * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. */ @@ -478,7 +550,7 @@ public Builder setHeader(final ResultSetMetaData resultSetMetaData) throws SQLEx } /** - * Sets the header to the given values. The header can either be parsed automatically from the input file with: + * Sets the header to the given values. The header can be parsed automatically from the input file with: * *
          * builder.setHeader();
@@ -493,7 +565,7 @@ public Builder setHeader(final ResultSetMetaData resultSetMetaData) throws SQLEx
          * The header is also used by the {@link CSVPrinter}.
          * 

* - * @param header the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. + * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. * @return This instance. */ public Builder setHeader(final String... header) { @@ -502,13 +574,36 @@ public Builder setHeader(final String... header) { } /** - * Sets the header comments set to the given values. The comments will be printed first, before the headers. This setting is ignored by the parser. - * + * Sets the header comments to write before the CSV data. + *

+ * This setting is ignored by the parser. + *

+ *

+ * Comments are printed first, before headers. + *

+ *

+ * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of + * each comment line. + *

+ *

+ * If the comment marker is not set, then the header comments are ignored. + *

+ *

+ * For example: + *

*
-         * builder.setHeaderComments("Generated by Apache Commons CSV.", Instant.now());
+         * builder.setCommentMarker('#')
+         *        .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+         * 
+ *

+ * writes: + *

+ *
+         * # Generated by Apache Commons CSV.
+         * # 1970-01-01T00:00:00Z
          * 
* - * @param headerComments the headerComments which will be printed by the Printer before the actual CSV data. + * @param headerComments the headerComments which will be printed by the Printer before the CSV data. * @return This instance. */ public Builder setHeaderComments(final Object... headerComments) { @@ -517,13 +612,36 @@ public Builder setHeaderComments(final Object... headerComments) { } /** - * Sets the header comments set to the given values. The comments will be printed first, before the headers. This setting is ignored by the parser. - * + * Sets the header comments to write before the CSV data. + *

+ * This setting is ignored by the parser. + *

+ *

+ * Comments are printed first, before headers. + *

+ *

+ * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of + * each comment line. + *

+ *

+ * If the comment marker is not set, then the header comments are ignored. + *

+ *

+ * For example: + *

+ *
+         * builder.setCommentMarker('#')
+         *        .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0).toString());
+         * 
+ *

+ * writes: + *

*
-         * Builder.setHeaderComments("Generated by Apache Commons CSV.", Instant.now());
+         * # Generated by Apache Commons CSV.
+         * # 1970-01-01T00:00:00Z
          * 
* - * @param headerComments the headerComments which will be printed by the Printer before the actual CSV data. + * @param headerComments the headerComments which will be printed by the Printer before the CSV data. * @return This instance. */ public Builder setHeaderComments(final String... headerComments) { @@ -545,7 +663,7 @@ public Builder setIgnoreEmptyLines(final boolean ignoreEmptyLines) { } /** - * Sets the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. + * Sets the parser case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. * * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. * @return This instance. @@ -794,8 +912,8 @@ public CSVFormat getFormat() { false, false, false, DuplicateHeaderMode.ALLOW_ALL); /** - * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary - * to customize this format to accommodate to your regional settings. + * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary + * to customize this format to accommodate your regional settings. * *

* For example for parsing or generating a CSV file on a French system the following format will be used: @@ -834,7 +952,7 @@ public CSVFormat getFormat() { * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation. * *

- * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. + * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. * The default NULL string is {@code "\\N"}. *

* @@ -866,7 +984,7 @@ public CSVFormat getFormat() { * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.) * *

- * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. + * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. * The default NULL string is {@code "\\N"}. *

* @@ -969,7 +1087,7 @@ public CSVFormat getFormat() { * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. * *

- * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special + * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. *

* @@ -997,7 +1115,7 @@ public CSVFormat getFormat() { .setIgnoreEmptyLines(false) .setQuote(null) .setRecordSeparator(LF) - .setNullString("\\N") + .setNullString(Constants.SQL_NULL_STRING) .setQuoteMode(QuoteMode.ALL_NON_NULL) .build(); // @formatter:off @@ -1006,7 +1124,7 @@ public CSVFormat getFormat() { * Default Oracle format used by the SQL*Loader utility. * *

- * This is a comma-delimited format with the system line separator character as the record separator.Values are + * This is a comma-delimited format with the system line separator character as the record separator. Values are * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is * {@code ""}. Values are trimmed. *

@@ -1035,7 +1153,7 @@ public CSVFormat getFormat() { .setEscape(BACKSLASH) .setIgnoreEmptyLines(false) .setQuote(DOUBLE_QUOTE_CHAR) - .setNullString("\\N") + .setNullString(Constants.SQL_NULL_STRING) .setTrim(true) .setRecordSeparator(System.lineSeparator()) .setQuoteMode(QuoteMode.MINIMAL) @@ -1046,8 +1164,8 @@ public CSVFormat getFormat() { * Default PostgreSQL CSV format used by the {@code COPY} operation. * *

- * This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special - * characters are escaped with {@code '"'}. The default NULL string is {@code ""}. + * This is a comma-delimited format with an LF character as the line separator. Values are double quoted and special + * characters are not escaped. The default NULL string is {@code ""}. *

* *

@@ -1055,7 +1173,7 @@ public CSVFormat getFormat() { *

*
    *
  • {@code setDelimiter(',')}
  • - *
  • {@code setEscape('"')}
  • + *
  • {@code setEscape(null)}
  • *
  • {@code setIgnoreEmptyLines(false)}
  • *
  • {@code setQuote('"')}
  • *
  • {@code setRecordSeparator('\n')}
  • @@ -1071,7 +1189,7 @@ public CSVFormat getFormat() { // @formatter:off public static final CSVFormat POSTGRESQL_CSV = DEFAULT.builder() .setDelimiter(COMMA) - .setEscape(DOUBLE_QUOTE_CHAR) + .setEscape(null) .setIgnoreEmptyLines(false) .setQuote(DOUBLE_QUOTE_CHAR) .setRecordSeparator(LF) @@ -1084,8 +1202,8 @@ public CSVFormat getFormat() { * Default PostgreSQL text format used by the {@code COPY} operation. * *

    - * This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special - * characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}. + * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special + * characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}. *

    * *

    @@ -1095,7 +1213,7 @@ public CSVFormat getFormat() { *

  • {@code setDelimiter('\t')}
  • *
  • {@code setEscape('\\')}
  • *
  • {@code setIgnoreEmptyLines(false)}
  • - *
  • {@code setQuote('"')}
  • + *
  • {@code setQuote(null)}
  • *
  • {@code setRecordSeparator('\n')}
  • *
  • {@code setNullString("\\N")}
  • *
  • {@code setQuoteMode(QuoteMode.ALL_NON_NULL)}
  • @@ -1111,9 +1229,9 @@ public CSVFormat getFormat() { .setDelimiter(TAB) .setEscape(BACKSLASH) .setIgnoreEmptyLines(false) - .setQuote(DOUBLE_QUOTE_CHAR) + .setQuote(null) .setRecordSeparator(LF) - .setNullString("\\N") + .setNullString(Constants.SQL_NULL_STRING) .setQuoteMode(QuoteMode.ALL_NON_NULL) .build(); // @formatter:off @@ -1135,7 +1253,7 @@ public CSVFormat getFormat() { */ public static final CSVFormat RFC4180 = DEFAULT.builder().setIgnoreEmptyLines(false).build(); - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 2L; /** * Tab-delimited format. @@ -1194,6 +1312,10 @@ private static boolean containsLineBreak(final String source) { return contains(source, CR) || contains(source, LF); } + static boolean isBlank(final String value) { + return value == null || value.trim().isEmpty(); + } + /** * Returns true if the given character is a line break character. * @@ -1216,6 +1338,16 @@ private static boolean isLineBreak(final Character c) { return c != null && isLineBreak(c.charValue()); } + /** Same test as in as {@link String#trim()}. */ + private static boolean isTrimChar(final char ch) { + return ch <= SP; + } + + /** Same test as in as {@link String#trim()}. */ + private static boolean isTrimChar(final CharSequence charSequence, final int pos) { + return isTrimChar(charSequence.charAt(pos)); + } + /** * Creates a new CSV format with the specified delimiter. * @@ -1243,9 +1375,7 @@ static String[] toStringArray(final Object[] values) { return null; } final String[] strings = new String[values.length]; - for (int i = 0; i < values.length; i++) { - strings[i] = Objects.toString(values[i], null); - } + Arrays.setAll(strings, i -> Objects.toString(values[i], null)); return strings; } @@ -1257,10 +1387,10 @@ static CharSequence trim(final CharSequence charSequence) { int len = count; int pos = 0; - while (pos < len && charSequence.charAt(pos) <= SP) { + while (pos < len && isTrimChar(charSequence, pos)) { pos++; } - while (pos < len && charSequence.charAt(len - 1) <= SP) { + while (pos < len && isTrimChar(charSequence, len - 1)) { len--; } return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence; @@ -1283,31 +1413,40 @@ public static CSVFormat valueOf(final String format) { private final boolean autoFlush; - private final Character commentMarker; // null if commenting is disabled + /** Set to null if commenting is disabled. */ + private final Character commentMarker; private final String delimiter; - private final Character escapeCharacter; // null if escaping is disabled + /** Set to null if escaping is disabled. */ + private final Character escapeCharacter; - private final String[] header; // array of header column names + /** Array of header column names. */ + private final String[] headers; - private final String[] headerComments; // array of header comment lines + /** Array of header comment lines. */ + private final String[] headerComments; private final boolean ignoreEmptyLines; - private final boolean ignoreHeaderCase; // should ignore header names case + /** Should ignore header names case. */ + private final boolean ignoreHeaderCase; - private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? + /** TODO Should leading/trailing spaces be ignored around values?. */ + private final boolean ignoreSurroundingSpaces; - private final String nullString; // the string to be used for null values + /** The string to be used for null values. */ + private final String nullString; - private final Character quoteCharacter; // null if quoting is disabled + /** Set to null if quoting is disabled. */ + private final Character quoteCharacter; private final String quotedNullString; private final QuoteMode quoteMode; - private final String recordSeparator; // for outputs + /** For output. */ + private final String recordSeparator; private final boolean skipHeaderRecord; @@ -1327,7 +1466,7 @@ private CSVFormat(final Builder builder) { this.recordSeparator = builder.recordSeparator; this.nullString = builder.nullString; this.headerComments = builder.headerComments; - this.header = builder.headers; + this.headers = builder.headers; this.skipHeaderRecord = builder.skipHeaderRecord; this.ignoreHeaderCase = builder.ignoreHeaderCase; this.trailingDelimiter = builder.trailingDelimiter; @@ -1352,12 +1491,12 @@ private CSVFormat(final Builder builder) { * @param nullString the line separator to use for output. * @param headerComments the comments to be printed by the Printer before the actual CSV data. * @param header the header - * @param skipHeaderRecord TODO Doc me. - * @param allowMissingColumnNames TODO Doc me. - * @param ignoreHeaderCase TODO Doc me. - * @param trim TODO Doc me. - * @param trailingDelimiter TODO Doc me. - * @param autoFlush TODO Doc me. + * @param skipHeaderRecord if {@code true} the header row will be skipped + * @param allowMissingColumnNames if {@code true} the missing column names are allowed when parsing the header line + * @param ignoreHeaderCase if {@code true} header names will be accessed ignoring case when parsing input + * @param trim if {@code true} next record value will be trimmed + * @param trailingDelimiter if {@code true} the trailing delimiter wil be added before record separator (if set) + * @param autoFlush if {@code true} the underlying stream will be flushed before closing * @param duplicateHeaderMode the behavior when handling duplicate headers * @throws IllegalArgumentException if the delimiter is a line break character. */ @@ -1377,7 +1516,7 @@ private CSVFormat(final String delimiter, final Character quoteChar, final Quote this.recordSeparator = recordSeparator; this.nullString = nullString; this.headerComments = toStringArray(headerComments); - this.header = clone(header); + this.headers = clone(header); this.skipHeaderRecord = skipHeaderRecord; this.ignoreHeaderCase = ignoreHeaderCase; this.trailingDelimiter = trailingDelimiter; @@ -1433,7 +1572,7 @@ public boolean equals(final Object obj) { final CSVFormat other = (CSVFormat) obj; return duplicateHeaderMode == other.duplicateHeaderMode && allowMissingColumnNames == other.allowMissingColumnNames && autoFlush == other.autoFlush && Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) && - Objects.equals(escapeCharacter, other.escapeCharacter) && Arrays.equals(header, other.header) && + Objects.equals(escapeCharacter, other.escapeCharacter) && Arrays.equals(headers, other.headers) && Arrays.equals(headerComments, other.headerComments) && ignoreEmptyLines == other.ignoreEmptyLines && ignoreHeaderCase == other.ignoreHeaderCase && ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == other.quoteMode && @@ -1448,20 +1587,21 @@ public boolean equals(final Object obj) { * @return the formatted values */ public String format(final Object... values) { + return Uncheck.get(() -> format_(values)); + } + + private String format_(final Object... values) throws IOException { final StringWriter out = new StringWriter(); try (CSVPrinter csvPrinter = new CSVPrinter(out, this)) { csvPrinter.printRecord(values); final String res = out.toString(); final int len = recordSeparator != null ? res.length() - recordSeparator.length() : res.length(); return res.substring(0, len); - } catch (final IOException e) { - // should not happen because a StringWriter does not do IO. - throw new IllegalStateException(e); } } /** - * Returns true if and only if duplicate names are allowed in the headers. + * Gets whether duplicate names are allowed in the headers. * * @return whether duplicate header names are allowed * @since 1.7 @@ -1473,17 +1613,7 @@ public boolean getAllowDuplicateHeaderNames() { } /** - * Gets how duplicate headers are handled. - * - * @return if duplicate header values are allowed, allowed conditionally, or disallowed. - * @since 1.9.0 - */ - public DuplicateHeaderMode getDuplicateHeaderMode() { - return duplicateHeaderMode; - } - - /** - * Specifies whether missing column names are allowed when parsing the header line. + * Gets whether missing column names are allowed when parsing the header line. * * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an {@link IllegalArgumentException}. */ @@ -1492,7 +1622,7 @@ public boolean getAllowMissingColumnNames() { } /** - * Returns whether to flush on close. + * Gets whether to flush on close. * * @return whether to flush on close. * @since 1.6 @@ -1502,7 +1632,34 @@ public boolean getAutoFlush() { } /** - * Returns the character marking the start of a line comment. + * Gets the comment marker character, {@code null} disables comments. + *

    + * The comment start character is only recognized at the start of a line. + *

    + *

    + * Comments are printed first, before headers. + *

    + *

    + * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment + * marker written at the start of each comment line. + *

    + *

    + * If the comment marker is not set, then the header comments are ignored. + *

    + *

    + * For example: + *

    + *
    +     * builder.setCommentMarker('#')
    +     *        .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
    +     * 
    + *

    + * writes: + *

    + *
    +     * # Generated by Apache Commons CSV.
    +     * # 1970-01-01T00:00:00Z
    +     * 
    * * @return the comment start marker, may be {@code null} */ @@ -1511,7 +1668,7 @@ public Character getCommentMarker() { } /** - * Returns the first character delimiting the values (typically ';', ',' or '\t'). + * Gets the first character delimiting the values (typically ';', ',' or '\t'). * * @return the first delimiter character. * @deprecated Use {@link #getDelimiterString()}. @@ -1522,16 +1679,27 @@ public char getDelimiter() { } /** - * Returns the character delimiting the values (typically ";", "," or "\t"). + * Gets the character delimiting the values (typically ";", "," or "\t"). * * @return the delimiter. + * @since 1.9.0 */ public String getDelimiterString() { return delimiter; } /** - * Returns the escape character. + * Gets how duplicate headers are handled. + * + * @return if duplicate header values are allowed, allowed conditionally, or disallowed. + * @since 1.10.0 + */ + public DuplicateHeaderMode getDuplicateHeaderMode() { + return duplicateHeaderMode; + } + + /** + * Gets the escape character. * * @return the escape character, may be {@code null} */ @@ -1540,16 +1708,43 @@ public Character getEscapeCharacter() { } /** - * Returns a copy of the header array. + * Gets a copy of the header array. * * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file */ public String[] getHeader() { - return header != null ? header.clone() : null; + return headers != null ? headers.clone() : null; } /** - * Returns a copy of the header comment array. + * Gets a copy of the header comment array to write before the CSV data. + *

    + * This setting is ignored by the parser. + *

    + *

    + * Comments are printed first, before headers. + *

    + *

    + * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment + * marker written at the start of each comment line. + *

    + *

    + * If the comment marker is not set, then the header comments are ignored. + *

    + *

    + * For example: + *

    + *
    +     * builder.setCommentMarker('#')
    +     *        .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
    +     * 
    + *

    + * writes: + *

    + *
    +     * # Generated by Apache Commons CSV.
    +     * # 1970-01-01T00:00:00Z
    +     * 
    * * @return a copy of the header comment array; {@code null} if disabled. */ @@ -1558,7 +1753,7 @@ public String[] getHeaderComments() { } /** - * Specifies whether empty lines between records are ignored when parsing input. + * Gets whether empty lines between records are ignored when parsing input. * * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty records. */ @@ -1567,9 +1762,9 @@ public boolean getIgnoreEmptyLines() { } /** - * Specifies whether header names will be accessed ignoring case. + * Gets whether header names will be accessed ignoring case when parsing input. * - * @return {@code true} if header names cases are ignored, {@code false} if they are case sensitive. + * @return {@code true} if header names cases are ignored, {@code false} if they are case-sensitive. * @since 1.3 */ public boolean getIgnoreHeaderCase() { @@ -1577,7 +1772,7 @@ public boolean getIgnoreHeaderCase() { } /** - * Specifies whether spaces around values are ignored when parsing input. + * Gets whether spaces around values are ignored when parsing input. * * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value. */ @@ -1599,7 +1794,7 @@ public String getNullString() { } /** - * Returns the character used to encapsulate values containing special characters. + * Gets the character used to encapsulate values containing special characters. * * @return the quoteChar character, may be {@code null} */ @@ -1608,7 +1803,7 @@ public Character getQuoteCharacter() { } /** - * Returns the quote policy output fields. + * Gets the quote policy output fields. * * @return the quote policy */ @@ -1617,7 +1812,7 @@ public QuoteMode getQuoteMode() { } /** - * Returns the record separator delimiting output records. + * Gets the record separator delimiting output records. * * @return the record separator */ @@ -1626,7 +1821,7 @@ public String getRecordSeparator() { } /** - * Returns whether to skip the header record. + * Gets whether to skip the header record. * * @return whether to skip the header record. */ @@ -1635,7 +1830,7 @@ public boolean getSkipHeaderRecord() { } /** - * Returns whether to add a trailing delimiter. + * Gets whether to add a trailing delimiter. * * @return whether to add a trailing delimiter. * @since 1.3 @@ -1645,7 +1840,7 @@ public boolean getTrailingDelimiter() { } /** - * Returns whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by + * Gets whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by * {CSVParser#addRecordValue(boolean)} * * @return whether to trim leading and trailing blanks. @@ -1658,7 +1853,7 @@ public boolean getTrim() { public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + Arrays.hashCode(header); + result = prime * result + Arrays.hashCode(headers); result = prime * result + Arrays.hashCode(headerComments); return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter, ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator, @@ -1666,7 +1861,7 @@ public int hashCode() { } /** - * Specifies whether comments are supported by this format. + * Tests whether comments are supported by this format. * * Note that the comment introducer character is only recognized at the start of a line. * @@ -1677,7 +1872,7 @@ public boolean isCommentMarkerSet() { } /** - * Matches whether the next characters constitute a delimiter + * Tests whether the next characters constitute a delimiter * * @param ch * the current char @@ -1708,7 +1903,7 @@ private boolean isDelimiter(final char ch, final CharSequence charSeq, final int } /** - * Returns whether escape are being processed. + * Tests whether escapes are being processed. * * @return {@code true} if escapes are processed */ @@ -1717,7 +1912,7 @@ public boolean isEscapeCharacterSet() { } /** - * Returns whether a nullString has been defined. + * Tests whether a null string has been defined. * * @return {@code true} if a nullString is defined */ @@ -1726,7 +1921,7 @@ public boolean isNullStringSet() { } /** - * Returns whether a quoteChar has been defined. + * Tests whether a quoteChar has been defined. * * @return {@code true} if a quoteChar is defined */ @@ -1765,7 +1960,7 @@ public CSVPrinter print(final Appendable out) throws IOException { } /** - * Prints to the specified output. + * Prints to the specified {@code File} with given {@code Charset}. * *

    * See also {@link CSVPrinter}. @@ -1827,7 +2022,7 @@ private synchronized void print(final Object object, final CharSequence value, f if (object == null) { out.append(value); } else if (isQuoteCharacterSet()) { - // the original object is needed so can check for Number + // The original object is needed so can check for Number printWithQuotes(object, value, out, newRecord); } else if (isEscapeCharacterSet()) { printWithEscapes(value, out); @@ -1837,7 +2032,8 @@ private synchronized void print(final Object object, final CharSequence value, f } /** - * Prints to the specified output, returns a {@code CSVPrinter} which the caller MUST close. + * Prints to the specified {@code Path} with given {@code Charset}, + * returns a {@code CSVPrinter} which the caller MUST close. * *

    * See also {@link CSVPrinter}. @@ -1903,7 +2099,7 @@ public synchronized void println(final Appendable appendable) throws IOException } /** - * Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the record separator. + * Prints the given {@code values} to {@code out} as a single record of delimiter-separated values followed by the record separator. * *

    * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing @@ -2058,7 +2254,7 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi return; case MINIMAL: if (len <= 0) { - // always quote an empty token that is the first + // Always quote an empty token that is the first // on the line, as it may be the only thing on the // line. If it were not quoted in that case, // an empty line has no tokens. @@ -2088,7 +2284,7 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi c = charSeq.charAt(pos); // Some other chars at the end caused the parser to fail, so for now // encapsulate if we end in anything less than ' ' - if (c <= SP) { + if (isTrimChar(c)) { quote = true; } } @@ -2096,7 +2292,7 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi } if (!quote) { - // no encapsulation needed - write out the original value + // No encapsulation needed - write out the original value out.append(charSeq, start, len); return; } @@ -2106,12 +2302,12 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi } if (!quote) { - // no encapsulation needed - write out the original value + // No encapsulation needed - write out the original value out.append(charSeq, start, len); return; } - // we hit something that needed encapsulation + // We hit something that needed encapsulation out.append(quoteChar); // Pick up where we left off: pos should be positioned on the first character that caused @@ -2127,13 +2323,13 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi pos++; } - // write the last segment + // Write the last segment out.append(charSeq, start, pos); out.append(quoteChar); } /** - * Always use quotes unless QuoteMode is NONE, so we not have to look ahead. + * Always use quotes unless QuoteMode is NONE, so we do not have to look ahead. * * @param reader What to print * @param appendable Where to print it @@ -2220,15 +2416,23 @@ public String toString() { sb.append(' '); sb.append("HeaderComments:").append(Arrays.toString(headerComments)); } - if (header != null) { + if (headers != null) { sb.append(' '); - sb.append("Header:").append(Arrays.toString(header)); + sb.append("Header:").append(Arrays.toString(headers)); } return sb.toString(); } + String trim(final String value) { + return getTrim() ? value.trim() : value; + } + /** - * Verifies the validity and consistency of the attributes, and throws an IllegalArgumentException if necessary. + * Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary. + *

    + * Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used + * for parsing, so it cannot be used here. + *

    * * @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes. */ @@ -2258,22 +2462,29 @@ private void validate() throws IllegalArgumentException { } if (escapeCharacter == null && quoteMode == QuoteMode.NONE) { - throw new IllegalArgumentException("No quotes mode set but no escape character is set"); - } - - // validate header - if (header != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) { - final Set dupCheck = new HashSet<>(); - for (final String hdr : header) { - if (!dupCheck.add(hdr)) { - throw new IllegalArgumentException("The header contains a duplicate entry: '" + hdr + "' in " + Arrays.toString(header)); + throw new IllegalArgumentException("Quote mode set to NONE but no escape character is set"); + } + + // Validate headers + if (headers != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) { + final Set dupCheckSet = new HashSet<>(headers.length); + final boolean emptyDuplicatesAllowed = duplicateHeaderMode == DuplicateHeaderMode.ALLOW_EMPTY; + for (final String header : headers) { + final boolean blank = isBlank(header); + // Sanitise all empty headers to the empty string "" when checking duplicates + final boolean containsHeader = !dupCheckSet.add(blank ? "" : header); + if (containsHeader && !(blank && emptyDuplicatesAllowed)) { + throw new IllegalArgumentException( + String.format( + "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", + header, Arrays.toString(headers))); } } } } /** - * Returns a new {@code CSVFormat} that allows duplicate header names. + * Builds a new {@code CSVFormat} that allows duplicate header names. * * @return a new {@code CSVFormat} that allows duplicate header names * @since 1.7 @@ -2285,7 +2496,7 @@ public CSVFormat withAllowDuplicateHeaderNames() { } /** - * Returns a new {@code CSVFormat} with duplicate header names behavior set to the given value. + * Builds a new {@code CSVFormat} with duplicate header names behavior set to the given value. * * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. * @return a new {@code CSVFormat} with duplicate header names behavior set to the given value. @@ -2299,7 +2510,7 @@ public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeade } /** - * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}. + * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}. * * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. * @see Builder#setAllowMissingColumnNames(boolean) @@ -2312,7 +2523,7 @@ public CSVFormat withAllowMissingColumnNames() { } /** - * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to the given value. + * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to the given value. * * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause * an {@link IllegalArgumentException} to be thrown. @@ -2325,7 +2536,7 @@ public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNam } /** - * Returns a new {@code CSVFormat} with whether to flush on close. + * Builds a new {@code CSVFormat} with whether to flush on close. * * @param autoFlush whether to flush on close. * @@ -2339,7 +2550,7 @@ public CSVFormat withAutoFlush(final boolean autoFlush) { } /** - * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character. + * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character. * * Note that the comment start character is only recognized at the start of a line. * @@ -2354,7 +2565,7 @@ public CSVFormat withCommentMarker(final char commentMarker) { } /** - * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character. + * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character. * * Note that the comment start character is only recognized at the start of a line. * @@ -2369,10 +2580,10 @@ public CSVFormat withCommentMarker(final Character commentMarker) { } /** - * Returns a new {@code CSVFormat} with the delimiter of the format set to the specified character. + * Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character. * * @param delimiter the delimiter character - * @return A new CSVFormat that is equal to this with the specified character as delimiter + * @return A new CSVFormat that is equal to this with the specified character as a delimiter * @throws IllegalArgumentException thrown if the specified character is a line break * @deprecated Use {@link Builder#setDelimiter(char)} */ @@ -2382,7 +2593,7 @@ public CSVFormat withDelimiter(final char delimiter) { } /** - * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character. + * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character. * * @param escape the escape character * @return A new CSVFormat that is equal to this but with the specified character as the escape character @@ -2395,7 +2606,7 @@ public CSVFormat withEscape(final char escape) { } /** - * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character. + * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character. * * @param escape the escape character, use {@code null} to disable * @return A new CSVFormat that is equal to this but with the specified character as the escape character @@ -2408,7 +2619,7 @@ public CSVFormat withEscape(final Character escape) { } /** - * Returns a new {@code CSVFormat} using the first record as header. + * Builds a new {@code CSVFormat} using the first record as header. * *

    * Calling this method is equivalent to calling: @@ -2435,7 +2646,7 @@ public CSVFormat withFirstRecordAsHeader() { } /** - * Returns a new {@code CSVFormat} with the header of the format defined by the enum class. + * Builds a new {@code CSVFormat} with the header of the format defined by the enum class. * *

    * Example: @@ -2465,7 +2676,7 @@ public CSVFormat withHeader(final Class> headerEnum) { } /** - * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the + * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the * input file with: * *

    @@ -2481,7 +2692,7 @@ public CSVFormat withHeader(final Class> headerEnum) {
          * The header is also used by the {@link CSVPrinter}.
          * 

    * - * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. + * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. * @return A new CSVFormat that is equal to this but with the specified header * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. * @since 1.1 @@ -2493,7 +2704,7 @@ public CSVFormat withHeader(final ResultSet resultSet) throws SQLException { } /** - * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the + * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the * input file with: * *
    @@ -2521,7 +2732,7 @@ public CSVFormat withHeader(final ResultSetMetaData resultSetMetaData) throws SQ
         }
     
         /**
    -     * Returns a new {@code CSVFormat} with the header of the format set to the given values. The header can either be parsed automatically from the input file
    +     * Builds a new {@code CSVFormat} with the header of the format set to the given values. The header can either be parsed automatically from the input file
          * with:
          *
          * 
    @@ -2537,7 +2748,7 @@ public CSVFormat withHeader(final ResultSetMetaData resultSetMetaData) throws SQ
          * The header is also used by the {@link CSVPrinter}.
          * 

    * - * @param header the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. + * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. * @return A new CSVFormat that is equal to this but with the specified header * @see Builder#setSkipHeaderRecord(boolean) * @deprecated Use {@link Builder#setHeader(String...)} @@ -2548,7 +2759,7 @@ public CSVFormat withHeader(final String... header) { } /** - * Returns a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will be printed first, before the headers. + * Builds a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will be printed first, before the headers. * This setting is ignored by the parser. * *
    @@ -2567,10 +2778,10 @@ public CSVFormat withHeaderComments(final Object... headerComments) {
         }
     
         /**
    -     * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}.
    +     * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}.
          *
          * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
    -     * @since {@link Builder#setIgnoreEmptyLines(boolean)}
    +     * @see Builder#setIgnoreEmptyLines(boolean)
          * @since 1.1
          * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(true)}
          */
    @@ -2580,7 +2791,7 @@ public CSVFormat withIgnoreEmptyLines() {
         }
     
         /**
    -     * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value.
    +     * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value.
          *
          * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty
          *                         lines to empty records.
    @@ -2593,9 +2804,9 @@ public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
         }
     
         /**
    -     * Returns a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
    +     * Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
          *
    -     * @return A new CSVFormat that will ignore case header name.
    +     * @return A new CSVFormat that will ignore the new case header name behavior.
          * @see Builder#setIgnoreHeaderCase(boolean)
          * @since 1.3
          * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)}
    @@ -2606,7 +2817,7 @@ public CSVFormat withIgnoreHeaderCase() {
         }
     
         /**
    -     * Returns a new {@code CSVFormat} with whether header names should be accessed ignoring case.
    +     * Builds a new {@code CSVFormat} with whether header names should be accessed ignoring case.
          *
          * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is.
          * @return A new CSVFormat that will ignore case header name if specified as {@code true}
    @@ -2619,7 +2830,7 @@ public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
         }
     
         /**
    -     * Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}.
    +     * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}.
          *
          * @return A new CSVFormat that is equal to this but with the specified parser trimming behavior.
          * @see Builder#setIgnoreSurroundingSpaces(boolean)
    @@ -2632,7 +2843,7 @@ public CSVFormat withIgnoreSurroundingSpaces() {
         }
     
         /**
    -     * Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value.
    +     * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value.
          *
          * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is.
          * @return A new CSVFormat that is equal to this but with the specified trimming behavior.
    @@ -2644,7 +2855,7 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
         }
     
         /**
    -     * Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output.
    +     * Builds a new {@code CSVFormat} with conversions to and from null for strings on input and output.
          * 
      *
    • Reading: Converts strings equal to the given {@code nullString} to {@code null} when reading records.
    • *
    • Writing: Writes {@code null} as the given {@code nullString} when writing records.
    • @@ -2660,7 +2871,7 @@ public CSVFormat withNullString(final String nullString) { } /** - * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character. + * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character. * * @param quoteChar the quote character * @return A new CSVFormat that is equal to this but with the specified character as quoteChar @@ -2673,7 +2884,7 @@ public CSVFormat withQuote(final char quoteChar) { } /** - * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character. + * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character. * * @param quoteChar the quote character, use {@code null} to disable. * @return A new CSVFormat that is equal to this but with the specified character as quoteChar @@ -2686,7 +2897,7 @@ public CSVFormat withQuote(final Character quoteChar) { } /** - * Returns a new {@code CSVFormat} with the output quote policy of the format set to the specified value. + * Builds a new {@code CSVFormat} with the output quote policy of the format set to the specified value. * * @param quoteMode the quote policy to use for output. * @@ -2699,7 +2910,7 @@ public CSVFormat withQuoteMode(final QuoteMode quoteMode) { } /** - * Returns a new {@code CSVFormat} with the record separator of the format set to the specified character. + * Builds a new {@code CSVFormat} with the record separator of the format set to the specified character. * *

      * Note: This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and @@ -2716,7 +2927,7 @@ public CSVFormat withRecordSeparator(final char recordSeparator) { } /** - * Returns a new {@code CSVFormat} with the record separator of the format set to the specified String. + * Builds a new {@code CSVFormat} with the record separator of the format set to the specified String. * *

      * Note: This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and @@ -2734,7 +2945,7 @@ public CSVFormat withRecordSeparator(final String recordSeparator) { } /** - * Returns a new {@code CSVFormat} with skipping the header record set to {@code true}. + * Builds a new {@code CSVFormat} with skipping the header record set to {@code true}. * * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. * @see Builder#setSkipHeaderRecord(boolean) @@ -2748,7 +2959,7 @@ public CSVFormat withSkipHeaderRecord() { } /** - * Returns a new {@code CSVFormat} with whether to skip the header record. + * Builds a new {@code CSVFormat} with whether to skip the header record. * * @param skipHeaderRecord whether to skip the header record. * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. @@ -2761,7 +2972,7 @@ public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { } /** - * Returns a new {@code CSVFormat} with the record separator of the format set to the operating system's line separator string, typically CR+LF on Windows + * Builds a new {@code CSVFormat} with the record separator of the format set to the operating system's line separator string, typically CR+LF on Windows * and LF on Linux. * *

      @@ -2779,7 +2990,7 @@ public CSVFormat withSystemRecordSeparator() { } /** - * Returns a new {@code CSVFormat} to add a trailing delimiter. + * Builds a new {@code CSVFormat} to add a trailing delimiter. * * @return A new CSVFormat that is equal to this but with the trailing delimiter setting. * @since 1.3 @@ -2791,7 +3002,7 @@ public CSVFormat withTrailingDelimiter() { } /** - * Returns a new {@code CSVFormat} with whether to add a trailing delimiter. + * Builds a new {@code CSVFormat} with whether to add a trailing delimiter. * * @param trailingDelimiter whether to add a trailing delimiter. * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting. @@ -2804,7 +3015,7 @@ public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { } /** - * Returns a new {@code CSVFormat} to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. + * Builds a new {@code CSVFormat} to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. * * @return A new CSVFormat that is equal to this but with the trim setting on. * @since 1.3 @@ -2816,7 +3027,7 @@ public CSVFormat withTrim() { } /** - * Returns a new {@code CSVFormat} with whether to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. + * Builds a new {@code CSVFormat} with whether to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. * * @param trim whether to trim leading and trailing blanks. * @return A new CSVFormat that is equal to this but with the specified trim setting. diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 58cdb146b2..055e2a292b 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -26,6 +26,7 @@ import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; +import java.io.UncheckedIOException; import java.net.URL; import java.nio.charset.Charset; import java.nio.file.Files; @@ -42,16 +43,19 @@ import java.util.Spliterator; import java.util.Spliterators; import java.util.TreeMap; +import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; +import org.apache.commons.io.function.Uncheck; + /** * Parses CSV files according to the specified format. * * Because CSV appears in many different dialects, the parser supports many formats by allowing the * specification of a {@link CSVFormat}. * - * The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream. + * The parser works record-wise. It is not possible to go back, once a record has been parsed from the input stream. * *

      Creating instances

      *

      @@ -103,13 +107,13 @@ *

    * *

    - * If the predefined formats don't match the format at hands, custom formats can be defined. More information about - * customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}. + * If the predefined formats don't match the format at hand, custom formats can be defined. More information about + * customizing CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}. *

    * *

    Parsing into memory

    *

    - * If parsing record wise is not desired, the contents of the input can be read completely into memory. + * If parsing record-wise is not desired, the contents of the input can be read completely into memory. *

    * *
    @@ -124,30 +128,25 @@
      *
      * 
      *
    1. Parsing into memory starts at the current position of the parser. If you have already parsed records from - * the input, those records will not end up in the in memory representation of your CSV data.
    2. - *
    3. Parsing into memory may consume a lot of system resources depending on the input. For example if you're + * the input, those records will not end up in the in-memory representation of your CSV data.
    4. + *
    5. Parsing into memory may consume a lot of system resources depending on the input. For example, if you're * parsing a 150MB file of CSV data the contents will be read completely into memory.
    6. *
    * *

    Notes

    *

    - * Internal parser state is completely covered by the format and the reader-state. + * The internal parser state is completely covered by the format and the reader state. *

    * * @see package documentation for more details */ public final class CSVParser implements Iterable, Closeable { - class CSVRecordIterator implements Iterator { + final class CSVRecordIterator implements Iterator { private CSVRecord current; private CSVRecord getNextRecord() { - try { - return CSVParser.this.nextRecord(); - } catch (final IOException e) { - throw new IllegalStateException( - e.getClass().getSimpleName() + " reading next record: " + e.toString(), e); - } + return Uncheck.get(CSVParser.this::nextRecord); } @Override @@ -191,6 +190,7 @@ public void remove() { * Header information based on name and position. */ private static final class Headers { + /** * Header column positions (0-based) */ @@ -302,8 +302,6 @@ public static CSVParser parse(final Reader reader, final CSVFormat format) throw return new CSVParser(reader, format); } - // the following objects are shared to reduce garbage - /** * Creates a parser for the given {@link String}. * @@ -353,6 +351,10 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor return new CSVParser(new InputStreamReader(url.openStream(), charset), format); } + private String headerComment; + + private String trailerComment; + private final CSVFormat format; private final Headers headers; @@ -378,7 +380,7 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor private final Token reusableToken = new Token(); /** - * Customized CSV parser using the given {@link CSVFormat} + * Constructs a new instance using the given {@link CSVFormat} * *

    * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, @@ -399,7 +401,7 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException } /** - * Customized CSV parser using the given {@link CSVFormat} + * Constructs a new instance using the given {@link CSVFormat} * *

    * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, @@ -415,7 +417,7 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException * @param recordNumber * The next record number to assign * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either reader or format are null. + * If the parameters of the format are inconsistent or if either the reader or format is null. * @throws IOException * If there is a problem reading the header or skipping the first record * @since 1.1 @@ -435,12 +437,11 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact } private void addRecordValue(final boolean lastRecord) { - final String input = this.reusableToken.content.toString(); - final String inputClean = this.format.getTrim() ? input.trim() : input; - if (lastRecord && inputClean.isEmpty() && this.format.getTrailingDelimiter()) { + final String input = this.format.trim(this.reusableToken.content.toString()); + if (lastRecord && input.isEmpty() && this.format.getTrailingDelimiter()) { return; } - this.recordList.add(handleNull(inputClean)); + this.recordList.add(handleNull(input)); } /** @@ -480,35 +481,42 @@ private Headers createHeaders() throws IOException { final CSVRecord nextRecord = this.nextRecord(); if (nextRecord != null) { headerRecord = nextRecord.values(); + headerComment = nextRecord.getComment(); } } else { if (this.format.getSkipHeaderRecord()) { - this.nextRecord(); + final CSVRecord nextRecord = this.nextRecord(); + if (nextRecord != null) { + headerComment = nextRecord.getComment(); + } } headerRecord = formatHeader; } // build the name to index mappings if (headerRecord != null) { + // Track an occurrence of a null, empty or blank header. + boolean observedMissing = false; for (int i = 0; i < headerRecord.length; i++) { final String header = headerRecord[i]; - final boolean emptyHeader = header == null || header.trim().isEmpty(); - if (emptyHeader && !this.format.getAllowMissingColumnNames()) { + final boolean blankHeader = CSVFormat.isBlank(header); + if (blankHeader && !this.format.getAllowMissingColumnNames()) { throw new IllegalArgumentException( "A header name is missing in " + Arrays.toString(headerRecord)); } - final boolean containsHeader = header != null && hdrMap.containsKey(header); + final boolean containsHeader = blankHeader ? observedMissing : hdrMap.containsKey(header); final DuplicateHeaderMode headerMode = this.format.getDuplicateHeaderMode(); final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL; final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY; - if (containsHeader && !duplicatesAllowed && !(emptyHeader && emptyDuplicatesAllowed)) { + if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) { throw new IllegalArgumentException( String.format( "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", header, Arrays.toString(headerRecord))); } + observedMissing |= blankHeader; if (header != null) { hdrMap.put(header, Integer.valueOf(i)); if (headerNames == null) { @@ -520,7 +528,7 @@ private Headers createHeaders() throws IOException { } } if (headerNames == null) { - headerNames = Collections.emptyList(); //immutable + headerNames = Collections.emptyList(); // immutable } else { headerNames = Collections.unmodifiableList(headerNames); } @@ -528,7 +536,7 @@ private Headers createHeaders() throws IOException { } /** - * Returns the current line number in the input stream. + * Gets the current line number in the input stream. * *

    * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to @@ -552,7 +560,18 @@ public String getFirstEndOfLine() { } /** - * Returns a copy of the header map. + * Gets the header comment, if any. + * The header comment appears before the header record. + * + * @return the header comment for this stream, or null if no comment is available. + * @since 1.10.0 + */ + public String getHeaderComment() { + return headerComment; + } + + /** + * Gets a copy of the header map as defined in the CSVFormat's header. *

    * The map keys are column names. The map values are 0-based indices. *

    @@ -573,16 +592,16 @@ public Map getHeaderMap() { } /** - * Returns the header map. + * Gets the underlying header map. * - * @return the header map. + * @return the underlying header map. */ Map getHeaderMapRaw() { return this.headers.headerMap; } /** - * Returns a read-only list of header names that iterates in column order. + * Gets a read-only list of header names that iterates in column order as defined in the CSVFormat's header. *

    * Note: The list provides strings that can be used as keys in the header map. * The list will not contain null column names if they were present in the input @@ -598,7 +617,7 @@ public List getHeaderNames() { } /** - * Returns the current record number in the input stream. + * Gets the current record number in the input stream. * *

    * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to @@ -620,24 +639,30 @@ public long getRecordNumber() { *

    * * @return list of {@link CSVRecord CSVRecords}, may be empty - * @throws IOException + * @throws UncheckedIOException * on parse error or input read-failure */ - public List getRecords() throws IOException { - CSVRecord rec; - final List records = new ArrayList<>(); - while ((rec = this.nextRecord()) != null) { - records.add(rec); - } - return records; + public List getRecords() { + return stream().collect(Collectors.toList()); } /** - * Handle whether input is parsed as null + * Gets the trailer comment, if any. + * Trailer comments are located between the last record and EOF + * + * @return the trailer comment for this stream, or null if no comment is available. + * @since 1.10.0 + */ + public String getTrailerComment() { + return trailerComment; + } + + /** + * Handles whether the input is parsed as null * * @param input * the cell data to further processed - * @return null if input is parsed as null, or input itself if input isn't parsed as null + * @return null if input is parsed as null, or input itself if the input isn't parsed as null */ private String handleNull(final String input) { final boolean isQuoted = this.reusableToken.isQuoted; @@ -651,6 +676,35 @@ private String handleNull(final String input) { return strictQuoteMode && nullString == null && input.isEmpty() && !isQuoted ? null : input; } + /** + * Checks whether there is a header comment. + * The header comment appears before the header record. + * Note that if the parser's format has been given an explicit header + * (with {@link CSVFormat.Builder#setHeader(String... )} or another overload) + * and the header record is not being skipped + * ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments + * will be associated with the first record, not the header. + * + * @return true if this parser has seen a header comment, false otherwise + * @since 1.10.0 + */ + public boolean hasHeaderComment() { + return headerComment != null; + } + + /** + * Checks whether there is a trailer comment. + * Trailer comments are located between the last record and EOF. + * The trailer comments will only be available after the parser has + * finished processing this stream. + * + * @return true if this parser has seen a trailer comment, false otherwise + * @since 1.10.0 + */ + public boolean hasTrailerComment() { + return trailerComment != null; + } + /** * Tests whether this parser is closed. * @@ -675,13 +729,20 @@ private boolean isStrictQuoteMode() { * Returns the record iterator. * *

    - * An {@link IOException} caught during the iteration are re-thrown as an + * An {@link IOException} caught during the iteration is re-thrown as an * {@link IllegalStateException}. *

    *

    - * If the parser is closed a call to {@link Iterator#next()} will throw a + * If the parser is closed, the iterator will not yield any more records. + * A call to {@link Iterator#hasNext()} will return {@code false} and + * a call to {@link Iterator#next()} will throw a * {@link NoSuchElementException}. *

    + *

    + * If it is necessary to construct an iterator which is usable after the + * parser is closed, one option is to extract all records as a list with + * {@link #getRecords()}, and return an iterator to that list. + *

    */ @Override public Iterator iterator() { @@ -713,6 +774,8 @@ CSVRecord nextRecord() throws IOException { case EOF: if (this.reusableToken.isReady) { this.addRecordValue(true); + } else if (sb != null) { + trailerComment = sb.toString(); } break; case INVALID: @@ -742,7 +805,10 @@ CSVRecord nextRecord() throws IOException { /** * Returns a sequential {@code Stream} with this collection as its source. - * + *

    + * If the parser is closed, the stream will not produce any more values. + * See the comments in {@link #iterator()}. + *

    * @return a sequential {@code Stream} with this collection as its source. * @since 1.9.0 */ diff --git a/src/main/java/org/apache/commons/csv/CSVPrinter.java b/src/main/java/org/apache/commons/csv/CSVPrinter.java index 9dcb95cd3a..6d8f8af840 100644 --- a/src/main/java/org/apache/commons/csv/CSVPrinter.java +++ b/src/main/java/org/apache/commons/csv/CSVPrinter.java @@ -1,412 +1,487 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.SP; - -import java.io.Closeable; -import java.io.Flushable; -import java.io.IOException; -import java.sql.Clob; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.Objects; - -/** - * Prints values in a {@link CSVFormat CSV format}. - * - *

    Values can be appended to the output by calling the {@link #print(Object)} method. - * Values are printed according to {@link String#valueOf(Object)}. - * To complete a record the {@link #println()} method has to be called. - * Comments can be appended by calling {@link #printComment(String)}. - * However a comment will only be written to the output if the {@link CSVFormat} supports comments. - *

    - * - *

    The printer also supports appending a complete record at once by calling {@link #printRecord(Object...)} - * or {@link #printRecord(Iterable)}. - * Furthermore {@link #printRecords(Object...)}, {@link #printRecords(Iterable)} and {@link #printRecords(ResultSet)} - * methods can be used to print several records at once. - *

    - * - *

    Example:

    - * - *
    - * try (CSVPrinter printer = new CSVPrinter(new FileWriter("csv.txt"), CSVFormat.EXCEL)) {
    - *     printer.printRecord("id", "userName", "firstName", "lastName", "birthday");
    - *     printer.printRecord(1, "john73", "John", "Doe", LocalDate.of(1973, 9, 15));
    - *     printer.println();
    - *     printer.printRecord(2, "mary", "Mary", "Meyer", LocalDate.of(1985, 3, 29));
    - * } catch (IOException ex) {
    - *     ex.printStackTrace();
    - * }
    - * 
    - * - *

    This code will write the following to csv.txt:

    - *
    - * id,userName,firstName,lastName,birthday
    - * 1,john73,John,Doe,1973-09-15
    - *
    - * 2,mary,Mary,Meyer,1985-03-29
    - * 
    - */ -public final class CSVPrinter implements Flushable, Closeable { - - /** The place that the values get written. */ - private final Appendable appendable; - private final CSVFormat format; - - /** True if we just began a new record. */ - private boolean newRecord = true; - - /** - * Creates a printer that will print values to the given stream following the CSVFormat. - *

    - * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation - * and escaping with a different character) are not supported. - *

    - * - * @param appendable - * stream to which to print. Must not be null. - * @param format - * the CSV format. Must not be null. - * @throws IOException - * thrown if the optional header cannot be printed. - * @throws IllegalArgumentException - * thrown if the parameters of the format are inconsistent or if either out or format are null. - */ - public CSVPrinter(final Appendable appendable, final CSVFormat format) throws IOException { - Objects.requireNonNull(appendable, "appendable"); - Objects.requireNonNull(format, "format"); - - this.appendable = appendable; - this.format = format.copy(); - // TODO: Is it a good idea to do this here instead of on the first call to a print method? - // It seems a pain to have to track whether the header has already been printed or not. - if (format.getHeaderComments() != null) { - for (final String line : format.getHeaderComments()) { - this.printComment(line); - } - } - if (format.getHeader() != null && !format.getSkipHeaderRecord()) { - this.printRecord((Object[]) format.getHeader()); - } - } - - @Override - public void close() throws IOException { - close(false); - } - - /** - * Closes the underlying stream with an optional flush first. - * @param flush whether to flush before the actual close. - * - * @throws IOException - * If an I/O error occurs - * @since 1.6 - */ - public void close(final boolean flush) throws IOException { - if (flush || format.getAutoFlush()) { - flush(); - } - if (appendable instanceof Closeable) { - ((Closeable) appendable).close(); - } - } - - /** - * Flushes the underlying stream. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void flush() throws IOException { - if (appendable instanceof Flushable) { - ((Flushable) appendable).flush(); - } - } - - /** - * Gets the target Appendable. - * - * @return the target Appendable. - */ - public Appendable getOut() { - return this.appendable; - } - - /** - * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. - * - * @param value - * value to be output. - * @throws IOException - * If an I/O error occurs - */ - public synchronized void print(final Object value) throws IOException { - format.print(value, appendable, newRecord); - newRecord = false; - } - - /** - * Prints a comment on a new line among the delimiter separated values. - * - *

    - * Comments will always begin on a new line and occupy at least one full line. The character specified to start - * comments and a space will be inserted at the beginning of each new line in the comment. - *

    - * - *

    - * If comments are disabled in the current CSV format this method does nothing. - *

    - * - *

    This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()} - * to start a new line of the comment. Note that this might produce unexpected results for formats that do not use - * line breaks as record separator.

    - * - * @param comment - * the comment to output - * @throws IOException - * If an I/O error occurs - */ - public synchronized void printComment(final String comment) throws IOException { - if (comment == null || !format.isCommentMarkerSet()) { - return; - } - if (!newRecord) { - println(); - } - appendable.append(format.getCommentMarker().charValue()); - appendable.append(SP); - for (int i = 0; i < comment.length(); i++) { - final char c = comment.charAt(i); - switch (c) { - case CR: - if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) { - i++; - } - //$FALL-THROUGH$ break intentionally excluded. - case LF: - println(); - appendable.append(format.getCommentMarker().charValue()); - appendable.append(SP); - break; - default: - appendable.append(c); - break; - } - } - println(); - } - - /** - * Prints headers for a result set based on its metadata. - * - * @param resultSet The result set to query for metadata. - * @throws IOException If an I/O error occurs. - * @throws SQLException If a database access error occurs or this method is called on a closed result set. - * @since 1.9.0 - */ - public synchronized void printHeaders(final ResultSet resultSet) throws IOException, SQLException { - printRecord((Object[]) format.builder().setHeader(resultSet).build().getHeader()); - } - - /** - * Outputs the record separator. - * - * @throws IOException - * If an I/O error occurs - */ - public synchronized void println() throws IOException { - format.println(appendable); - newRecord = true; - } - - /** - * Prints the given values a single record of delimiter separated values followed by the record separator. - * - *

    - * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record - * separator to the output after printing the record, so there is no need to call {@link #println()}. - *

    - * - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs - */ - public synchronized void printRecord(final Iterable values) throws IOException { - for (final Object value : values) { - print(value); - } - println(); - } - - /** - * Prints the given values a single record of delimiter separated values followed by the record separator. - * - *

    - * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record - * separator to the output after printing the record, so there is no need to call {@link #println()}. - *

    - * - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs - */ - public void printRecord(final Object... values) throws IOException { - printRecord(Arrays.asList(values)); - } - - /** - * Prints all the objects in the given collection handling nested collections/arrays as records. - * - *

    - * If the given collection only contains simple objects, this method will print a single record like - * {@link #printRecord(Iterable)}. If the given collections contains nested collections/arrays those nested elements - * will each be printed as records using {@link #printRecord(Object...)}. - *

    - * - *

    - * Given the following data structure: - *

    - * - *
    -     * 
    -     * List<String[]> data = ...
    -     * data.add(new String[]{ "A", "B", "C" });
    -     * data.add(new String[]{ "1", "2", "3" });
    -     * data.add(new String[]{ "A1", "B2", "C3" });
    -     * 
    -     * 
    - * - *

    - * Calling this method will print: - *

    - * - *
    -     * 
    -     * A, B, C
    -     * 1, 2, 3
    -     * A1, B2, C3
    -     * 
    -     * 
    - * - * @param values - * the values to print. - * @throws IOException - * If an I/O error occurs - */ - public void printRecords(final Iterable values) throws IOException { - for (final Object value : values) { - if (value instanceof Object[]) { - this.printRecord((Object[]) value); - } else if (value instanceof Iterable) { - this.printRecord((Iterable) value); - } else { - this.printRecord(value); - } - } - } - - /** - * Prints all the objects in the given array handling nested collections/arrays as records. - * - *

    - * If the given array only contains simple objects, this method will print a single record like - * {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested - * elements will each be printed as records using {@link #printRecord(Object...)}. - *

    - * - *

    - * Given the following data structure: - *

    - * - *
    -     * 
    -     * String[][] data = new String[3][]
    -     * data[0] = String[]{ "A", "B", "C" };
    -     * data[1] = new String[]{ "1", "2", "3" };
    -     * data[2] = new String[]{ "A1", "B2", "C3" };
    -     * 
    -     * 
    - * - *

    - * Calling this method will print: - *

    - * - *
    -     * 
    -     * A, B, C
    -     * 1, 2, 3
    -     * A1, B2, C3
    -     * 
    -     * 
    - * - * @param values - * the values to print. - * @throws IOException - * If an I/O error occurs - */ - public void printRecords(final Object... values) throws IOException { - printRecords(Arrays.asList(values)); - } - - /** - * Prints all the objects in the given JDBC result set. - * - * @param resultSet - * result set the values to print. - * @throws IOException - * If an I/O error occurs - * @throws SQLException - * if a database access error occurs - */ - public void printRecords(final ResultSet resultSet) throws SQLException, IOException { - final int columnCount = resultSet.getMetaData().getColumnCount(); - while (resultSet.next()) { - for (int i = 1; i <= columnCount; i++) { - final Object object = resultSet.getObject(i); - // TODO Who manages the Clob? The JDBC driver or must we close it? Is it driver-dependent? - print(object instanceof Clob ? ((Clob) object).getCharacterStream() : object); - } - println(); - } - } - - /** - * Prints all the objects with metadata in the given JDBC result set based on the header boolean. - * - * @param resultSet source of row data. - * @param printHeader whether to print headers. - * @throws IOException If an I/O error occurs - * @throws SQLException if a database access error occurs - * @since 1.9.0 - */ - public void printRecords(final ResultSet resultSet, final boolean printHeader) throws SQLException, IOException { - if (printHeader) { - printHeaders(resultSet); - } - printRecords(resultSet); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv; + +import static org.apache.commons.csv.Constants.CR; +import static org.apache.commons.csv.Constants.LF; +import static org.apache.commons.csv.Constants.SP; + +import java.io.Closeable; +import java.io.Flushable; +import java.io.IOException; +import java.sql.Clob; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Objects; +import java.util.stream.Stream; + +import org.apache.commons.io.function.IOStream; + +/** + * Prints values in a {@link CSVFormat CSV format}. + * + *

    Values can be appended to the output by calling the {@link #print(Object)} method. + * Values are printed according to {@link String#valueOf(Object)}. + * To complete a record the {@link #println()} method has to be called. + * Comments can be appended by calling {@link #printComment(String)}. + * However a comment will only be written to the output if the {@link CSVFormat} supports comments. + *

    + * + *

    The printer also supports appending a complete record at once by calling {@link #printRecord(Object...)} + * or {@link #printRecord(Iterable)}. + * Furthermore {@link #printRecords(Object...)}, {@link #printRecords(Iterable)} and {@link #printRecords(ResultSet)} + * methods can be used to print several records at once. + *

    + * + *

    Example:

    + * + *
    + * try (CSVPrinter printer = new CSVPrinter(new FileWriter("csv.txt"), CSVFormat.EXCEL)) {
    + *     printer.printRecord("id", "userName", "firstName", "lastName", "birthday");
    + *     printer.printRecord(1, "john73", "John", "Doe", LocalDate.of(1973, 9, 15));
    + *     printer.println();
    + *     printer.printRecord(2, "mary", "Mary", "Meyer", LocalDate.of(1985, 3, 29));
    + * } catch (IOException ex) {
    + *     ex.printStackTrace();
    + * }
    + * 
    + * + *

    This code will write the following to csv.txt:

    + *
    + * id,userName,firstName,lastName,birthday
    + * 1,john73,John,Doe,1973-09-15
    + *
    + * 2,mary,Mary,Meyer,1985-03-29
    + * 
    + */ +public final class CSVPrinter implements Flushable, Closeable { + + /** The place that the values get written. */ + private final Appendable appendable; + + private final CSVFormat format; + + /** True if we just began a new record. */ + private boolean newRecord = true; + + /** + * Creates a printer that will print values to the given stream following the CSVFormat. + *

    + * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation + * and escaping with a different character) are not supported. + *

    + * + * @param appendable + * stream to which to print. Must not be null. + * @param format + * the CSV format. Must not be null. + * @throws IOException + * thrown if the optional header cannot be printed. + * @throws IllegalArgumentException + * thrown if the parameters of the format are inconsistent or if either out or format are null. + */ + public CSVPrinter(final Appendable appendable, final CSVFormat format) throws IOException { + Objects.requireNonNull(appendable, "appendable"); + Objects.requireNonNull(format, "format"); + + this.appendable = appendable; + this.format = format.copy(); + // TODO: Is it a good idea to do this here instead of on the first call to a print method? + // It seems a pain to have to track whether the header has already been printed or not. + final String[] headerComments = format.getHeaderComments(); + if (headerComments != null) { + for (final String line : headerComments) { + this.printComment(line); + } + } + if (format.getHeader() != null && !format.getSkipHeaderRecord()) { + this.printRecord((Object[]) format.getHeader()); + } + } + + @Override + public void close() throws IOException { + close(false); + } + + /** + * Closes the underlying stream with an optional flush first. + * @param flush whether to flush before the actual close. + * + * @throws IOException + * If an I/O error occurs + * @since 1.6 + */ + public void close(final boolean flush) throws IOException { + if (flush || format.getAutoFlush()) { + flush(); + } + if (appendable instanceof Closeable) { + ((Closeable) appendable).close(); + } + } + + /** + * Flushes the underlying stream. + * + * @throws IOException + * If an I/O error occurs + */ + @Override + public void flush() throws IOException { + if (appendable instanceof Flushable) { + ((Flushable) appendable).flush(); + } + } + + /** + * Gets the target Appendable. + * + * @return the target Appendable. + */ + public Appendable getOut() { + return this.appendable; + } + + /** + * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. + * + * @param value + * value to be output. + * @throws IOException + * If an I/O error occurs + */ + public synchronized void print(final Object value) throws IOException { + format.print(value, appendable, newRecord); + newRecord = false; + } + + /** + * Prints a comment on a new line among the delimiter-separated values. + * + *

    + * Comments will always begin on a new line and occupy at least one full line. The character specified to start + * comments and a space will be inserted at the beginning of each new line in the comment. + *

    + * + *

    + * If comments are disabled in the current CSV format this method does nothing. + *

    + * + *

    This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()} + * to start a new line of the comment. Note that this might produce unexpected results for formats that do not use + * line breaks as record separators.

    + * + * @param comment + * the comment to output + * @throws IOException + * If an I/O error occurs + */ + public synchronized void printComment(final String comment) throws IOException { + if (comment == null || !format.isCommentMarkerSet()) { + return; + } + if (!newRecord) { + println(); + } + appendable.append(format.getCommentMarker().charValue()); + appendable.append(SP); + for (int i = 0; i < comment.length(); i++) { + final char c = comment.charAt(i); + switch (c) { + case CR: + if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) { + i++; + } + //$FALL-THROUGH$ break intentionally excluded. + case LF: + println(); + appendable.append(format.getCommentMarker().charValue()); + appendable.append(SP); + break; + default: + appendable.append(c); + break; + } + } + println(); + } + + /** + * Prints headers for a result set based on its metadata. + * + * @param resultSet The ResultSet to query for metadata. + * @throws IOException If an I/O error occurs. + * @throws SQLException If a database access error occurs or this method is called on a closed result set. + * @since 1.9.0 + */ + public synchronized void printHeaders(final ResultSet resultSet) throws IOException, SQLException { + printRecord((Object[]) format.builder().setHeader(resultSet).build().getHeader()); + } + + /** + * Outputs the record separator. + * + * @throws IOException + * If an I/O error occurs + */ + public synchronized void println() throws IOException { + format.println(appendable); + newRecord = true; + } + + /** + * Prints the given values as a single record of delimiter-separated values followed by the record separator. + * + *

    + * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record + * separator to the output after printing the record, so there is no need to call {@link #println()}. + *

    + * + * @param values + * values to output. + * @throws IOException + * If an I/O error occurs + */ + public synchronized void printRecord(final Iterable values) throws IOException { + for (final Object value : values) { + print(value); + } + println(); + } + + /** + * Prints the given values as a single record of delimiter-separated values followed by the record separator. + * + *

    + * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record + * separator to the output after printing the record, so there is no need to call {@link #println()}. + *

    + * + * @param values + * values to output. + * @throws IOException + * If an I/O error occurs + */ + public void printRecord(final Object... values) throws IOException { + printRecord(Arrays.asList(values)); + } + + /** + * Prints the given values as a single record of delimiter-separated values followed by the record separator. + * + *

    + * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record + * separator to the output after printing the record, so there is no need to call {@link #println()}. + *

    + * + * @param values + * values to output. + * @throws IOException + * If an I/O error occurs + * @since 1.10.0 + */ + @SuppressWarnings("resource") // caller closes. + public synchronized void printRecord(final Stream values) throws IOException { + IOStream.adapt(values).forEachOrdered(this::print); + println(); + } + + private void printRecordObject(final Object value) throws IOException { + if (value instanceof Object[]) { + this.printRecord((Object[]) value); + } else if (value instanceof Iterable) { + this.printRecord((Iterable) value); + } else { + this.printRecord(value); + } + } + + /** + * Prints all the objects in the given {@link Iterable} handling nested collections/arrays as records. + * + *

    + * If the given Iterable only contains simple objects, this method will print a single record like + * {@link #printRecord(Iterable)}. If the given Iterable contains nested collections/arrays those nested elements + * will each be printed as records using {@link #printRecord(Object...)}. + *

    + * + *

    + * Given the following data structure: + *

    + * + *
    +     * 
    +     * List<String[]> data = new ArrayList<>();
    +     * data.add(new String[]{ "A", "B", "C" });
    +     * data.add(new String[]{ "1", "2", "3" });
    +     * data.add(new String[]{ "A1", "B2", "C3" });
    +     * 
    +     * 
    + * + *

    + * Calling this method will print: + *

    + * + *
    +     * 
    +     * A, B, C
    +     * 1, 2, 3
    +     * A1, B2, C3
    +     * 
    +     * 
    + * + * @param values + * the values to print. + * @throws IOException + * If an I/O error occurs + */ + public void printRecords(final Iterable values) throws IOException { + for (final Object value : values) { + printRecordObject(value); + } + } + + /** + * Prints all the objects in the given array handling nested collections/arrays as records. + * + *

    + * If the given array only contains simple objects, this method will print a single record like + * {@link #printRecord(Object...)}. If the given collections contain nested collections or arrays, those nested + * elements will each be printed as records using {@link #printRecord(Object...)}. + *

    + * + *

    + * Given the following data structure: + *

    + * + *
    +     * 
    +     * String[][] data = new String[3][]
    +     * data[0] = String[]{ "A", "B", "C" };
    +     * data[1] = new String[]{ "1", "2", "3" };
    +     * data[2] = new String[]{ "A1", "B2", "C3" };
    +     * 
    +     * 
    + * + *

    + * Calling this method will print: + *

    + * + *
    +     * 
    +     * A, B, C
    +     * 1, 2, 3
    +     * A1, B2, C3
    +     * 
    +     * 
    + * + * @param values + * the values to print. + * @throws IOException + * If an I/O error occurs + */ + public void printRecords(final Object... values) throws IOException { + printRecords(Arrays.asList(values)); + } + + /** + * Prints all the objects in the given JDBC result set. + * + * @param resultSet + * The values to print. + * @throws IOException + * If an I/O error occurs. + * @throws SQLException + * Thrown when a database access error occurs. + */ + public void printRecords(final ResultSet resultSet) throws SQLException, IOException { + final int columnCount = resultSet.getMetaData().getColumnCount(); + while (resultSet.next()) { + for (int i = 1; i <= columnCount; i++) { + final Object object = resultSet.getObject(i); + // TODO Who manages the Clob? The JDBC driver or must we close it? Is it driver-dependent? + print(object instanceof Clob ? ((Clob) object).getCharacterStream() : object); + } + println(); + } + } + + /** + * Prints all the objects with metadata in the given JDBC result set based on the header boolean. + * + * @param resultSet source of row data. + * @param printHeader whether to print headers. + * @throws IOException If an I/O error occurs + * @throws SQLException if a database access error occurs + * @since 1.9.0 + */ + public void printRecords(final ResultSet resultSet, final boolean printHeader) throws SQLException, IOException { + if (printHeader) { + printHeaders(resultSet); + } + printRecords(resultSet); + } + + /** + * Prints all the objects in the given {@link Stream} handling nested collections/arrays as records. + * + *

    + * If the given Stream only contains simple objects, this method will print a single record like + * {@link #printRecord(Iterable)}. If the given Stream contains nested collections/arrays those nested elements + * will each be printed as records using {@link #printRecord(Object...)}. + *

    + * + *

    + * Given the following data structure: + *

    + * + *
    +     * 
    +     * List<String[]> data = new ArrayList<>();
    +     * data.add(new String[]{ "A", "B", "C" });
    +     * data.add(new String[]{ "1", "2", "3" });
    +     * data.add(new String[]{ "A1", "B2", "C3" });
    +     * Stream<String[]> stream = data.stream();
    +     * 
    +     * 
    + * + *

    + * Calling this method will print: + *

    + * + *
    +     * 
    +     * A, B, C
    +     * 1, 2, 3
    +     * A1, B2, C3
    +     * 
    +     * 
    + * + * @param values + * the values to print. + * @throws IOException + * If an I/O error occurs + * @since 1.10.0 + */ + @SuppressWarnings({ "resource" }) // Caller closes. + public void printRecords(final Stream values) throws IOException { + IOStream.adapt(values).forEachOrdered(this::printRecordObject); + } +} diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java index b4925d74f6..189cc07e58 100644 --- a/src/main/java/org/apache/commons/csv/CSVRecord.java +++ b/src/main/java/org/apache/commons/csv/CSVRecord.java @@ -23,6 +23,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import java.util.stream.Stream; /** @@ -31,10 +32,10 @@ *

    * Note: Support for {@link Serializable} is scheduled to be removed in version 2.0. * In version 1.8 the mapping between the column header and the column index was - * removed from the serialised state. The class maintains serialization compatibility + * removed from the serialized state. The class maintains serialization compatibility * with versions pre-1.8 for the record values; these must be accessed by index - * following deserialization. There will be loss of any functionally linked to the header - * mapping when transferring serialised forms pre-1.8 to 1.8 and vice versa. + * following deserialization. There will be a loss of any functionally linked to the header + * mapping when transferring serialized forms pre-1.8 to 1.8 and vice versa. *

    */ public final class CSVRecord implements Serializable, Iterable { @@ -87,7 +88,7 @@ public String get(final int i) { } /** - * Returns a value by name. + * Returns a value by name. If multiple instances of the header name exists, only the last occurrence is returned. * *

    * Note: This requires a field mapping obtained from the original parser. @@ -142,8 +143,8 @@ public long getCharacterPosition() { /** * Returns the comment for this record, if any. * Note that comments are attached to the following record. - * If there is no following record (i.e. the comment is at EOF) - * the comment will be ignored. + * If there is no following record (i.e. the comment is at EOF), + * then the comment will be ignored. * * @return the comment for this record, or null if no comment for this record is available. */ @@ -188,8 +189,8 @@ public long getRecordNumber() { /** * Checks whether this record has a comment, false otherwise. * Note that comments are attached to the following record. - * If there is no following record (i.e. the comment is at EOF) - * the comment will be ignored. + * If there is no following record (i.e. the comment is at EOF), + * then the comment will be ignored. * * @return true if this record has a comment, false otherwise * @since 1.3 @@ -226,22 +227,22 @@ public boolean isMapped(final String name) { } /** - * Checks whether a column with given index has a value. + * Checks whether a column with a given index has a value. * * @param index * a column index (0-based) - * @return whether a column with given index has a value + * @return whether a column with a given index has a value */ public boolean isSet(final int index) { return 0 <= index && index < values.length; } /** - * Checks whether a given columns is mapped and has a value. + * Checks whether a given column is mapped and has a value. * * @param name * the name of the column to be retrieved. - * @return whether a given columns is mapped and has a value + * @return whether a given column is mapped and has a value */ public boolean isSet(final String name) { return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; @@ -270,9 +271,8 @@ public > M putIn(final M map) { return map; } getHeaderMapRaw().forEach((key, value) -> { - final int col = value; - if (col < values.length) { - map.put(key, values[col]); + if (value < values.length) { + map.put(key, values[value]); } }); return map; @@ -298,17 +298,25 @@ public Stream stream() { } /** - * Converts the values to a List. + * Converts the values to a new List. + *

    + * Editing the list does not update this instance. + *

    * * @return a new List * @since 1.9.0 */ public List toList() { - return Arrays.asList(values); + return stream().collect(Collectors.toList()); } /** - * Copies this record into a new Map of header name to record value. + * Copies this record into a new Map of header name to record value. If multiple instances of a header name exist, + * then only the last occurrence is mapped. + * + *

    + * Editing the map does not update this instance. + *

    * * @return A new Map. The map is empty if the record has no headers. */ diff --git a/src/main/java/org/apache/commons/csv/Constants.java b/src/main/java/org/apache/commons/csv/Constants.java index e8f0106ea1..9b9e2d417a 100644 --- a/src/main/java/org/apache/commons/csv/Constants.java +++ b/src/main/java/org/apache/commons/csv/Constants.java @@ -1,84 +1,91 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -/** - * Constants for this package. - */ -final class Constants { - - static final char BACKSLASH = '\\'; - - static final char BACKSPACE = '\b'; - - static final String COMMA = ","; - - /** - * Starts a comment, the remainder of the line is the comment. - */ - static final char COMMENT = '#'; - - static final char CR = '\r'; - - /** RFC 4180 defines line breaks as CRLF */ - static final String CRLF = "\r\n"; - - static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"'); - - static final String EMPTY = ""; - - /** The end of stream symbol */ - static final int END_OF_STREAM = -1; - - static final char FF = '\f'; - - static final char LF = '\n'; - - /** - * Unicode line separator. - */ - static final String LINE_SEPARATOR = "\u2028"; - - /** - * Unicode next line. - */ - static final String NEXT_LINE = "\u0085"; - - /** - * Unicode paragraph separator. - */ - static final String PARAGRAPH_SEPARATOR = "\u2029"; - - static final char PIPE = '|'; - - /** ASCII record separator */ - static final char RS = 30; - - static final char SP = ' '; - - static final char TAB = '\t'; - - /** Undefined state for the lookahead char */ - static final int UNDEFINED = -2; - - /** ASCII unit separator */ - static final char US = 31; - - static final String[] EMPTY_STRING_ARRAY = {}; - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv; + +/** + * Constants for this package. + */ +final class Constants { + + static final char BACKSLASH = '\\'; + + static final char BACKSPACE = '\b'; + + static final String COMMA = ","; + + /** + * Starts a comment, the remainder of the line is the comment. + */ + static final char COMMENT = '#'; + + static final char CR = '\r'; + + /** RFC 4180 defines line breaks as CRLF */ + static final String CRLF = "\r\n"; + + static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"'); + + static final String EMPTY = ""; + + static final String[] EMPTY_STRING_ARRAY = {}; + + /** The end of stream symbol */ + static final int END_OF_STREAM = -1; + + static final char FF = '\f'; + + static final char LF = '\n'; + + /** + * Unicode line separator. + */ + static final String LINE_SEPARATOR = "\u2028"; + + /** + * Unicode next line. + */ + static final String NEXT_LINE = "\u0085"; + + /** + * Unicode paragraph separator. + */ + static final String PARAGRAPH_SEPARATOR = "\u2029"; + + static final char PIPE = '|'; + + /** ASCII record separator */ + static final char RS = 30; + + static final char SP = ' '; + + static final String SQL_NULL_STRING = "\\N"; + + static final char TAB = '\t'; + + /** Undefined state for the lookahead char */ + static final int UNDEFINED = -2; + + /** ASCII unit separator */ + static final char US = 31; + + /** No instances. */ + private Constants() { + // noop + } + +} diff --git a/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java index 28ce071987..92f44d30ba 100644 --- a/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java +++ b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java @@ -21,7 +21,7 @@ * Determines how duplicate header fields should be handled * if {@link CSVFormat.Builder#setHeader(Class)} is not null. * - * @since 1.9.0 + * @since 1.10.0 */ public enum DuplicateHeaderMode { @@ -31,7 +31,7 @@ public enum DuplicateHeaderMode { ALLOW_ALL, /** - * Allows duplicate headers only if they're empty strings or null. + * Allows duplicate headers only if they're empty, blank, or null strings. */ ALLOW_EMPTY, diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java index 89e63e3a8b..429b07cb1f 100644 --- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java +++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java @@ -41,13 +41,13 @@ final class ExtendedBufferedReader extends BufferedReader { /** The count of EOLs (CR/LF/CRLF) seen so far */ private long eolCounter; - /** The position, which is number of characters read so far */ + /** The position, which is the number of characters read so far */ private long position; private boolean closed; /** - * Created extended buffered reader using default buffer-size + * Constructs a new instance using the default buffer size. */ ExtendedBufferedReader(final Reader reader) { super(reader); @@ -107,7 +107,7 @@ public boolean isClosed() { /** * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will - * still return this value. Does not affect line number or last character. + * still return this value. Does not affect the line number or the last character. * * @return the next character * @@ -125,7 +125,7 @@ int lookAhead() throws IOException { /** * Populates the buffer with the next {@code buf.length} characters in the * current reader without consuming them. The next call to {@link #read()} will - * still return the next value. This doesn't affect line number or last + * still return the next value. This doesn't affect the line number or the last * character. * * @param buf the buffer to fill for the look ahead. @@ -199,7 +199,7 @@ public int read(final char[] buf, final int offset, final int length) throws IOE /** * Gets the next line, dropping the line terminator(s). This method should only be called when processing a - * comment, otherwise information can be lost. + * comment, otherwise, information can be lost. *

    * Increments {@link #eolCounter} and updates {@link #position}. *

    diff --git a/src/main/java/org/apache/commons/csv/IOUtils.java b/src/main/java/org/apache/commons/csv/IOUtils.java deleted file mode 100644 index 882bc8bf3e..0000000000 --- a/src/main/java/org/apache/commons/csv/IOUtils.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.csv; - -import java.io.IOException; -import java.io.Reader; -import java.io.Writer; -import java.nio.Buffer; -import java.nio.CharBuffer; - -/** Copied from Apache Commons IO. */ -class IOUtils { - - /** - *

    - * Copied from Apache Commons IO. - *

    - * The default buffer size ({@value}). - */ - static final int DEFAULT_BUFFER_SIZE = 1024 * 4; - - /** - *

    - * Copied from Apache Commons IO. - *

    - * Represents the end-of-file (or stream). - * @since 2.5 (made public) - */ - private static final int EOF = -1; - - /** - * Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}. - *

    - * This method buffers the input internally, so there is no need to use a - * {@code BufferedReader}. - *

    - * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}. - * - * @param input the {@code Reader} to read from - * @param output the {@code Appendable} to append to - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 2.7 - */ - static long copy(final Reader input, final Appendable output) throws IOException { - return copy(input, output, CharBuffer.allocate(DEFAULT_BUFFER_SIZE)); - } - - /** - * Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}. - *

    - * This method uses the provided buffer, so there is no need to use a - * {@code BufferedReader}. - *

    - * - * @param input the {@code Reader} to read from - * @param output the {@code Appendable} to write to - * @param buffer the buffer to be used for the copy - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 2.7 - */ - static long copy(final Reader input, final Appendable output, final CharBuffer buffer) throws IOException { - long count = 0; - int n; - while (EOF != (n = input.read(buffer))) { - ((Buffer) buffer).flip(); - output.append(buffer, 0, n); - count += n; - } - return count; - } - - /** - *

    - * Copied from Apache Commons IO. - *

    - * Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}. - *

    - * This method buffers the input internally, so there is no need to use a - * {@code BufferedReader}. - *

    - * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}. - * - * @param input the {@code Reader} to read from - * @param output the {@code Writer} to write to - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 1.3 - */ - static long copyLarge(final Reader input, final Writer output) throws IOException { - return copyLarge(input, output, new char[DEFAULT_BUFFER_SIZE]); - } - - /** - *

    - * Copied from Apache Commons IO. - *

    - * Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}. - *

    - * This method uses the provided buffer, so there is no need to use a - * {@code BufferedReader}. - *

    - * - * @param input the {@code Reader} to read from - * @param output the {@code Writer} to write to - * @param buffer the buffer to be used for the copy - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 2.2 - */ - static long copyLarge(final Reader input, final Writer output, final char[] buffer) throws IOException { - long count = 0; - int n; - while (EOF != (n = input.read(buffer))) { - output.write(buffer, 0, n); - count += n; - } - return count; - } - -} diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java index f424039b1f..50aa176d3f 100644 --- a/src/main/java/org/apache/commons/csv/Lexer.java +++ b/src/main/java/org/apache/commons/csv/Lexer.java @@ -42,7 +42,7 @@ final class Lexer implements Closeable { private static final String LF_STRING = Character.toString(LF); /** - * Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it + * Constant char to use for disabling comments, escapes, and encapsulation. The value -2 is used because it * won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two * chars (using surrogates) and thus there should never be a collision with a real text char. */ @@ -146,9 +146,9 @@ boolean isDelimiter(final int ch) throws IOException { } /** - * Tests if the given character indicates end of file. + * Tests if the given character indicates the end of the file. * - * @return true if the given character indicates end of file. + * @return true if the given character indicates the end of the file. */ boolean isEndOfFile(final int ch) { return ch == END_OF_STREAM; @@ -168,7 +168,7 @@ boolean isEscape(final int ch) { * * For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]". * - * @return true if the next characters constitute a escape delimiter. + * @return true if the next characters constitute an escape delimiter. * @throws IOException If an I/O error occurs. */ boolean isEscapeDelimiter() throws IOException { @@ -194,7 +194,7 @@ boolean isQuoteChar(final int ch) { } /** - * Tests if the current character represents the start of a line: a CR, LF or is at the start of the file. + * Tests if the current character represents the start of a line: a CR, LF, or is at the start of the file. * * @param ch the character to check * @return true if the character is at the start of a line. @@ -214,13 +214,13 @@ private char mapNullToDisabled(final Character c) { *

    * * @param token - * an existing Token object to reuse. The caller is responsible to initialize the Token. + * an existing Token object to reuse. The caller is responsible for initializing the Token. * @return the next token found. - * @throws java.io.IOException on stream access error. + * @throws IOException on stream access error. */ Token nextToken(final Token token) throws IOException { - // get the last read char (required for empty line detection) + // Get the last read char (required for empty line detection) int lastChar = reader.getLastChar(); // read the next char and set eol @@ -234,11 +234,11 @@ Token nextToken(final Token token) throws IOException { // empty line detection: eol AND (last char was EOL or beginning) if (ignoreEmptyLines) { while (eol && isStartOfLine(lastChar)) { - // go on char ahead ... + // Go on char ahead ... lastChar = c; c = reader.read(); eol = readEndOfLine(c); - // reached end of file without any content (empty line at the end) + // reached the end of the file without any content (empty line at the end) if (isEndOfFile(c)) { token.type = EOF; // don't set token.isReady here because no content @@ -247,7 +247,7 @@ Token nextToken(final Token token) throws IOException { } } - // did we reach eof during the last iteration already ? EOF + // Did we reach EOF during the last iteration already? EOF if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) { token.type = EOF; // don't set token.isReady here because no content @@ -267,7 +267,7 @@ Token nextToken(final Token token) throws IOException { return token; } - // important: make sure a new char gets consumed in each iteration + // Important: make sure a new char gets consumed in each iteration while (token.type == INVALID) { // ignore whitespaces at beginning of a token if (ignoreSurroundingSpaces) { @@ -305,12 +305,12 @@ Token nextToken(final Token token) throws IOException { /** * Parses an encapsulated token. *

    - * Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included + * Encapsulated tokens are surrounded by the given encapsulating string. The encapsulator itself might be included * in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after - * an encapsulated token are ignored. The token is finished when one of the following conditions become true: + * an encapsulated token is ignored. The token is finished when one of the following conditions becomes true: *

    *
      - *
    • an unescaped encapsulator has been reached, and is followed by optional whitespace then:
    • + *
    • An unescaped encapsulator has been reached and is followed by optional whitespace then:
    • *
        *
      • delimiter (TOKEN)
      • *
      • end of line (EORECORD)
      • @@ -321,11 +321,12 @@ Token nextToken(final Token token) throws IOException { * the current token * @return a valid token object * @throws IOException - * on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL + * Thrown when in an invalid state: EOF before closing encapsulator or invalid character before + * delimiter or EOL. */ private Token parseEncapsulatedToken(final Token token) throws IOException { token.isQuoted = true; - // save current line number in case needed for IOE + // Save current line number in case needed for IOE final long startLineNumber = getCurrentLineNumber(); int c; while (true) { @@ -366,8 +367,8 @@ private Token parseEncapsulatedToken(final Token token) throws IOException { } if (!Character.isWhitespace((char)c)) { // error invalid char between token and next delimiter - throw new IOException("(line " + getCurrentLineNumber() + - ") invalid char between encapsulated token and delimiter"); + throw new IOException("Invalid char between encapsulated token and delimiter at line: " + + getCurrentLineNumber() + ", position: " + getCharacterPosition()); } } } @@ -385,13 +386,13 @@ private Token parseEncapsulatedToken(final Token token) throws IOException { /** * Parses a simple token. *

        - * Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped - * delimiters (as \, or \;). The token is finished when one of the following conditions become true: + * Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped + * delimiters (as \, or \;). The token is finished when one of the following conditions becomes true: *

        *
          - *
        • end of line has been reached (EORECORD)
        • - *
        • end of stream has been reached (EOF)
        • - *
        • an unescaped delimiter has been reached (TOKEN)
        • + *
        • The end of line has been reached (EORECORD)
        • + *
        • The end of stream has been reached (EOF)
        • + *
        • An unescaped delimiter has been reached (TOKEN)
        • *
        * * @param token diff --git a/src/main/java/org/apache/commons/csv/QuoteMode.java b/src/main/java/org/apache/commons/csv/QuoteMode.java index a9b33a10fa..f2fb1f9474 100644 --- a/src/main/java/org/apache/commons/csv/QuoteMode.java +++ b/src/main/java/org/apache/commons/csv/QuoteMode.java @@ -32,7 +32,7 @@ public enum QuoteMode { ALL_NON_NULL, /** - * Quotes fields which contain special characters such as a the field delimiter, quote character or any of the + * Quotes fields that contain special characters such as a field delimiter, quote character, or any of the * characters in the line separator string. */ MINIMAL, diff --git a/src/main/java/org/apache/commons/csv/Token.java b/src/main/java/org/apache/commons/csv/Token.java index 2dedc58a5e..b9f1094783 100644 --- a/src/main/java/org/apache/commons/csv/Token.java +++ b/src/main/java/org/apache/commons/csv/Token.java @@ -21,8 +21,9 @@ /** * Internal token representation. - *

        - * It is used as contract between the lexer and the parser. + *

        + * It is used as a contract between the lexer and the parser. + *

        */ final class Token { @@ -30,7 +31,7 @@ enum Type { /** Token has no valid content, i.e. is in its initialized state. */ INVALID, - /** Token with content, at beginning or in the middle of a line. */ + /** Token with content, at the beginning or in the middle of a line. */ TOKEN, /** Token (which can have content) when the end of file is reached. */ @@ -43,7 +44,7 @@ enum Type { COMMENT } - /** length of the initial token (content-)buffer */ + /** Length of the initial token (content-)buffer */ private static final int INITIAL_TOKEN_LENGTH = 50; /** Token type */ diff --git a/src/main/java/org/apache/commons/csv/package-info.java b/src/main/java/org/apache/commons/csv/package-info.java index 29e7fef612..023096f60e 100644 --- a/src/main/java/org/apache/commons/csv/package-info.java +++ b/src/main/java/org/apache/commons/csv/package-info.java @@ -18,14 +18,14 @@ /** * Apache Commons CSV Format Support. * - *

        CSV are widely used as interfaces to legacy systems or manual data-imports. + *

        CSV are widely used as interfaces to legacy systems or manual data imports. * CSV stands for "Comma Separated Values" (or sometimes "Character Separated * Values"). The CSV data format is defined in * RFC 4180 * but many dialects exist.

        * *

        Common to all file dialects is its basic structure: The CSV data-format - * is record oriented, whereas each record starts on a new textual line. A + * is record-oriented, whereas each record starts on a new textual line. A * record is build of a list of values. Keep in mind that not all records * must have an equal number of values:

        *
        @@ -36,28 +36,28 @@
          * 

        The following list contains the CSV aspects the Commons CSV parser supports:

        *
        *
        Separators (for lines)
        - *
        The record separators are hardcoded and cannot be changed. The must be '\r', '\n' or '\r\n'.
        + *
        The record separators are hardcoded and cannot be changed. The must be '\r', '\n', or '\r\n'.
        * *
        Delimiter (for values)
        *
        The delimiter for values is freely configurable (default ',').
        * *
        Comments
        - *
        Some CSV-dialects support a simple comment syntax. A comment is a record + *
        Some CSV dialects support a simple comment syntax. A comment is a record * which must start with a designated character (the commentStarter). A record - * of this kind is treated as comment and gets removed from the input (default none)
        + * of this kind is treated as a comment and gets removed from the input (default none) * *
        Encapsulator
        *
        Two encapsulator characters (default '"') are used to enclose -> complex values.
        * *
        Simple values
        - *
        A simple value consist of all characters (except the delimiter) until - * (but not including) the next delimiter or a record-terminator. Optionally + *
        A simple value consists of all characters (except the delimiter) until + * (but not including) the next delimiter or a record terminator. Optionally * all surrounding whitespaces of a simple value can be ignored (default: true).
        * *
        Complex values
        *
        Complex values are encapsulated within a pair of the defined encapsulator characters. * The encapsulator itself must be escaped or doubled when used inside complex values. - * Complex values preserve all kind of formatting (including newlines -> multiline-values)
        + * Complex values preserve all kinds of formatting (including newlines -> multiline-values) * *
        Empty line skipping
        *
        Optionally empty lines in CSV files can be skipped. diff --git a/src/site/resources/pmd/pmd-ruleset.xml b/src/site/resources/pmd/pmd-ruleset.xml index 17a8fab27a..86f3a412d4 100644 --- a/src/site/resources/pmd/pmd-ruleset.xml +++ b/src/site/resources/pmd/pmd-ruleset.xml @@ -26,13 +26,10 @@ - - - @@ -54,16 +51,7 @@ - - - - - - - - - @@ -76,7 +64,6 @@ - diff --git a/src/site/xdoc/download_csv.xml b/src/site/xdoc/download_csv.xml index 3e00f95c54..21d27b6785 100644 --- a/src/site/xdoc/download_csv.xml +++ b/src/site/xdoc/download_csv.xml @@ -1,144 +1,144 @@ - - - - - - Download Apache Commons CSV - Apache Commons Documentation Team - - -
        - -

        - We recommend you use a mirror to download our release - builds, but you must verify the integrity of - the downloaded files using signatures downloaded from our main - distribution directories. Recent releases (48 hours) may not yet - be available from all the mirrors. -

        - -

        - You are currently using [preferred]. If you - encounter a problem with this mirror, please select another - mirror. If all mirrors are failing, there are backup - mirrors (at the end of the mirrors list) that should be - available. -

        - [if-any logo][end] -

        - -
        -

        - Other mirrors: - - -

        -
        - -

        - It is essential that you - verify the integrity - of downloaded files, preferably using the PGP signature (*.asc files); - failing that using the SHA512 hash (*.sha512 checksum files). -

        -

        - The KEYS - file contains the public PGP keys used by Apache Commons developers - to sign releases. -

        -
        -
        -
        + + + + + + Download Apache Commons CSV + Apache Commons Documentation Team + + +
        + +

        + We recommend you use a mirror to download our release + builds, but you must verify the integrity of + the downloaded files using signatures downloaded from our main + distribution directories. Recent releases (48 hours) may not yet + be available from all the mirrors. +

        + +

        + You are currently using [preferred]. If you + encounter a problem with this mirror, please select another + mirror. If all mirrors are failing, there are backup + mirrors (at the end of the mirrors list) that should be + available. +

        + [if-any logo][end] +

        + +
        +

        + Other mirrors: + + +

        +
        + +

        + It is essential that you + verify the integrity + of downloaded files, preferably using the PGP signature (*.asc files); + failing that using the SHA512 hash (*.sha512 checksum files). +

        +

        + The KEYS + file contains the public PGP keys used by Apache Commons developers + to sign releases. +

        +
        +
        +
        - - - + + + - - - + + +
        commons-csv-1.9.0-bin.tar.gzsha512pgpcommons-csv-1.10.0-bin.tar.gzsha512pgp
        commons-csv-1.9.0-bin.zipsha512pgpcommons-csv-1.10.0-bin.zipsha512pgp
        - - - + + + - - - + + +
        commons-csv-1.9.0-src.tar.gzsha512pgpcommons-csv-1.10.0-src.tar.gzsha512pgp
        commons-csv-1.9.0-src.zipsha512pgpcommons-csv-1.10.0-src.zipsha512pgp
        diff --git a/src/site/xdoc/index.xml b/src/site/xdoc/index.xml index 9d69cf2246..ec139c8668 100644 --- a/src/site/xdoc/index.xml +++ b/src/site/xdoc/index.xml @@ -29,12 +29,12 @@ limitations under the License.
      • Microsoft Excel
      • Informix UNLOAD
      • Informix UNLOAD CSV
      • -
      • MySQL
      • -
      • Oracle
      • +
      • MySQL
      • +
      • Oracle
      • PostgreSQL CSV
      • PostgreSQL Text
      • -
      • RFC 4180
      • -
      • TDF
      • +
      • RFC 4180
      • +
      • TDF

      Custom formats can be created using a fluent style API.

      @@ -61,7 +61,7 @@ The git repository can be

      @@ -76,15 +76,7 @@ for the latest releases.
      For previous releases, see the Apache Archive

      - Alternatively, you can pull it from a Maven repository: -

      <dependency>
      -    <groupId>org.apache.commons</groupId>
      -    <artifactId>commons-csv</artifactId>
      -    <version>1.9.0</version>
      -</dependency>
      -

      -

      - For other dependency access methods, see Dependency Information + For dependency access methods, see Dependency Information

      @@ -97,7 +89,7 @@ For previous releases, see the

      The commons developer mailing list is the main channel of communication for contributors. Please remember that the lists are shared between all commons components, so prefix your email by [csv].

      -

      You can also visit the #apache-commons IRC channel on irc.freenode.net or peruse JIRA. Specific links of interest for JIRA are:

      +

      You can also peruse JIRA. Specific links of interest for JIRA are:

      In addition to the code from Netcetera (org.apache.commons.csv), Martin van den Bemt has added an additional writer API.

      Other CSV implementations:

      diff --git a/src/site/xdoc/issue-tracking.xml b/src/site/xdoc/issue-tracking.xml index c7edd21b1b..3564ef4fdd 100644 --- a/src/site/xdoc/issue-tracking.xml +++ b/src/site/xdoc/issue-tracking.xml @@ -85,7 +85,7 @@ limitations under the License.

      - For more information on subversion and creating patches see the + For more information on creating patches see the Apache Contributors Guide.

      diff --git a/src/site/xdoc/mail-lists.xml b/src/site/xdoc/mail-lists.xml index 111faf1212..8cbe145704 100644 --- a/src/site/xdoc/mail-lists.xml +++ b/src/site/xdoc/mail-lists.xml @@ -158,7 +158,7 @@ limitations under the License. Commons Commits List

      - Only for e-mails automatically generated by the source control sytem. + Only for e-mails automatically generated by the source control system.

      Subscribe diff --git a/src/site/xdoc/user-guide.xml b/src/site/xdoc/user-guide.xml index 254b461729..3a433faca4 100644 --- a/src/site/xdoc/user-guide.xml +++ b/src/site/xdoc/user-guide.xml @@ -44,7 +44,7 @@ limitations under the License.
      ORACLE1.6
      Default Oracle format used by the SQL*Loader utility.
      POSTGRESSQL_CSV1.5
      Default PostgreSQL CSV format used by the COPY operation.
      POSTGRESSQL_TEXT1.5
      Default PostgreSQL text format used by the COPY operation.
      -
      RFC-4180
      The RFC-4180 format defined by RFC-4180.
      +
      RFC-4180
      The RFC-4180 format defined by RFC-4180.
      TDF
      A tab delimited format.
      @@ -71,24 +71,23 @@ for (CSVRecord record : records) { for example:

      final URL url = ...; -final Reader reader = new InputStreamReader(new BOMInputStream(url.openStream()), "UTF-8"); -final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader()); -try { +try (final Reader reader = new InputStreamReader(new BOMInputStream(url.openStream()), "UTF-8"); + final CSVParser parser = CSVFormat.EXCEL.builder() + .setHeader() + .build() + .parse(reader)) { for (final CSVRecord record : parser) { final String string = record.get("SomeColumn"); ... } -} finally { - parser.close(); - reader.close(); }

      You might find it handy to create something like this:

      /** -* Creates a reader capable of handling BOMs. -*/ + * Creates a reader capable of handling BOMs. + */ public InputStreamReader newReader(final InputStream inputStream) { return new InputStreamReader(new BOMInputStream(inputStream), StandardCharsets.UTF_8); } @@ -101,7 +100,7 @@ public InputStreamReader newReader(final InputStream inputStream) { Apache Commons CSV provides several ways to access record values. The simplest way is to access values by their index in the record. However, columns in CSV files often have a name, for example: ID, CustomerNo, Birthday, etc. - The CSVFormat class provides an API for specifing these header names and CSVRecord on + The CSVFormat class provides an API for specifying these header names and CSVRecord on the other hand has methods to access values by their corresponding header name. @@ -118,7 +117,10 @@ for (CSVRecord record : records) { Indices may not be the most intuitive way to access record values. For this reason it is possible to assign names to each column in the file: Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.RFC4180.withHeader("ID", "CustomerNo", "Name").parse(in); +Iterable<CSVRecord> records = CSVFormat.RFC4180.builder() + .setHeader("ID", "CustomerNo", "Name") + .build() + .parse(in); for (CSVRecord record : records) { String id = record.get("ID"); String customerNo = record.get("CustomerNo"); @@ -136,7 +138,10 @@ for (CSVRecord record : records) { ID, CustomerNo, Name } Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.RFC4180.withHeader(Headers.class).parse(in); +Iterable<CSVRecord> records = CSVFormat.RFC4180.builder() + .setHeader(Headers.class) + .build() + .parse(in); for (CSVRecord record : records) { String id = record.get(Headers.ID); String customerNo = record.get(Headers.CustomerNo); @@ -149,7 +154,11 @@ for (CSVRecord record : records) { Some CSV files define header names in their first record. If configured, Apache Commons CSV can parse the header names from the first record: Reader in = new FileReader("path/to/file.csv"); -Iterable<CSVRecord> records = CSVFormat.RFC4180.withFirstRecordAsHeader().parse(in); +Iterable<CSVRecord> records = CSVFormat.RFC4180.builder() + .setHeader() + .setSkipHeaderRecord(true) + .build() + .parse(in); for (CSVRecord record : records) { String id = record.get("ID"); String customerNo = record.get("CustomerNo"); @@ -163,16 +172,22 @@ for (CSVRecord record : records) { To print a CSV file with headers, you specify the headers in the format:

      final Appendable out = ...; - final CSVPrinter printer = CSVFormat.DEFAULT.withHeader("H1", "H2").print(out) +final CSVPrinter printer = CSVFormat.DEFAULT.builder() + .setHeader("H1", "H2") + .build() + .print(out);

      To print a CSV file with JDBC column labels, you specify the ResultSet in the format:

      - final ResultSet resultSet = ...; - final CSVPrinter printer = CSVFormat.DEFAULT.withHeader(resultSet).print(out) + try (final ResultSet resultSet = ...) { + final CSVPrinter printer = CSVFormat.DEFAULT.builder() + .setHeader(resultSet) + .build() + .print(out); +}
      - diff --git a/src/test/java/org/apache/commons/csv/CSVBenchmark.java b/src/test/java/org/apache/commons/csv/CSVBenchmark.java index 64d3f4980a..4a146a0a93 100644 --- a/src/test/java/org/apache/commons/csv/CSVBenchmark.java +++ b/src/test/java/org/apache/commons/csv/CSVBenchmark.java @@ -58,8 +58,21 @@ @State(Scope.Benchmark) public class CSVBenchmark { + private static final class CountingReaderCallback implements org.skife.csv.ReaderCallback { + public int count; + + @Override + public void onRow(final String[] fields) { + count++; + } + } + private String data; + private Reader getReader() { + return new StringReader(data); + } + /** * Load the data in memory before running the benchmarks, this takes out IO from the results. */ @@ -72,55 +85,6 @@ public void init() throws IOException { } } - private Reader getReader() { - return new StringReader(data); - } - - @Benchmark - public int read(final Blackhole bh) throws Exception { - int count = 0; - - try (BufferedReader reader = new BufferedReader(getReader())) { - while (reader.readLine() != null) { - count++; - } - } - - bh.consume(count); - return count; - } - - @Benchmark - public int scan(final Blackhole bh) throws Exception { - int count = 0; - - try (Scanner scanner = new Scanner(getReader())) { - while (scanner.hasNextLine()) { - scanner.nextLine(); - count++; - } - } - - bh.consume(count); - return count; - } - - @Benchmark - public int split(final Blackhole bh) throws Exception { - int count = 0; - - try (BufferedReader reader = new BufferedReader(getReader())) { - String line; - while ((line = reader.readLine()) != null) { - final String[] values = StringUtils.split(line, ','); - count += values.length; - } - } - - bh.consume(count); - return count; - } - @Benchmark public int parseCommonsCSV(final Blackhole bh) throws Exception { int count = 0; @@ -202,15 +166,6 @@ public int parseSkifeCSV(final Blackhole bh) throws Exception { return callback.count; } - private static class CountingReaderCallback implements org.skife.csv.ReaderCallback { - public int count; - - @Override - public void onRow(final String[] fields) { - count++; - } - } - @Benchmark public int parseSuperCSV(final Blackhole bh) throws Exception { int count = 0; @@ -224,4 +179,49 @@ public int parseSuperCSV(final Blackhole bh) throws Exception { bh.consume(count); return count; } + + @Benchmark + public int read(final Blackhole bh) throws Exception { + int count = 0; + + try (BufferedReader reader = new BufferedReader(getReader())) { + while (reader.readLine() != null) { + count++; + } + } + + bh.consume(count); + return count; + } + + @Benchmark + public int scan(final Blackhole bh) throws Exception { + int count = 0; + + try (Scanner scanner = new Scanner(getReader())) { + while (scanner.hasNextLine()) { + scanner.nextLine(); + count++; + } + } + + bh.consume(count); + return count; + } + + @Benchmark + public int split(final Blackhole bh) throws Exception { + int count = 0; + + try (BufferedReader reader = new BufferedReader(getReader())) { + String line; + while ((line = reader.readLine()) != null) { + final String[] values = StringUtils.split(line, ','); + count += values.length; + } + } + + bh.consume(count); + return count; + } } diff --git a/src/test/java/org/apache/commons/csv/CSVDuplicateHeaderTest.java b/src/test/java/org/apache/commons/csv/CSVDuplicateHeaderTest.java new file mode 100644 index 0000000000..9eae51b055 --- /dev/null +++ b/src/test/java/org/apache/commons/csv/CSVDuplicateHeaderTest.java @@ -0,0 +1,335 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +/** + * Tests parsing of duplicate column names in a CSV header. + * The test verifies that headers are consistently handled by CSVFormat and CSVParser. + */ +public class CSVDuplicateHeaderTest { + + /** + * Return test cases for duplicate header data for use in CSVFormat. + *

      + * This filters the parsing test data to all cases where the allow missing column + * names flag is true and ignore header case is false: these flags are exclusively for parsing. + * CSVFormat validation applies to both parsing and writing and thus validation + * is less strict and behaves as if the allow missing column names constraint and + * the ignore header case behavior are absent. + * The filtered data is then returned with the parser flags set to both true and false + * for each test case. + *

      + * + * @return the stream of arguments + */ + static Stream duplicateHeaderAllowsMissingColumnsNamesData() { + return duplicateHeaderData() + .filter(arg -> Boolean.TRUE.equals(arg.get()[1]) && Boolean.FALSE.equals(arg.get()[2])) + .flatMap(arg -> { + // Return test case with flags as all true/false combinations + final Object[][] data = new Object[4][]; + final Boolean[] flags = {Boolean.TRUE, Boolean.FALSE}; + int i = 0; + for (final Boolean a : flags) { + for (final Boolean b : flags) { + data[i] = arg.get().clone(); + data[i][1] = a; + data[i][2] = b; + i++; + } + } + return Arrays.stream(data).map(Arguments::of); + }); + } + + /** + * Return test cases for duplicate header data for use in parsing (CSVParser). Uses the order: + *
      +     * DuplicateHeaderMode duplicateHeaderMode
      +     * boolean allowMissingColumnNames
      +     * String[] headers
      +     * boolean valid
      +     * 
      + * + * @return the stream of arguments + */ + static Stream duplicateHeaderData() { + return Stream.of( + // Any combination with a valid header + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "B"}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "B"}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "B"}, true), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "B"}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "B"}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "B"}, true), + + // Any combination with a valid header including empty + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", ""}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", ""}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", ""}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", ""}, true), + + // Any combination with a valid header including blank (1 space) + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", " "}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", " "}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", " "}, true), + + // Any combination with a valid header including null + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", null}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", null}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", null}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", null}, true), + + // Duplicate non-empty names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "A"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "A"}, true), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "A"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "A"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "A"}, true), + + // Duplicate empty names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"", ""}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"", ""}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"", ""}, true), + + // Duplicate blank names (1 space) + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {" ", " "}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {" ", " "}, true), + + // Duplicate blank names (3 spaces) + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {" ", " "}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {" ", " "}, true), + + // Duplicate null names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {null, null}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {null, null}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {null, null}, true), + + // Duplicate blank names (1+3 spaces) + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {" ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {" ", " "}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {" ", " "}, true), + + // Duplicate blank names and null names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {" ", null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {" ", null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {" ", null}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {" ", null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {" ", null}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {" ", null}, true), + + // Duplicate non-empty and empty names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "A", "", ""}, true), + + // Non-duplicate non-empty and duplicate empty names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "B", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "B", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "B", "", ""}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "B", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "B", "", ""}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "B", "", ""}, true), + + // Duplicate non-empty and blank names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "A", " ", " "}, true), + + // Duplicate non-empty and null names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "A", null, null}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "A", null, null}, true), + + // Duplicate blank names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", "", ""}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", "", ""}, true), + + // Duplicate null names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", null, null}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", null, null}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", null, null}, true), + + // Duplicate blank names (1+3 spaces) + Arguments.of(DuplicateHeaderMode.DISALLOW, false, false, new String[] {"A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, false, new String[] {"A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, false, new String[] {"A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, false, new String[] {"A", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, false, new String[] {"A", " ", " "}, true), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, false, new String[] {"A", " ", " "}, true), + + // Duplicate names (case insensitive) + Arguments.of(DuplicateHeaderMode.DISALLOW, false, true , new String[] {"A", "a"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true , new String[] {"A", "a"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, true , new String[] {"A", "a"}, true), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, true , new String[] {"A", "a"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, true , new String[] {"A", "a"}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, true , new String[] {"A", "a"}, true), + + // Duplicate non-empty (case insensitive) and empty names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, true, new String[] {"A", "a", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, true, new String[] {"A", "a", "", ""}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, true, new String[] {"A", "a", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, true, new String[] {"A", "a", "", ""}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, true, new String[] {"A", "a", "", ""}, true), + + // Duplicate non-empty (case insensitive) and blank names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, true, new String[] {"A", "a", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, true, new String[] {"A", "a", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, true, new String[] {"A", "a", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, true, new String[] {"A", "a", " ", " "}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, true, new String[] {"A", "a", " ", " "}, true), + + // Duplicate non-empty (case insensitive) and null names + Arguments.of(DuplicateHeaderMode.DISALLOW, false, true, new String[] {"A", "a", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, true, new String[] {"A", "a", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, true, new String[] {"A", "a", null, null}, false), + Arguments.of(DuplicateHeaderMode.DISALLOW, true, true, new String[] {"A", "a", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, true, new String[] {"A", "a", null, null}, false), + Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, true, new String[] {"A", "a", null, null}, true) + ); + } + + /** + * Tests duplicate headers with the CSVFormat. + * + * @param duplicateHeaderMode the duplicate header mode + * @param allowMissingColumnNames the allow missing column names flag (only used for parsing) + * @param ignoreHeaderCase the ignore header case flag (only used for parsing) + * @param headers the headers + * @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException + */ + @ParameterizedTest + @MethodSource(value = {"duplicateHeaderAllowsMissingColumnsNamesData"}) + public void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode, + final boolean allowMissingColumnNames, + final boolean ignoreHeaderCase, + final String[] headers, + final boolean valid) { + final CSVFormat.Builder builder = + CSVFormat.DEFAULT.builder() + .setDuplicateHeaderMode(duplicateHeaderMode) + .setAllowMissingColumnNames(allowMissingColumnNames) + .setIgnoreHeaderCase(ignoreHeaderCase) + .setHeader(headers); + if (valid) { + final CSVFormat format = builder.build(); + Assertions.assertEquals(duplicateHeaderMode, format.getDuplicateHeaderMode(), "DuplicateHeaderMode"); + Assertions.assertEquals(allowMissingColumnNames, format.getAllowMissingColumnNames(), "AllowMissingColumnNames"); + Assertions.assertArrayEquals(headers, format.getHeader(), "Header"); + } else { + Assertions.assertThrows(IllegalArgumentException.class, builder::build); + } + } + + /** + * Tests duplicate headers with the CSVParser. + * + * @param duplicateHeaderMode the duplicate header mode + * @param allowMissingColumnNames the allow missing column names flag (only used for parsing) + * @param ignoreHeaderCase the ignore header case flag (only used for parsing) + * @param headers the headers (joined with the CSVFormat delimiter to create a string input) + * @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException + * @throws IOException Signals that an I/O exception has occurred. + */ + @ParameterizedTest + @MethodSource(value = {"duplicateHeaderData"}) + public void testCSVParser(final DuplicateHeaderMode duplicateHeaderMode, + final boolean allowMissingColumnNames, + final boolean ignoreHeaderCase, + final String[] headers, + final boolean valid) throws IOException { + final CSVFormat format = + CSVFormat.DEFAULT.builder() + .setDuplicateHeaderMode(duplicateHeaderMode) + .setAllowMissingColumnNames(allowMissingColumnNames) + .setIgnoreHeaderCase(ignoreHeaderCase) + .setNullString("NULL") + .setHeader() + .build(); + final String input = Arrays.stream(headers) + .map(s -> s == null ? format.getNullString() : s) + .collect(Collectors.joining(format.getDelimiterString())); + if (valid) { + try(CSVParser parser = CSVParser.parse(input, format)) { + // Parser ignores null headers + final List expected = + Arrays.stream(headers) + .filter(s -> s != null) + .collect(Collectors.toList()); + Assertions.assertEquals(expected, parser.getHeaderNames(), "HeaderNames"); + } + } else { + Assertions.assertThrows(IllegalArgumentException.class, () -> CSVParser.parse(input, format)); + } + } +} diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java index 90417926c9..e3a284f76d 100644 --- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java +++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java @@ -1,1572 +1,1493 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.CSVFormat.RFC4180; -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.CRLF; -import static org.apache.commons.csv.Constants.LF; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNotSame; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.io.Reader; -import java.io.StringReader; -import java.lang.reflect.Method; -import java.lang.reflect.Modifier; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.Objects; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -/** - * Tests {@link CSVFormat}. - */ -public class CSVFormatTest { - - public enum EmptyEnum { - // empty enum. - } - - public enum Header { - Name, Email, Phone - } - - private static void assertNotEquals(final Object right, final Object left) { - Assertions.assertNotEquals(right, left); - Assertions.assertNotEquals(left, right); - } - - private static CSVFormat copy(final CSVFormat format) { - return format.builder().setDelimiter(format.getDelimiter()).build(); - } - - private void assertNotEquals(final String name, final String type, final Object left, final Object right) { - if (left.equals(right) || right.equals(left)) { - fail("Objects must not compare equal for " + name + "(" + type + ")"); - } - if (left.hashCode() == right.hashCode()) { - fail("Hash code should not be equal for " + name + "(" + type + ")"); - } - } - - @SuppressWarnings("deprecation") - @Test - public void testDelimiterSameAsCommentStartThrowsException_Deprecated() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter('!').withCommentMarker('!')); - } - - @Test - public void testDelimiterSameAsCommentStartThrowsException1() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter('!').setCommentMarker('!').build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testDelimiterSameAsEscapeThrowsException_Deprecated() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter('!').withEscape('!')); - } - - @Test - public void testDelimiterSameAsEscapeThrowsException1() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter('!').setEscape('!').build()); - } - - @Test - public void testDelimiterSameAsRecordSeparatorThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat(CR)); - } - - @Test - public void testDuplicateHeaderElements() { - final String[] header = { "A", "A" }; - final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader(header).build(); - assertEquals(2, format.getHeader().length); - assertArrayEquals(header, format.getHeader()); - } - - @SuppressWarnings("deprecation") - @Test - public void testDuplicateHeaderElements_Deprecated() { - final String[] header = { "A", "A" }; - final CSVFormat format = CSVFormat.DEFAULT.withHeader(header); - assertEquals(2, format.getHeader().length); - assertArrayEquals(header, format.getHeader()); - } - - @Test - public void testDuplicateHeaderElementsFalse() { - assertThrows( - IllegalArgumentException.class, - () -> CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(false).setHeader("A", "A").build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testDuplicateHeaderElementsFalse_Deprecated() { - assertThrows( - IllegalArgumentException.class, - () -> CSVFormat.DEFAULT.withAllowDuplicateHeaderNames(false).withHeader("A", "A")); - } - - public void testDuplicateHeaderElementsTrue() { - CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(true).setHeader("A", "A").build(); - } - - @SuppressWarnings("deprecation") - public void testDuplicateHeaderElementsTrue_Deprecated() { - CSVFormat.DEFAULT.withAllowDuplicateHeaderNames(true).withHeader("A", "A"); - } - - @Test - public void testEquals() { - final CSVFormat right = CSVFormat.DEFAULT; - final CSVFormat left = copy(right); - - Assertions.assertNotEquals(null, right); - Assertions.assertNotEquals("A String Instance", right); - - assertEquals(right, right); - assertEquals(right, left); - assertEquals(left, right); - - assertEquals(right.hashCode(), right.hashCode()); - assertEquals(right.hashCode(), left.hashCode()); - } - - @Test - public void testEqualsCommentStart() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setQuote('"') - .setCommentMarker('#') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setCommentMarker('!') - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsCommentStart_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withQuote('"') - .withCommentMarker('#') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withCommentMarker('!'); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsDelimiter() { - final CSVFormat right = CSVFormat.newFormat('!'); - final CSVFormat left = CSVFormat.newFormat('?'); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsEscape() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setQuote('"') - .setCommentMarker('#') - .setEscape('+') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setEscape('!') - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsEscape_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withQuote('"') - .withCommentMarker('#') - .withEscape('+') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withEscape('!'); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsHash() throws Exception { - final Method[] methods = CSVFormat.class.getDeclaredMethods(); - for (final Method method : methods) { - if (Modifier.isPublic(method.getModifiers())) { - final String name = method.getName(); - if (name.startsWith("with")) { - for (final Class cls : method.getParameterTypes()) { - final String type = cls.getCanonicalName(); - if ("boolean".equals(type)) { - final Object defTrue = method.invoke(CSVFormat.DEFAULT, Boolean.TRUE); - final Object defFalse = method.invoke(CSVFormat.DEFAULT, Boolean.FALSE); - assertNotEquals(name, type ,defTrue, defFalse); - } else if ("char".equals(type)){ - final Object a = method.invoke(CSVFormat.DEFAULT, 'a'); - final Object b = method.invoke(CSVFormat.DEFAULT, 'b'); - assertNotEquals(name, type, a, b); - } else if ("java.lang.Character".equals(type)){ - final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {null}); - final Object b = method.invoke(CSVFormat.DEFAULT, Character.valueOf('d')); - assertNotEquals(name, type, a, b); - } else if ("java.lang.String".equals(type)){ - final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {null}); - final Object b = method.invoke(CSVFormat.DEFAULT, "e"); - assertNotEquals(name, type, a, b); - } else if ("java.lang.String[]".equals(type)){ - final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {new String[] {null, null}}); - final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {new String[] {"f", "g"}}); - assertNotEquals(name, type, a, b); - } else if ("org.apache.commons.csv.QuoteMode".equals(type)){ - final Object a = method.invoke(CSVFormat.DEFAULT, QuoteMode.MINIMAL); - final Object b = method.invoke(CSVFormat.DEFAULT, QuoteMode.ALL); - assertNotEquals(name, type, a, b); - } else if ("org.apache.commons.csv.DuplicateHeaderMode".equals(type)) { - final Object a = method.invoke(CSVFormat.DEFAULT, DuplicateHeaderMode.ALLOW_ALL); - final Object b = method.invoke(CSVFormat.DEFAULT, DuplicateHeaderMode.DISALLOW); - assertNotEquals(name, type, a, b); - } else if ("java.lang.Object[]".equals(type)){ - final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {null, null}}); - final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {new Object(), new Object()}}); - assertNotEquals(name, type, a, b); - } else if ("withHeader".equals(name)){ // covered above by String[] - // ignored - } else { - fail("Unhandled method: "+name + "(" + type + ")"); - } - } - } - } - } - } - - @Test - public void testEqualsHeader() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setRecordSeparator(CR) - .setCommentMarker('#') - .setEscape('+') - .setHeader("One", "Two", "Three") - .setIgnoreEmptyLines(true) - .setIgnoreSurroundingSpaces(true) - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setHeader("Three", "Two", "One") - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsHeader_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withRecordSeparator(CR) - .withCommentMarker('#') - .withEscape('+') - .withHeader("One", "Two", "Three") - .withIgnoreEmptyLines() - .withIgnoreSurroundingSpaces() - .withQuote('"') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withHeader("Three", "Two", "One"); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsIgnoreEmptyLines() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setCommentMarker('#') - .setEscape('+') - .setIgnoreEmptyLines(true) - .setIgnoreSurroundingSpaces(true) - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setIgnoreEmptyLines(false) - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsIgnoreEmptyLines_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withCommentMarker('#') - .withEscape('+') - .withIgnoreEmptyLines() - .withIgnoreSurroundingSpaces() - .withQuote('"') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withIgnoreEmptyLines(false); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsIgnoreSurroundingSpaces() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setCommentMarker('#') - .setEscape('+') - .setIgnoreSurroundingSpaces(true) - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setIgnoreSurroundingSpaces(false) - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsIgnoreSurroundingSpaces_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withCommentMarker('#') - .withEscape('+') - .withIgnoreSurroundingSpaces() - .withQuote('"') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withIgnoreSurroundingSpaces(false); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsLeftNoQuoteRightQuote() { - final CSVFormat left = CSVFormat.newFormat(',').builder().setQuote(null).build(); - final CSVFormat right = left.builder().setQuote('#').build(); - - assertNotEquals(left, right); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsLeftNoQuoteRightQuote_Deprecated() { - final CSVFormat left = CSVFormat.newFormat(',').withQuote(null); - final CSVFormat right = left.withQuote('#'); - - assertNotEquals(left, right); - } - - @Test - public void testEqualsNoQuotes() { - final CSVFormat left = CSVFormat.newFormat(',').builder().setQuote(null).build(); - final CSVFormat right = left.builder().setQuote(null).build(); - - assertEquals(left, right); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsNoQuotes_Deprecated() { - final CSVFormat left = CSVFormat.newFormat(',').withQuote(null); - final CSVFormat right = left.withQuote(null); - - assertEquals(left, right); - } - - @Test - public void testEqualsNullString() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setRecordSeparator(CR) - .setCommentMarker('#') - .setEscape('+') - .setIgnoreEmptyLines(true) - .setIgnoreSurroundingSpaces(true) - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .setNullString("null") - .build(); - final CSVFormat left = right.builder() - .setNullString("---") - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsNullString_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withRecordSeparator(CR) - .withCommentMarker('#') - .withEscape('+') - .withIgnoreEmptyLines() - .withIgnoreSurroundingSpaces() - .withQuote('"') - .withQuoteMode(QuoteMode.ALL) - .withNullString("null"); - final CSVFormat left = right - .withNullString("---"); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsOne() { - - final CSVFormat csvFormatOne = CSVFormat.INFORMIX_UNLOAD; - final CSVFormat csvFormatTwo = CSVFormat.MYSQL; - - - assertEquals('\\', (char)csvFormatOne.getEscapeCharacter()); - assertNull(csvFormatOne.getQuoteMode()); - - assertTrue(csvFormatOne.getIgnoreEmptyLines()); - assertFalse(csvFormatOne.getSkipHeaderRecord()); - - assertFalse(csvFormatOne.getIgnoreHeaderCase()); - assertNull(csvFormatOne.getCommentMarker()); - - assertFalse(csvFormatOne.isCommentMarkerSet()); - assertTrue(csvFormatOne.isQuoteCharacterSet()); - - assertEquals('|', csvFormatOne.getDelimiter()); - assertFalse(csvFormatOne.getAllowMissingColumnNames()); - - assertTrue(csvFormatOne.isEscapeCharacterSet()); - assertEquals("\n", csvFormatOne.getRecordSeparator()); - - assertEquals('\"', (char)csvFormatOne.getQuoteCharacter()); - assertFalse(csvFormatOne.getTrailingDelimiter()); - - assertFalse(csvFormatOne.getTrim()); - assertFalse(csvFormatOne.isNullStringSet()); - - assertNull(csvFormatOne.getNullString()); - assertFalse(csvFormatOne.getIgnoreSurroundingSpaces()); - - - assertTrue(csvFormatTwo.isEscapeCharacterSet()); - assertNull(csvFormatTwo.getQuoteCharacter()); - - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - assertEquals(QuoteMode.ALL_NON_NULL, csvFormatTwo.getQuoteMode()); - - assertEquals('\t', csvFormatTwo.getDelimiter()); - assertEquals("\n", csvFormatTwo.getRecordSeparator()); - - assertFalse(csvFormatTwo.isQuoteCharacterSet()); - assertTrue(csvFormatTwo.isNullStringSet()); - - assertEquals('\\', (char)csvFormatTwo.getEscapeCharacter()); - assertFalse(csvFormatTwo.getIgnoreHeaderCase()); - - assertFalse(csvFormatTwo.getTrim()); - assertFalse(csvFormatTwo.getIgnoreEmptyLines()); - - assertEquals("\\N", csvFormatTwo.getNullString()); - assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormatTwo.getTrailingDelimiter()); - assertFalse(csvFormatTwo.getSkipHeaderRecord()); - - assertNull(csvFormatTwo.getCommentMarker()); - assertFalse(csvFormatTwo.isCommentMarkerSet()); - - assertNotSame(csvFormatTwo, csvFormatOne); - Assertions.assertNotEquals(csvFormatTwo, csvFormatOne); - - assertEquals('\\', (char)csvFormatOne.getEscapeCharacter()); - assertNull(csvFormatOne.getQuoteMode()); - - assertTrue(csvFormatOne.getIgnoreEmptyLines()); - assertFalse(csvFormatOne.getSkipHeaderRecord()); - - assertFalse(csvFormatOne.getIgnoreHeaderCase()); - assertNull(csvFormatOne.getCommentMarker()); - - assertFalse(csvFormatOne.isCommentMarkerSet()); - assertTrue(csvFormatOne.isQuoteCharacterSet()); - - assertEquals('|', csvFormatOne.getDelimiter()); - assertFalse(csvFormatOne.getAllowMissingColumnNames()); - - assertTrue(csvFormatOne.isEscapeCharacterSet()); - assertEquals("\n", csvFormatOne.getRecordSeparator()); - - assertEquals('\"', (char)csvFormatOne.getQuoteCharacter()); - assertFalse(csvFormatOne.getTrailingDelimiter()); - - assertFalse(csvFormatOne.getTrim()); - assertFalse(csvFormatOne.isNullStringSet()); - - assertNull(csvFormatOne.getNullString()); - assertFalse(csvFormatOne.getIgnoreSurroundingSpaces()); - - assertTrue(csvFormatTwo.isEscapeCharacterSet()); - assertNull(csvFormatTwo.getQuoteCharacter()); - - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - assertEquals(QuoteMode.ALL_NON_NULL, csvFormatTwo.getQuoteMode()); - - assertEquals('\t', csvFormatTwo.getDelimiter()); - assertEquals("\n", csvFormatTwo.getRecordSeparator()); - - assertFalse(csvFormatTwo.isQuoteCharacterSet()); - assertTrue(csvFormatTwo.isNullStringSet()); - - assertEquals('\\', (char)csvFormatTwo.getEscapeCharacter()); - assertFalse(csvFormatTwo.getIgnoreHeaderCase()); - - assertFalse(csvFormatTwo.getTrim()); - assertFalse(csvFormatTwo.getIgnoreEmptyLines()); - - assertEquals("\\N", csvFormatTwo.getNullString()); - assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormatTwo.getTrailingDelimiter()); - assertFalse(csvFormatTwo.getSkipHeaderRecord()); - - assertNull(csvFormatTwo.getCommentMarker()); - assertFalse(csvFormatTwo.isCommentMarkerSet()); - - assertNotSame(csvFormatOne, csvFormatTwo); - assertNotSame(csvFormatTwo, csvFormatOne); - - Assertions.assertNotEquals(csvFormatOne, csvFormatTwo); - Assertions.assertNotEquals(csvFormatTwo, csvFormatOne); - - Assertions.assertNotEquals(csvFormatTwo, csvFormatOne); - - } - - @Test - public void testEqualsQuoteChar() { - final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').build(); - final CSVFormat left = right.builder().setQuote('!').build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsQuoteChar_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"'); - final CSVFormat left = right.withQuote('!'); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsQuotePolicy() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setQuoteMode(QuoteMode.MINIMAL) - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsQuotePolicy_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withQuote('"') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withQuoteMode(QuoteMode.MINIMAL); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsRecordSeparator() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setRecordSeparator(CR) - .setCommentMarker('#') - .setEscape('+') - .setIgnoreEmptyLines(true) - .setIgnoreSurroundingSpaces(true) - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .build(); - final CSVFormat left = right.builder() - .setRecordSeparator(LF) - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsRecordSeparator_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withRecordSeparator(CR) - .withCommentMarker('#') - .withEscape('+') - .withIgnoreEmptyLines() - .withIgnoreSurroundingSpaces() - .withQuote('"') - .withQuoteMode(QuoteMode.ALL); - final CSVFormat left = right - .withRecordSeparator(LF); - - assertNotEquals(right, left); - } - - public void testEqualsSkipHeaderRecord() { - final CSVFormat right = CSVFormat.newFormat('\'').builder() - .setRecordSeparator(CR) - .setCommentMarker('#') - .setEscape('+') - .setIgnoreEmptyLines(true) - .setIgnoreSurroundingSpaces(true) - .setQuote('"') - .setQuoteMode(QuoteMode.ALL) - .setNullString("null") - .setSkipHeaderRecord(true) - .build(); - final CSVFormat left = right.builder() - .setSkipHeaderRecord(false) - .build(); - - assertNotEquals(right, left); - } - - @SuppressWarnings("deprecation") - @Test - public void testEqualsSkipHeaderRecord_Deprecated() { - final CSVFormat right = CSVFormat.newFormat('\'') - .withRecordSeparator(CR) - .withCommentMarker('#') - .withEscape('+') - .withIgnoreEmptyLines() - .withIgnoreSurroundingSpaces() - .withQuote('"') - .withQuoteMode(QuoteMode.ALL) - .withNullString("null") - .withSkipHeaderRecord(); - final CSVFormat left = right - .withSkipHeaderRecord(false); - - assertNotEquals(right, left); - } - - @Test - public void testEqualsWithNull() { - - final CSVFormat csvFormat = CSVFormat.POSTGRESQL_TEXT; - - assertEquals('\\', (char)csvFormat.getEscapeCharacter()); - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormat.getTrailingDelimiter()); - assertFalse(csvFormat.getTrim()); - - assertTrue(csvFormat.isQuoteCharacterSet()); - assertEquals("\\N", csvFormat.getNullString()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertTrue(csvFormat.isEscapeCharacterSet()); - - assertFalse(csvFormat.isCommentMarkerSet()); - assertNull(csvFormat.getCommentMarker()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertEquals(QuoteMode.ALL_NON_NULL, csvFormat.getQuoteMode()); - - assertEquals('\t', csvFormat.getDelimiter()); - assertFalse(csvFormat.getSkipHeaderRecord()); - - assertEquals("\n", csvFormat.getRecordSeparator()); - assertFalse(csvFormat.getIgnoreEmptyLines()); - - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - assertTrue(csvFormat.isNullStringSet()); - - assertEquals('\\', (char)csvFormat.getEscapeCharacter()); - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormat.getTrailingDelimiter()); - assertFalse(csvFormat.getTrim()); - - assertTrue(csvFormat.isQuoteCharacterSet()); - assertEquals("\\N", csvFormat.getNullString()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertTrue(csvFormat.isEscapeCharacterSet()); - - assertFalse(csvFormat.isCommentMarkerSet()); - assertNull(csvFormat.getCommentMarker()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertEquals(QuoteMode.ALL_NON_NULL, csvFormat.getQuoteMode()); - - assertEquals('\t', csvFormat.getDelimiter()); - assertFalse(csvFormat.getSkipHeaderRecord()); - - assertEquals("\n", csvFormat.getRecordSeparator()); - assertFalse(csvFormat.getIgnoreEmptyLines()); - - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - assertTrue(csvFormat.isNullStringSet()); - - Assertions.assertNotEquals(null, csvFormat); - - } - - @Test - public void testEscapeSameAsCommentStartThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setEscape('!').setCommentMarker('!').build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testEscapeSameAsCommentStartThrowsException_Deprecated() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape('!').withCommentMarker('!')); - } - - @Test - public void testEscapeSameAsCommentStartThrowsExceptionForWrapperType() { - // Cannot assume that callers won't use different Character objects - assertThrows( - IllegalArgumentException.class, - () -> CSVFormat.DEFAULT.builder().setEscape(Character.valueOf('!')).setCommentMarker(Character.valueOf('!')).build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testEscapeSameAsCommentStartThrowsExceptionForWrapperType_Deprecated() { - // Cannot assume that callers won't use different Character objects - assertThrows( - IllegalArgumentException.class, - () -> CSVFormat.DEFAULT.withEscape(Character.valueOf('!')).withCommentMarker(Character.valueOf('!'))); - } - - @Test - public void testFormat() { - final CSVFormat format = CSVFormat.DEFAULT; - - assertEquals("", format.format()); - assertEquals("a,b,c", format.format("a", "b", "c")); - assertEquals("\"x,y\",z", format.format("x,y", "z")); - } - - @Test //I assume this to be a defect. - public void testFormatThrowsNullPointerException() { - - final CSVFormat csvFormat = CSVFormat.MYSQL; - - final NullPointerException e = assertThrows(NullPointerException.class, () -> csvFormat.format((Object[]) null)); - assertEquals(Objects.class.getName(), e.getStackTrace()[0].getClassName()); - } - - @Test - public void testFormatToString() { - final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',') - .withQuoteMode(QuoteMode.MINIMAL).withRecordSeparator(CRLF).withQuote('"') - .withNullString("").withIgnoreHeaderCase(true) - .withHeaderComments("This is HeaderComments").withHeader("col1","col2","col3"); - assertEquals("Delimiter=<,> Escape= QuoteChar=<\"> QuoteMode= NullString=<> RecordSeparator=<" +CRLF+ - "> IgnoreHeaderCase:ignored SkipHeaderRecord:false HeaderComments:[This is HeaderComments] Header:[col1, col2, col3]", format.toString()); - } - - @Test - public void testGetHeader() { - final String[] header = {"one", "two", "three"}; - final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header); - // getHeader() makes a copy of the header array. - final String[] headerCopy = formatWithHeader.getHeader(); - headerCopy[0] = "A"; - headerCopy[1] = "B"; - headerCopy[2] = "C"; - assertFalse(Arrays.equals(formatWithHeader.getHeader(), headerCopy)); - assertNotSame(formatWithHeader.getHeader(), headerCopy); - } - - @Test - public void testHashCodeAndWithIgnoreHeaderCase() { - - final CSVFormat csvFormat = CSVFormat.INFORMIX_UNLOAD_CSV; - final CSVFormat csvFormatTwo = csvFormat.withIgnoreHeaderCase(); - csvFormatTwo.hashCode(); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertTrue(csvFormatTwo.getIgnoreHeaderCase()); // now different - assertFalse(csvFormatTwo.getTrailingDelimiter()); - - Assertions.assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - - assertFalse(csvFormatTwo.getTrim()); - - } - - @Test - public void testJiraCsv236() { - CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(true).setHeader("CC","VV","VV").build(); - } - - @SuppressWarnings("deprecation") - @Test - public void testJiraCsv236__Deprecated() { - CSVFormat.DEFAULT.withAllowDuplicateHeaderNames().withHeader("CC","VV","VV"); - } - - @Test - public void testNewFormat() { - - final CSVFormat csvFormat = CSVFormat.newFormat('X'); - - assertFalse(csvFormat.getSkipHeaderRecord()); - assertFalse(csvFormat.isEscapeCharacterSet()); - - assertNull(csvFormat.getRecordSeparator()); - assertNull(csvFormat.getQuoteMode()); - - assertNull(csvFormat.getCommentMarker()); - assertFalse(csvFormat.getIgnoreHeaderCase()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertFalse(csvFormat.getTrim()); - - assertFalse(csvFormat.isNullStringSet()); - assertNull(csvFormat.getEscapeCharacter()); - - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - assertFalse(csvFormat.getTrailingDelimiter()); - - assertEquals('X', csvFormat.getDelimiter()); - assertNull(csvFormat.getNullString()); - - assertFalse(csvFormat.isQuoteCharacterSet()); - assertFalse(csvFormat.isCommentMarkerSet()); - - assertNull(csvFormat.getQuoteCharacter()); - assertFalse(csvFormat.getIgnoreEmptyLines()); - - assertFalse(csvFormat.getSkipHeaderRecord()); - assertFalse(csvFormat.isEscapeCharacterSet()); - - assertNull(csvFormat.getRecordSeparator()); - assertNull(csvFormat.getQuoteMode()); - - assertNull(csvFormat.getCommentMarker()); - assertFalse(csvFormat.getIgnoreHeaderCase()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertFalse(csvFormat.getTrim()); - - assertFalse(csvFormat.isNullStringSet()); - assertNull(csvFormat.getEscapeCharacter()); - - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - assertFalse(csvFormat.getTrailingDelimiter()); - - assertEquals('X', csvFormat.getDelimiter()); - assertNull(csvFormat.getNullString()); - - assertFalse(csvFormat.isQuoteCharacterSet()); - assertFalse(csvFormat.isCommentMarkerSet()); - - assertNull(csvFormat.getQuoteCharacter()); - assertFalse(csvFormat.getIgnoreEmptyLines()); - - } - - @Test - public void testNullRecordSeparatorCsv106() { - final CSVFormat format = CSVFormat.newFormat(';').builder().setSkipHeaderRecord(true).setHeader("H1", "H2").build(); - final String formatStr = format.format("A", "B"); - assertNotNull(formatStr); - assertFalse(formatStr.endsWith("null")); - } - - @SuppressWarnings("deprecation") - @Test - public void testNullRecordSeparatorCsv106__Deprecated() { - final CSVFormat format = CSVFormat.newFormat(';').withSkipHeaderRecord().withHeader("H1", "H2"); - final String formatStr = format.format("A", "B"); - assertNotNull(formatStr); - assertFalse(formatStr.endsWith("null")); - } - - @Test - public void testPrintWithEscapesEndWithCRLF() throws IOException { - final Reader in = new StringReader("x,y,x\r\na,?b,c\r\n"); - final Appendable out = new StringBuilder(); - final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',').withQuote(null).withRecordSeparator(CRLF); - format.print(in,out,true); - assertEquals("x?,y?,x?r?na?,??b?,c?r?n", out.toString()); - } - - @Test - public void testPrintWithEscapesEndWithoutCRLF() throws IOException { - final Reader in = new StringReader("x,y,x"); - final Appendable out = new StringBuilder(); - final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',').withQuote(null).withRecordSeparator(CRLF); - format.print(in,out,true); - assertEquals("x?,y?,x", out.toString()); - } - - @Test - public void testPrintWithoutQuotes() throws IOException { - final Reader in = new StringReader(""); - final Appendable out = new StringBuilder(); - final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NON_NUMERIC); - format.print(in, out, true); - assertEquals("\"\"", out.toString()); - } - - @Test - public void testPrintWithQuoteModeIsNONE() throws IOException { - final Reader in = new StringReader("a,b,c"); - final Appendable out = new StringBuilder(); - final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NONE); - format.print(in, out, true); - assertEquals("a?,b?,c", out.toString()); - } - - @Test - public void testPrintWithQuotes() throws IOException { - final Reader in = new StringReader("\"a,b,c\r\nx,y,z"); - final Appendable out = new StringBuilder(); - final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NON_NUMERIC); - format.print(in, out, true); - assertEquals("\"\"\"a,b,c\r\nx,y,z\"", out.toString()); - } - - @Test - public void testQuoteCharSameAsCommentStartThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote('!').setCommentMarker('!').build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testQuoteCharSameAsCommentStartThrowsException_Deprecated() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote('!').withCommentMarker('!')); - } - - @Test - public void testQuoteCharSameAsCommentStartThrowsExceptionForWrapperType() { - // Cannot assume that callers won't use different Character objects - assertThrows( - IllegalArgumentException.class, - () -> CSVFormat.DEFAULT.builder().setQuote(Character.valueOf('!')).setCommentMarker('!').build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testQuoteCharSameAsCommentStartThrowsExceptionForWrapperType_Deprecated() { - // Cannot assume that callers won't use different Character objects - assertThrows( - IllegalArgumentException.class, - () -> CSVFormat.DEFAULT.withQuote(Character.valueOf('!')).withCommentMarker('!')); - } - - @Test - public void testQuoteCharSameAsDelimiterThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote('!').setDelimiter('!').build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testQuoteCharSameAsDelimiterThrowsException_Deprecated() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote('!').withDelimiter('!')); - } - - @Test - public void testQuotePolicyNoneWithoutEscapeThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat('!').builder().setQuoteMode(QuoteMode.NONE).build()); - } - - @SuppressWarnings("deprecation") - @Test - public void testQuotePolicyNoneWithoutEscapeThrowsException_Deprecated() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat('!').withQuoteMode(QuoteMode.NONE)); - } - - @Test - public void testRFC4180() { - assertNull(RFC4180.getCommentMarker()); - assertEquals(',', RFC4180.getDelimiter()); - assertNull(RFC4180.getEscapeCharacter()); - assertFalse(RFC4180.getIgnoreEmptyLines()); - assertEquals(Character.valueOf('"'), RFC4180.getQuoteCharacter()); - assertNull(RFC4180.getQuoteMode()); - assertEquals("\r\n", RFC4180.getRecordSeparator()); - } - - @SuppressWarnings("boxing") // no need to worry about boxing here - @Test - public void testSerialization() throws Exception { - final ByteArrayOutputStream out = new ByteArrayOutputStream(); - - try (final ObjectOutputStream oos = new ObjectOutputStream(out)) { - oos.writeObject(CSVFormat.DEFAULT); - oos.flush(); - } - - final ObjectInputStream in = new ObjectInputStream(new ByteArrayInputStream(out.toByteArray())); - final CSVFormat format = (CSVFormat) in.readObject(); - - assertNotNull(format); - assertEquals(CSVFormat.DEFAULT.getDelimiter(), format.getDelimiter(), "delimiter"); - assertEquals(CSVFormat.DEFAULT.getQuoteCharacter(), format.getQuoteCharacter(), "encapsulator"); - assertEquals(CSVFormat.DEFAULT.getCommentMarker(), format.getCommentMarker(), "comment start"); - assertEquals(CSVFormat.DEFAULT.getRecordSeparator(), format.getRecordSeparator(), "record separator"); - assertEquals(CSVFormat.DEFAULT.getEscapeCharacter(), format.getEscapeCharacter(), "escape"); - assertEquals(CSVFormat.DEFAULT.getIgnoreSurroundingSpaces(), format.getIgnoreSurroundingSpaces(), "trim"); - assertEquals(CSVFormat.DEFAULT.getIgnoreEmptyLines(), format.getIgnoreEmptyLines(), "empty lines"); - } - - @Test - public void testToString() { - - final String string = CSVFormat.INFORMIX_UNLOAD.toString(); - - assertEquals("Delimiter=<|> Escape=<\\> QuoteChar=<\"> RecordSeparator=<\n> EmptyLines:ignored SkipHeaderRecord:false", string); - - } - - @Test - public void testToStringAndWithCommentMarkerTakingCharacter() { - - final CSVFormat.Predefined csvFormat_Predefined = CSVFormat.Predefined.Default; - final CSVFormat csvFormat = csvFormat_Predefined.getFormat(); - - assertNull(csvFormat.getEscapeCharacter()); - assertTrue(csvFormat.isQuoteCharacterSet()); - - assertFalse(csvFormat.getTrim()); - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormat.getTrailingDelimiter()); - assertEquals(',', csvFormat.getDelimiter()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertEquals("\r\n", csvFormat.getRecordSeparator()); - - assertFalse(csvFormat.isCommentMarkerSet()); - assertNull(csvFormat.getCommentMarker()); - - assertFalse(csvFormat.isNullStringSet()); - assertFalse(csvFormat.getAllowMissingColumnNames()); - - assertFalse(csvFormat.isEscapeCharacterSet()); - assertFalse(csvFormat.getSkipHeaderRecord()); - - assertNull(csvFormat.getNullString()); - assertNull(csvFormat.getQuoteMode()); - - assertTrue(csvFormat.getIgnoreEmptyLines()); - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - - final Character character = Character.valueOf('n'); - - final CSVFormat csvFormatTwo = csvFormat.withCommentMarker(character); - - assertNull(csvFormat.getEscapeCharacter()); - assertTrue(csvFormat.isQuoteCharacterSet()); - - assertFalse(csvFormat.getTrim()); - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormat.getTrailingDelimiter()); - assertEquals(',', csvFormat.getDelimiter()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertEquals("\r\n", csvFormat.getRecordSeparator()); - - assertFalse(csvFormat.isCommentMarkerSet()); - assertNull(csvFormat.getCommentMarker()); - - assertFalse(csvFormat.isNullStringSet()); - assertFalse(csvFormat.getAllowMissingColumnNames()); - - assertFalse(csvFormat.isEscapeCharacterSet()); - assertFalse(csvFormat.getSkipHeaderRecord()); - - assertNull(csvFormat.getNullString()); - assertNull(csvFormat.getQuoteMode()); - - assertTrue(csvFormat.getIgnoreEmptyLines()); - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - - assertFalse(csvFormatTwo.isNullStringSet()); - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - - assertEquals('\"', (char)csvFormatTwo.getQuoteCharacter()); - assertNull(csvFormatTwo.getNullString()); - - assertEquals(',', csvFormatTwo.getDelimiter()); - assertFalse(csvFormatTwo.getTrailingDelimiter()); - - assertTrue(csvFormatTwo.isCommentMarkerSet()); - assertFalse(csvFormatTwo.getIgnoreHeaderCase()); - - assertFalse(csvFormatTwo.getTrim()); - assertNull(csvFormatTwo.getEscapeCharacter()); - - assertTrue(csvFormatTwo.isQuoteCharacterSet()); - assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); - - assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); - assertNull(csvFormatTwo.getQuoteMode()); - - assertEquals('n', (char)csvFormatTwo.getCommentMarker()); - assertFalse(csvFormatTwo.getSkipHeaderRecord()); - - assertFalse(csvFormatTwo.isEscapeCharacterSet()); - assertTrue(csvFormatTwo.getIgnoreEmptyLines()); - - assertNotSame(csvFormat, csvFormatTwo); - assertNotSame(csvFormatTwo, csvFormat); - - Assertions.assertNotEquals(csvFormatTwo, csvFormat); - - assertNull(csvFormat.getEscapeCharacter()); - assertTrue(csvFormat.isQuoteCharacterSet()); - - assertFalse(csvFormat.getTrim()); - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - - assertFalse(csvFormat.getTrailingDelimiter()); - assertEquals(',', csvFormat.getDelimiter()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertEquals("\r\n", csvFormat.getRecordSeparator()); - - assertFalse(csvFormat.isCommentMarkerSet()); - assertNull(csvFormat.getCommentMarker()); - - assertFalse(csvFormat.isNullStringSet()); - assertFalse(csvFormat.getAllowMissingColumnNames()); - - assertFalse(csvFormat.isEscapeCharacterSet()); - assertFalse(csvFormat.getSkipHeaderRecord()); - - assertNull(csvFormat.getNullString()); - assertNull(csvFormat.getQuoteMode()); - - assertTrue(csvFormat.getIgnoreEmptyLines()); - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - - assertFalse(csvFormatTwo.isNullStringSet()); - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - - assertEquals('\"', (char)csvFormatTwo.getQuoteCharacter()); - assertNull(csvFormatTwo.getNullString()); - - assertEquals(',', csvFormatTwo.getDelimiter()); - assertFalse(csvFormatTwo.getTrailingDelimiter()); - - assertTrue(csvFormatTwo.isCommentMarkerSet()); - assertFalse(csvFormatTwo.getIgnoreHeaderCase()); - - assertFalse(csvFormatTwo.getTrim()); - assertNull(csvFormatTwo.getEscapeCharacter()); - - assertTrue(csvFormatTwo.isQuoteCharacterSet()); - assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); - - assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); - assertNull(csvFormatTwo.getQuoteMode()); - - assertEquals('n', (char)csvFormatTwo.getCommentMarker()); - assertFalse(csvFormatTwo.getSkipHeaderRecord()); - - assertFalse(csvFormatTwo.isEscapeCharacterSet()); - assertTrue(csvFormatTwo.getIgnoreEmptyLines()); - - assertNotSame(csvFormat, csvFormatTwo); - assertNotSame(csvFormatTwo, csvFormat); - - Assertions.assertNotEquals(csvFormat, csvFormatTwo); - - Assertions.assertNotEquals(csvFormatTwo, csvFormat); - assertEquals("Delimiter=<,> QuoteChar=<\"> CommentStart= " + - "RecordSeparator=<\r\n> EmptyLines:ignored SkipHeaderRecord:false" - , csvFormatTwo.toString()); - - } - - @Test - public void testTrim() throws IOException { - final CSVFormat formatWithTrim = CSVFormat.DEFAULT.withDelimiter(',').withTrim().withQuote(null).withRecordSeparator(CRLF); - - CharSequence in = "a,b,c"; - final StringBuilder out = new StringBuilder(); - formatWithTrim.print(in, out, true); - assertEquals("a,b,c", out.toString()); - - in = new StringBuilder(" x,y,z"); - out.setLength(0); - formatWithTrim.print(in, out, true); - assertEquals("x,y,z", out.toString()); - - in = new StringBuilder(""); - out.setLength(0); - formatWithTrim.print(in, out, true); - assertEquals("", out.toString()); - - in = new StringBuilder("header\r\n"); - out.setLength(0); - formatWithTrim.print(in, out, true); - assertEquals("header", out.toString()); - } - - @Test - public void testWithCommentStart() { - final CSVFormat formatWithCommentStart = CSVFormat.DEFAULT.withCommentMarker('#'); - assertEquals( Character.valueOf('#'), formatWithCommentStart.getCommentMarker()); - } - - - @Test - public void testWithCommentStartCRThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withCommentMarker(CR)); - } - - - @Test - public void testWithDelimiter() { - final CSVFormat formatWithDelimiter = CSVFormat.DEFAULT.withDelimiter('!'); - assertEquals('!', formatWithDelimiter.getDelimiter()); - } - - - @Test - public void testWithDelimiterLFThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(LF)); - } - - - @Test - public void testWithEmptyEnum() { - final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(EmptyEnum.class); - assertEquals(0, formatWithHeader.getHeader().length); - } - - - @Test - public void testWithEscape() { - final CSVFormat formatWithEscape = CSVFormat.DEFAULT.withEscape('&'); - assertEquals(Character.valueOf('&'), formatWithEscape.getEscapeCharacter()); - } - - - @Test - public void testWithEmptyDuplicates() { - final CSVFormat formatWithEmptyDuplicates = - CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build(); - - assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, formatWithEmptyDuplicates.getDuplicateHeaderMode()); - assertFalse(formatWithEmptyDuplicates.getAllowDuplicateHeaderNames()); - } - - @Test - public void testWithEscapeCRThrowsExceptions() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(CR)); - } - - - @Test - public void testWithFirstRecordAsHeader() { - final CSVFormat formatWithFirstRecordAsHeader = CSVFormat.DEFAULT.withFirstRecordAsHeader(); - assertTrue(formatWithFirstRecordAsHeader.getSkipHeaderRecord()); - assertEquals(0, formatWithFirstRecordAsHeader.getHeader().length); - } - - @Test - public void testWithHeader() { - final String[] header = {"one", "two", "three"}; - // withHeader() makes a copy of the header array. - final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header); - assertArrayEquals(header, formatWithHeader.getHeader()); - assertNotSame(header, formatWithHeader.getHeader()); - } - - @Test - public void testWithHeaderComments() { - - final CSVFormat csvFormat = CSVFormat.DEFAULT; - - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - assertFalse(csvFormat.isCommentMarkerSet()); - - assertFalse(csvFormat.isEscapeCharacterSet()); - assertTrue(csvFormat.isQuoteCharacterSet()); - - assertFalse(csvFormat.getSkipHeaderRecord()); - assertNull(csvFormat.getQuoteMode()); - - assertEquals(',', csvFormat.getDelimiter()); - assertTrue(csvFormat.getIgnoreEmptyLines()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertNull(csvFormat.getCommentMarker()); - - assertEquals("\r\n", csvFormat.getRecordSeparator()); - assertFalse(csvFormat.getTrailingDelimiter()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertFalse(csvFormat.getTrim()); - - assertFalse(csvFormat.isNullStringSet()); - assertNull(csvFormat.getNullString()); - - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - assertNull(csvFormat.getEscapeCharacter()); - - final Object[] objectArray = new Object[8]; - final CSVFormat csvFormatTwo = csvFormat.withHeaderComments(objectArray); - - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - assertFalse(csvFormat.isCommentMarkerSet()); - - assertFalse(csvFormat.isEscapeCharacterSet()); - assertTrue(csvFormat.isQuoteCharacterSet()); - - assertFalse(csvFormat.getSkipHeaderRecord()); - assertNull(csvFormat.getQuoteMode()); - - assertEquals(',', csvFormat.getDelimiter()); - assertTrue(csvFormat.getIgnoreEmptyLines()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertNull(csvFormat.getCommentMarker()); - - assertEquals("\r\n", csvFormat.getRecordSeparator()); - assertFalse(csvFormat.getTrailingDelimiter()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertFalse(csvFormat.getTrim()); - - assertFalse(csvFormat.isNullStringSet()); - assertNull(csvFormat.getNullString()); - - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - assertNull(csvFormat.getEscapeCharacter()); - - assertFalse(csvFormatTwo.getIgnoreHeaderCase()); - assertNull(csvFormatTwo.getQuoteMode()); - - assertTrue(csvFormatTwo.getIgnoreEmptyLines()); - assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); - - assertNull(csvFormatTwo.getEscapeCharacter()); - assertFalse(csvFormatTwo.getTrim()); - - assertFalse(csvFormatTwo.isEscapeCharacterSet()); - assertTrue(csvFormatTwo.isQuoteCharacterSet()); - - assertFalse(csvFormatTwo.getSkipHeaderRecord()); - assertEquals('\"', (char)csvFormatTwo.getQuoteCharacter()); - - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - assertNull(csvFormatTwo.getNullString()); - - assertFalse(csvFormatTwo.isNullStringSet()); - assertFalse(csvFormatTwo.getTrailingDelimiter()); - - assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); - assertEquals(',', csvFormatTwo.getDelimiter()); - - assertNull(csvFormatTwo.getCommentMarker()); - assertFalse(csvFormatTwo.isCommentMarkerSet()); - - assertNotSame(csvFormat, csvFormatTwo); - assertNotSame(csvFormatTwo, csvFormat); - - Assertions.assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal - - final String string = csvFormatTwo.format(objectArray); - - assertEquals('\"', (char)csvFormat.getQuoteCharacter()); - assertFalse(csvFormat.isCommentMarkerSet()); - - assertFalse(csvFormat.isEscapeCharacterSet()); - assertTrue(csvFormat.isQuoteCharacterSet()); - - assertFalse(csvFormat.getSkipHeaderRecord()); - assertNull(csvFormat.getQuoteMode()); - - assertEquals(',', csvFormat.getDelimiter()); - assertTrue(csvFormat.getIgnoreEmptyLines()); - - assertFalse(csvFormat.getIgnoreHeaderCase()); - assertNull(csvFormat.getCommentMarker()); - - assertEquals("\r\n", csvFormat.getRecordSeparator()); - assertFalse(csvFormat.getTrailingDelimiter()); - - assertFalse(csvFormat.getAllowMissingColumnNames()); - assertFalse(csvFormat.getTrim()); - - assertFalse(csvFormat.isNullStringSet()); - assertNull(csvFormat.getNullString()); - - assertFalse(csvFormat.getIgnoreSurroundingSpaces()); - assertNull(csvFormat.getEscapeCharacter()); - - assertFalse(csvFormatTwo.getIgnoreHeaderCase()); - assertNull(csvFormatTwo.getQuoteMode()); - - assertTrue(csvFormatTwo.getIgnoreEmptyLines()); - assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); - - assertNull(csvFormatTwo.getEscapeCharacter()); - assertFalse(csvFormatTwo.getTrim()); - - assertFalse(csvFormatTwo.isEscapeCharacterSet()); - assertTrue(csvFormatTwo.isQuoteCharacterSet()); - - assertFalse(csvFormatTwo.getSkipHeaderRecord()); - assertEquals('\"', (char)csvFormatTwo.getQuoteCharacter()); - - assertFalse(csvFormatTwo.getAllowMissingColumnNames()); - assertNull(csvFormatTwo.getNullString()); - - assertFalse(csvFormatTwo.isNullStringSet()); - assertFalse(csvFormatTwo.getTrailingDelimiter()); - - assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); - assertEquals(',', csvFormatTwo.getDelimiter()); - - assertNull(csvFormatTwo.getCommentMarker()); - assertFalse(csvFormatTwo.isCommentMarkerSet()); - - assertNotSame(csvFormat, csvFormatTwo); - assertNotSame(csvFormatTwo, csvFormat); - - assertNotNull(string); - Assertions.assertNotEquals(csvFormat, csvFormatTwo); // CSV-244 - should not be equal - - Assertions.assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal - assertEquals(",,,,,,,", string); - - } - - @Test - public void testWithHeaderEnum() { - final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(Header.class); - assertArrayEquals(new String[]{ "Name", "Email", "Phone" }, formatWithHeader.getHeader()); - } - - @Test - public void testWithHeaderEnumNull() { - final CSVFormat format = CSVFormat.DEFAULT; - final Class> simpleName = null; - format.withHeader(simpleName); - } - - @Test - public void testWithHeaderResultSetNull() throws SQLException { - final CSVFormat format = CSVFormat.DEFAULT; - final ResultSet resultSet = null; - format.withHeader(resultSet); - } - - @Test - public void testWithIgnoreEmptyLines() { - assertFalse(CSVFormat.DEFAULT.withIgnoreEmptyLines(false).getIgnoreEmptyLines()); - assertTrue(CSVFormat.DEFAULT.withIgnoreEmptyLines().getIgnoreEmptyLines()); - } - - @Test - public void testWithIgnoreSurround() { - assertFalse(CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces()); - assertTrue(CSVFormat.DEFAULT.withIgnoreSurroundingSpaces().getIgnoreSurroundingSpaces()); - } - - @Test - public void testWithNullString() { - final CSVFormat formatWithNullString = CSVFormat.DEFAULT.withNullString("null"); - assertEquals("null", formatWithNullString.getNullString()); - } - - @Test - public void testWithQuoteChar() { - final CSVFormat formatWithQuoteChar = CSVFormat.DEFAULT.withQuote('"'); - assertEquals(Character.valueOf('"'), formatWithQuoteChar.getQuoteCharacter()); - } - - @Test - public void testWithQuoteLFThrowsException() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote(LF)); - } - - @Test - public void testWithQuotePolicy() { - final CSVFormat formatWithQuotePolicy = CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL); - assertEquals(QuoteMode.ALL, formatWithQuotePolicy.getQuoteMode()); - } - - @Test - public void testWithRecordSeparatorCR() { - final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(CR); - assertEquals(String.valueOf(CR), formatWithRecordSeparator.getRecordSeparator()); - } - - @Test - public void testWithRecordSeparatorCRLF() { - final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(CRLF); - assertEquals(CRLF, formatWithRecordSeparator.getRecordSeparator()); - } - - @Test - public void testWithRecordSeparatorLF() { - final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(LF); - assertEquals(String.valueOf(LF), formatWithRecordSeparator.getRecordSeparator()); - } - - @Test - public void testWithSystemRecordSeparator() { - final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withSystemRecordSeparator(); - assertEquals(System.getProperty("line.separator"), formatWithRecordSeparator.getRecordSeparator()); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv; + +import static org.apache.commons.csv.CSVFormat.RFC4180; +import static org.apache.commons.csv.Constants.CR; +import static org.apache.commons.csv.Constants.CRLF; +import static org.apache.commons.csv.Constants.LF; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Reader; +import java.io.StringReader; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Objects; + +import org.apache.commons.csv.CSVFormat.Builder; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Tests {@link CSVFormat}. + */ +public class CSVFormatTest { + + public enum EmptyEnum { + // empty enum. + } + + public enum Header { + Name, Email, Phone + } + + private static void assertNotEquals(final Object right, final Object left) { + Assertions.assertNotEquals(right, left); + Assertions.assertNotEquals(left, right); + } + + private static CSVFormat copy(final CSVFormat format) { + return format.builder().setDelimiter(format.getDelimiter()).build(); + } + + private void assertNotEquals(final String name, final String type, final Object left, final Object right) { + if (left.equals(right) || right.equals(left)) { + fail("Objects must not compare equal for " + name + "(" + type + ")"); + } + if (left.hashCode() == right.hashCode()) { + fail("Hash code should not be equal for " + name + "(" + type + ")"); + } + } + + @Test + public void testDelimiterEmptyStringThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter("").build()); + } + + @SuppressWarnings("deprecation") + @Test + public void testDelimiterSameAsCommentStartThrowsException_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter('!').withCommentMarker('!')); + } + + @Test + public void testDelimiterSameAsCommentStartThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter('!').setCommentMarker('!').build()); + } + + @SuppressWarnings("deprecation") + @Test + public void testDelimiterSameAsEscapeThrowsException_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter('!').withEscape('!')); + } + + @Test + public void testDelimiterSameAsEscapeThrowsException1() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setDelimiter('!').setEscape('!').build()); + } + + @Test + public void testDelimiterSameAsRecordSeparatorThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat(CR)); + } + + @Test + public void testDuplicateHeaderElements() { + final String[] header = { "A", "A" }; + final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader(header).build(); + assertEquals(2, format.getHeader().length); + assertArrayEquals(header, format.getHeader()); + } + + @SuppressWarnings("deprecation") + @Test + public void testDuplicateHeaderElements_Deprecated() { + final String[] header = { "A", "A" }; + final CSVFormat format = CSVFormat.DEFAULT.withHeader(header); + assertEquals(2, format.getHeader().length); + assertArrayEquals(header, format.getHeader()); + } + + @Test + public void testDuplicateHeaderElementsFalse() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(false).setHeader("A", "A").build()); + } + + @SuppressWarnings("deprecation") + @Test + public void testDuplicateHeaderElementsFalse_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withAllowDuplicateHeaderNames(false).withHeader("A", "A")); + } + + @Test + public void testDuplicateHeaderElementsTrue() { + CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(true).setHeader("A", "A").build(); + } + + @SuppressWarnings("deprecation") + @Test + public void testDuplicateHeaderElementsTrue_Deprecated() { + CSVFormat.DEFAULT.withAllowDuplicateHeaderNames(true).withHeader("A", "A"); + } + + @Test + public void testDuplicateHeaderElementsTrueContainsEmpty1() { + CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(false).setHeader("A", "", "B", "").build(); + } + + @Test + public void testDuplicateHeaderElementsTrueContainsEmpty2() { + CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).setHeader("A", "", "B", "").build(); + } + + @Test + public void testDuplicateHeaderElementsTrueContainsEmpty3() { + CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(false).setAllowMissingColumnNames(true).setHeader("A", "", "B", "").build(); + } + + @Test + public void testEquals() { + final CSVFormat right = CSVFormat.DEFAULT; + final CSVFormat left = copy(right); + + Assertions.assertNotEquals(null, right); + Assertions.assertNotEquals("A String Instance", right); + + assertEquals(right, right); + assertEquals(right, left); + assertEquals(left, right); + + assertEquals(right.hashCode(), right.hashCode()); + assertEquals(right.hashCode(), left.hashCode()); + } + + @Test + public void testEqualsCommentStart() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').setCommentMarker('#').setQuoteMode(QuoteMode.ALL).build(); + final CSVFormat left = right.builder().setCommentMarker('!').build(); + + assertNotEquals(right, left); + } + + @SuppressWarnings("deprecation") + @Test + public void testEqualsCommentStart_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"').withCommentMarker('#').withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withCommentMarker('!'); + + assertNotEquals(right, left); + } + + @Test + public void testEqualsDelimiter() { + final CSVFormat right = CSVFormat.newFormat('!'); + final CSVFormat left = CSVFormat.newFormat('?'); + + assertNotEquals(right, left); + } + + @Test + public void testEqualsEscape() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').setCommentMarker('#').setEscape('+').setQuoteMode(QuoteMode.ALL).build(); + final CSVFormat left = right.builder().setEscape('!').build(); + + assertNotEquals(right, left); + } + + @SuppressWarnings("deprecation") + @Test + public void testEqualsEscape_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"').withCommentMarker('#').withEscape('+').withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withEscape('!'); + + assertNotEquals(right, left); + } + + @Test + public void testEqualsHash() throws Exception { + final Method[] methods = CSVFormat.class.getDeclaredMethods(); + for (final Method method : methods) { + if (Modifier.isPublic(method.getModifiers())) { + final String name = method.getName(); + if (name.startsWith("with")) { + for (final Class cls : method.getParameterTypes()) { + final String type = cls.getCanonicalName(); + if ("boolean".equals(type)) { + final Object defTrue = method.invoke(CSVFormat.DEFAULT, Boolean.TRUE); + final Object defFalse = method.invoke(CSVFormat.DEFAULT, Boolean.FALSE); + assertNotEquals(name, type, defTrue, defFalse); + } else if ("char".equals(type)) { + final Object a = method.invoke(CSVFormat.DEFAULT, 'a'); + final Object b = method.invoke(CSVFormat.DEFAULT, 'b'); + assertNotEquals(name, type, a, b); + } else if ("java.lang.Character".equals(type)) { + final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] { null }); + final Object b = method.invoke(CSVFormat.DEFAULT, Character.valueOf('d')); + assertNotEquals(name, type, a, b); + } else if ("java.lang.String".equals(type)) { + final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] { null }); + final Object b = method.invoke(CSVFormat.DEFAULT, "e"); + assertNotEquals(name, type, a, b); + } else if ("java.lang.String[]".equals(type)) { + final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] { new String[] { null, null } }); + final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] { new String[] { "f", "g" } }); + assertNotEquals(name, type, a, b); + } else if ("org.apache.commons.csv.QuoteMode".equals(type)) { + final Object a = method.invoke(CSVFormat.DEFAULT, QuoteMode.MINIMAL); + final Object b = method.invoke(CSVFormat.DEFAULT, QuoteMode.ALL); + assertNotEquals(name, type, a, b); + } else if ("org.apache.commons.csv.DuplicateHeaderMode".equals(type)) { + final Object a = method.invoke(CSVFormat.DEFAULT, DuplicateHeaderMode.ALLOW_ALL); + final Object b = method.invoke(CSVFormat.DEFAULT, DuplicateHeaderMode.DISALLOW); + assertNotEquals(name, type, a, b); + } else if ("java.lang.Object[]".equals(type)) { + final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] { new Object[] { null, null } }); + final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] { new Object[] { new Object(), new Object() } }); + assertNotEquals(name, type, a, b); + } else if ("withHeader".equals(name)) { // covered above by String[] + // ignored + } else { + fail("Unhandled method: " + name + "(" + type + ")"); + } + } + } + } + } + } + + @Test + public void testEqualsHeader() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setRecordSeparator(CR).setCommentMarker('#').setEscape('+').setHeader("One", "Two", "Three") + .setIgnoreEmptyLines(true).setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).build(); + final CSVFormat left = right.builder().setHeader("Three", "Two", "One").build(); + + assertNotEquals(right, left); + } + + @SuppressWarnings("deprecation") + @Test + public void testEqualsHeader_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withRecordSeparator(CR).withCommentMarker('#').withEscape('+').withHeader("One", "Two", "Three") + .withIgnoreEmptyLines().withIgnoreSurroundingSpaces().withQuote('"').withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withHeader("Three", "Two", "One"); + + assertNotEquals(right, left); + } + + @Test + public void testEqualsIgnoreEmptyLines() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setCommentMarker('#').setEscape('+').setIgnoreEmptyLines(true) + .setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).build(); + final CSVFormat left = right.builder().setIgnoreEmptyLines(false).build(); + + assertNotEquals(right, left); + } + + @SuppressWarnings("deprecation") + @Test + public void testEqualsIgnoreEmptyLines_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withCommentMarker('#').withEscape('+').withIgnoreEmptyLines().withIgnoreSurroundingSpaces() + .withQuote('"').withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withIgnoreEmptyLines(false); + + assertNotEquals(right, left); + } + + @Test + public void testEqualsIgnoreSurroundingSpaces() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setCommentMarker('#').setEscape('+').setIgnoreSurroundingSpaces(true).setQuote('"') + .setQuoteMode(QuoteMode.ALL).build(); + final CSVFormat left = right.builder().setIgnoreSurroundingSpaces(false).build(); + + assertNotEquals(right, left); + } + + @SuppressWarnings("deprecation") + @Test + public void testEqualsIgnoreSurroundingSpaces_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withCommentMarker('#').withEscape('+').withIgnoreSurroundingSpaces().withQuote('"') + .withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withIgnoreSurroundingSpaces(false); + + assertNotEquals(right, left); + } + + @Test + public void testEqualsLeftNoQuoteRightQuote() { + final CSVFormat left = CSVFormat.newFormat(',').builder().setQuote(null).build(); + final CSVFormat right = left.builder().setQuote('#').build(); + + assertNotEquals(left, right); + } + + @SuppressWarnings("deprecation") + @Test + public void testEqualsLeftNoQuoteRightQuote_Deprecated() { + final CSVFormat left = CSVFormat.newFormat(',').withQuote(null); + final CSVFormat right = left.withQuote('#'); + + assertNotEquals(left, right); + } + + @Test + public void testEqualsNoQuotes() { + final CSVFormat left = CSVFormat.newFormat(',').builder().setQuote(null).build(); + final CSVFormat right = left.builder().setQuote(null).build(); + + assertEquals(left, right); + } + + @SuppressWarnings("deprecation") + @Test + public void testEqualsNoQuotes_Deprecated() { + final CSVFormat left = CSVFormat.newFormat(',').withQuote(null); + final CSVFormat right = left.withQuote(null); + + assertEquals(left, right); + } + + @Test + public void testEqualsNullString() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setRecordSeparator(CR).setCommentMarker('#').setEscape('+').setIgnoreEmptyLines(true) + .setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).setNullString("null").build(); + final CSVFormat left = right.builder().setNullString("---").build(); + + assertNotEquals(right, left); + } + + @SuppressWarnings("deprecation") + @Test + public void testEqualsNullString_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withRecordSeparator(CR).withCommentMarker('#').withEscape('+').withIgnoreEmptyLines() + .withIgnoreSurroundingSpaces().withQuote('"').withQuoteMode(QuoteMode.ALL).withNullString("null"); + final CSVFormat left = right.withNullString("---"); + + assertNotEquals(right, left); + } + + @Test + public void testEqualsOne() { + + final CSVFormat csvFormatOne = CSVFormat.INFORMIX_UNLOAD; + final CSVFormat csvFormatTwo = CSVFormat.MYSQL; + + assertEquals('\\', (char) csvFormatOne.getEscapeCharacter()); + assertNull(csvFormatOne.getQuoteMode()); + + assertTrue(csvFormatOne.getIgnoreEmptyLines()); + assertFalse(csvFormatOne.getSkipHeaderRecord()); + + assertFalse(csvFormatOne.getIgnoreHeaderCase()); + assertNull(csvFormatOne.getCommentMarker()); + + assertFalse(csvFormatOne.isCommentMarkerSet()); + assertTrue(csvFormatOne.isQuoteCharacterSet()); + + assertEquals('|', csvFormatOne.getDelimiter()); + assertFalse(csvFormatOne.getAllowMissingColumnNames()); + + assertTrue(csvFormatOne.isEscapeCharacterSet()); + assertEquals("\n", csvFormatOne.getRecordSeparator()); + + assertEquals('\"', (char) csvFormatOne.getQuoteCharacter()); + assertFalse(csvFormatOne.getTrailingDelimiter()); + + assertFalse(csvFormatOne.getTrim()); + assertFalse(csvFormatOne.isNullStringSet()); + + assertNull(csvFormatOne.getNullString()); + assertFalse(csvFormatOne.getIgnoreSurroundingSpaces()); + + assertTrue(csvFormatTwo.isEscapeCharacterSet()); + assertNull(csvFormatTwo.getQuoteCharacter()); + + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + assertEquals(QuoteMode.ALL_NON_NULL, csvFormatTwo.getQuoteMode()); + + assertEquals('\t', csvFormatTwo.getDelimiter()); + assertEquals("\n", csvFormatTwo.getRecordSeparator()); + + assertFalse(csvFormatTwo.isQuoteCharacterSet()); + assertTrue(csvFormatTwo.isNullStringSet()); + + assertEquals('\\', (char) csvFormatTwo.getEscapeCharacter()); + assertFalse(csvFormatTwo.getIgnoreHeaderCase()); + + assertFalse(csvFormatTwo.getTrim()); + assertFalse(csvFormatTwo.getIgnoreEmptyLines()); + + assertEquals("\\N", csvFormatTwo.getNullString()); + assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormatTwo.getTrailingDelimiter()); + assertFalse(csvFormatTwo.getSkipHeaderRecord()); + + assertNull(csvFormatTwo.getCommentMarker()); + assertFalse(csvFormatTwo.isCommentMarkerSet()); + + assertNotSame(csvFormatTwo, csvFormatOne); + Assertions.assertNotEquals(csvFormatTwo, csvFormatOne); + + assertEquals('\\', (char) csvFormatOne.getEscapeCharacter()); + assertNull(csvFormatOne.getQuoteMode()); + + assertTrue(csvFormatOne.getIgnoreEmptyLines()); + assertFalse(csvFormatOne.getSkipHeaderRecord()); + + assertFalse(csvFormatOne.getIgnoreHeaderCase()); + assertNull(csvFormatOne.getCommentMarker()); + + assertFalse(csvFormatOne.isCommentMarkerSet()); + assertTrue(csvFormatOne.isQuoteCharacterSet()); + + assertEquals('|', csvFormatOne.getDelimiter()); + assertFalse(csvFormatOne.getAllowMissingColumnNames()); + + assertTrue(csvFormatOne.isEscapeCharacterSet()); + assertEquals("\n", csvFormatOne.getRecordSeparator()); + + assertEquals('\"', (char) csvFormatOne.getQuoteCharacter()); + assertFalse(csvFormatOne.getTrailingDelimiter()); + + assertFalse(csvFormatOne.getTrim()); + assertFalse(csvFormatOne.isNullStringSet()); + + assertNull(csvFormatOne.getNullString()); + assertFalse(csvFormatOne.getIgnoreSurroundingSpaces()); + + assertTrue(csvFormatTwo.isEscapeCharacterSet()); + assertNull(csvFormatTwo.getQuoteCharacter()); + + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + assertEquals(QuoteMode.ALL_NON_NULL, csvFormatTwo.getQuoteMode()); + + assertEquals('\t', csvFormatTwo.getDelimiter()); + assertEquals("\n", csvFormatTwo.getRecordSeparator()); + + assertFalse(csvFormatTwo.isQuoteCharacterSet()); + assertTrue(csvFormatTwo.isNullStringSet()); + + assertEquals('\\', (char) csvFormatTwo.getEscapeCharacter()); + assertFalse(csvFormatTwo.getIgnoreHeaderCase()); + + assertFalse(csvFormatTwo.getTrim()); + assertFalse(csvFormatTwo.getIgnoreEmptyLines()); + + assertEquals("\\N", csvFormatTwo.getNullString()); + assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormatTwo.getTrailingDelimiter()); + assertFalse(csvFormatTwo.getSkipHeaderRecord()); + + assertNull(csvFormatTwo.getCommentMarker()); + assertFalse(csvFormatTwo.isCommentMarkerSet()); + + assertNotSame(csvFormatOne, csvFormatTwo); + assertNotSame(csvFormatTwo, csvFormatOne); + + Assertions.assertNotEquals(csvFormatOne, csvFormatTwo); + Assertions.assertNotEquals(csvFormatTwo, csvFormatOne); + + Assertions.assertNotEquals(csvFormatTwo, csvFormatOne); + + } + + @Test + public void testEqualsQuoteChar() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').build(); + final CSVFormat left = right.builder().setQuote('!').build(); + + assertNotEquals(right, left); + } + + @SuppressWarnings("deprecation") + @Test + public void testEqualsQuoteChar_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"'); + final CSVFormat left = right.withQuote('!'); + + assertNotEquals(right, left); + } + + @Test + public void testEqualsQuotePolicy() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setQuote('"').setQuoteMode(QuoteMode.ALL).build(); + final CSVFormat left = right.builder().setQuoteMode(QuoteMode.MINIMAL).build(); + + assertNotEquals(right, left); + } + + @SuppressWarnings("deprecation") + @Test + public void testEqualsQuotePolicy_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withQuote('"').withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withQuoteMode(QuoteMode.MINIMAL); + + assertNotEquals(right, left); + } + + @Test + public void testEqualsRecordSeparator() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setRecordSeparator(CR).setCommentMarker('#').setEscape('+').setIgnoreEmptyLines(true) + .setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).build(); + final CSVFormat left = right.builder().setRecordSeparator(LF).build(); + + assertNotEquals(right, left); + } + + @SuppressWarnings("deprecation") + @Test + public void testEqualsRecordSeparator_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withRecordSeparator(CR).withCommentMarker('#').withEscape('+').withIgnoreEmptyLines() + .withIgnoreSurroundingSpaces().withQuote('"').withQuoteMode(QuoteMode.ALL); + final CSVFormat left = right.withRecordSeparator(LF); + + assertNotEquals(right, left); + } + + public void testEqualsSkipHeaderRecord() { + final CSVFormat right = CSVFormat.newFormat('\'').builder().setRecordSeparator(CR).setCommentMarker('#').setEscape('+').setIgnoreEmptyLines(true) + .setIgnoreSurroundingSpaces(true).setQuote('"').setQuoteMode(QuoteMode.ALL).setNullString("null").setSkipHeaderRecord(true).build(); + final CSVFormat left = right.builder().setSkipHeaderRecord(false).build(); + + assertNotEquals(right, left); + } + + @SuppressWarnings("deprecation") + @Test + public void testEqualsSkipHeaderRecord_Deprecated() { + final CSVFormat right = CSVFormat.newFormat('\'').withRecordSeparator(CR).withCommentMarker('#').withEscape('+').withIgnoreEmptyLines() + .withIgnoreSurroundingSpaces().withQuote('"').withQuoteMode(QuoteMode.ALL).withNullString("null").withSkipHeaderRecord(); + final CSVFormat left = right.withSkipHeaderRecord(false); + + assertNotEquals(right, left); + } + + @Test + public void testEqualsWithNull() { + + final CSVFormat csvFormat = CSVFormat.POSTGRESQL_TEXT; + + assertEquals('\\', (char) csvFormat.getEscapeCharacter()); + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormat.getTrailingDelimiter()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isQuoteCharacterSet()); + assertEquals("\\N", csvFormat.getNullString()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertTrue(csvFormat.isEscapeCharacterSet()); + + assertFalse(csvFormat.isCommentMarkerSet()); + assertNull(csvFormat.getCommentMarker()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertEquals(QuoteMode.ALL_NON_NULL, csvFormat.getQuoteMode()); + + assertEquals('\t', csvFormat.getDelimiter()); + assertFalse(csvFormat.getSkipHeaderRecord()); + + assertEquals("\n", csvFormat.getRecordSeparator()); + assertFalse(csvFormat.getIgnoreEmptyLines()); + + assertNull(csvFormat.getQuoteCharacter()); + assertTrue(csvFormat.isNullStringSet()); + + assertEquals('\\', (char) csvFormat.getEscapeCharacter()); + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormat.getTrailingDelimiter()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isQuoteCharacterSet()); + assertEquals("\\N", csvFormat.getNullString()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertTrue(csvFormat.isEscapeCharacterSet()); + + assertFalse(csvFormat.isCommentMarkerSet()); + assertNull(csvFormat.getCommentMarker()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertEquals(QuoteMode.ALL_NON_NULL, csvFormat.getQuoteMode()); + + assertEquals('\t', csvFormat.getDelimiter()); + assertFalse(csvFormat.getSkipHeaderRecord()); + + assertEquals("\n", csvFormat.getRecordSeparator()); + assertFalse(csvFormat.getIgnoreEmptyLines()); + + assertNull(csvFormat.getQuoteCharacter()); + assertTrue(csvFormat.isNullStringSet()); + + Assertions.assertNotEquals(null, csvFormat); + + } + + @Test + public void testEscapeSameAsCommentStartThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setEscape('!').setCommentMarker('!').build()); + } + + @SuppressWarnings("deprecation") + @Test + public void testEscapeSameAsCommentStartThrowsException_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape('!').withCommentMarker('!')); + } + + @Test + public void testEscapeSameAsCommentStartThrowsExceptionForWrapperType() { + // Cannot assume that callers won't use different Character objects + assertThrows(IllegalArgumentException.class, + () -> CSVFormat.DEFAULT.builder().setEscape(Character.valueOf('!')).setCommentMarker(Character.valueOf('!')).build()); + } + + @SuppressWarnings("deprecation") + @Test + public void testEscapeSameAsCommentStartThrowsExceptionForWrapperType_Deprecated() { + // Cannot assume that callers won't use different Character objects + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(Character.valueOf('!')).withCommentMarker(Character.valueOf('!'))); + } + + @Test + public void testFormat() { + final CSVFormat format = CSVFormat.DEFAULT; + + assertEquals("", format.format()); + assertEquals("a,b,c", format.format("a", "b", "c")); + assertEquals("\"x,y\",z", format.format("x,y", "z")); + } + + @Test // I assume this to be a defect. + public void testFormatThrowsNullPointerException() { + + final CSVFormat csvFormat = CSVFormat.MYSQL; + + final NullPointerException e = assertThrows(NullPointerException.class, () -> csvFormat.format((Object[]) null)); + assertEquals(Objects.class.getName(), e.getStackTrace()[0].getClassName()); + } + + @Test + public void testFormatToString() { + final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',').withQuoteMode(QuoteMode.MINIMAL).withRecordSeparator(CRLF).withQuote('"') + .withNullString("").withIgnoreHeaderCase(true).withHeaderComments("This is HeaderComments").withHeader("col1", "col2", "col3"); + assertEquals( + "Delimiter=<,> Escape= QuoteChar=<\"> QuoteMode= NullString=<> RecordSeparator=<" + CRLF + + "> IgnoreHeaderCase:ignored SkipHeaderRecord:false HeaderComments:[This is HeaderComments] Header:[col1, col2, col3]", + format.toString()); + } + + @Test + public void testGetAllowDuplicateHeaderNames() { + final Builder builder = CSVFormat.DEFAULT.builder(); + assertTrue(builder.build().getAllowDuplicateHeaderNames()); + assertTrue(builder.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build().getAllowDuplicateHeaderNames()); + assertFalse(builder.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build().getAllowDuplicateHeaderNames()); + assertFalse(builder.setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW).build().getAllowDuplicateHeaderNames()); + } + + @Test + public void testGetDuplicateHeaderMode() { + final Builder builder = CSVFormat.DEFAULT.builder(); + + assertEquals(DuplicateHeaderMode.ALLOW_ALL, builder.build().getDuplicateHeaderMode()); + assertEquals(DuplicateHeaderMode.ALLOW_ALL, builder.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build().getDuplicateHeaderMode()); + assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, builder.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build().getDuplicateHeaderMode()); + assertEquals(DuplicateHeaderMode.DISALLOW, builder.setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW).build().getDuplicateHeaderMode()); + } + + @Test + public void testGetHeader() { + final String[] header = { "one", "two", "three" }; + final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header); + // getHeader() makes a copy of the header array. + final String[] headerCopy = formatWithHeader.getHeader(); + headerCopy[0] = "A"; + headerCopy[1] = "B"; + headerCopy[2] = "C"; + assertFalse(Arrays.equals(formatWithHeader.getHeader(), headerCopy)); + assertNotSame(formatWithHeader.getHeader(), headerCopy); + } + + @Test + public void testHashCodeAndWithIgnoreHeaderCase() { + + final CSVFormat csvFormat = CSVFormat.INFORMIX_UNLOAD_CSV; + final CSVFormat csvFormatTwo = csvFormat.withIgnoreHeaderCase(); + csvFormatTwo.hashCode(); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertTrue(csvFormatTwo.getIgnoreHeaderCase()); // now different + assertFalse(csvFormatTwo.getTrailingDelimiter()); + + Assertions.assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + + assertFalse(csvFormatTwo.getTrim()); + + } + + @Test + public void testJiraCsv236() { + CSVFormat.DEFAULT.builder().setAllowDuplicateHeaderNames(true).setHeader("CC", "VV", "VV").build(); + } + + @SuppressWarnings("deprecation") + @Test + public void testJiraCsv236__Deprecated() { + CSVFormat.DEFAULT.withAllowDuplicateHeaderNames().withHeader("CC", "VV", "VV"); + } + + @Test + public void testNewFormat() { + + final CSVFormat csvFormat = CSVFormat.newFormat('X'); + + assertFalse(csvFormat.getSkipHeaderRecord()); + assertFalse(csvFormat.isEscapeCharacterSet()); + + assertNull(csvFormat.getRecordSeparator()); + assertNull(csvFormat.getQuoteMode()); + + assertNull(csvFormat.getCommentMarker()); + assertFalse(csvFormat.getIgnoreHeaderCase()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isNullStringSet()); + assertNull(csvFormat.getEscapeCharacter()); + + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + assertFalse(csvFormat.getTrailingDelimiter()); + + assertEquals('X', csvFormat.getDelimiter()); + assertNull(csvFormat.getNullString()); + + assertFalse(csvFormat.isQuoteCharacterSet()); + assertFalse(csvFormat.isCommentMarkerSet()); + + assertNull(csvFormat.getQuoteCharacter()); + assertFalse(csvFormat.getIgnoreEmptyLines()); + + assertFalse(csvFormat.getSkipHeaderRecord()); + assertFalse(csvFormat.isEscapeCharacterSet()); + + assertNull(csvFormat.getRecordSeparator()); + assertNull(csvFormat.getQuoteMode()); + + assertNull(csvFormat.getCommentMarker()); + assertFalse(csvFormat.getIgnoreHeaderCase()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isNullStringSet()); + assertNull(csvFormat.getEscapeCharacter()); + + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + assertFalse(csvFormat.getTrailingDelimiter()); + + assertEquals('X', csvFormat.getDelimiter()); + assertNull(csvFormat.getNullString()); + + assertFalse(csvFormat.isQuoteCharacterSet()); + assertFalse(csvFormat.isCommentMarkerSet()); + + assertNull(csvFormat.getQuoteCharacter()); + assertFalse(csvFormat.getIgnoreEmptyLines()); + + } + + @Test + public void testNullRecordSeparatorCsv106() { + final CSVFormat format = CSVFormat.newFormat(';').builder().setSkipHeaderRecord(true).setHeader("H1", "H2").build(); + final String formatStr = format.format("A", "B"); + assertNotNull(formatStr); + assertFalse(formatStr.endsWith("null")); + } + + @SuppressWarnings("deprecation") + @Test + public void testNullRecordSeparatorCsv106__Deprecated() { + final CSVFormat format = CSVFormat.newFormat(';').withSkipHeaderRecord().withHeader("H1", "H2"); + final String formatStr = format.format("A", "B"); + assertNotNull(formatStr); + assertFalse(formatStr.endsWith("null")); + } + + @Test + public void testPrintRecord() throws IOException { + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180; + format.printRecord(out, "a", "b", "c"); + assertEquals("a,b,c" + format.getRecordSeparator(), out.toString()); + } + + @Test + public void testPrintRecordEmpty() throws IOException { + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180; + format.printRecord(out); + assertEquals(format.getRecordSeparator(), out.toString()); + } + + @Test + public void testPrintWithEscapesEndWithCRLF() throws IOException { + final Reader in = new StringReader("x,y,x\r\na,?b,c\r\n"); + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',').withQuote(null).withRecordSeparator(CRLF); + format.print(in, out, true); + assertEquals("x?,y?,x?r?na?,??b?,c?r?n", out.toString()); + } + + @Test + public void testPrintWithEscapesEndWithoutCRLF() throws IOException { + final Reader in = new StringReader("x,y,x"); + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180.withEscape('?').withDelimiter(',').withQuote(null).withRecordSeparator(CRLF); + format.print(in, out, true); + assertEquals("x?,y?,x", out.toString()); + } + + @Test + public void testPrintWithoutQuotes() throws IOException { + final Reader in = new StringReader(""); + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NON_NUMERIC); + format.print(in, out, true); + assertEquals("\"\"", out.toString()); + } + + @Test + public void testPrintWithQuoteModeIsNONE() throws IOException { + final Reader in = new StringReader("a,b,c"); + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NONE); + format.print(in, out, true); + assertEquals("a?,b?,c", out.toString()); + } + + @Test + public void testPrintWithQuotes() throws IOException { + final Reader in = new StringReader("\"a,b,c\r\nx,y,z"); + final Appendable out = new StringBuilder(); + final CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withQuote('"').withEscape('?').withQuoteMode(QuoteMode.NON_NUMERIC); + format.print(in, out, true); + assertEquals("\"\"\"a,b,c\r\nx,y,z\"", out.toString()); + } + + @Test + public void testQuoteCharSameAsCommentStartThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote('!').setCommentMarker('!').build()); + } + + @SuppressWarnings("deprecation") + @Test + public void testQuoteCharSameAsCommentStartThrowsException_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote('!').withCommentMarker('!')); + } + + @Test + public void testQuoteCharSameAsCommentStartThrowsExceptionForWrapperType() { + // Cannot assume that callers won't use different Character objects + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote(Character.valueOf('!')).setCommentMarker('!').build()); + } + + @SuppressWarnings("deprecation") + @Test + public void testQuoteCharSameAsCommentStartThrowsExceptionForWrapperType_Deprecated() { + // Cannot assume that callers won't use different Character objects + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote(Character.valueOf('!')).withCommentMarker('!')); + } + + @Test + public void testQuoteCharSameAsDelimiterThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.builder().setQuote('!').setDelimiter('!').build()); + } + + @SuppressWarnings("deprecation") + @Test + public void testQuoteCharSameAsDelimiterThrowsException_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote('!').withDelimiter('!')); + } + + @Test + public void testQuoteModeNoneShouldReturnMeaningfulExceptionMessage() { + Exception exception = assertThrows(IllegalArgumentException.class, () -> { + CSVFormat.DEFAULT.builder() + .setHeader("Col1", "Col2", "Col3", "Col4") + .setQuoteMode(QuoteMode.NONE) + .build(); + }); + String actualMessage = exception.getMessage(); + String expectedMessage = "Quote mode set to NONE but no escape character is set"; + assertEquals(expectedMessage, actualMessage); + } + + @Test + public void testQuotePolicyNoneWithoutEscapeThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat('!').builder().setQuoteMode(QuoteMode.NONE).build()); + } + + @SuppressWarnings("deprecation") + @Test + public void testQuotePolicyNoneWithoutEscapeThrowsException_Deprecated() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.newFormat('!').withQuoteMode(QuoteMode.NONE)); + } + + @Test + public void testRFC4180() { + assertNull(RFC4180.getCommentMarker()); + assertEquals(',', RFC4180.getDelimiter()); + assertNull(RFC4180.getEscapeCharacter()); + assertFalse(RFC4180.getIgnoreEmptyLines()); + assertEquals(Character.valueOf('"'), RFC4180.getQuoteCharacter()); + assertNull(RFC4180.getQuoteMode()); + assertEquals("\r\n", RFC4180.getRecordSeparator()); + } + + @SuppressWarnings("boxing") // no need to worry about boxing here + @Test + public void testSerialization() throws Exception { + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + + try (final ObjectOutputStream oos = new ObjectOutputStream(out)) { + oos.writeObject(CSVFormat.DEFAULT); + oos.flush(); + } + + final ObjectInputStream in = new ObjectInputStream(new ByteArrayInputStream(out.toByteArray())); + final CSVFormat format = (CSVFormat) in.readObject(); + + assertNotNull(format); + assertEquals(CSVFormat.DEFAULT.getDelimiter(), format.getDelimiter(), "delimiter"); + assertEquals(CSVFormat.DEFAULT.getQuoteCharacter(), format.getQuoteCharacter(), "encapsulator"); + assertEquals(CSVFormat.DEFAULT.getCommentMarker(), format.getCommentMarker(), "comment start"); + assertEquals(CSVFormat.DEFAULT.getRecordSeparator(), format.getRecordSeparator(), "record separator"); + assertEquals(CSVFormat.DEFAULT.getEscapeCharacter(), format.getEscapeCharacter(), "escape"); + assertEquals(CSVFormat.DEFAULT.getIgnoreSurroundingSpaces(), format.getIgnoreSurroundingSpaces(), "trim"); + assertEquals(CSVFormat.DEFAULT.getIgnoreEmptyLines(), format.getIgnoreEmptyLines(), "empty lines"); + } + + @Test + public void testToString() { + + final String string = CSVFormat.INFORMIX_UNLOAD.toString(); + + assertEquals("Delimiter=<|> Escape=<\\> QuoteChar=<\"> RecordSeparator=<\n> EmptyLines:ignored SkipHeaderRecord:false", string); + + } + + @Test + public void testToStringAndWithCommentMarkerTakingCharacter() { + + final CSVFormat.Predefined csvFormat_Predefined = CSVFormat.Predefined.Default; + final CSVFormat csvFormat = csvFormat_Predefined.getFormat(); + + assertNull(csvFormat.getEscapeCharacter()); + assertTrue(csvFormat.isQuoteCharacterSet()); + + assertFalse(csvFormat.getTrim()); + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormat.getTrailingDelimiter()); + assertEquals(',', csvFormat.getDelimiter()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertEquals("\r\n", csvFormat.getRecordSeparator()); + + assertFalse(csvFormat.isCommentMarkerSet()); + assertNull(csvFormat.getCommentMarker()); + + assertFalse(csvFormat.isNullStringSet()); + assertFalse(csvFormat.getAllowMissingColumnNames()); + + assertFalse(csvFormat.isEscapeCharacterSet()); + assertFalse(csvFormat.getSkipHeaderRecord()); + + assertNull(csvFormat.getNullString()); + assertNull(csvFormat.getQuoteMode()); + + assertTrue(csvFormat.getIgnoreEmptyLines()); + assertEquals('\"', (char) csvFormat.getQuoteCharacter()); + + final Character character = Character.valueOf('n'); + + final CSVFormat csvFormatTwo = csvFormat.withCommentMarker(character); + + assertNull(csvFormat.getEscapeCharacter()); + assertTrue(csvFormat.isQuoteCharacterSet()); + + assertFalse(csvFormat.getTrim()); + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormat.getTrailingDelimiter()); + assertEquals(',', csvFormat.getDelimiter()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertEquals("\r\n", csvFormat.getRecordSeparator()); + + assertFalse(csvFormat.isCommentMarkerSet()); + assertNull(csvFormat.getCommentMarker()); + + assertFalse(csvFormat.isNullStringSet()); + assertFalse(csvFormat.getAllowMissingColumnNames()); + + assertFalse(csvFormat.isEscapeCharacterSet()); + assertFalse(csvFormat.getSkipHeaderRecord()); + + assertNull(csvFormat.getNullString()); + assertNull(csvFormat.getQuoteMode()); + + assertTrue(csvFormat.getIgnoreEmptyLines()); + assertEquals('\"', (char) csvFormat.getQuoteCharacter()); + + assertFalse(csvFormatTwo.isNullStringSet()); + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + + assertEquals('\"', (char) csvFormatTwo.getQuoteCharacter()); + assertNull(csvFormatTwo.getNullString()); + + assertEquals(',', csvFormatTwo.getDelimiter()); + assertFalse(csvFormatTwo.getTrailingDelimiter()); + + assertTrue(csvFormatTwo.isCommentMarkerSet()); + assertFalse(csvFormatTwo.getIgnoreHeaderCase()); + + assertFalse(csvFormatTwo.getTrim()); + assertNull(csvFormatTwo.getEscapeCharacter()); + + assertTrue(csvFormatTwo.isQuoteCharacterSet()); + assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); + + assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); + assertNull(csvFormatTwo.getQuoteMode()); + + assertEquals('n', (char) csvFormatTwo.getCommentMarker()); + assertFalse(csvFormatTwo.getSkipHeaderRecord()); + + assertFalse(csvFormatTwo.isEscapeCharacterSet()); + assertTrue(csvFormatTwo.getIgnoreEmptyLines()); + + assertNotSame(csvFormat, csvFormatTwo); + assertNotSame(csvFormatTwo, csvFormat); + + Assertions.assertNotEquals(csvFormatTwo, csvFormat); + + assertNull(csvFormat.getEscapeCharacter()); + assertTrue(csvFormat.isQuoteCharacterSet()); + + assertFalse(csvFormat.getTrim()); + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + + assertFalse(csvFormat.getTrailingDelimiter()); + assertEquals(',', csvFormat.getDelimiter()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertEquals("\r\n", csvFormat.getRecordSeparator()); + + assertFalse(csvFormat.isCommentMarkerSet()); + assertNull(csvFormat.getCommentMarker()); + + assertFalse(csvFormat.isNullStringSet()); + assertFalse(csvFormat.getAllowMissingColumnNames()); + + assertFalse(csvFormat.isEscapeCharacterSet()); + assertFalse(csvFormat.getSkipHeaderRecord()); + + assertNull(csvFormat.getNullString()); + assertNull(csvFormat.getQuoteMode()); + + assertTrue(csvFormat.getIgnoreEmptyLines()); + assertEquals('\"', (char) csvFormat.getQuoteCharacter()); + + assertFalse(csvFormatTwo.isNullStringSet()); + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + + assertEquals('\"', (char) csvFormatTwo.getQuoteCharacter()); + assertNull(csvFormatTwo.getNullString()); + + assertEquals(',', csvFormatTwo.getDelimiter()); + assertFalse(csvFormatTwo.getTrailingDelimiter()); + + assertTrue(csvFormatTwo.isCommentMarkerSet()); + assertFalse(csvFormatTwo.getIgnoreHeaderCase()); + + assertFalse(csvFormatTwo.getTrim()); + assertNull(csvFormatTwo.getEscapeCharacter()); + + assertTrue(csvFormatTwo.isQuoteCharacterSet()); + assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); + + assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); + assertNull(csvFormatTwo.getQuoteMode()); + + assertEquals('n', (char) csvFormatTwo.getCommentMarker()); + assertFalse(csvFormatTwo.getSkipHeaderRecord()); + + assertFalse(csvFormatTwo.isEscapeCharacterSet()); + assertTrue(csvFormatTwo.getIgnoreEmptyLines()); + + assertNotSame(csvFormat, csvFormatTwo); + assertNotSame(csvFormatTwo, csvFormat); + + Assertions.assertNotEquals(csvFormat, csvFormatTwo); + + Assertions.assertNotEquals(csvFormatTwo, csvFormat); + assertEquals("Delimiter=<,> QuoteChar=<\"> CommentStart= " + "RecordSeparator=<\r\n> EmptyLines:ignored SkipHeaderRecord:false", + csvFormatTwo.toString()); + + } + + @Test + public void testTrim() throws IOException { + final CSVFormat formatWithTrim = CSVFormat.DEFAULT.withDelimiter(',').withTrim().withQuote(null).withRecordSeparator(CRLF); + + CharSequence in = "a,b,c"; + final StringBuilder out = new StringBuilder(); + formatWithTrim.print(in, out, true); + assertEquals("a,b,c", out.toString()); + + in = new StringBuilder(" x,y,z"); + out.setLength(0); + formatWithTrim.print(in, out, true); + assertEquals("x,y,z", out.toString()); + + in = new StringBuilder(""); + out.setLength(0); + formatWithTrim.print(in, out, true); + assertEquals("", out.toString()); + + in = new StringBuilder("header\r\n"); + out.setLength(0); + formatWithTrim.print(in, out, true); + assertEquals("header", out.toString()); + } + + @Test + public void testWithCommentStart() { + final CSVFormat formatWithCommentStart = CSVFormat.DEFAULT.withCommentMarker('#'); + assertEquals(Character.valueOf('#'), formatWithCommentStart.getCommentMarker()); + } + + @Test + public void testWithCommentStartCRThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withCommentMarker(CR)); + } + + @Test + public void testWithDelimiter() { + final CSVFormat formatWithDelimiter = CSVFormat.DEFAULT.withDelimiter('!'); + assertEquals('!', formatWithDelimiter.getDelimiter()); + } + + @Test + public void testWithDelimiterLFThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(LF)); + } + + @Test + public void testWithEmptyDuplicates() { + final CSVFormat formatWithEmptyDuplicates = CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build(); + + assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, formatWithEmptyDuplicates.getDuplicateHeaderMode()); + assertFalse(formatWithEmptyDuplicates.getAllowDuplicateHeaderNames()); + } + + @Test + public void testWithEmptyEnum() { + final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(EmptyEnum.class); + assertEquals(0, formatWithHeader.getHeader().length); + } + + @Test + public void testWithEscape() { + final CSVFormat formatWithEscape = CSVFormat.DEFAULT.withEscape('&'); + assertEquals(Character.valueOf('&'), formatWithEscape.getEscapeCharacter()); + } + + @Test + public void testWithEscapeCRThrowsExceptions() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(CR)); + } + + @Test + public void testWithFirstRecordAsHeader() { + final CSVFormat formatWithFirstRecordAsHeader = CSVFormat.DEFAULT.withFirstRecordAsHeader(); + assertTrue(formatWithFirstRecordAsHeader.getSkipHeaderRecord()); + assertEquals(0, formatWithFirstRecordAsHeader.getHeader().length); + } + + @Test + public void testWithHeader() { + final String[] header = { "one", "two", "three" }; + // withHeader() makes a copy of the header array. + final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(header); + assertArrayEquals(header, formatWithHeader.getHeader()); + assertNotSame(header, formatWithHeader.getHeader()); + } + + @Test + public void testWithHeaderComments() { + + final CSVFormat csvFormat = CSVFormat.DEFAULT; + + assertEquals('\"', (char) csvFormat.getQuoteCharacter()); + assertFalse(csvFormat.isCommentMarkerSet()); + + assertFalse(csvFormat.isEscapeCharacterSet()); + assertTrue(csvFormat.isQuoteCharacterSet()); + + assertFalse(csvFormat.getSkipHeaderRecord()); + assertNull(csvFormat.getQuoteMode()); + + assertEquals(',', csvFormat.getDelimiter()); + assertTrue(csvFormat.getIgnoreEmptyLines()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertNull(csvFormat.getCommentMarker()); + + assertEquals("\r\n", csvFormat.getRecordSeparator()); + assertFalse(csvFormat.getTrailingDelimiter()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isNullStringSet()); + assertNull(csvFormat.getNullString()); + + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + assertNull(csvFormat.getEscapeCharacter()); + + final Object[] objectArray = new Object[8]; + final CSVFormat csvFormatTwo = csvFormat.withHeaderComments(objectArray); + + assertEquals('\"', (char) csvFormat.getQuoteCharacter()); + assertFalse(csvFormat.isCommentMarkerSet()); + + assertFalse(csvFormat.isEscapeCharacterSet()); + assertTrue(csvFormat.isQuoteCharacterSet()); + + assertFalse(csvFormat.getSkipHeaderRecord()); + assertNull(csvFormat.getQuoteMode()); + + assertEquals(',', csvFormat.getDelimiter()); + assertTrue(csvFormat.getIgnoreEmptyLines()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertNull(csvFormat.getCommentMarker()); + + assertEquals("\r\n", csvFormat.getRecordSeparator()); + assertFalse(csvFormat.getTrailingDelimiter()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isNullStringSet()); + assertNull(csvFormat.getNullString()); + + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + assertNull(csvFormat.getEscapeCharacter()); + + assertFalse(csvFormatTwo.getIgnoreHeaderCase()); + assertNull(csvFormatTwo.getQuoteMode()); + + assertTrue(csvFormatTwo.getIgnoreEmptyLines()); + assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); + + assertNull(csvFormatTwo.getEscapeCharacter()); + assertFalse(csvFormatTwo.getTrim()); + + assertFalse(csvFormatTwo.isEscapeCharacterSet()); + assertTrue(csvFormatTwo.isQuoteCharacterSet()); + + assertFalse(csvFormatTwo.getSkipHeaderRecord()); + assertEquals('\"', (char) csvFormatTwo.getQuoteCharacter()); + + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + assertNull(csvFormatTwo.getNullString()); + + assertFalse(csvFormatTwo.isNullStringSet()); + assertFalse(csvFormatTwo.getTrailingDelimiter()); + + assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); + assertEquals(',', csvFormatTwo.getDelimiter()); + + assertNull(csvFormatTwo.getCommentMarker()); + assertFalse(csvFormatTwo.isCommentMarkerSet()); + + assertNotSame(csvFormat, csvFormatTwo); + assertNotSame(csvFormatTwo, csvFormat); + + Assertions.assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal + + final String string = csvFormatTwo.format(objectArray); + + assertEquals('\"', (char) csvFormat.getQuoteCharacter()); + assertFalse(csvFormat.isCommentMarkerSet()); + + assertFalse(csvFormat.isEscapeCharacterSet()); + assertTrue(csvFormat.isQuoteCharacterSet()); + + assertFalse(csvFormat.getSkipHeaderRecord()); + assertNull(csvFormat.getQuoteMode()); + + assertEquals(',', csvFormat.getDelimiter()); + assertTrue(csvFormat.getIgnoreEmptyLines()); + + assertFalse(csvFormat.getIgnoreHeaderCase()); + assertNull(csvFormat.getCommentMarker()); + + assertEquals("\r\n", csvFormat.getRecordSeparator()); + assertFalse(csvFormat.getTrailingDelimiter()); + + assertFalse(csvFormat.getAllowMissingColumnNames()); + assertFalse(csvFormat.getTrim()); + + assertFalse(csvFormat.isNullStringSet()); + assertNull(csvFormat.getNullString()); + + assertFalse(csvFormat.getIgnoreSurroundingSpaces()); + assertNull(csvFormat.getEscapeCharacter()); + + assertFalse(csvFormatTwo.getIgnoreHeaderCase()); + assertNull(csvFormatTwo.getQuoteMode()); + + assertTrue(csvFormatTwo.getIgnoreEmptyLines()); + assertFalse(csvFormatTwo.getIgnoreSurroundingSpaces()); + + assertNull(csvFormatTwo.getEscapeCharacter()); + assertFalse(csvFormatTwo.getTrim()); + + assertFalse(csvFormatTwo.isEscapeCharacterSet()); + assertTrue(csvFormatTwo.isQuoteCharacterSet()); + + assertFalse(csvFormatTwo.getSkipHeaderRecord()); + assertEquals('\"', (char) csvFormatTwo.getQuoteCharacter()); + + assertFalse(csvFormatTwo.getAllowMissingColumnNames()); + assertNull(csvFormatTwo.getNullString()); + + assertFalse(csvFormatTwo.isNullStringSet()); + assertFalse(csvFormatTwo.getTrailingDelimiter()); + + assertEquals("\r\n", csvFormatTwo.getRecordSeparator()); + assertEquals(',', csvFormatTwo.getDelimiter()); + + assertNull(csvFormatTwo.getCommentMarker()); + assertFalse(csvFormatTwo.isCommentMarkerSet()); + + assertNotSame(csvFormat, csvFormatTwo); + assertNotSame(csvFormatTwo, csvFormat); + + assertNotNull(string); + Assertions.assertNotEquals(csvFormat, csvFormatTwo); // CSV-244 - should not be equal + + Assertions.assertNotEquals(csvFormatTwo, csvFormat); // CSV-244 - should not be equal + assertEquals(",,,,,,,", string); + + } + + @Test + public void testWithHeaderEnum() { + final CSVFormat formatWithHeader = CSVFormat.DEFAULT.withHeader(Header.class); + assertArrayEquals(new String[] { "Name", "Email", "Phone" }, formatWithHeader.getHeader()); + } + + @Test + public void testWithHeaderEnumNull() { + final CSVFormat format = CSVFormat.DEFAULT; + final Class> simpleName = null; + format.withHeader(simpleName); + } + + @Test + public void testWithHeaderResultSetNull() throws SQLException { + final CSVFormat format = CSVFormat.DEFAULT; + final ResultSet resultSet = null; + format.withHeader(resultSet); + } + + @Test + public void testWithIgnoreEmptyLines() { + assertFalse(CSVFormat.DEFAULT.withIgnoreEmptyLines(false).getIgnoreEmptyLines()); + assertTrue(CSVFormat.DEFAULT.withIgnoreEmptyLines().getIgnoreEmptyLines()); + } + + @Test + public void testWithIgnoreSurround() { + assertFalse(CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces()); + assertTrue(CSVFormat.DEFAULT.withIgnoreSurroundingSpaces().getIgnoreSurroundingSpaces()); + } + + @Test + public void testWithNullString() { + final CSVFormat formatWithNullString = CSVFormat.DEFAULT.withNullString("null"); + assertEquals("null", formatWithNullString.getNullString()); + } + + @Test + public void testWithQuoteChar() { + final CSVFormat formatWithQuoteChar = CSVFormat.DEFAULT.withQuote('"'); + assertEquals(Character.valueOf('"'), formatWithQuoteChar.getQuoteCharacter()); + } + + @Test + public void testWithQuoteLFThrowsException() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withQuote(LF)); + } + + @Test + public void testWithQuotePolicy() { + final CSVFormat formatWithQuotePolicy = CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL); + assertEquals(QuoteMode.ALL, formatWithQuotePolicy.getQuoteMode()); + } + + @Test + public void testWithRecordSeparatorCR() { + final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(CR); + assertEquals(String.valueOf(CR), formatWithRecordSeparator.getRecordSeparator()); + } + + @Test + public void testWithRecordSeparatorCRLF() { + final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(CRLF); + assertEquals(CRLF, formatWithRecordSeparator.getRecordSeparator()); + } + + @Test + public void testWithRecordSeparatorLF() { + final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withRecordSeparator(LF); + assertEquals(String.valueOf(LF), formatWithRecordSeparator.getRecordSeparator()); + } + + @Test + public void testWithSystemRecordSeparator() { + final CSVFormat formatWithRecordSeparator = CSVFormat.DEFAULT.withSystemRecordSeparator(); + assertEquals(System.lineSeparator(), formatWithRecordSeparator.getRecordSeparator()); + } +} diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 1640780aa4..52173287f2 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -1,1378 +1,1660 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.CRLF; -import static org.apache.commons.csv.Constants.LF; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.File; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.PipedReader; -import java.io.PipedWriter; -import java.io.Reader; -import java.io.StringReader; -import java.io.StringWriter; -import java.net.URL; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.stream.Collectors; - -import org.apache.commons.io.input.BOMInputStream; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -/** - * CSVParserTest - * - * The test are organized in three different sections: The 'setter/getter' section, the lexer section and finally the - * parser section. In case a test fails, you should follow a top-down approach for fixing a potential bug (its likely - * that the parser itself fails if the lexer has problems...). - */ -public class CSVParserTest { - - private static final Charset UTF_8 = StandardCharsets.UTF_8; - - private static final String UTF_8_NAME = UTF_8.name(); - - private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n" - // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n"; - + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping - - private static final String CSV_INPUT_1 = "a,b,c,d"; - - private static final String CSV_INPUT_2 = "a,b,1 2"; - - private static final String[][] RESULT = {{"a", "b", "c", "d"}, {"a", "b", "1 2"}, {"foo baar", "b", ""}, - {"foo\n,,\n\",,\n\"", "d", "e"}}; - - private BOMInputStream createBOMInputStream(final String resource) throws IOException { - final URL url = ClassLoader.getSystemClassLoader().getResource(resource); - return new BOMInputStream(url.openStream()); - } - - private void parseFully(final CSVParser parser) { - for (final CSVRecord csvRecord : parser) { - assertNotNull(csvRecord); - } - } - - @Test - public void testBackslashEscaping() throws IOException { - - // To avoid confusion over the need for escaping chars in java code, - // We will test with a forward slash as the escape char, and a single - // quote as the encapsulator. - - final String code = "one,two,three\n" // 0 - + "'',''\n" // 1) empty encapsulators - + "/',/'\n" // 2) single encapsulators - + "'/'','/''\n" // 3) single encapsulators encapsulated via escape - + "'''',''''\n" // 4) single encapsulators encapsulated via doubling - + "/,,/,\n" // 5) separator escaped - + "//,//\n" // 6) escape escaped - + "'//','//'\n" // 7) escape escaped in encapsulation - + " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces - + "9, /\n \n" // escaped newline - + ""; - final String[][] res = {{"one", "two", "three"}, // 0 - {"", ""}, // 1 - {"'", "'"}, // 2 - {"'", "'"}, // 3 - {"'", "'"}, // 4 - {",", ","}, // 5 - {"/", "/"}, // 6 - {"/", "/"}, // 7 - {" 8 ", " \"quoted \"\" /\" / string\" "}, {"9", " \n "},}; - - final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(CRLF).withEscape('/') - .withIgnoreEmptyLines(); - - try (final CSVParser parser = CSVParser.parse(code, format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - - Utils.compare("Records do not match expected result", res, records); - } - } - - @Test - public void testBackslashEscaping2() throws IOException { - - // To avoid confusion over the need for escaping chars in java code, - // We will test with a forward slash as the escape char, and a single - // quote as the encapsulator. - - final String code = "" + " , , \n" // 1) - + " \t , , \n" // 2) - + " // , /, , /,\n" // 3) - + ""; - final String[][] res = {{" ", " ", " "}, // 1 - {" \t ", " ", " "}, // 2 - {" / ", " , ", " ,"}, // 3 - }; - - final CSVFormat format = CSVFormat.newFormat(',').withRecordSeparator(CRLF).withEscape('/') - .withIgnoreEmptyLines(); - - try (final CSVParser parser = CSVParser.parse(code, format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - - Utils.compare("", res, records); - } - } - - @Test - @Disabled - public void testBackslashEscapingOld() throws IOException { - final String code = "one,two,three\n" + "on\\\"e,two\n" + "on\"e,two\n" + "one,\"tw\\\"o\"\n" - + "one,\"t\\,wo\"\n" + "one,two,\"th,ree\"\n" + "\"a\\\\\"\n" + "a\\,b\n" + "\"a\\\\,b\""; - final String[][] res = {{"one", "two", "three"}, {"on\\\"e", "two"}, {"on\"e", "two"}, {"one", "tw\"o"}, - {"one", "t\\,wo"}, // backslash in quotes only escapes a delimiter (",") - {"one", "two", "th,ree"}, {"a\\\\"}, // backslash in quotes only escapes a delimiter (",") - {"a\\", "b"}, // a backslash must be returned - {"a\\\\,b"} // backslash in quotes only escapes a delimiter (",") - }; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - - @Test - @Disabled("CSV-107") - public void testBOM() throws IOException { - final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/bom.csv"); - try (final CSVParser parser = CSVParser.parse(url, Charset.forName(UTF_8_NAME), CSVFormat.EXCEL.withHeader())) { - for (final CSVRecord record : parser) { - final String string = record.get("Date"); - assertNotNull(string); - // System.out.println("date: " + record.get("Date")); - } - } - } - - @Test - public void testBOMInputStream_ParserWithInputStream() throws IOException { - try (final BOMInputStream inputStream = createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"); - final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) { - for (final CSVRecord record : parser) { - final String string = record.get("Date"); - assertNotNull(string); - // System.out.println("date: " + record.get("Date")); - } - } - } - - @Test - public void testBOMInputStream_ParserWithReader() throws IOException { - try ( - final Reader reader = new InputStreamReader( - createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME); - final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader())) { - for (final CSVRecord record : parser) { - final String string = record.get("Date"); - assertNotNull(string); - // System.out.println("date: " + record.get("Date")); - } - } - } - - @Test - public void testBOMInputStream_parseWithReader() throws IOException { - try ( - final Reader reader = new InputStreamReader( - createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME); - final CSVParser parser = CSVParser.parse(reader, CSVFormat.EXCEL.withHeader())) { - for (final CSVRecord record : parser) { - final String string = record.get("Date"); - assertNotNull(string); - // System.out.println("date: " + record.get("Date")); - } - } - } - - @Test - public void testCarriageReturnEndings() throws IOException { - final String code = "foo\rbaar,\rhello,world\r,kanu"; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(4, records.size()); - } - } - - @Test - public void testCarriageReturnLineFeedEndings() throws IOException { - final String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(4, records.size()); - } - } - - @Test - public void testClose() throws Exception { - final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z"); - final Iterator records; - try (final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) { - records = parser.iterator(); - assertTrue(records.hasNext()); - } - assertFalse(records.hasNext()); - assertThrows(NoSuchElementException.class, records::next); - } - - @Test - public void testCSV235() throws IOException { - final String dqString = "\"aaa\",\"b\"\"bb\",\"ccc\""; // "aaa","b""bb","ccc" - final Iterator records = CSVFormat.RFC4180.parse(new StringReader(dqString)).iterator(); - final CSVRecord record = records.next(); - assertFalse(records.hasNext()); - assertEquals(3, record.size()); - assertEquals("aaa", record.get(0)); - assertEquals("b\"bb", record.get(1)); - assertEquals("ccc", record.get(2)); - } - - @Test - public void testCSV57() throws Exception { - try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) { - final List list = parser.getRecords(); - assertNotNull(list); - assertEquals(0, list.size()); - } - } - - @Test - public void testDefaultFormat() throws IOException { - final String code = "" + "a,b#\n" // 1) - + "\"\n\",\" \",#\n" // 2) - + "#,\"\"\n" // 3) - + "# Final comment\n"// 4) - ; - final String[][] res = {{"a", "b#"}, {"\n", " ", "#"}, {"#", ""}, {"# Final comment"}}; - - CSVFormat format = CSVFormat.DEFAULT; - assertFalse(format.isCommentMarkerSet()); - final String[][] res_comments = {{"a", "b#"}, {"\n", " ", "#"},}; - - try (final CSVParser parser = CSVParser.parse(code, format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - - Utils.compare("Failed to parse without comments", res, records); - - format = CSVFormat.DEFAULT.withCommentMarker('#'); - } - try (final CSVParser parser = CSVParser.parse(code, format)) { - final List records = parser.getRecords(); - - Utils.compare("Failed to parse with comments", res_comments, records); - } - } - - @Test - public void testDuplicateHeadersAllowedByDefault() throws Exception { - CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader()); - } - - @Test - public void testDuplicateHeadersNotAllowed() { - assertThrows(IllegalArgumentException.class, () -> CSVParser.parse("a,b,a\n1,2,3\nx,y,z", - CSVFormat.DEFAULT.withHeader().withAllowDuplicateHeaderNames(false))); - } - - @Test - public void testEmptyFile() throws Exception { - try (final CSVParser parser = CSVParser.parse(Paths.get("src/test/resources/org/apache/commons/csv/empty.txt"), - StandardCharsets.UTF_8, CSVFormat.DEFAULT)) { - assertNull(parser.nextRecord()); - } - } - - @Test - public void testEmptyFileHeaderParsing() throws Exception { - try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT.withFirstRecordAsHeader())) { - assertNull(parser.nextRecord()); - assertTrue(parser.getHeaderNames().isEmpty()); - } - } - - @Test - public void testEmptyLineBehaviorCSV() throws Exception { - final String[] codes = {"hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"}; - final String[][] res = {{"hello", ""} // CSV format ignores empty lines - }; - for (final String code : codes) { - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - } - - @Test - public void testEmptyLineBehaviorExcel() throws Exception { - final String[] codes = {"hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"}; - final String[][] res = {{"hello", ""}, {""}, // Excel format does not ignore empty lines - {""}}; - for (final String code : codes) { - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - } - - @Test - public void testEmptyString() throws Exception { - try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) { - assertNull(parser.nextRecord()); - } - } - - @Test - public void testEndOfFileBehaviorCSV() throws Exception { - final String[] codes = {"hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", - "hello,\r\n\r\nworld,\"\"", "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", - "hello,\r\n\r\nworld,\"\""}; - final String[][] res = {{"hello", ""}, // CSV format ignores empty lines - {"world", ""}}; - for (final String code : codes) { - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - } - - @Test - public void testEndOfFileBehaviorExcel() throws Exception { - final String[] codes = {"hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", - "hello,\r\n\r\nworld,\"\"", "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", - "hello,\r\n\r\nworld,\"\""}; - final String[][] res = {{"hello", ""}, {""}, // Excel format does not ignore empty lines - {"world", ""}}; - - for (final String code : codes) { - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - } - - @Test - public void testExcelFormat1() throws IOException { - final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n x,,," - + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n"; - final String[][] res = {{"value1", "value2", "value3", "value4"}, {"a", "b", "c", "d"}, {" x", "", "", ""}, - {""}, {"\"hello\"", " \"world\"", "abc\ndef", ""}}; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - - @Test - public void testExcelFormat2() throws Exception { - final String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n"; - final String[][] res = {{"foo", "baar"}, {""}, {"hello", ""}, {""}, {"world", ""}}; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - - /** - * Tests an exported Excel worksheet with a header row and rows that have more columns than the headers - * - * @throws Exception - */ - @Test - public void testExcelHeaderCountLessThanData() throws Exception { - final String code = "A,B,C,,\r\na,b,c,d,e\r\n"; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL.withHeader())) { - for (final CSVRecord record : parser.getRecords()) { - assertEquals("a", record.get("A")); - assertEquals("b", record.get("B")); - assertEquals("c", record.get("C")); - } - } - } - - @Test - public void testFirstEndOfLineCr() throws IOException { - final String data = "foo\rbaar,\rhello,world\r,kanu"; - try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(4, records.size()); - assertEquals("\r", parser.getFirstEndOfLine()); - } - } - - @Test - public void testFirstEndOfLineCrLf() throws IOException { - final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; - try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(4, records.size()); - assertEquals("\r\n", parser.getFirstEndOfLine()); - } - } - - @Test - public void testFirstEndOfLineLf() throws IOException { - final String data = "foo\nbaar,\nhello,world\n,kanu"; - try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(4, records.size()); - assertEquals("\n", parser.getFirstEndOfLine()); - } - } - - @Test - public void testForEach() throws Exception { - final List records = new ArrayList<>(); - try (final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z")) { - for (final CSVRecord record : CSVFormat.DEFAULT.parse(in)) { - records.add(record); - } - assertEquals(3, records.size()); - assertArrayEquals(new String[] {"a", "b", "c"}, records.get(0).values()); - assertArrayEquals(new String[] {"1", "2", "3"}, records.get(1).values()); - assertArrayEquals(new String[] {"x", "y", "z"}, records.get(2).values()); - } - } - - @Test - public void testGetHeaderMap() throws Exception { - try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", - CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { - final Map headerMap = parser.getHeaderMap(); - final Iterator columnNames = headerMap.keySet().iterator(); - // Headers are iterated in column order. - assertEquals("A", columnNames.next()); - assertEquals("B", columnNames.next()); - assertEquals("C", columnNames.next()); - final Iterator records = parser.iterator(); - - // Parse to make sure getHeaderMap did not have a side-effect. - for (int i = 0; i < 3; i++) { - assertTrue(records.hasNext()); - final CSVRecord record = records.next(); - assertEquals(record.get(0), record.get("A")); - assertEquals(record.get(1), record.get("B")); - assertEquals(record.get(2), record.get("C")); - } - - assertFalse(records.hasNext()); - } - } - - @Test - public void testGetHeaderNames() throws IOException { - try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", - CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { - final Map nameIndexMap = parser.getHeaderMap(); - final List headerNames = parser.getHeaderNames(); - assertNotNull(headerNames); - assertEquals(nameIndexMap.size(), headerNames.size()); - for (int i = 0; i < headerNames.size(); i++) { - final String name = headerNames.get(i); - assertEquals(i, nameIndexMap.get(name).intValue()); - } - } - } - - @Test - public void testGetHeaderNamesReadOnly() throws IOException { - try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", - CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { - final List headerNames = parser.getHeaderNames(); - assertNotNull(headerNames); - assertThrows(UnsupportedOperationException.class, () -> headerNames.add("This is a read-only list.")); - } - } - - @Test - public void testGetLine() throws IOException { - try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { - for (final String[] re : RESULT) { - assertArrayEquals(re, parser.nextRecord().values()); - } - - assertNull(parser.nextRecord()); - } - } - - @Test - public void testGetLineNumberWithCR() throws Exception { - this.validateLineNumbers(String.valueOf(CR)); - } - - @Test - public void testGetLineNumberWithCRLF() throws Exception { - this.validateLineNumbers(CRLF); - } - - @Test - public void testGetLineNumberWithLF() throws Exception { - this.validateLineNumbers(String.valueOf(LF)); - } - - @Test - public void testGetOneLine() throws IOException { - try (final CSVParser parser = CSVParser.parse(CSV_INPUT_1, CSVFormat.DEFAULT)) { - final CSVRecord record = parser.getRecords().get(0); - assertArrayEquals(RESULT[0], record.values()); - } - } - - /** - * Tests reusing a parser to process new string records one at a time as they are being discovered. See [CSV-110]. - * - * @throws IOException when an I/O error occurs. - */ - @Test - public void testGetOneLineOneParser() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT; - try (final PipedWriter writer = new PipedWriter(); - final CSVParser parser = new CSVParser(new PipedReader(writer), format)) { - writer.append(CSV_INPUT_1); - writer.append(format.getRecordSeparator()); - final CSVRecord record1 = parser.nextRecord(); - assertArrayEquals(RESULT[0], record1.values()); - writer.append(CSV_INPUT_2); - writer.append(format.getRecordSeparator()); - final CSVRecord record2 = parser.nextRecord(); - assertArrayEquals(RESULT[1], record2.values()); - } - } - - @Test - public void testGetRecordNumberWithCR() throws Exception { - this.validateRecordNumbers(String.valueOf(CR)); - } - - @Test - public void testGetRecordNumberWithCRLF() throws Exception { - this.validateRecordNumbers(CRLF); - } - - @Test - public void testGetRecordNumberWithLF() throws Exception { - this.validateRecordNumbers(String.valueOf(LF)); - } - - @Test - public void testGetRecordPositionWithCRLF() throws Exception { - this.validateRecordPosition(CRLF); - } - - @Test - public void testGetRecordPositionWithLF() throws Exception { - this.validateRecordPosition(String.valueOf(LF)); - } - - @Test - public void testGetRecords() throws IOException { - try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { - final List records = parser.getRecords(); - assertEquals(RESULT.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < RESULT.length; i++) { - assertArrayEquals(RESULT[i], records.get(i).values()); - } - } - } - - @Test - public void testGetRecordWithMultiLineValues() throws Exception { - try (final CSVParser parser = CSVParser.parse( - "\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"", - CSVFormat.DEFAULT.withRecordSeparator(CRLF))) { - CSVRecord record; - assertEquals(0, parser.getRecordNumber()); - assertEquals(0, parser.getCurrentLineNumber()); - assertNotNull(record = parser.nextRecord()); - assertEquals(3, parser.getCurrentLineNumber()); - assertEquals(1, record.getRecordNumber()); - assertEquals(1, parser.getRecordNumber()); - assertNotNull(record = parser.nextRecord()); - assertEquals(6, parser.getCurrentLineNumber()); - assertEquals(2, record.getRecordNumber()); - assertEquals(2, parser.getRecordNumber()); - assertNotNull(record = parser.nextRecord()); - assertEquals(9, parser.getCurrentLineNumber()); - assertEquals(3, record.getRecordNumber()); - assertEquals(3, parser.getRecordNumber()); - assertNull(record = parser.nextRecord()); - assertEquals(9, parser.getCurrentLineNumber()); - assertEquals(3, parser.getRecordNumber()); - } - } - - @Test - public void testHeader() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - - final Iterator records = CSVFormat.DEFAULT.withHeader().parse(in).iterator(); - - for (int i = 0; i < 2; i++) { - assertTrue(records.hasNext()); - final CSVRecord record = records.next(); - assertEquals(record.get(0), record.get("a")); - assertEquals(record.get(1), record.get("b")); - assertEquals(record.get(2), record.get("c")); - } - - assertFalse(records.hasNext()); - } - - @Test - public void testHeaderComment() throws Exception { - final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z"); - - final Iterator records = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in).iterator(); - - for (int i = 0; i < 2; i++) { - assertTrue(records.hasNext()); - final CSVRecord record = records.next(); - assertEquals(record.get(0), record.get("a")); - assertEquals(record.get(1), record.get("b")); - assertEquals(record.get(2), record.get("c")); - } - - assertFalse(records.hasNext()); - } - - @Test - public void testHeaderMissing() throws Exception { - final Reader in = new StringReader("a,,c\n1,2,3\nx,y,z"); - - final Iterator records = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in) - .iterator(); - - for (int i = 0; i < 2; i++) { - assertTrue(records.hasNext()); - final CSVRecord record = records.next(); - assertEquals(record.get(0), record.get("a")); - assertEquals(record.get(2), record.get("c")); - } - - assertFalse(records.hasNext()); - } - - @Test - public void testHeaderMissingWithNull() throws Exception { - final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); - CSVFormat.DEFAULT.withHeader().withNullString("").withAllowMissingColumnNames().parse(in).iterator(); - } - - @Test - public void testHeadersMissing() throws Exception { - final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); - CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in).iterator(); - } - - @Test - public void testHeadersMissingException() { - final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator()); - } - - @Test - public void testHeadersMissingOneColumnException() throws Exception { - final Reader in = new StringReader("a,,c,d,e\n1,2,3,4,5\nv,w,x,y,z"); - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator()); - } - - @Test - public void testHeadersWithNullColumnName() throws IOException { - final Reader in = new StringReader("header1,null,header3\n1,2,3\n4,5,6"); - final Iterator records = CSVFormat.DEFAULT.withHeader().withNullString("null") - .withAllowMissingColumnNames().parse(in).iterator(); - final CSVRecord record = records.next(); - // Expect the null header to be missing - assertEquals(Arrays.asList("header1", "header3"), record.getParser().getHeaderNames()); - assertEquals(2, record.getParser().getHeaderMap().size()); - } - - @Test - public void testIgnoreCaseHeaderMapping() throws Exception { - final Reader reader = new StringReader("1,2,3"); - final Iterator records = CSVFormat.DEFAULT.withHeader("One", "TWO", "three").withIgnoreHeaderCase() - .parse(reader).iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("one")); - assertEquals("2", record.get("two")); - assertEquals("3", record.get("THREE")); - } - - @Test - public void testIgnoreEmptyLines() throws IOException { - final String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n"; - // String code = "world\r\n\n"; - // String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n"; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(3, records.size()); - } - } - - @Test - public void testInvalidFormat() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR)); - } - - @Test - public void testIterator() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - - final Iterator iterator = CSVFormat.DEFAULT.parse(in).iterator(); - - assertTrue(iterator.hasNext()); - assertThrows(UnsupportedOperationException.class, iterator::remove); - assertArrayEquals(new String[] {"a", "b", "c"}, iterator.next().values()); - assertArrayEquals(new String[] {"1", "2", "3"}, iterator.next().values()); - assertTrue(iterator.hasNext()); - assertTrue(iterator.hasNext()); - assertTrue(iterator.hasNext()); - assertArrayEquals(new String[] {"x", "y", "z"}, iterator.next().values()); - assertFalse(iterator.hasNext()); - - assertThrows(NoSuchElementException.class, iterator::next); - } - - @Test - public void testIteratorSequenceBreaking() throws IOException { - final String fiveRows = "1\n2\n3\n4\n5\n"; - - // Iterator hasNext() shouldn't break sequence - try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { - - final Iterator iter = parser.iterator(); - int recordNumber = 0; - while (iter.hasNext()) { - final CSVRecord record = iter.next(); - recordNumber++; - assertEquals(String.valueOf(recordNumber), record.get(0)); - if (recordNumber >= 2) { - break; - } - } - iter.hasNext(); - while (iter.hasNext()) { - final CSVRecord record = iter.next(); - recordNumber++; - assertEquals(String.valueOf(recordNumber), record.get(0)); - } - } - - // Consecutive enhanced for loops shouldn't break sequence - try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { - int recordNumber = 0; - for (final CSVRecord record : parser) { - recordNumber++; - assertEquals(String.valueOf(recordNumber), record.get(0)); - if (recordNumber >= 2) { - break; - } - } - for (final CSVRecord record : parser) { - recordNumber++; - assertEquals(String.valueOf(recordNumber), record.get(0)); - } - } - - // Consecutive enhanced for loops with hasNext() peeking shouldn't break sequence - try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { - int recordNumber = 0; - for (final CSVRecord record : parser) { - recordNumber++; - assertEquals(String.valueOf(recordNumber), record.get(0)); - if (recordNumber >= 2) { - break; - } - } - parser.iterator().hasNext(); - for (final CSVRecord record : parser) { - recordNumber++; - assertEquals(String.valueOf(recordNumber), record.get(0)); - } - } - } - - @Test - public void testLineFeedEndings() throws IOException { - final String code = "foo\nbaar,\nhello,world\n,kanu"; - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { - final List records = parser.getRecords(); - assertEquals(4, records.size()); - } - } - - @Test - public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2\nx,y,z"); - final Iterator records = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord().parse(in) - .iterator(); - CSVRecord record; - - // 1st record - record = records.next(); - assertTrue(record.isMapped("A")); - assertTrue(record.isMapped("B")); - assertTrue(record.isMapped("C")); - assertTrue(record.isSet("A")); - assertTrue(record.isSet("B")); - assertFalse(record.isSet("C")); - assertEquals("1", record.get("A")); - assertEquals("2", record.get("B")); - assertFalse(record.isConsistent()); - - // 2nd record - record = records.next(); - assertTrue(record.isMapped("A")); - assertTrue(record.isMapped("B")); - assertTrue(record.isMapped("C")); - assertTrue(record.isSet("A")); - assertTrue(record.isSet("B")); - assertTrue(record.isSet("C")); - assertEquals("x", record.get("A")); - assertEquals("y", record.get("B")); - assertEquals("z", record.get("C")); - assertTrue(record.isConsistent()); - - assertFalse(records.hasNext()); - } - - @Test - @Disabled - public void testMongoDbCsv() throws Exception { - try (final CSVParser parser = CSVParser.parse("\"a a\",b,c" + LF + "d,e,f", CSVFormat.MONGODB_CSV)) { - final Iterator itr1 = parser.iterator(); - final Iterator itr2 = parser.iterator(); - - final CSVRecord first = itr1.next(); - assertEquals("a a", first.get(0)); - assertEquals("b", first.get(1)); - assertEquals("c", first.get(2)); - - final CSVRecord second = itr2.next(); - assertEquals("d", second.get(0)); - assertEquals("e", second.get(1)); - assertEquals("f", second.get(2)); - } - } - - @Test - // TODO this may lead to strange behavior, throw an exception if iterator() has already been called? - public void testMultipleIterators() throws Exception { - try (final CSVParser parser = CSVParser.parse("a,b,c" + CRLF + "d,e,f", CSVFormat.DEFAULT)) { - final Iterator itr1 = parser.iterator(); - - final CSVRecord first = itr1.next(); - assertEquals("a", first.get(0)); - assertEquals("b", first.get(1)); - assertEquals("c", first.get(2)); - - final CSVRecord second = itr1.next(); - assertEquals("d", second.get(0)); - assertEquals("e", second.get(1)); - assertEquals("f", second.get(2)); - } - } - - @Test - public void testNewCSVParserNullReaderFormat() { - assertThrows(NullPointerException.class, () -> new CSVParser(null, CSVFormat.DEFAULT)); - } - - @Test - public void testNewCSVParserReaderNullFormat() { - assertThrows(NullPointerException.class, () -> new CSVParser(new StringReader(""), null)); - } - - @Test - public void testNoHeaderMap() throws Exception { - try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT)) { - assertNull(parser.getHeaderMap()); - } - } - - @Test - public void testNotValueCSV() throws IOException { - final String source = "#"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.withCommentMarker('#'); - final CSVParser csvParser = csvFormat.parse(new StringReader(source)); - final CSVRecord csvRecord = csvParser.nextRecord(); - assertNull(csvRecord); - } - - @Test - public void testParse() throws Exception { - final ClassLoader loader = ClassLoader.getSystemClassLoader(); - final URL url = loader.getResource("org/apache/commons/csv/CSVFileParser/test.csv"); - final CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C", "D"); - final Charset charset = StandardCharsets.UTF_8; - - try (final CSVParser parser = CSVParser.parse(new InputStreamReader(url.openStream(), charset), format)) { - parseFully(parser); - } - try (final CSVParser parser = CSVParser.parse(new String(Files.readAllBytes(Paths.get(url.toURI())), charset), - format)) { - parseFully(parser); - } - try (final CSVParser parser = CSVParser.parse(new File(url.toURI()), charset, format)) { - parseFully(parser); - } - try (final CSVParser parser = CSVParser.parse(url.openStream(), charset, format)) { - parseFully(parser); - } - try (final CSVParser parser = CSVParser.parse(Paths.get(url.toURI()), charset, format)) { - parseFully(parser); - } - try (final CSVParser parser = CSVParser.parse(url, charset, format)) { - parseFully(parser); - } - try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format)) { - parseFully(parser); - } - try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format, - /* characterOffset= */0, /* recordNumber= */1)) { - parseFully(parser); - } - } - - @Test - public void testParseFileNullFormat() { - assertThrows(NullPointerException.class, - () -> CSVParser.parse(new File("CSVFileParser/test.csv"), Charset.defaultCharset(), null)); - } - - @Test - public void testParseNullFileFormat() { - assertThrows(NullPointerException.class, - () -> CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); - } - - @Test - public void testParseNullPathFormat() { - assertThrows(NullPointerException.class, - () -> CSVParser.parse((Path) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); - } - - @Test - public void testParseNullStringFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse((String) null, CSVFormat.DEFAULT)); - } - - @Test - public void testParseNullUrlCharsetFormat() { - assertThrows(NullPointerException.class, - () -> CSVParser.parse((URL) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); - } - - @Test - public void testParserUrlNullCharsetFormat() { - assertThrows(NullPointerException.class, - () -> CSVParser.parse(new URL("https://commons.apache.org"), null, CSVFormat.DEFAULT)); - } - - @Test - public void testParseStringNullFormat() { - assertThrows(NullPointerException.class, () -> CSVParser.parse("csv data", (CSVFormat) null)); - } - - @Test - public void testParseUrlCharsetNullFormat() { - assertThrows(NullPointerException.class, - () -> CSVParser.parse(new URL("https://commons.apache.org"), Charset.defaultCharset(), null)); - } - - @Test - public void testParseWithDelimiterStringWithEscape() throws IOException { - final String source = "a![!|!]b![|]c[|]xyz\r\nabc[abc][|]xyz"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').build(); - try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { - CSVRecord csvRecord = csvParser.nextRecord(); - assertEquals("a[|]b![|]c", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - csvRecord = csvParser.nextRecord(); - assertEquals("abc[abc]", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - } - } - - @Test - public void testParseWithDelimiterStringWithQuote() throws IOException { - final String source = "'a[|]b[|]c'[|]xyz\r\nabc[abc][|]xyz"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').build(); - try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { - CSVRecord csvRecord = csvParser.nextRecord(); - assertEquals("a[|]b[|]c", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - csvRecord = csvParser.nextRecord(); - assertEquals("abc[abc]", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - } - } - - @Test - public void testParseWithDelimiterWithEscape() throws IOException { - final String source = "a!,b!,c,xyz"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.withEscape('!'); - try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { - final CSVRecord csvRecord = csvParser.nextRecord(); - assertEquals("a,b,c", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - } - } - - @Test - public void testParseWithDelimiterWithQuote() throws IOException { - final String source = "'a,b,c',xyz"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\''); - try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { - final CSVRecord csvRecord = csvParser.nextRecord(); - assertEquals("a,b,c", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - } - } - - @Test - public void testParseWithQuoteThrowsException() { - final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\''); - assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c','")).nextRecord()); - assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c'abc,xyz")).nextRecord()); - assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'abc'a,b,c',xyz")).nextRecord()); - } - - @Test - public void testParseWithQuoteWithEscape() throws IOException { - final String source = "'a?,b?,c?d',xyz"; - final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'').withEscape('?'); - try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { - final CSVRecord csvRecord = csvParser.nextRecord(); - assertEquals("a,b,c?d", csvRecord.get(0)); - assertEquals("xyz", csvRecord.get(1)); - } - } - - @Test - public void testProvidedHeader() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - - final Iterator records = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in).iterator(); - - for (int i = 0; i < 3; i++) { - assertTrue(records.hasNext()); - final CSVRecord record = records.next(); - assertTrue(record.isMapped("A")); - assertTrue(record.isMapped("B")); - assertTrue(record.isMapped("C")); - assertFalse(record.isMapped("NOT MAPPED")); - assertEquals(record.get(0), record.get("A")); - assertEquals(record.get(1), record.get("B")); - assertEquals(record.get(2), record.get("C")); - } - - assertFalse(records.hasNext()); - } - - @Test - public void testProvidedHeaderAuto() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - - final Iterator records = CSVFormat.DEFAULT.withHeader().parse(in).iterator(); - - for (int i = 0; i < 2; i++) { - assertTrue(records.hasNext()); - final CSVRecord record = records.next(); - assertTrue(record.isMapped("a")); - assertTrue(record.isMapped("b")); - assertTrue(record.isMapped("c")); - assertFalse(record.isMapped("NOT MAPPED")); - assertEquals(record.get(0), record.get("a")); - assertEquals(record.get(1), record.get("b")); - assertEquals(record.get(2), record.get("c")); - } - - assertFalse(records.hasNext()); - } - - @Test - public void testRepeatedHeadersAreReturnedInCSVRecordHeaderNames() throws IOException { - final Reader in = new StringReader("header1,header2,header1\n1,2,3\n4,5,6"); - final Iterator records = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().parse(in).iterator(); - final CSVRecord record = records.next(); - assertEquals(Arrays.asList("header1", "header2", "header1"), record.getParser().getHeaderNames()); - } - - @Test - public void testRoundtrip() throws Exception { - final StringWriter out = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT)) { - final String input = "a,b,c\r\n1,2,3\r\nx,y,z\r\n"; - for (final CSVRecord record : CSVParser.parse(input, CSVFormat.DEFAULT)) { - printer.printRecord(record); - } - assertEquals(input, out.toString()); - } - } - - @Test - public void testSkipAutoHeader() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - final Iterator records = CSVFormat.DEFAULT.withHeader().parse(in).iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("a")); - assertEquals("2", record.get("b")); - assertEquals("3", record.get("c")); - } - - @Test - public void testSkipHeaderOverrideDuplicateHeaders() throws Exception { - final Reader in = new StringReader("a,a,a\n1,2,3\nx,y,z"); - final Iterator records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in) - .iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("X")); - assertEquals("2", record.get("Y")); - assertEquals("3", record.get("Z")); - } - - @Test - public void testSkipSetAltHeaders() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - final Iterator records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in) - .iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("X")); - assertEquals("2", record.get("Y")); - assertEquals("3", record.get("Z")); - } - - @Test - public void testSkipSetHeader() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - final Iterator records = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord().parse(in) - .iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("a")); - assertEquals("2", record.get("b")); - assertEquals("3", record.get("c")); - } - - @Test - @Disabled - public void testStartWithEmptyLinesThenHeaders() throws Exception { - final String[] codes = {"\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", - "hello,\"\"\n\n\n"}; - final String[][] res = {{"hello", ""}, {""}, // Excel format does not ignore empty lines - {""}}; - for (final String code : codes) { - try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { - final List records = parser.getRecords(); - assertEquals(res.length, records.size()); - assertFalse(records.isEmpty()); - for (int i = 0; i < res.length; i++) { - assertArrayEquals(res[i], records.get(i).values()); - } - } - } - } - - @Test - public void testStream() throws Exception { - final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); - final List list = CSVFormat.DEFAULT.parse(in).stream().collect(Collectors.toList()); - assertFalse(list.isEmpty()); - assertArrayEquals(new String[] {"a", "b", "c"}, list.get(0).values()); - assertArrayEquals(new String[] {"1", "2", "3"}, list.get(1).values()); - assertArrayEquals(new String[] {"x", "y", "z"}, list.get(2).values()); - } - - @Test - public void testTrailingDelimiter() throws Exception { - final Reader in = new StringReader("a,a,a,\n\"1\",\"2\",\"3\",\nx,y,z,"); - final Iterator records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord() - .withTrailingDelimiter().parse(in).iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("X")); - assertEquals("2", record.get("Y")); - assertEquals("3", record.get("Z")); - assertEquals(3, record.size()); - } - - @Test - public void testTrim() throws Exception { - final Reader in = new StringReader("a,a,a\n\" 1 \",\" 2 \",\" 3 \"\nx,y,z"); - final Iterator records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord() - .withTrim().parse(in).iterator(); - final CSVRecord record = records.next(); - assertEquals("1", record.get("X")); - assertEquals("2", record.get("Y")); - assertEquals("3", record.get("Z")); - assertEquals(3, record.size()); - } - - private void validateLineNumbers(final String lineSeparator) throws IOException { - try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", - CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) { - assertEquals(0, parser.getCurrentLineNumber()); - assertNotNull(parser.nextRecord()); - assertEquals(1, parser.getCurrentLineNumber()); - assertNotNull(parser.nextRecord()); - assertEquals(2, parser.getCurrentLineNumber()); - assertNotNull(parser.nextRecord()); - // Read EOF without EOL should 3 - assertEquals(3, parser.getCurrentLineNumber()); - assertNull(parser.nextRecord()); - // Read EOF without EOL should 3 - assertEquals(3, parser.getCurrentLineNumber()); - } - } - - private void validateRecordNumbers(final String lineSeparator) throws IOException { - try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", - CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) { - CSVRecord record; - assertEquals(0, parser.getRecordNumber()); - assertNotNull(record = parser.nextRecord()); - assertEquals(1, record.getRecordNumber()); - assertEquals(1, parser.getRecordNumber()); - assertNotNull(record = parser.nextRecord()); - assertEquals(2, record.getRecordNumber()); - assertEquals(2, parser.getRecordNumber()); - assertNotNull(record = parser.nextRecord()); - assertEquals(3, record.getRecordNumber()); - assertEquals(3, parser.getRecordNumber()); - assertNull(record = parser.nextRecord()); - assertEquals(3, parser.getRecordNumber()); - } - } - - private void validateRecordPosition(final String lineSeparator) throws IOException { - final String nl = lineSeparator; // used as linebreak in values for better distinction - - final String code = "a,b,c" + lineSeparator + "1,2,3" + lineSeparator + - // to see if recordPosition correctly points to the enclosing quote - "'A" + nl + "A','B" + nl + "B',CC" + lineSeparator + - // unicode test... not very relevant while operating on strings instead of bytes, but for - // completeness... - "\u00c4,\u00d6,\u00dc" + lineSeparator + "EOF,EOF,EOF"; - - final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(lineSeparator); - CSVParser parser = CSVParser.parse(code, format); - - CSVRecord record; - assertEquals(0, parser.getRecordNumber()); - - assertNotNull(record = parser.nextRecord()); - assertEquals(1, record.getRecordNumber()); - assertEquals(code.indexOf('a'), record.getCharacterPosition()); - - assertNotNull(record = parser.nextRecord()); - assertEquals(2, record.getRecordNumber()); - assertEquals(code.indexOf('1'), record.getCharacterPosition()); - - assertNotNull(record = parser.nextRecord()); - final long positionRecord3 = record.getCharacterPosition(); - assertEquals(3, record.getRecordNumber()); - assertEquals(code.indexOf("'A"), record.getCharacterPosition()); - assertEquals("A" + lineSeparator + "A", record.get(0)); - assertEquals("B" + lineSeparator + "B", record.get(1)); - assertEquals("CC", record.get(2)); - - assertNotNull(record = parser.nextRecord()); - assertEquals(4, record.getRecordNumber()); - assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition()); - - assertNotNull(record = parser.nextRecord()); - assertEquals(5, record.getRecordNumber()); - assertEquals(code.indexOf("EOF"), record.getCharacterPosition()); - - parser.close(); - - // now try to read starting at record 3 - parser = new CSVParser(new StringReader(code.substring((int) positionRecord3)), format, positionRecord3, 3); - - assertNotNull(record = parser.nextRecord()); - assertEquals(3, record.getRecordNumber()); - assertEquals(code.indexOf("'A"), record.getCharacterPosition()); - assertEquals("A" + lineSeparator + "A", record.get(0)); - assertEquals("B" + lineSeparator + "B", record.get(1)); - assertEquals("CC", record.get(2)); - - assertNotNull(record = parser.nextRecord()); - assertEquals(4, record.getRecordNumber()); - assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition()); - assertEquals("\u00c4", record.get(0)); - - parser.close(); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv; + +import static org.apache.commons.csv.Constants.CR; +import static org.apache.commons.csv.Constants.CRLF; +import static org.apache.commons.csv.Constants.LF; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.PipedReader; +import java.io.PipedWriter; +import java.io.Reader; +import java.io.StringReader; +import java.io.StringWriter; +import java.io.UncheckedIOException; +import java.net.URL; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.stream.Collectors; + +import org.apache.commons.io.input.BOMInputStream; +import org.apache.commons.io.input.BrokenInputStream; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +/** + * CSVParserTest + * + * The test are organized in three different sections: The 'setter/getter' section, the lexer section and finally the + * parser section. In case a test fails, you should follow a top-down approach for fixing a potential bug (its likely + * that the parser itself fails if the lexer has problems...). + */ +public class CSVParserTest { + + private static final Charset UTF_8 = StandardCharsets.UTF_8; + + private static final String UTF_8_NAME = UTF_8.name(); + + private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n" + // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n"; + + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping + + private static final String CSV_INPUT_1 = "a,b,c,d"; + + private static final String CSV_INPUT_2 = "a,b,1 2"; + + private static final String[][] RESULT = {{"a", "b", "c", "d"}, {"a", "b", "1 2"}, {"foo baar", "b", ""}, {"foo\n,,\n\",,\n\"", "d", "e"}}; + + // CSV with no header comments + static private final String CSV_INPUT_NO_COMMENT = "A,B"+CRLF+"1,2"+CRLF; + + // CSV with a header comment + static private final String CSV_INPUT_HEADER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF; + + // CSV with a single line header and trailer comment + static private final String CSV_INPUT_HEADER_TRAILER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + "# comment"; + + // CSV with a multi-line header and trailer comment + static private final String CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT = "# multi-line" + CRLF + "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + "# multi-line" + CRLF + "# comment"; + + // Format with auto-detected header + static private final CSVFormat FORMAT_AUTO_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT).setCommentMarker('#').setHeader().build(); + + // Format with explicit header + static private final CSVFormat FORMAT_EXPLICIT_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT) + .setSkipHeaderRecord(true) + .setCommentMarker('#') + .setHeader("A", "B") + .build(); + + // Format with explicit header that does not skip the header line + CSVFormat FORMAT_EXPLICIT_HEADER_NOSKIP = CSVFormat.Builder.create(CSVFormat.DEFAULT) + .setCommentMarker('#') + .setHeader("A", "B") + .build(); + + @SuppressWarnings("resource") // caller releases + private BOMInputStream createBOMInputStream(final String resource) throws IOException { + return new BOMInputStream(ClassLoader.getSystemClassLoader().getResource(resource).openStream()); + } + + CSVRecord parse(final CSVParser parser, final int failParseRecordNo) throws IOException { + if (parser.getRecordNumber() + 1 == failParseRecordNo) { + assertThrows(IOException.class, () -> parser.nextRecord()); + return null; + } + return parser.nextRecord(); + } + + private void parseFully(final CSVParser parser) { + parser.forEach(Assertions::assertNotNull); + } + + @Test + public void testBackslashEscaping() throws IOException { + + // To avoid confusion over the need for escaping chars in java code, + // We will test with a forward slash as the escape char, and a single + // quote as the encapsulator. + + final String code = "one,two,three\n" // 0 + + "'',''\n" // 1) empty encapsulators + + "/',/'\n" // 2) single encapsulators + + "'/'','/''\n" // 3) single encapsulators encapsulated via escape + + "'''',''''\n" // 4) single encapsulators encapsulated via doubling + + "/,,/,\n" // 5) separator escaped + + "//,//\n" // 6) escape escaped + + "'//','//'\n" // 7) escape escaped in encapsulation + + " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces + + "9, /\n \n" // escaped newline + + ""; + final String[][] res = {{"one", "two", "three"}, // 0 + {"", ""}, // 1 + {"'", "'"}, // 2 + {"'", "'"}, // 3 + {"'", "'"}, // 4 + {",", ","}, // 5 + {"/", "/"}, // 6 + {"/", "/"}, // 7 + {" 8 ", " \"quoted \"\" /\" / string\" "}, {"9", " \n "},}; + + final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(CRLF).withEscape('/').withIgnoreEmptyLines(); + + try (final CSVParser parser = CSVParser.parse(code, format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + + Utils.compare("Records do not match expected result", res, records); + } + } + + @Test + public void testBackslashEscaping2() throws IOException { + + // To avoid confusion over the need for escaping chars in java code, + // We will test with a forward slash as the escape char, and a single + // quote as the encapsulator. + + final String code = "" + " , , \n" // 1) + + " \t , , \n" // 2) + + " // , /, , /,\n" // 3) + + ""; + final String[][] res = {{" ", " ", " "}, // 1 + {" \t ", " ", " "}, // 2 + {" / ", " , ", " ,"}, // 3 + }; + + final CSVFormat format = CSVFormat.newFormat(',').withRecordSeparator(CRLF).withEscape('/').withIgnoreEmptyLines(); + + try (final CSVParser parser = CSVParser.parse(code, format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + + Utils.compare("", res, records); + } + } + + @Test + @Disabled + public void testBackslashEscapingOld() throws IOException { + final String code = "one,two,three\n" + "on\\\"e,two\n" + "on\"e,two\n" + "one,\"tw\\\"o\"\n" + "one,\"t\\,wo\"\n" + "one,two,\"th,ree\"\n" + + "\"a\\\\\"\n" + "a\\,b\n" + "\"a\\\\,b\""; + final String[][] res = {{"one", "two", "three"}, {"on\\\"e", "two"}, {"on\"e", "two"}, {"one", "tw\"o"}, {"one", "t\\,wo"}, // backslash in quotes only + // escapes a delimiter (",") + {"one", "two", "th,ree"}, {"a\\\\"}, // backslash in quotes only escapes a delimiter (",") + {"a\\", "b"}, // a backslash must be returned + {"a\\\\,b"} // backslash in quotes only escapes a delimiter (",") + }; + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertArrayEquals(res[i], records.get(i).values()); + } + } + } + + @Test + @Disabled("CSV-107") + public void testBOM() throws IOException { + final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/bom.csv"); + try (final CSVParser parser = CSVParser.parse(url, StandardCharsets.UTF_8, CSVFormat.EXCEL.withHeader())) { + parser.forEach(record -> assertNotNull(record.get("Date"))); + } + } + + @Test + public void testBOMInputStreamParserWithInputStream() throws IOException { + try (final BOMInputStream inputStream = createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"); + final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) { + parser.forEach(record -> assertNotNull(record.get("Date"))); + } + } + + @Test + public void testBOMInputStreamParserWithReader() throws IOException { + try (final Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME); + final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader())) { + parser.forEach(record -> assertNotNull(record.get("Date"))); + } + } + + @Test + public void testBOMInputStreamParseWithReader() throws IOException { + try (final Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME); + final CSVParser parser = CSVParser.parse(reader, CSVFormat.EXCEL.withHeader())) { + parser.forEach(record -> assertNotNull(record.get("Date"))); + } + } + + @Test + public void testCarriageReturnEndings() throws IOException { + final String code = "foo\rbaar,\rhello,world\r,kanu"; + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(4, records.size()); + } + } + + @Test + public void testCarriageReturnLineFeedEndings() throws IOException { + final String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(4, records.size()); + } + } + + @Test + public void testClose() throws Exception { + final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z"); + final Iterator records; + try (final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) { + records = parser.iterator(); + assertTrue(records.hasNext()); + } + assertFalse(records.hasNext()); + assertThrows(NoSuchElementException.class, records::next); + } + + @Test + public void testCSV141CSVFormat_DEFAULT() throws Exception { + testCSV141Failure(CSVFormat.DEFAULT, 3); + } + + @Test + public void testCSV141CSVFormat_INFORMIX_UNLOAD() throws Exception { + testCSV141Failure(CSVFormat.INFORMIX_UNLOAD, 1); + } + + @Test + public void testCSV141CSVFormat_INFORMIX_UNLOAD_CSV() throws Exception { + testCSV141Failure(CSVFormat.INFORMIX_UNLOAD_CSV, 3); + } + + @Test + public void testCSV141CSVFormat_ORACLE() throws Exception { + testCSV141Failure(CSVFormat.ORACLE, 2); + } + + @Test + public void testCSV141CSVFormat_POSTGRESQL_CSV() throws Exception { + testCSV141Failure(CSVFormat.POSTGRESQL_CSV, 3); + } + + @Test + @Disabled("PR 295 does not work") + public void testCSV141Excel() throws Exception { + testCSV141Ok(CSVFormat.EXCEL); + } + + private void testCSV141Failure(final CSVFormat format, final int failParseRecordNo) throws IOException { + final Path path = Paths.get("src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv"); + try (final CSVParser parser = CSVParser.parse(path, StandardCharsets.UTF_8, format)) { + // row 1 + CSVRecord record = parse(parser, failParseRecordNo); + if (record == null) { + return; // expected failure + } + assertEquals("1414770317901", record.get(0)); + assertEquals("android.widget.EditText", record.get(1)); + assertEquals("pass sem1 _84*|*", record.get(2)); + assertEquals("0", record.get(3)); + assertEquals("pass sem1 _8", record.get(4)); + assertEquals(5, record.size()); + // row 2 + record = parse(parser, failParseRecordNo); + if (record == null) { + return; // expected failure + } + assertEquals("1414770318470", record.get(0)); + assertEquals("android.widget.EditText", record.get(1)); + assertEquals("pass sem1 _84:|", record.get(2)); + assertEquals("0", record.get(3)); + assertEquals("pass sem1 _84:\\", record.get(4)); + assertEquals(5, record.size()); + // row 3: Fail for certain + assertThrows(IOException.class, () -> parser.nextRecord()); + } + } + + private void testCSV141Ok(final CSVFormat format) throws IOException { + final Path path = Paths.get("src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv"); + try (final CSVParser parser = CSVParser.parse(path, StandardCharsets.UTF_8, format)) { + // row 1 + CSVRecord record = parser.nextRecord(); + assertEquals("1414770317901", record.get(0)); + assertEquals("android.widget.EditText", record.get(1)); + assertEquals("pass sem1 _84*|*", record.get(2)); + assertEquals("0", record.get(3)); + assertEquals("pass sem1 _8", record.get(4)); + assertEquals(5, record.size()); + // row 2 + record = parser.nextRecord(); + assertEquals("1414770318470", record.get(0)); + assertEquals("android.widget.EditText", record.get(1)); + assertEquals("pass sem1 _84:|", record.get(2)); + assertEquals("0", record.get(3)); + assertEquals("pass sem1 _84:\\", record.get(4)); + assertEquals(5, record.size()); + // row 3 + record = parser.nextRecord(); + assertEquals("1414770318327", record.get(0)); + assertEquals("android.widget.EditText", record.get(1)); + assertEquals("pass sem1", record.get(2)); + assertEquals(3, record.size()); + // row 4 + record = parser.nextRecord(); + assertEquals("1414770318628", record.get(0)); + assertEquals("android.widget.EditText", record.get(1)); + assertEquals("pass sem1 _84*|*", record.get(2)); + assertEquals("0", record.get(3)); + assertEquals("pass sem1", record.get(4)); + assertEquals(5, record.size()); + } + } + + @Test + public void testCSV141RFC4180() throws Exception { + testCSV141Failure(CSVFormat.RFC4180, 3); + } + + @Test + public void testCSV235() throws IOException { + final String dqString = "\"aaa\",\"b\"\"bb\",\"ccc\""; // "aaa","b""bb","ccc" + try (final CSVParser parser = CSVFormat.RFC4180.parse(new StringReader(dqString))) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertFalse(records.hasNext()); + assertEquals(3, record.size()); + assertEquals("aaa", record.get(0)); + assertEquals("b\"bb", record.get(1)); + assertEquals("ccc", record.get(2)); + } + } + + @Test + public void testCSV57() throws Exception { + try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) { + final List list = parser.getRecords(); + assertNotNull(list); + assertEquals(0, list.size()); + } + } + + @Test + public void testDefaultFormat() throws IOException { + final String code = "" + "a,b#\n" // 1) + + "\"\n\",\" \",#\n" // 2) + + "#,\"\"\n" // 3) + + "# Final comment\n"// 4) + ; + final String[][] res = {{"a", "b#"}, {"\n", " ", "#"}, {"#", ""}, {"# Final comment"}}; + + CSVFormat format = CSVFormat.DEFAULT; + assertFalse(format.isCommentMarkerSet()); + final String[][] res_comments = {{"a", "b#"}, {"\n", " ", "#"},}; + + try (final CSVParser parser = CSVParser.parse(code, format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + + Utils.compare("Failed to parse without comments", res, records); + + format = CSVFormat.DEFAULT.withCommentMarker('#'); + } + try (final CSVParser parser = CSVParser.parse(code, format)) { + final List records = parser.getRecords(); + + Utils.compare("Failed to parse with comments", res_comments, records); + } + } + + @Test + public void testDuplicateHeadersAllowedByDefault() throws Exception { + try (CSVParser parser = CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader())) { + // noop + } + } + + @Test + public void testDuplicateHeadersNotAllowed() { + assertThrows(IllegalArgumentException.class, + () -> CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader().withAllowDuplicateHeaderNames(false))); + } + + @Test + public void testEmptyFile() throws Exception { + try (final CSVParser parser = CSVParser.parse(Paths.get("src/test/resources/org/apache/commons/csv/empty.txt"), StandardCharsets.UTF_8, + CSVFormat.DEFAULT)) { + assertNull(parser.nextRecord()); + } + } + + @Test + public void testEmptyFileHeaderParsing() throws Exception { + try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT.withFirstRecordAsHeader())) { + assertNull(parser.nextRecord()); + assertTrue(parser.getHeaderNames().isEmpty()); + } + } + + @Test + public void testEmptyLineBehaviorCSV() throws Exception { + final String[] codes = {"hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"}; + final String[][] res = {{"hello", ""} // CSV format ignores empty lines + }; + for (final String code : codes) { + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertArrayEquals(res[i], records.get(i).values()); + } + } + } + } + + @Test + public void testEmptyLineBehaviorExcel() throws Exception { + final String[] codes = {"hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"}; + final String[][] res = {{"hello", ""}, {""}, // Excel format does not ignore empty lines + {""}}; + for (final String code : codes) { + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertArrayEquals(res[i], records.get(i).values()); + } + } + } + } + + @Test + public void testEmptyString() throws Exception { + try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) { + assertNull(parser.nextRecord()); + } + } + + @Test + public void testEndOfFileBehaviorCSV() throws Exception { + final String[] codes = {"hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", "hello,\r\n\r\nworld,\"\"", + "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\""}; + final String[][] res = {{"hello", ""}, // CSV format ignores empty lines + {"world", ""}}; + for (final String code : codes) { + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertArrayEquals(res[i], records.get(i).values()); + } + } + } + } + + @Test + public void testEndOfFileBehaviorExcel() throws Exception { + final String[] codes = {"hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", "hello,\r\n\r\nworld,\"\"", + "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\""}; + final String[][] res = {{"hello", ""}, {""}, // Excel format does not ignore empty lines + {"world", ""}}; + + for (final String code : codes) { + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertArrayEquals(res[i], records.get(i).values()); + } + } + } + } + + @Test + public void testExcelFormat1() throws IOException { + final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n x,,," + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n"; + final String[][] res = {{"value1", "value2", "value3", "value4"}, {"a", "b", "c", "d"}, {" x", "", "", ""}, {""}, + {"\"hello\"", " \"world\"", "abc\ndef", ""}}; + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertArrayEquals(res[i], records.get(i).values()); + } + } + } + + @Test + public void testExcelFormat2() throws Exception { + final String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n"; + final String[][] res = {{"foo", "baar"}, {""}, {"hello", ""}, {""}, {"world", ""}}; + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertArrayEquals(res[i], records.get(i).values()); + } + } + } + + /** + * Tests an exported Excel worksheet with a header row and rows that have more columns than the headers + */ + @Test + public void testExcelHeaderCountLessThanData() throws Exception { + final String code = "A,B,C,,\r\na,b,c,d,e\r\n"; + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL.withHeader())) { + parser.getRecords().forEach(record -> { + assertEquals("a", record.get("A")); + assertEquals("b", record.get("B")); + assertEquals("c", record.get("C")); + }); + } + } + + @Test + public void testFirstEndOfLineCr() throws IOException { + final String data = "foo\rbaar,\rhello,world\r,kanu"; + try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(4, records.size()); + assertEquals("\r", parser.getFirstEndOfLine()); + } + } + + @Test + public void testFirstEndOfLineCrLf() throws IOException { + final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; + try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(4, records.size()); + assertEquals("\r\n", parser.getFirstEndOfLine()); + } + } + + @Test + public void testFirstEndOfLineLf() throws IOException { + final String data = "foo\nbaar,\nhello,world\n,kanu"; + try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(4, records.size()); + assertEquals("\n", parser.getFirstEndOfLine()); + } + } + + @Test + public void testForEach() throws Exception { + try (final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); final CSVParser parser = CSVFormat.DEFAULT.parse(in)) { + final List records = new ArrayList<>(); + for (final CSVRecord record : parser) { + records.add(record); + } + assertEquals(3, records.size()); + assertArrayEquals(new String[] {"a", "b", "c"}, records.get(0).values()); + assertArrayEquals(new String[] {"1", "2", "3"}, records.get(1).values()); + assertArrayEquals(new String[] {"x", "y", "z"}, records.get(2).values()); + } + } + + @Test + public void testGetHeaderComment_HeaderComment1() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + // Expect a header comment + assertTrue(parser.hasHeaderComment()); + assertEquals("header comment", parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderComment_HeaderComment2() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) { + parser.getRecords(); + // Expect a header comment + assertTrue(parser.hasHeaderComment()); + assertEquals("header comment", parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderComment_HeaderComment3() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { + parser.getRecords(); + // Expect no header comment - the text "comment" is attached to the first record + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderComment_HeaderTrailerComment() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + // Expect a header comment + assertTrue(parser.hasHeaderComment()); + assertEquals("multi-line"+LF+"header comment", parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderComment_NoComment1() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + // Expect no header comment + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderComment_NoComment2() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER)) { + parser.getRecords(); + // Expect no header comment + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderComment_NoComment3() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { + parser.getRecords(); + // Expect no header comment + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderMap() throws Exception { + try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { + final Map headerMap = parser.getHeaderMap(); + final Iterator columnNames = headerMap.keySet().iterator(); + // Headers are iterated in column order. + assertEquals("A", columnNames.next()); + assertEquals("B", columnNames.next()); + assertEquals("C", columnNames.next()); + final Iterator records = parser.iterator(); + + // Parse to make sure getHeaderMap did not have a side-effect. + for (int i = 0; i < 3; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertEquals(record.get(0), record.get("A")); + assertEquals(record.get(1), record.get("B")); + assertEquals(record.get(2), record.get("C")); + } + + assertFalse(records.hasNext()); + } + } + + @Test + public void testGetHeaderNames() throws IOException { + try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { + final Map nameIndexMap = parser.getHeaderMap(); + final List headerNames = parser.getHeaderNames(); + assertNotNull(headerNames); + assertEquals(nameIndexMap.size(), headerNames.size()); + for (int i = 0; i < headerNames.size(); i++) { + final String name = headerNames.get(i); + assertEquals(i, nameIndexMap.get(name).intValue()); + } + } + } + + @Test + public void testGetHeaderNamesReadOnly() throws IOException { + try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { + final List headerNames = parser.getHeaderNames(); + assertNotNull(headerNames); + assertThrows(UnsupportedOperationException.class, () -> headerNames.add("This is a read-only list.")); + } + } + + @Test + public void testGetLine() throws IOException { + try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { + for (final String[] re : RESULT) { + assertArrayEquals(re, parser.nextRecord().values()); + } + + assertNull(parser.nextRecord()); + } + } + + @Test + public void testGetLineNumberWithCR() throws Exception { + this.validateLineNumbers(String.valueOf(CR)); + } + + @Test + public void testGetLineNumberWithCRLF() throws Exception { + this.validateLineNumbers(CRLF); + } + + @Test + public void testGetLineNumberWithLF() throws Exception { + this.validateLineNumbers(String.valueOf(LF)); + } + + @Test + public void testGetOneLine() throws IOException { + try (final CSVParser parser = CSVParser.parse(CSV_INPUT_1, CSVFormat.DEFAULT)) { + final CSVRecord record = parser.getRecords().get(0); + assertArrayEquals(RESULT[0], record.values()); + } + } + + /** + * Tests reusing a parser to process new string records one at a time as they are being discovered. See [CSV-110]. + * + * @throws IOException when an I/O error occurs. + */ + @Test + public void testGetOneLineOneParser() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT; + try (final PipedWriter writer = new PipedWriter(); final CSVParser parser = new CSVParser(new PipedReader(writer), format)) { + writer.append(CSV_INPUT_1); + writer.append(format.getRecordSeparator()); + final CSVRecord record1 = parser.nextRecord(); + assertArrayEquals(RESULT[0], record1.values()); + writer.append(CSV_INPUT_2); + writer.append(format.getRecordSeparator()); + final CSVRecord record2 = parser.nextRecord(); + assertArrayEquals(RESULT[1], record2.values()); + } + } + + @Test + public void testGetRecordNumberWithCR() throws Exception { + this.validateRecordNumbers(String.valueOf(CR)); + } + + @Test + public void testGetRecordNumberWithCRLF() throws Exception { + this.validateRecordNumbers(CRLF); + } + + @Test + public void testGetRecordNumberWithLF() throws Exception { + this.validateRecordNumbers(String.valueOf(LF)); + } + + @Test + public void testGetRecordPositionWithCRLF() throws Exception { + this.validateRecordPosition(CRLF); + } + + @Test + public void testGetRecordPositionWithLF() throws Exception { + this.validateRecordPosition(String.valueOf(LF)); + } + + @Test + public void testGetRecords() throws IOException { + try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { + final List records = parser.getRecords(); + assertEquals(RESULT.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < RESULT.length; i++) { + assertArrayEquals(RESULT[i], records.get(i).values()); + } + } + } + + @Test + public void testGetRecordsFromBrokenInputStream() throws IOException { + @SuppressWarnings("resource") // We also get an exception on close, which is OK but can't assert in a try. + final CSVParser parser = CSVParser.parse(new BrokenInputStream(), UTF_8, CSVFormat.DEFAULT); + assertThrows(UncheckedIOException.class, parser::getRecords); + + } + + @Test + public void testGetRecordWithMultiLineValues() throws Exception { + try (final CSVParser parser = CSVParser.parse("\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"", + CSVFormat.DEFAULT.withRecordSeparator(CRLF))) { + CSVRecord record; + assertEquals(0, parser.getRecordNumber()); + assertEquals(0, parser.getCurrentLineNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(3, parser.getCurrentLineNumber()); + assertEquals(1, record.getRecordNumber()); + assertEquals(1, parser.getRecordNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(6, parser.getCurrentLineNumber()); + assertEquals(2, record.getRecordNumber()); + assertEquals(2, parser.getRecordNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(9, parser.getCurrentLineNumber()); + assertEquals(3, record.getRecordNumber()); + assertEquals(3, parser.getRecordNumber()); + assertNull(record = parser.nextRecord()); + assertEquals(9, parser.getCurrentLineNumber()); + assertEquals(3, parser.getRecordNumber()); + } + } + + @Test + public void testGetTrailerComment_HeaderComment1() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + assertFalse(parser.hasTrailerComment()); + assertNull(parser.getTrailerComment()); + } + } + + @Test + public void testGetTrailerComment_HeaderComment2() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) { + parser.getRecords(); + assertFalse(parser.hasTrailerComment()); + assertNull(parser.getTrailerComment()); + } + } + + @Test + public void testGetTrailerComment_HeaderComment3() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { + parser.getRecords(); + assertFalse(parser.hasTrailerComment()); + assertNull(parser.getTrailerComment()); + } + } + + @Test + public void testGetTrailerComment_HeaderTrailerComment1() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("comment", parser.getTrailerComment()); + } + } + + @Test + public void testGetTrailerComment_HeaderTrailerComment2() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("comment", parser.getTrailerComment()); + } + } + + @Test + public void testGetTrailerComment_HeaderTrailerComment3() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("comment", parser.getTrailerComment()); + } + } + + @Test + public void testGetTrailerComment_MultilineComment() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("multi-line"+LF+"comment", parser.getTrailerComment()); + } + } + + @Test + public void testHeader() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) { + final Iterator records = parser.iterator(); + + for (int i = 0; i < 2; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertEquals(record.get(0), record.get("a")); + assertEquals(record.get(1), record.get("b")); + assertEquals(record.get(2), record.get("c")); + } + + assertFalse(records.hasNext()); + } + } + + @Test + public void testHeaderComment() throws Exception { + final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z"); + + try (final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) { + final Iterator records = parser.iterator(); + + for (int i = 0; i < 2; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertEquals(record.get(0), record.get("a")); + assertEquals(record.get(1), record.get("b")); + assertEquals(record.get(2), record.get("c")); + } + + assertFalse(records.hasNext()); + } + } + + @Test + public void testHeaderMissing() throws Exception { + final Reader in = new StringReader("a,,c\n1,2,3\nx,y,z"); + + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in)) { + final Iterator records = parser.iterator(); + + for (int i = 0; i < 2; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertEquals(record.get(0), record.get("a")); + assertEquals(record.get(2), record.get("c")); + } + + assertFalse(records.hasNext()); + } + } + + @Test + public void testHeaderMissingWithNull() throws Exception { + final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("").withAllowMissingColumnNames().parse(in)) { + parser.iterator(); + } + } + + @Test + public void testHeadersMissing() throws Exception { + try (final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); + final CSVParser parser = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in)) { + parser.iterator(); + } + } + + @Test + public void testHeadersMissingException() { + final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z"); + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator()); + } + + @Test + public void testHeadersMissingOneColumnException() { + final Reader in = new StringReader("a,,c,d,e\n1,2,3,4,5\nv,w,x,y,z"); + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator()); + } + + @Test + public void testHeadersWithNullColumnName() throws IOException { + final Reader in = new StringReader("header1,null,header3\n1,2,3\n4,5,6"); + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("null").withAllowMissingColumnNames().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + // Expect the null header to be missing + @SuppressWarnings("resource") + final CSVParser recordParser = record.getParser(); + assertEquals(Arrays.asList("header1", "header3"), recordParser.getHeaderNames()); + assertEquals(2, recordParser.getHeaderMap().size()); + } + } + + @Test + public void testIgnoreCaseHeaderMapping() throws Exception { + final Reader reader = new StringReader("1,2,3"); + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("One", "TWO", "three").withIgnoreHeaderCase().parse(reader)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("one")); + assertEquals("2", record.get("two")); + assertEquals("3", record.get("THREE")); + }} + + @Test + public void testIgnoreEmptyLines() throws IOException { + final String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n"; + // String code = "world\r\n\n"; + // String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n"; + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(3, records.size()); + } + } + + @Test + public void testInvalidFormat() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR)); + } + + @Test + public void testIterator() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + + try (final CSVParser parser = CSVFormat.DEFAULT.parse(in)) { + final Iterator iterator = parser.iterator(); + + assertTrue(iterator.hasNext()); + assertThrows(UnsupportedOperationException.class, iterator::remove); + assertArrayEquals(new String[] { "a", "b", "c" }, iterator.next().values()); + assertArrayEquals(new String[] { "1", "2", "3" }, iterator.next().values()); + assertTrue(iterator.hasNext()); + assertTrue(iterator.hasNext()); + assertTrue(iterator.hasNext()); + assertArrayEquals(new String[] { "x", "y", "z" }, iterator.next().values()); + assertFalse(iterator.hasNext()); + + assertThrows(NoSuchElementException.class, iterator::next); + }} + + @Test + public void testIteratorSequenceBreaking() throws IOException { + final String fiveRows = "1\n2\n3\n4\n5\n"; + + // Iterator hasNext() shouldn't break sequence + try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { + + final Iterator iter = parser.iterator(); + int recordNumber = 0; + while (iter.hasNext()) { + final CSVRecord record = iter.next(); + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + if (recordNumber >= 2) { + break; + } + } + iter.hasNext(); + while (iter.hasNext()) { + final CSVRecord record = iter.next(); + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + } + } + + // Consecutive enhanced for loops shouldn't break sequence + try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { + int recordNumber = 0; + for (final CSVRecord record : parser) { + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + if (recordNumber >= 2) { + break; + } + } + for (final CSVRecord record : parser) { + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + } + } + + // Consecutive enhanced for loops with hasNext() peeking shouldn't break sequence + try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) { + int recordNumber = 0; + for (final CSVRecord record : parser) { + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + if (recordNumber >= 2) { + break; + } + } + parser.iterator().hasNext(); + for (final CSVRecord record : parser) { + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + } + } + } + + @Test + public void testLineFeedEndings() throws IOException { + final String code = "foo\nbaar,\nhello,world\n,kanu"; + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) { + final List records = parser.getRecords(); + assertEquals(4, records.size()); + } + } + + @Test + public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2\nx,y,z"); + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord().parse(in)) { + final Iterator records = parser.iterator(); + CSVRecord record; + + // 1st record + record = records.next(); + assertTrue(record.isMapped("A")); + assertTrue(record.isMapped("B")); + assertTrue(record.isMapped("C")); + assertTrue(record.isSet("A")); + assertTrue(record.isSet("B")); + assertFalse(record.isSet("C")); + assertEquals("1", record.get("A")); + assertEquals("2", record.get("B")); + assertFalse(record.isConsistent()); + + // 2nd record + record = records.next(); + assertTrue(record.isMapped("A")); + assertTrue(record.isMapped("B")); + assertTrue(record.isMapped("C")); + assertTrue(record.isSet("A")); + assertTrue(record.isSet("B")); + assertTrue(record.isSet("C")); + assertEquals("x", record.get("A")); + assertEquals("y", record.get("B")); + assertEquals("z", record.get("C")); + assertTrue(record.isConsistent()); + + assertFalse(records.hasNext()); + } + } + + @Test + @Disabled + public void testMongoDbCsv() throws Exception { + try (final CSVParser parser = CSVParser.parse("\"a a\",b,c" + LF + "d,e,f", CSVFormat.MONGODB_CSV)) { + final Iterator itr1 = parser.iterator(); + final Iterator itr2 = parser.iterator(); + + final CSVRecord first = itr1.next(); + assertEquals("a a", first.get(0)); + assertEquals("b", first.get(1)); + assertEquals("c", first.get(2)); + + final CSVRecord second = itr2.next(); + assertEquals("d", second.get(0)); + assertEquals("e", second.get(1)); + assertEquals("f", second.get(2)); + } + } + + @Test + // TODO this may lead to strange behavior, throw an exception if iterator() has already been called? + public void testMultipleIterators() throws Exception { + try (final CSVParser parser = CSVParser.parse("a,b,c" + CRLF + "d,e,f", CSVFormat.DEFAULT)) { + final Iterator itr1 = parser.iterator(); + + final CSVRecord first = itr1.next(); + assertEquals("a", first.get(0)); + assertEquals("b", first.get(1)); + assertEquals("c", first.get(2)); + + final CSVRecord second = itr1.next(); + assertEquals("d", second.get(0)); + assertEquals("e", second.get(1)); + assertEquals("f", second.get(2)); + } + } + + @Test + public void testNewCSVParserNullReaderFormat() { + assertThrows(NullPointerException.class, () -> new CSVParser(null, CSVFormat.DEFAULT)); + } + + @Test + public void testNewCSVParserReaderNullFormat() { + assertThrows(NullPointerException.class, () -> new CSVParser(new StringReader(""), null)); + } + + @Test + public void testNoHeaderMap() throws Exception { + try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT)) { + assertNull(parser.getHeaderMap()); + } + } + + @Test + public void testNotValueCSV() throws IOException { + final String source = "#"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.withCommentMarker('#'); + try (final CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + final CSVRecord csvRecord = csvParser.nextRecord(); + assertNull(csvRecord); + } + } + + @Test + public void testParse() throws Exception { + final ClassLoader loader = ClassLoader.getSystemClassLoader(); + final URL url = loader.getResource("org/apache/commons/csv/CSVFileParser/test.csv"); + final CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C", "D"); + final Charset charset = StandardCharsets.UTF_8; + + try (@SuppressWarnings("resource") // CSVParser closes the input resource + final CSVParser parser = CSVParser.parse(new InputStreamReader(url.openStream(), charset), format)) { + parseFully(parser); + } + try (final CSVParser parser = CSVParser.parse(new String(Files.readAllBytes(Paths.get(url.toURI())), charset), format)) { + parseFully(parser); + } + try (final CSVParser parser = CSVParser.parse(new File(url.toURI()), charset, format)) { + parseFully(parser); + } + try (@SuppressWarnings("resource") // CSVParser closes the input resource + final CSVParser parser = CSVParser.parse(url.openStream(), charset, format)) { + parseFully(parser); + } + try (final CSVParser parser = CSVParser.parse(Paths.get(url.toURI()), charset, format)) { + parseFully(parser); + } + try (final CSVParser parser = CSVParser.parse(url, charset, format)) { + parseFully(parser); + } + try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format)) { + parseFully(parser); + } + try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format, /* characterOffset= */0, /* recordNumber= */1)) { + parseFully(parser); + } + } + + @Test + public void testParseFileNullFormat() { + assertThrows(NullPointerException.class, () -> CSVParser.parse(new File("CSVFileParser/test.csv"), Charset.defaultCharset(), null)); + } + + @Test + public void testParseNullFileFormat() { + assertThrows(NullPointerException.class, () -> CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); + } + + @Test + public void testParseNullPathFormat() { + assertThrows(NullPointerException.class, () -> CSVParser.parse((Path) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); + } + + @Test + public void testParseNullStringFormat() { + assertThrows(NullPointerException.class, () -> CSVParser.parse((String) null, CSVFormat.DEFAULT)); + } + + @Test + public void testParseNullUrlCharsetFormat() { + assertThrows(NullPointerException.class, () -> CSVParser.parse((URL) null, Charset.defaultCharset(), CSVFormat.DEFAULT)); + } + + @Test + public void testParserUrlNullCharsetFormat() { + assertThrows(NullPointerException.class, () -> CSVParser.parse(new URL("https://commons.apache.org"), null, CSVFormat.DEFAULT)); + } + + @Test + public void testParseStringNullFormat() { + assertThrows(NullPointerException.class, () -> CSVParser.parse("csv data", (CSVFormat) null)); + } + + @Test + public void testParseUrlCharsetNullFormat() { + assertThrows(NullPointerException.class, () -> CSVParser.parse(new URL("https://commons.apache.org"), Charset.defaultCharset(), null)); + } + + @Test + public void testParseWithDelimiterStringWithEscape() throws IOException { + final String source = "a![!|!]b![|]c[|]xyz\r\nabc[abc][|]xyz"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').build(); + try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + CSVRecord csvRecord = csvParser.nextRecord(); + assertEquals("a[|]b![|]c", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + csvRecord = csvParser.nextRecord(); + assertEquals("abc[abc]", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + } + } + @Test + public void testParseWithDelimiterStringWithQuote() throws IOException { + final String source = "'a[|]b[|]c'[|]xyz\r\nabc[abc][|]xyz"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').build(); + try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + CSVRecord csvRecord = csvParser.nextRecord(); + assertEquals("a[|]b[|]c", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + csvRecord = csvParser.nextRecord(); + assertEquals("abc[abc]", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + } + } + @Test + public void testParseWithDelimiterWithEscape() throws IOException { + final String source = "a!,b!,c,xyz"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.withEscape('!'); + try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + final CSVRecord csvRecord = csvParser.nextRecord(); + assertEquals("a,b,c", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + } + } + @Test + public void testParseWithDelimiterWithQuote() throws IOException { + final String source = "'a,b,c',xyz"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\''); + try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + final CSVRecord csvRecord = csvParser.nextRecord(); + assertEquals("a,b,c", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + } + } + @Test + public void testParseWithQuoteThrowsException() { + final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\''); + assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c','")).nextRecord()); + assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c'abc,xyz")).nextRecord()); + assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'abc'a,b,c',xyz")).nextRecord()); + } + @Test + public void testParseWithQuoteWithEscape() throws IOException { + final String source = "'a?,b?,c?d',xyz"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'').withEscape('?'); + try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) { + final CSVRecord csvRecord = csvParser.nextRecord(); + assertEquals("a,b,c?d", csvRecord.get(0)); + assertEquals("xyz", csvRecord.get(1)); + } + } + @Test + public void testProvidedHeader() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in)) { + final Iterator records = parser.iterator(); + + for (int i = 0; i < 3; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertTrue(record.isMapped("A")); + assertTrue(record.isMapped("B")); + assertTrue(record.isMapped("C")); + assertFalse(record.isMapped("NOT MAPPED")); + assertEquals(record.get(0), record.get("A")); + assertEquals(record.get(1), record.get("B")); + assertEquals(record.get(2), record.get("C")); + } + + assertFalse(records.hasNext()); + } + } + + @Test + public void testProvidedHeaderAuto() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) { + final Iterator records = parser.iterator(); + + for (int i = 0; i < 2; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertTrue(record.isMapped("a")); + assertTrue(record.isMapped("b")); + assertTrue(record.isMapped("c")); + assertFalse(record.isMapped("NOT MAPPED")); + assertEquals(record.get(0), record.get("a")); + assertEquals(record.get(1), record.get("b")); + assertEquals(record.get(2), record.get("c")); + } + + assertFalse(records.hasNext()); + } + } + + @Test + public void testRepeatedHeadersAreReturnedInCSVRecordHeaderNames() throws IOException { + final Reader in = new StringReader("header1,header2,header1\n1,2,3\n4,5,6"); + try (final CSVParser parser = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + @SuppressWarnings("resource") + final CSVParser recordParser = record.getParser(); + assertEquals(Arrays.asList("header1", "header2", "header1"), recordParser.getHeaderNames()); + }} + + @Test + public void testRoundtrip() throws Exception { + final StringWriter out = new StringWriter(); + final String data = "a,b,c\r\n1,2,3\r\nx,y,z\r\n"; + try (final CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT); + final CSVParser parse = CSVParser.parse(data, CSVFormat.DEFAULT)) { + for (final CSVRecord record : parse) { + printer.printRecord(record); + } + assertEquals(data, out.toString()); + } + } + + @Test + public void testSkipAutoHeader() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("a")); + assertEquals("2", record.get("b")); + assertEquals("3", record.get("c")); + } + } + + @Test + public void testSkipHeaderOverrideDuplicateHeaders() throws Exception { + final Reader in = new StringReader("a,a,a\n1,2,3\nx,y,z"); + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("X")); + assertEquals("2", record.get("Y")); + assertEquals("3", record.get("Z")); + }} + + @Test + public void testSkipSetAltHeaders() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("X")); + assertEquals("2", record.get("Y")); + assertEquals("3", record.get("Z")); + } + } + + @Test + public void testSkipSetHeader() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("a")); + assertEquals("2", record.get("b")); + assertEquals("3", record.get("c")); + } + } + + @Test + @Disabled + public void testStartWithEmptyLinesThenHeaders() throws Exception { + final String[] codes = {"\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"}; + final String[][] res = {{"hello", ""}, {""}, // Excel format does not ignore empty lines + {""}}; + for (final String code : codes) { + try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) { + final List records = parser.getRecords(); + assertEquals(res.length, records.size()); + assertFalse(records.isEmpty()); + for (int i = 0; i < res.length; i++) { + assertArrayEquals(res[i], records.get(i).values()); + } + } + } + } + + @Test + public void testStream() throws Exception { + final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); + try (final CSVParser parser = CSVFormat.DEFAULT.parse(in)) { + final List list = parser.stream().collect(Collectors.toList()); + assertFalse(list.isEmpty()); + assertArrayEquals(new String[] { "a", "b", "c" }, list.get(0).values()); + assertArrayEquals(new String[] { "1", "2", "3" }, list.get(1).values()); + assertArrayEquals(new String[] { "x", "y", "z" }, list.get(2).values()); + }} + + @Test + public void testThrowExceptionWithLineAndPosition() throws IOException { + final String csvContent = "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10\nrec1,rec2,rec3,rec4,rec5,rec6,rec7,rec8,\"\"rec9\"\",rec10"; + final StringReader stringReader = new StringReader(csvContent); + // @formatter:off + final CSVFormat csvFormat = CSVFormat.DEFAULT.builder() + .setHeader() + .setSkipHeaderRecord(true) + .build(); + // @formatter:on + + try (CSVParser csvParser = csvFormat.parse(stringReader)) { + final Exception exception = assertThrows(UncheckedIOException.class, csvParser::getRecords); + assertTrue(exception.getMessage().contains("Invalid char between encapsulated token and delimiter at line: 2, position: 94")); + } + } + + @Test + public void testTrailingDelimiter() throws Exception { + final Reader in = new StringReader("a,a,a,\n\"1\",\"2\",\"3\",\nx,y,z,"); + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrailingDelimiter().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("X")); + assertEquals("2", record.get("Y")); + assertEquals("3", record.get("Z")); + assertEquals(3, record.size()); + } + } + + @Test + public void testTrim() throws Exception { + final Reader in = new StringReader("a,a,a\n\" 1 \",\" 2 \",\" 3 \"\nx,y,z"); + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrim().parse(in)) { + final Iterator records = parser.iterator(); + final CSVRecord record = records.next(); + assertEquals("1", record.get("X")); + assertEquals("2", record.get("Y")); + assertEquals("3", record.get("Z")); + assertEquals(3, record.size()); + }} + + private void validateLineNumbers(final String lineSeparator) throws IOException { + try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) { + assertEquals(0, parser.getCurrentLineNumber()); + assertNotNull(parser.nextRecord()); + assertEquals(1, parser.getCurrentLineNumber()); + assertNotNull(parser.nextRecord()); + assertEquals(2, parser.getCurrentLineNumber()); + assertNotNull(parser.nextRecord()); + // Read EOF without EOL should 3 + assertEquals(3, parser.getCurrentLineNumber()); + assertNull(parser.nextRecord()); + // Read EOF without EOL should 3 + assertEquals(3, parser.getCurrentLineNumber()); + } + } + + private void validateRecordNumbers(final String lineSeparator) throws IOException { + try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) { + CSVRecord record; + assertEquals(0, parser.getRecordNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(1, record.getRecordNumber()); + assertEquals(1, parser.getRecordNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(2, record.getRecordNumber()); + assertEquals(2, parser.getRecordNumber()); + assertNotNull(record = parser.nextRecord()); + assertEquals(3, record.getRecordNumber()); + assertEquals(3, parser.getRecordNumber()); + assertNull(record = parser.nextRecord()); + assertEquals(3, parser.getRecordNumber()); + } + } + + private void validateRecordPosition(final String lineSeparator) throws IOException { + final String nl = lineSeparator; // used as linebreak in values for better distinction + + final String code = "a,b,c" + lineSeparator + "1,2,3" + lineSeparator + + // to see if recordPosition correctly points to the enclosing quote + "'A" + nl + "A','B" + nl + "B',CC" + lineSeparator + + // unicode test... not very relevant while operating on strings instead of bytes, but for + // completeness... + "\u00c4,\u00d6,\u00dc" + lineSeparator + "EOF,EOF,EOF"; + + final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(lineSeparator); + CSVParser parser = CSVParser.parse(code, format); + + CSVRecord record; + assertEquals(0, parser.getRecordNumber()); + + assertNotNull(record = parser.nextRecord()); + assertEquals(1, record.getRecordNumber()); + assertEquals(code.indexOf('a'), record.getCharacterPosition()); + + assertNotNull(record = parser.nextRecord()); + assertEquals(2, record.getRecordNumber()); + assertEquals(code.indexOf('1'), record.getCharacterPosition()); + + assertNotNull(record = parser.nextRecord()); + final long positionRecord3 = record.getCharacterPosition(); + assertEquals(3, record.getRecordNumber()); + assertEquals(code.indexOf("'A"), record.getCharacterPosition()); + assertEquals("A" + lineSeparator + "A", record.get(0)); + assertEquals("B" + lineSeparator + "B", record.get(1)); + assertEquals("CC", record.get(2)); + + assertNotNull(record = parser.nextRecord()); + assertEquals(4, record.getRecordNumber()); + assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition()); + + assertNotNull(record = parser.nextRecord()); + assertEquals(5, record.getRecordNumber()); + assertEquals(code.indexOf("EOF"), record.getCharacterPosition()); + + parser.close(); + + // now try to read starting at record 3 + parser = new CSVParser(new StringReader(code.substring((int) positionRecord3)), format, positionRecord3, 3); + + assertNotNull(record = parser.nextRecord()); + assertEquals(3, record.getRecordNumber()); + assertEquals(code.indexOf("'A"), record.getCharacterPosition()); + assertEquals("A" + lineSeparator + "A", record.get(0)); + assertEquals("B" + lineSeparator + "B", record.get(1)); + assertEquals("CC", record.get(2)); + + assertNotNull(record = parser.nextRecord()); + assertEquals(4, record.getRecordNumber()); + assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition()); + assertEquals("\u00c4", record.get(0)); + + parser.close(); + } +} diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java index 6890bc26d6..0f9b298cae 100644 --- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java +++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java @@ -1,1710 +1,1751 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.BACKSLASH; -import static org.apache.commons.csv.Constants.CR; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; - -import java.io.CharArrayWriter; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.io.PrintStream; -import java.io.Reader; -import java.io.StringReader; -import java.io.StringWriter; -import java.io.Writer; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.sql.BatchUpdateException; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.Arrays; -import java.util.Date; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Objects; -import java.util.Random; -import java.util.Vector; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.output.NullOutputStream; -import org.apache.commons.lang3.StringUtils; -import org.h2.tools.SimpleResultSet; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -/** - * Tests {@link CSVPrinter}. - */ -public class CSVPrinterTest { - - private static final char DQUOTE_CHAR = '"'; - private static final char EURO_CH = '\u20AC'; - private static final int ITERATIONS_FOR_RANDOM_TEST = 50000; - private static final char QUOTE_CH = '\''; - - private static String printable(final String s) { - final StringBuilder sb = new StringBuilder(); - for (int i = 0; i < s.length(); i++) { - final char ch = s.charAt(i); - if (ch <= ' ' || ch >= 128) { - sb.append("(").append((int) ch).append(")"); - } else { - sb.append(ch); - } - } - return sb.toString(); - } - - private String longText2; - - private final String recordSeparator = CSVFormat.DEFAULT.getRecordSeparator(); - - private void doOneRandom(final CSVFormat format) throws Exception { - final Random r = new Random(); - - final int nLines = r.nextInt(4) + 1; - final int nCol = r.nextInt(3) + 1; - // nLines=1;nCol=2; - final String[][] lines = generateLines(nLines, nCol); - - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - - for (int i = 0; i < nLines; i++) { - // for (int j=0; j parseResult = parser.getRecords(); - - final String[][] expected = lines.clone(); - for (int i = 0; i < expected.length; i++) { - expected[i] = expectNulls(expected[i], format); - } - Utils.compare("Printer output :" + printable(result), expected, parseResult); - } - } - - private void doRandom(final CSVFormat format, final int iter) throws Exception { - for (int i = 0; i < iter; i++) { - doOneRandom(format); - } - } - - /** - * Converts an input CSV array into expected output values WRT NULLs. NULL strings are converted to null values - * because the parser will convert these strings to null. - */ - private T[] expectNulls(final T[] original, final CSVFormat csvFormat) { - final T[] fixed = original.clone(); - for (int i = 0; i < fixed.length; i++) { - if (Objects.equals(csvFormat.getNullString(), fixed[i])) { - fixed[i] = null; - } - } - return fixed; - } - - private String[][] generateLines(final int nLines, final int nCol) { - final String[][] lines = new String[nLines][]; - for (int i = 0; i < nLines; i++) { - final String[] line = new String[nCol]; - lines[i] = line; - for (int j = 0; j < nCol; j++) { - line[j] = randStr(); - } - } - return lines; - } - - private Connection getH2Connection() throws SQLException, ClassNotFoundException { - Class.forName("org.h2.Driver"); - return DriverManager.getConnection("jdbc:h2:mem:my_test;", "sa", ""); - } - - private CSVPrinter printWithHeaderComments(final StringWriter sw, final Date now, final CSVFormat baseFormat) - throws IOException { - // Use withHeaderComments first to test CSV-145 - // @formatter:off - final CSVFormat format = baseFormat.builder() - .setHeaderComments("Generated by Apache Commons CSV 1.1", now) - .setCommentMarker('#') - .setHeader("Col1", "Col2") - .build(); - // @formatter:on - final CSVPrinter csvPrinter = format.print(sw); - csvPrinter.printRecord("A", "B"); - csvPrinter.printRecord("C", "D"); - csvPrinter.close(); - return csvPrinter; - } - - private String randStr() { - final Random r = new Random(); - - final int sz = r.nextInt(20); - // sz = r.nextInt(3); - final char[] buf = new char[sz]; - for (int i = 0; i < sz; i++) { - // stick in special chars with greater frequency - final char ch; - final int what = r.nextInt(20); - switch (what) { - case 0: - ch = '\r'; - break; - case 1: - ch = '\n'; - break; - case 2: - ch = '\t'; - break; - case 3: - ch = '\f'; - break; - case 4: - ch = ' '; - break; - case 5: - ch = ','; - break; - case 6: - ch = DQUOTE_CHAR; - break; - case 7: - ch = '\''; - break; - case 8: - ch = BACKSLASH; - break; - default: - ch = (char) r.nextInt(300); - break; - // default: ch = 'a'; break; - } - buf[i] = ch; - } - return new String(buf); - } - - private void setUpTable(final Connection connection) throws SQLException { - try (final Statement statement = connection.createStatement()) { - statement.execute("CREATE TABLE TEST(ID INT PRIMARY KEY, NAME VARCHAR(255), TEXT CLOB)"); - statement.execute("insert into TEST values(1, 'r1', 'long text 1')"); - longText2 = StringUtils.repeat('a', IOUtils.DEFAULT_BUFFER_SIZE - 4); - longText2 += "\"\r\n\"a\""; - longText2 += StringUtils.repeat('a', IOUtils.DEFAULT_BUFFER_SIZE - 1); - statement.execute("insert into TEST values(2, 'r2', '" + longText2 + "')"); - longText2 = longText2.replace("\"","\"\""); - } - } - - @Test - public void testCloseBackwardCompatibility() throws IOException { - try (final Writer writer = mock(Writer.class)) { - final CSVFormat csvFormat = CSVFormat.DEFAULT; - try (CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat)) { - // empty - } - verify(writer, never()).flush(); - verify(writer, times(1)).close(); - }} - - @Test - public void testCloseWithCsvFormatAutoFlushOff() throws IOException { - try (final Writer writer = mock(Writer.class)) { - final CSVFormat csvFormat = CSVFormat.DEFAULT.withAutoFlush(false); - try (CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat)) { - // empty - } - verify(writer, never()).flush(); - verify(writer, times(1)).close(); - } - } - - @Test - public void testCloseWithCsvFormatAutoFlushOn() throws IOException { - // System.out.println("start method"); - try (final Writer writer = mock(Writer.class)) { - final CSVFormat csvFormat = CSVFormat.DEFAULT.withAutoFlush(true); - try (CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat)) { - // empty - } - verify(writer, times(1)).flush(); - verify(writer, times(1)).close(); - }} - - @Test - public void testCloseWithFlushOff() throws IOException { - try (final Writer writer = mock(Writer.class)) { - final CSVFormat csvFormat = CSVFormat.DEFAULT; - @SuppressWarnings("resource") - final CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat); - csvPrinter.close(false); - verify(writer, never()).flush(); - verify(writer, times(1)).close(); - } - } - - @Test - public void testCloseWithFlushOn() throws IOException { - try (final Writer writer = mock(Writer.class)) { - @SuppressWarnings("resource") - final CSVPrinter csvPrinter = new CSVPrinter(writer, CSVFormat.DEFAULT); - csvPrinter.close(true); - verify(writer, times(1)).flush(); - } - } - - @Test - public void testCRComment() throws IOException { - final StringWriter sw = new StringWriter(); - final Object value = "abc"; - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { - printer.print(value); - printer.printComment("This is a comment\r\non multiple lines\rthis is next comment\r"); - assertEquals("abc" + recordSeparator + "# This is a comment" + recordSeparator + "# on multiple lines" - + recordSeparator + "# this is next comment" + recordSeparator + "# " + recordSeparator, sw.toString()); - } - } - - @Test - public void testCSV135() throws IOException { - final List list = new LinkedList<>(); - list.add("\"\""); // "" - list.add("\\\\"); // \\ - list.add("\\\"\\"); // \"\ - // - // "",\\,\"\ (unchanged) - tryFormat(list, null, null, "\"\",\\\\,\\\"\\"); - // - // """""",\\,"\""\" (quoted, and embedded DQ doubled) - tryFormat(list, '"', null, "\"\"\"\"\"\",\\\\,\"\\\"\"\\\""); - // - // "",\\\\,\\"\\ (escapes escaped, not quoted) - tryFormat(list, null, '\\', "\"\",\\\\\\\\,\\\\\"\\\\"); - // - // "\"\"","\\\\","\\\"\\" (quoted, and embedded DQ & escape escaped) - tryFormat(list, '"', '\\', "\"\\\"\\\"\",\"\\\\\\\\\",\"\\\\\\\"\\\\\""); - // - // """""",\\,"\""\" (quoted, embedded DQ escaped) - tryFormat(list, '"', '"', "\"\"\"\"\"\",\\\\,\"\\\"\"\\\""); - } - - @Test - public void testCSV259() throws IOException { - final StringWriter sw = new StringWriter(); - try (final Reader reader = new FileReader("src/test/resources/org/apache/commons/csv/CSV-259/sample.txt"); - final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape('!').withQuote(null))) { - printer.print(reader); - assertEquals("x!,y!,z", sw.toString()); - } - } - - @Test - public void testDelimeterQuoted() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { - printer.print("a,b,c"); - printer.print("xyz"); - assertEquals("'a,b,c',xyz", sw.toString()); - } - } - - @Test - public void testDelimeterQuoteNone() throws IOException { - final StringWriter sw = new StringWriter(); - final CSVFormat format = CSVFormat.DEFAULT.withEscape('!').withQuoteMode(QuoteMode.NONE); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - printer.print("a,b,c"); - printer.print("xyz"); - assertEquals("a!,b!,c,xyz", sw.toString()); - } - } - - @Test - public void testDelimeterStringQuoted() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').build())) { - printer.print("a[|]b[|]c"); - printer.print("xyz"); - assertEquals("'a[|]b[|]c'[|]xyz", sw.toString()); - } - } - - @Test - public void testDelimeterStringQuoteNone() throws IOException { - final StringWriter sw = new StringWriter(); - final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').setQuoteMode(QuoteMode.NONE).build(); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - printer.print("a[|]b[|]c"); - printer.print("xyz"); - printer.print("a[xy]bc[]"); - assertEquals("a![!|!]b![!|!]c[|]xyz[|]a[xy]bc[]", sw.toString()); - } - } - - @Test - public void testDelimiterEscaped() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape('!').withQuote(null))) { - printer.print("a,b,c"); - printer.print("xyz"); - assertEquals("a!,b!,c,xyz", sw.toString()); - } - } - - @Test - public void testDelimiterPlain() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { - printer.print("a,b,c"); - printer.print("xyz"); - assertEquals("a,b,c,xyz", sw.toString()); - } - } - - @Test - public void testDelimiterStringEscaped() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.builder().setDelimiter("|||").setEscape('!').setQuote(null).build())) { - printer.print("a|||b|||c"); - printer.print("xyz"); - assertEquals("a!|!|!|b!|!|!|c|||xyz", sw.toString()); - } - } - - @Test - public void testDisabledComment() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printComment("This is a comment"); - assertEquals("", sw.toString()); - } - } - - @Test - public void testDontQuoteEuroFirstChar() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) { - printer.printRecord(EURO_CH, "Deux"); - assertEquals(EURO_CH + ",Deux" + recordSeparator, sw.toString()); - } - } - - @Test - public void testEolEscaped() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withEscape('!'))) { - printer.print("a\rb\nc"); - printer.print("x\fy\bz"); - assertEquals("a!rb!nc,x\fy\bz", sw.toString()); - } - } - - @Test - public void testEolPlain() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { - printer.print("a\rb\nc"); - printer.print("x\fy\bz"); - assertEquals("a\rb\nc,x\fy\bz", sw.toString()); - } - } - - @Test - public void testEolQuoted() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { - printer.print("a\rb\nc"); - printer.print("x\by\fz"); - assertEquals("'a\rb\nc',x\by\fz", sw.toString()); - } - } - - @Test - public void testEscapeBackslash1() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { - printer.print("\\"); - } - assertEquals("\\", sw.toString()); - } - - @Test - public void testEscapeBackslash2() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { - printer.print("\\\r"); - } - assertEquals("'\\\r'", sw.toString()); - } - - @Test - public void testEscapeBackslash3() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { - printer.print("X\\\r"); - } - assertEquals("'X\\\r'", sw.toString()); - } - - @Test - public void testEscapeBackslash4() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { - printer.print("\\\\"); - } - assertEquals("\\\\", sw.toString()); - } - - @Test - public void testEscapeBackslash5() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { - printer.print("\\\\"); - } - assertEquals("\\\\", sw.toString()); - } - - @Test - public void testEscapeNull1() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { - printer.print("\\"); - } - assertEquals("\\", sw.toString()); - } - - @Test - public void testEscapeNull2() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { - printer.print("\\\r"); - } - assertEquals("\"\\\r\"", sw.toString()); - } - - @Test - public void testEscapeNull3() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { - printer.print("X\\\r"); - } - assertEquals("\"X\\\r\"", sw.toString()); - } - - @Test - public void testEscapeNull4() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { - printer.print("\\\\"); - } - assertEquals("\\\\", sw.toString()); - } - - @Test - public void testEscapeNull5() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { - printer.print("\\\\"); - } - assertEquals("\\\\", sw.toString()); - } - - @Test - public void testExcelPrintAllArrayOfArrays() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecords((Object[]) new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } }); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); - } - } - - @Test - public void testExcelPrintAllArrayOfLists() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecords( - (Object[]) new List[] { Arrays.asList("r1c1", "r1c2"), Arrays.asList("r2c1", "r2c2") }); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); - } - } - - @Test - public void testExcelPrintAllIterableOfArrays() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecords(Arrays.asList(new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } })); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); - } - } - - @Test - public void testExcelPrintAllIterableOfLists() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecords( - Arrays.asList(Arrays.asList("r1c1", "r1c2"), Arrays.asList("r2c1", "r2c2"))); - assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); - } - } - - @Test - public void testExcelPrinter1() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecord("a", "b"); - assertEquals("a,b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testExcelPrinter2() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { - printer.printRecord("a,b", "b"); - assertEquals("\"a,b\",b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testHeader() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, - CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3"))) { - printer.printRecord("a", "b", "c"); - printer.printRecord("x", "y", "z"); - assertEquals("C1,C2,C3\r\na,b,c\r\nx,y,z\r\n", sw.toString()); - } - } - - @Test - public void testHeaderCommentExcel() throws IOException { - final StringWriter sw = new StringWriter(); - final Date now = new Date(); - final CSVFormat format = CSVFormat.EXCEL; - try (final CSVPrinter csvPrinter = printWithHeaderComments(sw, now, format)) { - assertEquals("# Generated by Apache Commons CSV 1.1\r\n# " + now + "\r\nCol1,Col2\r\nA,B\r\nC,D\r\n", - sw.toString()); - } - } - - @Test - public void testHeaderCommentTdf() throws IOException { - final StringWriter sw = new StringWriter(); - final Date now = new Date(); - final CSVFormat format = CSVFormat.TDF; - try (final CSVPrinter csvPrinter = printWithHeaderComments(sw, now, format)) { - assertEquals("# Generated by Apache Commons CSV 1.1\r\n# " + now + "\r\nCol1\tCol2\r\nA\tB\r\nC\tD\r\n", - sw.toString()); - } - } - - @Test - public void testHeaderNotSet() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { - printer.printRecord("a", "b", "c"); - printer.printRecord("x", "y", "z"); - assertEquals("a,b,c\r\nx,y,z\r\n", sw.toString()); - } - } - - @Test - public void testInvalidFormat() { - assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR)); - } - - @Test - public void testJdbcPrinter() throws IOException, ClassNotFoundException, SQLException { - final StringWriter sw = new StringWriter(); - try (final Connection connection = getH2Connection()) { - setUpTable(connection); - try (final Statement stmt = connection.createStatement(); - final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT); - final ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST");) { - printer.printRecords(resultSet); - } - } - assertEquals("1,r1,\"long text 1\"" + recordSeparator + "2,r2,\"" + longText2 + "\"" + recordSeparator, sw.toString()); - } - - @Test - public void testJdbcPrinterWithResultSet() throws IOException, ClassNotFoundException, SQLException { - final StringWriter sw = new StringWriter(); - Class.forName("org.h2.Driver"); - try (final Connection connection = getH2Connection()) { - setUpTable(connection); - try (final Statement stmt = connection.createStatement(); - final ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST"); - final CSVPrinter printer = CSVFormat.DEFAULT.withHeader(resultSet).print(sw)) { - printer.printRecords(resultSet); - } - } - assertEquals("ID,NAME,TEXT" + recordSeparator + "1,r1,\"long text 1\"" + recordSeparator + "2,r2,\"" + longText2 - + "\"" + recordSeparator, sw.toString()); - } - - @Test - public void testJdbcPrinterWithResultSetHeader() throws IOException, ClassNotFoundException, SQLException { - final StringWriter sw = new StringWriter(); - try (final Connection connection = getH2Connection()) { - setUpTable(connection); - try (final Statement stmt = connection.createStatement(); - final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT);) { - try (final ResultSet resultSet = stmt.executeQuery("select ID, NAME from TEST")) { - printer.printRecords(resultSet, true); - assertEquals("ID,NAME" + recordSeparator + "1,r1" + recordSeparator + "2,r2" + recordSeparator, - sw.toString()); - } - try (final ResultSet resultSet = stmt.executeQuery("select ID, NAME from TEST")) { - printer.printRecords(resultSet, false); - assertNotEquals("ID,NAME" + recordSeparator + "1,r1" + recordSeparator + "2,r2" + recordSeparator, - sw.toString()); - } - } - } - } - - @Test - public void testJdbcPrinterWithResultSetMetaData() throws IOException, ClassNotFoundException, SQLException { - final StringWriter sw = new StringWriter(); - Class.forName("org.h2.Driver"); - try (final Connection connection = getH2Connection()) { - setUpTable(connection); - try (final Statement stmt = connection.createStatement(); - final ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST"); - final CSVPrinter printer = CSVFormat.DEFAULT.withHeader(resultSet.getMetaData()).print(sw)) { - printer.printRecords(resultSet); - assertEquals("ID,NAME,TEXT" + recordSeparator + "1,r1,\"long text 1\"" + recordSeparator + "2,r2,\"" - + longText2 + "\"" + recordSeparator, sw.toString()); - } - } - } - - @Test - @Disabled - public void testJira135_part1() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); - final StringWriter sw = new StringWriter(); - final List list = new LinkedList<>(); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - list.add("\""); - printer.printRecord(list); - } - final String expected = "\"\\\"\"" + format.getRecordSeparator(); - assertEquals(expected, sw.toString()); - final String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(list.toArray(), format), record0); - } - - @Test - @Disabled - public void testJira135_part2() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); - final StringWriter sw = new StringWriter(); - final List list = new LinkedList<>(); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - list.add("\n"); - printer.printRecord(list); - } - final String expected = "\"\\n\"" + format.getRecordSeparator(); - assertEquals(expected, sw.toString()); - final String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(list.toArray(), format), record0); - } - - @Test - @Disabled - public void testJira135_part3() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); - final StringWriter sw = new StringWriter(); - final List list = new LinkedList<>(); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - list.add("\\"); - printer.printRecord(list); - } - final String expected = "\"\\\\\"" + format.getRecordSeparator(); - assertEquals(expected, sw.toString()); - final String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(list.toArray(), format), record0); - } - - @Test - @Disabled - public void testJira135All() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); - final StringWriter sw = new StringWriter(); - final List list = new LinkedList<>(); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - list.add("\""); - list.add("\n"); - list.add("\\"); - printer.printRecord(list); - } - final String expected = "\"\\\"\",\"\\n\",\"\\\"" + format.getRecordSeparator(); - assertEquals(expected, sw.toString()); - final String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(list.toArray(), format), record0); - } - - @Test - public void testMongoDbCsvBasic() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { - printer.printRecord("a", "b"); - assertEquals("a,b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMongoDbCsvCommaInValue() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { - printer.printRecord("a,b", "c"); - assertEquals("\"a,b\",c" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMongoDbCsvDoubleQuoteInValue() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { - printer.printRecord("a \"c\" b", "d"); - assertEquals("\"a \"\"c\"\" b\",d" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMongoDbCsvTabInValue() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { - printer.printRecord("a\tb", "c"); - assertEquals("a\tb,c" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMongoDbTsvBasic() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { - printer.printRecord("a", "b"); - assertEquals("a\tb" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMongoDbTsvCommaInValue() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { - printer.printRecord("a,b", "c"); - assertEquals("a,b\tc" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMongoDbTsvTabInValue() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { - printer.printRecord("a\tb", "c"); - assertEquals("\"a\tb\"\tc" + recordSeparator, sw.toString()); - } - } - - @Test - public void testMultiLineComment() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { - printer.printComment("This is a comment\non multiple lines"); - - assertEquals("# This is a comment" + recordSeparator + "# on multiple lines" + recordSeparator, - sw.toString()); - } - } - - @Test - public void testMySqlNullOutput() throws IOException { - Object[] s = new String[] { "NULL", null }; - CSVFormat format = CSVFormat.MYSQL.withQuote(DQUOTE_CHAR).withNullString("NULL") - .withQuoteMode(QuoteMode.NON_NUMERIC); - StringWriter writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - String expected = "\"NULL\"\tNULL\n"; - assertEquals(expected, writer.toString()); - String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(s, record0); - - s = new String[] { "\\N", null }; - format = CSVFormat.MYSQL.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\t\\N\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\N", "A" }; - format = CSVFormat.MYSQL.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\tA\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\n", "A" }; - format = CSVFormat.MYSQL.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\n\tA\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "", null }; - format = CSVFormat.MYSQL.withNullString("NULL"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\tNULL\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "", null }; - format = CSVFormat.MYSQL; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\t\\N\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\N", "", "\u000e,\\\r" }; - format = CSVFormat.MYSQL; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\t\t\u000e,\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "NULL", "\\\r" }; - format = CSVFormat.MYSQL; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "NULL\t\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\\r" }; - format = CSVFormat.MYSQL; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - } - - @Test - public void testMySqlNullStringDefault() { - assertEquals("\\N", CSVFormat.MYSQL.getNullString()); - } - - @Test - public void testNewCsvPrinterAppendableNullFormat() { - assertThrows(NullPointerException.class, () -> new CSVPrinter(new StringWriter(), null)); - } - - @Test - public void testNewCsvPrinterNullAppendableFormat() { - assertThrows(NullPointerException.class, () -> new CSVPrinter(null, CSVFormat.DEFAULT)); - } - - @Test - public void testNotFlushable() throws IOException { - final Appendable out = new StringBuilder(); - try (final CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT)) { - printer.printRecord("a", "b", "c"); - assertEquals("a,b,c" + recordSeparator, out.toString()); - printer.flush(); - } - } - - @Test - public void testParseCustomNullValues() throws IOException { - final StringWriter sw = new StringWriter(); - final CSVFormat format = CSVFormat.DEFAULT.withNullString("NULL"); - try (final CSVPrinter printer = new CSVPrinter(sw, format)) { - printer.printRecord("a", null, "b"); - } - final String csvString = sw.toString(); - assertEquals("a,NULL,b" + recordSeparator, csvString); - try (final CSVParser iterable = format.parse(new StringReader(csvString))) { - final Iterator iterator = iterable.iterator(); - final CSVRecord record = iterator.next(); - assertEquals("a", record.get(0)); - assertNull(record.get(1)); - assertEquals("b", record.get(2)); - assertFalse(iterator.hasNext()); - } - } - - @Test - public void testPlainEscaped() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withEscape('!'))) { - printer.print("abc"); - printer.print("xyz"); - assertEquals("abc,xyz", sw.toString()); - } - } - - @Test - public void testPlainPlain() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { - printer.print("abc"); - printer.print("xyz"); - assertEquals("abc,xyz", sw.toString()); - } - } - - @Test - public void testPlainQuoted() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { - printer.print("abc"); - assertEquals("abc", sw.toString()); - } - } - - @Test - @Disabled - public void testPostgreSqlCsvNullOutput() throws IOException { - Object[] s = new String[] { "NULL", null }; - CSVFormat format = CSVFormat.POSTGRESQL_CSV.withQuote(DQUOTE_CHAR).withNullString("NULL").withQuoteMode(QuoteMode.ALL_NON_NULL); - StringWriter writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - String expected = "\"NULL\",NULL\n"; - assertEquals(expected, writer.toString()); - String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(new Object[2], record0); - - s = new String[] { "\\N", null }; - format = CSVFormat.POSTGRESQL_CSV.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\t\\N\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\N", "A" }; - format = CSVFormat.POSTGRESQL_CSV.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\tA\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\n", "A" }; - format = CSVFormat.POSTGRESQL_CSV.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\n\tA\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "", null }; - format = CSVFormat.POSTGRESQL_CSV.withNullString("NULL"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\tNULL\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "", null }; - format = CSVFormat.POSTGRESQL_CSV; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\t\\N\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\N", "", "\u000e,\\\r" }; - format = CSVFormat.POSTGRESQL_CSV; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\t\t\u000e,\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "NULL", "\\\r" }; - format = CSVFormat.POSTGRESQL_CSV; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "NULL\t\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\\r" }; - format = CSVFormat.POSTGRESQL_CSV; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - } - - @Test - @Disabled - public void testPostgreSqlCsvTextOutput() throws IOException { - Object[] s = new String[] { "NULL", null }; - CSVFormat format = CSVFormat.POSTGRESQL_TEXT.withQuote(DQUOTE_CHAR).withNullString("NULL").withQuoteMode(QuoteMode.ALL_NON_NULL); - StringWriter writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - String expected = "\"NULL\"\tNULL\n"; - assertEquals(expected, writer.toString()); - String[] record0 = toFirstRecordValues(expected, format); - assertArrayEquals(new Object[2], record0); - - s = new String[] { "\\N", null }; - format = CSVFormat.POSTGRESQL_TEXT.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\t\\N\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\N", "A" }; - format = CSVFormat.POSTGRESQL_TEXT.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\tA\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\n", "A" }; - format = CSVFormat.POSTGRESQL_TEXT.withNullString("\\N"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\n\tA\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "", null }; - format = CSVFormat.POSTGRESQL_TEXT.withNullString("NULL"); - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\tNULL\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "", null }; - format = CSVFormat.POSTGRESQL_TEXT; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\t\\N\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\N", "", "\u000e,\\\r" }; - format = CSVFormat.POSTGRESQL_TEXT; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\N\t\t\u000e,\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "NULL", "\\\r" }; - format = CSVFormat.POSTGRESQL_TEXT; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "NULL\t\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - - s = new String[] { "\\\r" }; - format = CSVFormat.POSTGRESQL_TEXT; - writer = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(writer, format)) { - printer.printRecord(s); - } - expected = "\\\\\\r\n"; - assertEquals(expected, writer.toString()); - record0 = toFirstRecordValues(expected, format); - assertArrayEquals(expectNulls(s, format), record0); - } - - @Test - public void testPostgreSqlNullStringDefaultCsv() { - assertEquals("", CSVFormat.POSTGRESQL_CSV.getNullString()); - } - - @Test - public void testPostgreSqlNullStringDefaultText() { - assertEquals("\\N", CSVFormat.POSTGRESQL_TEXT.getNullString()); - } - - @Test - public void testPrint() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = CSVFormat.DEFAULT.print(sw)) { - printer.printRecord("a", "b\\c"); - assertEquals("a,b\\c" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrintCSVParser() throws IOException { - final String code = "a1,b1\n" // 1) - + "a2,b2\n" // 2) - + "a3,b3\n" // 3) - + "a4,b4\n"// 4) - ; - final String[][] res = {{"a1", "b1"}, {"a2", "b2"}, {"a3", "b3"}, {"a4", "b4"}}; - final CSVFormat format = CSVFormat.DEFAULT; - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = format.print(sw); final CSVParser parser = CSVParser.parse(code, format)) { - printer.printRecords(parser); - } - try (final CSVParser parser = CSVParser.parse(sw.toString(), format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - Utils.compare("Fail", res, records); - } - } - - @Test - public void testPrintCSVRecord() throws IOException { - final String code = "a1,b1\n" // 1) - + "a2,b2\n" // 2) - + "a3,b3\n" // 3) - + "a4,b4\n"// 4) - ; - final String[][] res = {{"a1", "b1"}, {"a2", "b2"}, {"a3", "b3"}, {"a4", "b4"}}; - final CSVFormat format = CSVFormat.DEFAULT; - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = format.print(sw); final CSVParser parser = CSVParser.parse(code, format)) { - for (final CSVRecord record : parser) { - printer.printRecord(record); - } - } - try (final CSVParser parser = CSVParser.parse(sw.toString(), format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - Utils.compare("Fail", res, records); - } - } - - @Test - public void testPrintCSVRecords() throws IOException { - final String code = "a1,b1\n" // 1) - + "a2,b2\n" // 2) - + "a3,b3\n" // 3) - + "a4,b4\n"// 4) - ; - final String[][] res = {{"a1", "b1"}, {"a2", "b2"}, {"a3", "b3"}, {"a4", "b4"}}; - final CSVFormat format = CSVFormat.DEFAULT; - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = format.print(sw); final CSVParser parser = CSVParser.parse(code, format)) { - printer.printRecords(parser.getRecords()); - } - try (final CSVParser parser = CSVParser.parse(sw.toString(), format)) { - final List records = parser.getRecords(); - assertFalse(records.isEmpty()); - Utils.compare("Fail", res, records); - } - } - - @Test - public void testPrintCustomNullValues() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withNullString("NULL"))) { - printer.printRecord("a", null, "b"); - assertEquals("a,NULL,b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter1() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a", "b"); - assertEquals("a,b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter2() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a,b", "b"); - assertEquals("\"a,b\",b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter3() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a, b", "b "); - assertEquals("\"a, b\",\"b \"" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter4() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a", "b\"c"); - assertEquals("a,\"b\"\"c\"" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter5() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a", "b\nc"); - assertEquals("a,\"b\nc\"" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter6() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a", "b\r\nc"); - assertEquals("a,\"b\r\nc\"" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrinter7() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a", "b\\c"); - assertEquals("a,b\\c" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrintNullValues() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { - printer.printRecord("a", null, "b"); - assertEquals("a,,b" + recordSeparator, sw.toString()); - } - } - - @Test - public void testPrintOnePositiveInteger() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.MINIMAL))) { - printer.print(Integer.MAX_VALUE); - assertEquals(String.valueOf(Integer.MAX_VALUE), sw.toString()); - } - } - - /** - * Test to target the use of {@link IOUtils#copy(java.io.Reader, Appendable)} which directly - * buffers the value from the Reader to the Appendable. - * - *

      Requires the format to have no quote or escape character, value to be a - * {@link java.io.Reader Reader} and the output MUST NOT be a - * {@link java.io.Writer Writer} but some other Appendable.

      - * - * @throws IOException Not expected to happen - */ - @Test - public void testPrintReaderWithoutQuoteToAppendable() throws IOException { - final StringBuilder sb = new StringBuilder(); - final String content = "testValue"; - try (final CSVPrinter printer = new CSVPrinter(sb, CSVFormat.DEFAULT.withQuote(null))) { - final StringReader value = new StringReader(content); - printer.print(value); - } - assertEquals(content, sb.toString()); - } - - /** - * Test to target the use of {@link IOUtils#copyLarge(java.io.Reader, Writer)} which directly - * buffers the value from the Reader to the Writer. - * - *

      Requires the format to have no quote or escape character, value to be a - * {@link java.io.Reader Reader} and the output MUST be a - * {@link java.io.Writer Writer}.

      - * - * @throws IOException Not expected to happen - */ - @Test - public void testPrintReaderWithoutQuoteToWriter() throws IOException { - final StringWriter sw = new StringWriter(); - final String content = "testValue"; - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { - final StringReader value = new StringReader(content); - printer.print(value); - } - assertEquals(content, sw.toString()); - } - - @Test - public void testPrintRecordsWithCSVRecord() throws IOException { - final String[] values = {"A", "B", "C"}; - final String rowData = StringUtils.join(values, ','); - final CharArrayWriter charArrayWriter = new CharArrayWriter(0); - try (final CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(rowData)); - final CSVPrinter csvPrinter = CSVFormat.INFORMIX_UNLOAD.print(charArrayWriter)) { - for (final CSVRecord record : parser) { - csvPrinter.printRecord(record); - } - } - assertEquals(6, charArrayWriter.size()); - assertEquals("A|B|C" + CSVFormat.INFORMIX_UNLOAD.getRecordSeparator(), charArrayWriter.toString()); - } - - @Test - public void testPrintRecordsWithEmptyVector() throws IOException { - final PrintStream out = System.out; - try { - System.setOut(new PrintStream(NullOutputStream.NULL_OUTPUT_STREAM)); - try (CSVPrinter csvPrinter = CSVFormat.POSTGRESQL_TEXT.printer()) { - final Vector vector = new Vector<>(); - final int expectedCapacity = 23; - vector.setSize(expectedCapacity); - csvPrinter.printRecords(vector); - assertEquals(expectedCapacity, vector.capacity()); - } - } finally { - System.setOut(out); - } - } - - @Test - public void testPrintRecordsWithObjectArray() throws IOException { - final CharArrayWriter charArrayWriter = new CharArrayWriter(0); - try (CSVPrinter csvPrinter = CSVFormat.INFORMIX_UNLOAD.print(charArrayWriter)) { - final HashSet hashSet = new HashSet<>(); - final Object[] objectArray = new Object[6]; - objectArray[3] = hashSet; - csvPrinter.printRecords(objectArray); - } - assertEquals(6, charArrayWriter.size()); - assertEquals("\n\n\n\n\n\n", charArrayWriter.toString()); - } - - @Test - public void testPrintRecordsWithResultSetOneRow() throws IOException, SQLException { - try (CSVPrinter csvPrinter = CSVFormat.MYSQL.printer()) { - try (ResultSet resultSet = new SimpleResultSet()) { - csvPrinter.printRecords(resultSet); - assertEquals(0, resultSet.getRow()); - } - } - } - - @Test - public void testPrintToFileWithCharsetUtf16Be() throws IOException { - final File file = File.createTempFile(getClass().getName(), ".csv"); - try (final CSVPrinter printer = CSVFormat.DEFAULT.print(file, StandardCharsets.UTF_16BE)) { - printer.printRecord("a", "b\\c"); - } - assertEquals("a,b\\c" + recordSeparator, FileUtils.readFileToString(file, StandardCharsets.UTF_16BE)); - } - - @Test - public void testPrintToFileWithDefaultCharset() throws IOException { - final File file = File.createTempFile(getClass().getName(), ".csv"); - try (final CSVPrinter printer = CSVFormat.DEFAULT.print(file, Charset.defaultCharset())) { - printer.printRecord("a", "b\\c"); - } - assertEquals("a,b\\c" + recordSeparator, FileUtils.readFileToString(file, Charset.defaultCharset())); - } - - @Test - public void testPrintToPathWithDefaultCharset() throws IOException { - final File file = File.createTempFile(getClass().getName(), ".csv"); - try (final CSVPrinter printer = CSVFormat.DEFAULT.print(file.toPath(), Charset.defaultCharset())) { - printer.printRecord("a", "b\\c"); - } - assertEquals("a,b\\c" + recordSeparator, FileUtils.readFileToString(file, Charset.defaultCharset())); - } - - @Test - public void testQuoteAll() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL))) { - printer.printRecord("a", "b\nc", "d"); - assertEquals("\"a\",\"b\nc\",\"d\"" + recordSeparator, sw.toString()); - } - } - - @Test - public void testQuoteCommaFirstChar() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) { - printer.printRecord(","); - assertEquals("\",\"" + recordSeparator, sw.toString()); - } - } - - @Test - public void testQuoteNonNumeric() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.NON_NUMERIC))) { - printer.printRecord("a", "b\nc", Integer.valueOf(1)); - assertEquals("\"a\",\"b\nc\",1" + recordSeparator, sw.toString()); - } - } - - @Test - public void testRandomDefault() throws Exception { - doRandom(CSVFormat.DEFAULT, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - public void testRandomExcel() throws Exception { - doRandom(CSVFormat.EXCEL, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - @Disabled - public void testRandomMongoDbCsv() throws Exception { - doRandom(CSVFormat.MONGODB_CSV, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - public void testRandomMySql() throws Exception { - doRandom(CSVFormat.MYSQL, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - @Disabled - public void testRandomOracle() throws Exception { - doRandom(CSVFormat.ORACLE, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - @Disabled - public void testRandomPostgreSqlCsv() throws Exception { - doRandom(CSVFormat.POSTGRESQL_CSV, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - @Disabled - public void testRandomPostgreSqlText() throws Exception { - doRandom(CSVFormat.POSTGRESQL_TEXT, ITERATIONS_FOR_RANDOM_TEST); - } - - - @Test - public void testRandomRfc4180() throws Exception { - doRandom(CSVFormat.RFC4180, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - public void testRandomTdf() throws Exception { - doRandom(CSVFormat.TDF, ITERATIONS_FOR_RANDOM_TEST); - } - - @Test - public void testSingleLineComment() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { - printer.printComment("This is a comment"); - assertEquals("# This is a comment" + recordSeparator, sw.toString()); - } - } - - @Test - public void testSingleQuoteQuoted() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { - printer.print("a'b'c"); - printer.print("xyz"); - assertEquals("'a''b''c',xyz", sw.toString()); - } - } - - @Test - public void testSkipHeaderRecordFalse() throws IOException { - // functionally identical to testHeader, used to test CSV-153 - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, - CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3").withSkipHeaderRecord(false))) { - printer.printRecord("a", "b", "c"); - printer.printRecord("x", "y", "z"); - assertEquals("C1,C2,C3\r\na,b,c\r\nx,y,z\r\n", sw.toString()); - } - } - - @Test - public void testSkipHeaderRecordTrue() throws IOException { - // functionally identical to testHeaderNotSet, used to test CSV-153 - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, - CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3").withSkipHeaderRecord(true))) { - printer.printRecord("a", "b", "c"); - printer.printRecord("x", "y", "z"); - assertEquals("a,b,c\r\nx,y,z\r\n", sw.toString()); - } - } - - @Test - public void testTrailingDelimiterOnTwoColumns() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrailingDelimiter())) { - printer.printRecord("A", "B"); - assertEquals("A,B,\r\n", sw.toString()); - } - } - - @Test - public void testTrimOffOneColumn() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim(false))) { - printer.print(" A "); - assertEquals("\" A \"", sw.toString()); - } - } - - @Test - public void testTrimOnOneColumn() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim())) { - printer.print(" A "); - assertEquals("A", sw.toString()); - } - } - - @Test - public void testTrimOnTwoColumns() throws IOException { - final StringWriter sw = new StringWriter(); - try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim())) { - printer.print(" A "); - printer.print(" B "); - assertEquals("A,B", sw.toString()); - } - } - - private String[] toFirstRecordValues(final String expected, final CSVFormat format) throws IOException { - return CSVParser.parse(expected, format).getRecords().get(0).values(); - } - - private void tryFormat(final List list, final Character quote, final Character escape, final String expected) throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.withQuote(quote).withEscape(escape).withRecordSeparator(null); - final Appendable out = new StringBuilder(); - try (final CSVPrinter printer = new CSVPrinter(out, format)) { - printer.printRecord(list); - } - assertEquals(expected, out.toString()); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv; + +import static org.apache.commons.csv.Constants.BACKSLASH; +import static org.apache.commons.csv.Constants.CR; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import java.io.CharArrayWriter; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.PrintStream; +import java.io.Reader; +import java.io.StringReader; +import java.io.StringWriter; +import java.io.Writer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.sql.BatchUpdateException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Arrays; +import java.util.Date; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; +import java.util.Random; +import java.util.Vector; +import java.util.stream.Stream; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.output.NullOutputStream; +import org.apache.commons.lang3.StringUtils; +import org.h2.tools.SimpleResultSet; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +/** + * Tests {@link CSVPrinter}. + */ +public class CSVPrinterTest { + + private static final char DQUOTE_CHAR = '"'; + private static final char EURO_CH = '\u20AC'; + private static final int ITERATIONS_FOR_RANDOM_TEST = 50000; + private static final char QUOTE_CH = '\''; + + private static String printable(final String s) { + final StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.length(); i++) { + final char ch = s.charAt(i); + if (ch <= ' ' || ch >= 128) { + sb.append("(").append((int) ch).append(")"); + } else { + sb.append(ch); + } + } + return sb.toString(); + } + + private String longText2; + + private final String recordSeparator = CSVFormat.DEFAULT.getRecordSeparator(); + + private File createTempFile() throws IOException { + return createTempPath().toFile(); + } + + private Path createTempPath() throws IOException { + return Files.createTempFile(getClass().getName(), ".csv"); + } + + private void doOneRandom(final CSVFormat format) throws Exception { + final Random r = new Random(); + + final int nLines = r.nextInt(4) + 1; + final int nCol = r.nextInt(3) + 1; + // nLines=1;nCol=2; + final String[][] lines = generateLines(nLines, nCol); + + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, format)) { + + for (int i = 0; i < nLines; i++) { + // for (int j=0; j parseResult = parser.getRecords(); + + final String[][] expected = lines.clone(); + for (int i = 0; i < expected.length; i++) { + expected[i] = expectNulls(expected[i], format); + } + Utils.compare("Printer output :" + printable(result), expected, parseResult); + } + } + + private void doRandom(final CSVFormat format, final int iter) throws Exception { + for (int i = 0; i < iter; i++) { + doOneRandom(format); + } + } + + /** + * Converts an input CSV array into expected output values WRT NULLs. NULL strings are converted to null values + * because the parser will convert these strings to null. + */ + private T[] expectNulls(final T[] original, final CSVFormat csvFormat) { + final T[] fixed = original.clone(); + for (int i = 0; i < fixed.length; i++) { + if (Objects.equals(csvFormat.getNullString(), fixed[i])) { + fixed[i] = null; + } + } + return fixed; + } + + private String[][] generateLines(final int nLines, final int nCol) { + final String[][] lines = new String[nLines][]; + for (int i = 0; i < nLines; i++) { + final String[] line = new String[nCol]; + lines[i] = line; + for (int j = 0; j < nCol; j++) { + line[j] = randStr(); + } + } + return lines; + } + + private Connection getH2Connection() throws SQLException, ClassNotFoundException { + Class.forName("org.h2.Driver"); + return DriverManager.getConnection("jdbc:h2:mem:my_test;", "sa", ""); + } + + private CSVPrinter printWithHeaderComments(final StringWriter sw, final Date now, final CSVFormat baseFormat) + throws IOException { + // Use withHeaderComments first to test CSV-145 + // @formatter:off + final CSVFormat format = baseFormat.builder() + .setHeaderComments("Generated by Apache Commons CSV 1.1", now) + .setCommentMarker('#') + .setHeader("Col1", "Col2") + .build(); + // @formatter:on + final CSVPrinter csvPrinter = format.print(sw); + csvPrinter.printRecord("A", "B"); + csvPrinter.printRecord("C", "D"); + csvPrinter.close(); + return csvPrinter; + } + + private String randStr() { + final Random r = new Random(); + + final int sz = r.nextInt(20); + // sz = r.nextInt(3); + final char[] buf = new char[sz]; + for (int i = 0; i < sz; i++) { + // stick in special chars with greater frequency + final char ch; + final int what = r.nextInt(20); + switch (what) { + case 0: + ch = '\r'; + break; + case 1: + ch = '\n'; + break; + case 2: + ch = '\t'; + break; + case 3: + ch = '\f'; + break; + case 4: + ch = ' '; + break; + case 5: + ch = ','; + break; + case 6: + ch = DQUOTE_CHAR; + break; + case 7: + ch = '\''; + break; + case 8: + ch = BACKSLASH; + break; + default: + ch = (char) r.nextInt(300); + break; + // default: ch = 'a'; break; + } + buf[i] = ch; + } + return new String(buf); + } + + private void setUpTable(final Connection connection) throws SQLException { + try (final Statement statement = connection.createStatement()) { + statement.execute("CREATE TABLE TEST(ID INT PRIMARY KEY, NAME VARCHAR(255), TEXT CLOB)"); + statement.execute("insert into TEST values(1, 'r1', 'long text 1')"); + longText2 = StringUtils.repeat('a', IOUtils.DEFAULT_BUFFER_SIZE - 4); + longText2 += "\"\r\n\"a\""; + longText2 += StringUtils.repeat('a', IOUtils.DEFAULT_BUFFER_SIZE - 1); + statement.execute("insert into TEST values(2, 'r2', '" + longText2 + "')"); + longText2 = longText2.replace("\"","\"\""); + } + } + + @Test + public void testCloseBackwardCompatibility() throws IOException { + try (final Writer writer = mock(Writer.class)) { + final CSVFormat csvFormat = CSVFormat.DEFAULT; + try (CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat)) { + // empty + } + verify(writer, never()).flush(); + verify(writer, times(1)).close(); + }} + + @Test + public void testCloseWithCsvFormatAutoFlushOff() throws IOException { + try (final Writer writer = mock(Writer.class)) { + final CSVFormat csvFormat = CSVFormat.DEFAULT.withAutoFlush(false); + try (CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat)) { + // empty + } + verify(writer, never()).flush(); + verify(writer, times(1)).close(); + } + } + + @Test + public void testCloseWithCsvFormatAutoFlushOn() throws IOException { + // System.out.println("start method"); + try (final Writer writer = mock(Writer.class)) { + final CSVFormat csvFormat = CSVFormat.DEFAULT.withAutoFlush(true); + try (CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat)) { + // empty + } + verify(writer, times(1)).flush(); + verify(writer, times(1)).close(); + }} + + @Test + public void testCloseWithFlushOff() throws IOException { + try (final Writer writer = mock(Writer.class)) { + final CSVFormat csvFormat = CSVFormat.DEFAULT; + @SuppressWarnings("resource") + final CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat); + csvPrinter.close(false); + verify(writer, never()).flush(); + verify(writer, times(1)).close(); + } + } + + @Test + public void testCloseWithFlushOn() throws IOException { + try (final Writer writer = mock(Writer.class)) { + @SuppressWarnings("resource") + final CSVPrinter csvPrinter = new CSVPrinter(writer, CSVFormat.DEFAULT); + csvPrinter.close(true); + verify(writer, times(1)).flush(); + } + } + + @Test + public void testCRComment() throws IOException { + final StringWriter sw = new StringWriter(); + final Object value = "abc"; + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { + printer.print(value); + printer.printComment("This is a comment\r\non multiple lines\rthis is next comment\r"); + assertEquals("abc" + recordSeparator + "# This is a comment" + recordSeparator + "# on multiple lines" + + recordSeparator + "# this is next comment" + recordSeparator + "# " + recordSeparator, sw.toString()); + } + } + + @Test + public void testCSV135() throws IOException { + final List list = new LinkedList<>(); + list.add("\"\""); // "" + list.add("\\\\"); // \\ + list.add("\\\"\\"); // \"\ + // + // "",\\,\"\ (unchanged) + tryFormat(list, null, null, "\"\",\\\\,\\\"\\"); + // + // """""",\\,"\""\" (quoted, and embedded DQ doubled) + tryFormat(list, '"', null, "\"\"\"\"\"\",\\\\,\"\\\"\"\\\""); + // + // "",\\\\,\\"\\ (escapes escaped, not quoted) + tryFormat(list, null, '\\', "\"\",\\\\\\\\,\\\\\"\\\\"); + // + // "\"\"","\\\\","\\\"\\" (quoted, and embedded DQ & escape escaped) + tryFormat(list, '"', '\\', "\"\\\"\\\"\",\"\\\\\\\\\",\"\\\\\\\"\\\\\""); + // + // """""",\\,"\""\" (quoted, embedded DQ escaped) + tryFormat(list, '"', '"', "\"\"\"\"\"\",\\\\,\"\\\"\"\\\""); + } + + @Test + public void testCSV259() throws IOException { + final StringWriter sw = new StringWriter(); + try (final Reader reader = new FileReader("src/test/resources/org/apache/commons/csv/CSV-259/sample.txt"); + final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape('!').withQuote(null))) { + printer.print(reader); + assertEquals("x!,y!,z", sw.toString()); + } + } + + @Test + public void testDelimeterQuoted() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { + printer.print("a,b,c"); + printer.print("xyz"); + assertEquals("'a,b,c',xyz", sw.toString()); + } + } + + @Test + public void testDelimeterQuoteNone() throws IOException { + final StringWriter sw = new StringWriter(); + final CSVFormat format = CSVFormat.DEFAULT.withEscape('!').withQuoteMode(QuoteMode.NONE); + try (final CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.print("a,b,c"); + printer.print("xyz"); + assertEquals("a!,b!,c,xyz", sw.toString()); + } + } + + @Test + public void testDelimeterStringQuoted() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').build())) { + printer.print("a[|]b[|]c"); + printer.print("xyz"); + assertEquals("'a[|]b[|]c'[|]xyz", sw.toString()); + } + } + + @Test + public void testDelimeterStringQuoteNone() throws IOException { + final StringWriter sw = new StringWriter(); + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').setQuoteMode(QuoteMode.NONE).build(); + try (final CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.print("a[|]b[|]c"); + printer.print("xyz"); + printer.print("a[xy]bc[]"); + assertEquals("a![!|!]b![!|!]c[|]xyz[|]a[xy]bc[]", sw.toString()); + } + } + + @Test + public void testDelimiterEscaped() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape('!').withQuote(null))) { + printer.print("a,b,c"); + printer.print("xyz"); + assertEquals("a!,b!,c,xyz", sw.toString()); + } + } + + @Test + public void testDelimiterPlain() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { + printer.print("a,b,c"); + printer.print("xyz"); + assertEquals("a,b,c,xyz", sw.toString()); + } + } + + @Test + public void testDelimiterStringEscaped() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.builder().setDelimiter("|||").setEscape('!').setQuote(null).build())) { + printer.print("a|||b|||c"); + printer.print("xyz"); + assertEquals("a!|!|!|b!|!|!|c|||xyz", sw.toString()); + } + } + + @Test + public void testDisabledComment() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + printer.printComment("This is a comment"); + assertEquals("", sw.toString()); + } + } + + @Test + public void testDontQuoteEuroFirstChar() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) { + printer.printRecord(EURO_CH, "Deux"); + assertEquals(EURO_CH + ",Deux" + recordSeparator, sw.toString()); + } + } + + @Test + public void testEolEscaped() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withEscape('!'))) { + printer.print("a\rb\nc"); + printer.print("x\fy\bz"); + assertEquals("a!rb!nc,x\fy\bz", sw.toString()); + } + } + + @Test + public void testEolPlain() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { + printer.print("a\rb\nc"); + printer.print("x\fy\bz"); + assertEquals("a\rb\nc,x\fy\bz", sw.toString()); + } + } + + @Test + public void testEolQuoted() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { + printer.print("a\rb\nc"); + printer.print("x\by\fz"); + assertEquals("'a\rb\nc',x\by\fz", sw.toString()); + } + } + + @Test + public void testEscapeBackslash1() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { + printer.print("\\"); + } + assertEquals("\\", sw.toString()); + } + + @Test + public void testEscapeBackslash2() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { + printer.print("\\\r"); + } + assertEquals("'\\\r'", sw.toString()); + } + + @Test + public void testEscapeBackslash3() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { + printer.print("X\\\r"); + } + assertEquals("'X\\\r'", sw.toString()); + } + + @Test + public void testEscapeBackslash4() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { + printer.print("\\\\"); + } + assertEquals("\\\\", sw.toString()); + } + + @Test + public void testEscapeBackslash5() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(QUOTE_CH))) { + printer.print("\\\\"); + } + assertEquals("\\\\", sw.toString()); + } + + @Test + public void testEscapeNull1() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { + printer.print("\\"); + } + assertEquals("\\", sw.toString()); + } + + @Test + public void testEscapeNull2() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { + printer.print("\\\r"); + } + assertEquals("\"\\\r\"", sw.toString()); + } + + @Test + public void testEscapeNull3() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { + printer.print("X\\\r"); + } + assertEquals("\"X\\\r\"", sw.toString()); + } + + @Test + public void testEscapeNull4() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { + printer.print("\\\\"); + } + assertEquals("\\\\", sw.toString()); + } + + @Test + public void testEscapeNull5() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape(null))) { + printer.print("\\\\"); + } + assertEquals("\\\\", sw.toString()); + } + + @Test + public void testExcelPrintAllArrayOfArrays() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + printer.printRecords((Object[]) new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } }); + assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); + } + } + + @Test + public void testExcelPrintAllArrayOfLists() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + printer.printRecords( + (Object[]) new List[] { Arrays.asList("r1c1", "r1c2"), Arrays.asList("r2c1", "r2c2") }); + assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); + } + } + + @Test + public void testExcelPrintAllIterableOfArrays() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + printer.printRecords(Arrays.asList(new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } })); + assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); + } + } + + @Test + public void testExcelPrintAllIterableOfLists() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + printer.printRecords( + Arrays.asList(Arrays.asList("r1c1", "r1c2"), Arrays.asList("r2c1", "r2c2"))); + assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); + } + } + + @Test + public void testExcelPrintAllStreamOfArrays() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + printer.printRecords(Stream.of(new String[][] { { "r1c1", "r1c2" }, { "r2c1", "r2c2" } })); + assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); + } + } + + @Test + public void testExcelPrinter1() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + printer.printRecord("a", "b"); + assertEquals("a,b" + recordSeparator, sw.toString()); + } + } + + @Test + public void testExcelPrinter2() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL)) { + printer.printRecord("a,b", "b"); + assertEquals("\"a,b\",b" + recordSeparator, sw.toString()); + } + } + + @Test + public void testHeader() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, + CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3"))) { + printer.printRecord("a", "b", "c"); + printer.printRecord("x", "y", "z"); + assertEquals("C1,C2,C3\r\na,b,c\r\nx,y,z\r\n", sw.toString()); + } + } + + @Test + public void testHeaderCommentExcel() throws IOException { + final StringWriter sw = new StringWriter(); + final Date now = new Date(); + final CSVFormat format = CSVFormat.EXCEL; + try (final CSVPrinter csvPrinter = printWithHeaderComments(sw, now, format)) { + assertEquals("# Generated by Apache Commons CSV 1.1\r\n# " + now + "\r\nCol1,Col2\r\nA,B\r\nC,D\r\n", + sw.toString()); + } + } + + @Test + public void testHeaderCommentTdf() throws IOException { + final StringWriter sw = new StringWriter(); + final Date now = new Date(); + final CSVFormat format = CSVFormat.TDF; + try (final CSVPrinter csvPrinter = printWithHeaderComments(sw, now, format)) { + assertEquals("# Generated by Apache Commons CSV 1.1\r\n# " + now + "\r\nCol1\tCol2\r\nA\tB\r\nC\tD\r\n", + sw.toString()); + } + } + + @Test + public void testHeaderNotSet() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { + printer.printRecord("a", "b", "c"); + printer.printRecord("x", "y", "z"); + assertEquals("a,b,c\r\nx,y,z\r\n", sw.toString()); + } + } + + @Test + public void testInvalidFormat() { + assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR)); + } + + @Test + public void testJdbcPrinter() throws IOException, ClassNotFoundException, SQLException { + final StringWriter sw = new StringWriter(); + try (final Connection connection = getH2Connection()) { + setUpTable(connection); + try (final Statement stmt = connection.createStatement(); + final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT); + final ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST");) { + printer.printRecords(resultSet); + } + } + assertEquals("1,r1,\"long text 1\"" + recordSeparator + "2,r2,\"" + longText2 + "\"" + recordSeparator, sw.toString()); + } + + @Test + public void testJdbcPrinterWithResultSet() throws IOException, ClassNotFoundException, SQLException { + final StringWriter sw = new StringWriter(); + Class.forName("org.h2.Driver"); + try (final Connection connection = getH2Connection()) { + setUpTable(connection); + try (final Statement stmt = connection.createStatement(); + final ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST"); + final CSVPrinter printer = CSVFormat.DEFAULT.withHeader(resultSet).print(sw)) { + printer.printRecords(resultSet); + } + } + assertEquals("ID,NAME,TEXT" + recordSeparator + "1,r1,\"long text 1\"" + recordSeparator + "2,r2,\"" + longText2 + + "\"" + recordSeparator, sw.toString()); + } + + @Test + public void testJdbcPrinterWithResultSetHeader() throws IOException, ClassNotFoundException, SQLException { + final StringWriter sw = new StringWriter(); + try (final Connection connection = getH2Connection()) { + setUpTable(connection); + try (final Statement stmt = connection.createStatement(); + final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT);) { + try (final ResultSet resultSet = stmt.executeQuery("select ID, NAME from TEST")) { + printer.printRecords(resultSet, true); + assertEquals("ID,NAME" + recordSeparator + "1,r1" + recordSeparator + "2,r2" + recordSeparator, + sw.toString()); + } + try (final ResultSet resultSet = stmt.executeQuery("select ID, NAME from TEST")) { + printer.printRecords(resultSet, false); + assertNotEquals("ID,NAME" + recordSeparator + "1,r1" + recordSeparator + "2,r2" + recordSeparator, + sw.toString()); + } + } + } + } + + @Test + public void testJdbcPrinterWithResultSetMetaData() throws IOException, ClassNotFoundException, SQLException { + final StringWriter sw = new StringWriter(); + Class.forName("org.h2.Driver"); + try (final Connection connection = getH2Connection()) { + setUpTable(connection); + try (final Statement stmt = connection.createStatement(); + final ResultSet resultSet = stmt.executeQuery("select ID, NAME, TEXT from TEST"); + final CSVPrinter printer = CSVFormat.DEFAULT.withHeader(resultSet.getMetaData()).print(sw)) { + printer.printRecords(resultSet); + assertEquals("ID,NAME,TEXT" + recordSeparator + "1,r1,\"long text 1\"" + recordSeparator + "2,r2,\"" + + longText2 + "\"" + recordSeparator, sw.toString()); + } + } + } + + @Test + public void testJira135_part1() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); + final StringWriter sw = new StringWriter(); + final List list = new LinkedList<>(); + try (final CSVPrinter printer = new CSVPrinter(sw, format)) { + list.add("\""); + printer.printRecord(list); + } + final String expected = "\"\\\"\"" + format.getRecordSeparator(); + assertEquals(expected, sw.toString()); + final String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(list.toArray(), format), record0); + } + + @Test + @Disabled + public void testJira135_part2() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); + final StringWriter sw = new StringWriter(); + final List list = new LinkedList<>(); + try (final CSVPrinter printer = new CSVPrinter(sw, format)) { + list.add("\n"); + printer.printRecord(list); + } + final String expected = "\"\\n\"" + format.getRecordSeparator(); + assertEquals(expected, sw.toString()); + final String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(list.toArray(), format), record0); + } + + @Test + public void testJira135_part3() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); + final StringWriter sw = new StringWriter(); + final List list = new LinkedList<>(); + try (final CSVPrinter printer = new CSVPrinter(sw, format)) { + list.add("\\"); + printer.printRecord(list); + } + final String expected = "\"\\\\\"" + format.getRecordSeparator(); + assertEquals(expected, sw.toString()); + final String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(list.toArray(), format), record0); + } + + @Test + @Disabled + public void testJira135All() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH); + final StringWriter sw = new StringWriter(); + final List list = new LinkedList<>(); + try (final CSVPrinter printer = new CSVPrinter(sw, format)) { + list.add("\""); + list.add("\n"); + list.add("\\"); + printer.printRecord(list); + } + final String expected = "\"\\\"\",\"\\n\",\"\\\"" + format.getRecordSeparator(); + assertEquals(expected, sw.toString()); + final String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(list.toArray(), format), record0); + } + + @Test + public void testMongoDbCsvBasic() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { + printer.printRecord("a", "b"); + assertEquals("a,b" + recordSeparator, sw.toString()); + } + } + + @Test + public void testMongoDbCsvCommaInValue() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { + printer.printRecord("a,b", "c"); + assertEquals("\"a,b\",c" + recordSeparator, sw.toString()); + } + } + + @Test + public void testMongoDbCsvDoubleQuoteInValue() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { + printer.printRecord("a \"c\" b", "d"); + assertEquals("\"a \"\"c\"\" b\",d" + recordSeparator, sw.toString()); + } + } + + @Test + public void testMongoDbCsvTabInValue() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_CSV)) { + printer.printRecord("a\tb", "c"); + assertEquals("a\tb,c" + recordSeparator, sw.toString()); + } + } + + @Test + public void testMongoDbTsvBasic() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { + printer.printRecord("a", "b"); + assertEquals("a\tb" + recordSeparator, sw.toString()); + } + } + + @Test + public void testMongoDbTsvCommaInValue() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { + printer.printRecord("a,b", "c"); + assertEquals("a,b\tc" + recordSeparator, sw.toString()); + } + } + + @Test + public void testMongoDbTsvTabInValue() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.MONGODB_TSV)) { + printer.printRecord("a\tb", "c"); + assertEquals("\"a\tb\"\tc" + recordSeparator, sw.toString()); + } + } + + @Test + public void testMultiLineComment() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { + printer.printComment("This is a comment\non multiple lines"); + + assertEquals("# This is a comment" + recordSeparator + "# on multiple lines" + recordSeparator, + sw.toString()); + } + } + + @Test + public void testMySqlNullOutput() throws IOException { + Object[] s = new String[] { "NULL", null }; + CSVFormat format = CSVFormat.MYSQL.withQuote(DQUOTE_CHAR).withNullString("NULL") + .withQuoteMode(QuoteMode.NON_NUMERIC); + StringWriter writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + String expected = "\"NULL\"\tNULL\n"; + assertEquals(expected, writer.toString()); + String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(s, record0); + + s = new String[] { "\\N", null }; + format = CSVFormat.MYSQL.withNullString("\\N"); + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\t\\N\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\N", "A" }; + format = CSVFormat.MYSQL.withNullString("\\N"); + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\tA\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\n", "A" }; + format = CSVFormat.MYSQL.withNullString("\\N"); + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\n\tA\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "", null }; + format = CSVFormat.MYSQL.withNullString("NULL"); + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\tNULL\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "", null }; + format = CSVFormat.MYSQL; + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\t\\N\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\N", "", "\u000e,\\\r" }; + format = CSVFormat.MYSQL; + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\t\t\u000e,\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "NULL", "\\\r" }; + format = CSVFormat.MYSQL; + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "NULL\t\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\\r" }; + format = CSVFormat.MYSQL; + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + } + + @Test + public void testMySqlNullStringDefault() { + assertEquals("\\N", CSVFormat.MYSQL.getNullString()); + } + + @Test + public void testNewCsvPrinterAppendableNullFormat() { + assertThrows(NullPointerException.class, () -> new CSVPrinter(new StringWriter(), null)); + } + + @Test + public void testNewCsvPrinterNullAppendableFormat() { + assertThrows(NullPointerException.class, () -> new CSVPrinter(null, CSVFormat.DEFAULT)); + } + + @Test + public void testNotFlushable() throws IOException { + final Appendable out = new StringBuilder(); + try (final CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT)) { + printer.printRecord("a", "b", "c"); + assertEquals("a,b,c" + recordSeparator, out.toString()); + printer.flush(); + } + } + + @Test + public void testParseCustomNullValues() throws IOException { + final StringWriter sw = new StringWriter(); + final CSVFormat format = CSVFormat.DEFAULT.withNullString("NULL"); + try (final CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.printRecord("a", null, "b"); + } + final String csvString = sw.toString(); + assertEquals("a,NULL,b" + recordSeparator, csvString); + try (final CSVParser iterable = format.parse(new StringReader(csvString))) { + final Iterator iterator = iterable.iterator(); + final CSVRecord record = iterator.next(); + assertEquals("a", record.get(0)); + assertNull(record.get(1)); + assertEquals("b", record.get(2)); + assertFalse(iterator.hasNext()); + } + } + + @Test + public void testPlainEscaped() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withEscape('!'))) { + printer.print("abc"); + printer.print("xyz"); + assertEquals("abc,xyz", sw.toString()); + } + } + + @Test + public void testPlainPlain() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { + printer.print("abc"); + printer.print("xyz"); + assertEquals("abc,xyz", sw.toString()); + } + } + + @Test + public void testPlainQuoted() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { + printer.print("abc"); + assertEquals("abc", sw.toString()); + } + } + + @Test + @Disabled + public void testPostgreSqlCsvNullOutput() throws IOException { + Object[] s = new String[] { "NULL", null }; + CSVFormat format = CSVFormat.POSTGRESQL_CSV.withQuote(DQUOTE_CHAR).withNullString("NULL").withQuoteMode(QuoteMode.ALL_NON_NULL); + StringWriter writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + String expected = "\"NULL\",NULL\n"; + assertEquals(expected, writer.toString()); + String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(new Object[2], record0); + + s = new String[] { "\\N", null }; + format = CSVFormat.POSTGRESQL_CSV.withNullString("\\N"); + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\t\\N\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\N", "A" }; + format = CSVFormat.POSTGRESQL_CSV.withNullString("\\N"); + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\tA\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\n", "A" }; + format = CSVFormat.POSTGRESQL_CSV.withNullString("\\N"); + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\n\tA\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "", null }; + format = CSVFormat.POSTGRESQL_CSV.withNullString("NULL"); + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\tNULL\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "", null }; + format = CSVFormat.POSTGRESQL_CSV; + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\t\\N\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\N", "", "\u000e,\\\r" }; + format = CSVFormat.POSTGRESQL_CSV; + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\t\t\u000e,\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "NULL", "\\\r" }; + format = CSVFormat.POSTGRESQL_CSV; + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "NULL\t\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\\r" }; + format = CSVFormat.POSTGRESQL_CSV; + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + } + + @Test + @Disabled + public void testPostgreSqlCsvTextOutput() throws IOException { + Object[] s = new String[] { "NULL", null }; + CSVFormat format = CSVFormat.POSTGRESQL_TEXT.withQuote(DQUOTE_CHAR).withNullString("NULL").withQuoteMode(QuoteMode.ALL_NON_NULL); + StringWriter writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + String expected = "\"NULL\"\tNULL\n"; + assertEquals(expected, writer.toString()); + String[] record0 = toFirstRecordValues(expected, format); + assertArrayEquals(new Object[2], record0); + + s = new String[] { "\\N", null }; + format = CSVFormat.POSTGRESQL_TEXT.withNullString("\\N"); + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\t\\N\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\N", "A" }; + format = CSVFormat.POSTGRESQL_TEXT.withNullString("\\N"); + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\tA\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\n", "A" }; + format = CSVFormat.POSTGRESQL_TEXT.withNullString("\\N"); + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\n\tA\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "", null }; + format = CSVFormat.POSTGRESQL_TEXT.withNullString("NULL"); + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\tNULL\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "", null }; + format = CSVFormat.POSTGRESQL_TEXT; + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\t\\N\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\N", "", "\u000e,\\\r" }; + format = CSVFormat.POSTGRESQL_TEXT; + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\N\t\t\u000e,\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "NULL", "\\\r" }; + format = CSVFormat.POSTGRESQL_TEXT; + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "NULL\t\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + + s = new String[] { "\\\r" }; + format = CSVFormat.POSTGRESQL_TEXT; + writer = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(writer, format)) { + printer.printRecord(s); + } + expected = "\\\\\\r\n"; + assertEquals(expected, writer.toString()); + record0 = toFirstRecordValues(expected, format); + assertArrayEquals(expectNulls(s, format), record0); + } + + @Test + public void testPostgreSqlNullStringDefaultCsv() { + assertEquals("", CSVFormat.POSTGRESQL_CSV.getNullString()); + } + + @Test + public void testPostgreSqlNullStringDefaultText() { + assertEquals("\\N", CSVFormat.POSTGRESQL_TEXT.getNullString()); + } + + @Test + public void testPrint() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = CSVFormat.DEFAULT.print(sw)) { + printer.printRecord("a", "b\\c"); + assertEquals("a,b\\c" + recordSeparator, sw.toString()); + } + } + + @Test + public void testPrintCSVParser() throws IOException { + final String code = "a1,b1\n" // 1) + + "a2,b2\n" // 2) + + "a3,b3\n" // 3) + + "a4,b4\n"// 4) + ; + final String[][] res = {{"a1", "b1"}, {"a2", "b2"}, {"a3", "b3"}, {"a4", "b4"}}; + final CSVFormat format = CSVFormat.DEFAULT; + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = format.print(sw); final CSVParser parser = CSVParser.parse(code, format)) { + printer.printRecords(parser); + } + try (final CSVParser parser = CSVParser.parse(sw.toString(), format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + Utils.compare("Fail", res, records); + } + } + + @Test + public void testPrintCSVRecord() throws IOException { + final String code = "a1,b1\n" // 1) + + "a2,b2\n" // 2) + + "a3,b3\n" // 3) + + "a4,b4\n"// 4) + ; + final String[][] res = {{"a1", "b1"}, {"a2", "b2"}, {"a3", "b3"}, {"a4", "b4"}}; + final CSVFormat format = CSVFormat.DEFAULT; + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = format.print(sw); final CSVParser parser = CSVParser.parse(code, format)) { + for (final CSVRecord record : parser) { + printer.printRecord(record); + } + } + try (final CSVParser parser = CSVParser.parse(sw.toString(), format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + Utils.compare("Fail", res, records); + } + } + + @Test + public void testPrintCSVRecords() throws IOException { + final String code = "a1,b1\n" // 1) + + "a2,b2\n" // 2) + + "a3,b3\n" // 3) + + "a4,b4\n"// 4) + ; + final String[][] res = {{"a1", "b1"}, {"a2", "b2"}, {"a3", "b3"}, {"a4", "b4"}}; + final CSVFormat format = CSVFormat.DEFAULT; + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = format.print(sw); final CSVParser parser = CSVParser.parse(code, format)) { + printer.printRecords(parser.getRecords()); + } + try (final CSVParser parser = CSVParser.parse(sw.toString(), format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + Utils.compare("Fail", res, records); + } + } + + @Test + public void testPrintCustomNullValues() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withNullString("NULL"))) { + printer.printRecord("a", null, "b"); + assertEquals("a,NULL,b" + recordSeparator, sw.toString()); + } + } + + @Test + public void testPrinter1() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + printer.printRecord("a", "b"); + assertEquals("a,b" + recordSeparator, sw.toString()); + } + } + + @Test + public void testPrinter2() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + printer.printRecord("a,b", "b"); + assertEquals("\"a,b\",b" + recordSeparator, sw.toString()); + } + } + + @Test + public void testPrinter3() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + printer.printRecord("a, b", "b "); + assertEquals("\"a, b\",\"b \"" + recordSeparator, sw.toString()); + } + } + + @Test + public void testPrinter4() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + printer.printRecord("a", "b\"c"); + assertEquals("a,\"b\"\"c\"" + recordSeparator, sw.toString()); + } + } + + @Test + public void testPrinter5() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + printer.printRecord("a", "b\nc"); + assertEquals("a,\"b\nc\"" + recordSeparator, sw.toString()); + } + } + + @Test + public void testPrinter6() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + printer.printRecord("a", "b\r\nc"); + assertEquals("a,\"b\r\nc\"" + recordSeparator, sw.toString()); + } + } + + @Test + public void testPrinter7() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + printer.printRecord("a", "b\\c"); + assertEquals("a,b\\c" + recordSeparator, sw.toString()); + } + } + + @Test + public void testPrintNullValues() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT)) { + printer.printRecord("a", null, "b"); + assertEquals("a,,b" + recordSeparator, sw.toString()); + } + } + + @Test + public void testPrintOnePositiveInteger() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.MINIMAL))) { + printer.print(Integer.MAX_VALUE); + assertEquals(String.valueOf(Integer.MAX_VALUE), sw.toString()); + } + } + + /** + * Test to target the use of {@link IOUtils#copy(java.io.Reader, Appendable)} which directly + * buffers the value from the Reader to the Appendable. + * + *

      Requires the format to have no quote or escape character, value to be a + * {@link java.io.Reader Reader} and the output MUST NOT be a + * {@link java.io.Writer Writer} but some other Appendable.

      + * + * @throws IOException Not expected to happen + */ + @Test + public void testPrintReaderWithoutQuoteToAppendable() throws IOException { + final StringBuilder sb = new StringBuilder(); + final String content = "testValue"; + try (final CSVPrinter printer = new CSVPrinter(sb, CSVFormat.DEFAULT.withQuote(null))) { + final StringReader value = new StringReader(content); + printer.print(value); + } + assertEquals(content, sb.toString()); + } + + /** + * Test to target the use of {@link IOUtils#copyLarge(java.io.Reader, Writer)} which directly + * buffers the value from the Reader to the Writer. + * + *

      Requires the format to have no quote or escape character, value to be a + * {@link java.io.Reader Reader} and the output MUST be a + * {@link java.io.Writer Writer}.

      + * + * @throws IOException Not expected to happen + */ + @Test + public void testPrintReaderWithoutQuoteToWriter() throws IOException { + final StringWriter sw = new StringWriter(); + final String content = "testValue"; + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null))) { + final StringReader value = new StringReader(content); + printer.print(value); + } + assertEquals(content, sw.toString()); + } + + @Test + public void testPrintRecordStream() throws IOException { + final String code = "a1,b1\n" // 1) + + "a2,b2\n" // 2) + + "a3,b3\n" // 3) + + "a4,b4\n"// 4) + ; + final String[][] res = {{"a1", "b1"}, {"a2", "b2"}, {"a3", "b3"}, {"a4", "b4"}}; + final CSVFormat format = CSVFormat.DEFAULT; + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = format.print(sw); final CSVParser parser = CSVParser.parse(code, format)) { + for (final CSVRecord record : parser) { + printer.printRecord(record.stream()); + } + } + try (final CSVParser parser = CSVParser.parse(sw.toString(), format)) { + final List records = parser.getRecords(); + assertFalse(records.isEmpty()); + Utils.compare("Fail", res, records); + } + } + + @Test + public void testPrintRecordsWithCSVRecord() throws IOException { + final String[] values = {"A", "B", "C"}; + final String rowData = StringUtils.join(values, ','); + final CharArrayWriter charArrayWriter = new CharArrayWriter(0); + try (final CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(rowData)); + final CSVPrinter csvPrinter = CSVFormat.INFORMIX_UNLOAD.print(charArrayWriter)) { + for (final CSVRecord record : parser) { + csvPrinter.printRecord(record); + } + } + assertEquals(6, charArrayWriter.size()); + assertEquals("A|B|C" + CSVFormat.INFORMIX_UNLOAD.getRecordSeparator(), charArrayWriter.toString()); + } + + @Test + public void testPrintRecordsWithEmptyVector() throws IOException { + final PrintStream out = System.out; + try { + System.setOut(new PrintStream(NullOutputStream.INSTANCE)); + try (CSVPrinter csvPrinter = CSVFormat.POSTGRESQL_TEXT.printer()) { + final Vector vector = new Vector<>(); + final int expectedCapacity = 23; + vector.setSize(expectedCapacity); + csvPrinter.printRecords(vector); + assertEquals(expectedCapacity, vector.capacity()); + } + } finally { + System.setOut(out); + } + } + + @Test + public void testPrintRecordsWithObjectArray() throws IOException { + final CharArrayWriter charArrayWriter = new CharArrayWriter(0); + try (CSVPrinter csvPrinter = CSVFormat.INFORMIX_UNLOAD.print(charArrayWriter)) { + final HashSet hashSet = new HashSet<>(); + final Object[] objectArray = new Object[6]; + objectArray[3] = hashSet; + csvPrinter.printRecords(objectArray); + } + assertEquals(6, charArrayWriter.size()); + assertEquals("\n\n\n\n\n\n", charArrayWriter.toString()); + } + + @Test + public void testPrintRecordsWithResultSetOneRow() throws IOException, SQLException { + try (CSVPrinter csvPrinter = CSVFormat.MYSQL.printer()) { + try (ResultSet resultSet = new SimpleResultSet()) { + csvPrinter.printRecords(resultSet); + assertEquals(0, resultSet.getRow()); + } + } + } + + @Test + public void testPrintToFileWithCharsetUtf16Be() throws IOException { + final File file = createTempFile(); + try (final CSVPrinter printer = CSVFormat.DEFAULT.print(file, StandardCharsets.UTF_16BE)) { + printer.printRecord("a", "b\\c"); + } + assertEquals("a,b\\c" + recordSeparator, FileUtils.readFileToString(file, StandardCharsets.UTF_16BE)); + } + + @Test + public void testPrintToFileWithDefaultCharset() throws IOException { + final File file = createTempFile(); + try (final CSVPrinter printer = CSVFormat.DEFAULT.print(file, Charset.defaultCharset())) { + printer.printRecord("a", "b\\c"); + } + assertEquals("a,b\\c" + recordSeparator, FileUtils.readFileToString(file, Charset.defaultCharset())); + } + + @Test + public void testPrintToPathWithDefaultCharset() throws IOException { + final Path file = createTempPath(); + try (final CSVPrinter printer = CSVFormat.DEFAULT.print(file, Charset.defaultCharset())) { + printer.printRecord("a", "b\\c"); + } + assertEquals("a,b\\c" + recordSeparator, new String(Files.readAllBytes(file), Charset.defaultCharset())); + } + + @Test + public void testQuoteAll() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL))) { + printer.printRecord("a", "b\nc", "d"); + assertEquals("\"a\",\"b\nc\",\"d\"" + recordSeparator, sw.toString()); + } + } + + @Test + public void testQuoteCommaFirstChar() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) { + printer.printRecord(","); + assertEquals("\",\"" + recordSeparator, sw.toString()); + } + } + + @Test + public void testQuoteNonNumeric() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.NON_NUMERIC))) { + printer.printRecord("a", "b\nc", Integer.valueOf(1)); + assertEquals("\"a\",\"b\nc\",1" + recordSeparator, sw.toString()); + } + } + + @Test + public void testRandomDefault() throws Exception { + doRandom(CSVFormat.DEFAULT, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + public void testRandomExcel() throws Exception { + doRandom(CSVFormat.EXCEL, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + @Disabled + public void testRandomMongoDbCsv() throws Exception { + doRandom(CSVFormat.MONGODB_CSV, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + public void testRandomMySql() throws Exception { + doRandom(CSVFormat.MYSQL, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + @Disabled + public void testRandomOracle() throws Exception { + doRandom(CSVFormat.ORACLE, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + @Disabled + public void testRandomPostgreSqlCsv() throws Exception { + doRandom(CSVFormat.POSTGRESQL_CSV, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + public void testRandomPostgreSqlText() throws Exception { + doRandom(CSVFormat.POSTGRESQL_TEXT, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + public void testRandomRfc4180() throws Exception { + doRandom(CSVFormat.RFC4180, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + public void testRandomTdf() throws Exception { + doRandom(CSVFormat.TDF, ITERATIONS_FOR_RANDOM_TEST); + } + + @Test + public void testSingleLineComment() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withCommentMarker('#'))) { + printer.printComment("This is a comment"); + assertEquals("# This is a comment" + recordSeparator, sw.toString()); + } + } + + @Test + public void testSingleQuoteQuoted() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote('\''))) { + printer.print("a'b'c"); + printer.print("xyz"); + assertEquals("'a''b''c',xyz", sw.toString()); + } + } + + @Test + public void testSkipHeaderRecordFalse() throws IOException { + // functionally identical to testHeader, used to test CSV-153 + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, + CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3").withSkipHeaderRecord(false))) { + printer.printRecord("a", "b", "c"); + printer.printRecord("x", "y", "z"); + assertEquals("C1,C2,C3\r\na,b,c\r\nx,y,z\r\n", sw.toString()); + } + } + + @Test + public void testSkipHeaderRecordTrue() throws IOException { + // functionally identical to testHeaderNotSet, used to test CSV-153 + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, + CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3").withSkipHeaderRecord(true))) { + printer.printRecord("a", "b", "c"); + printer.printRecord("x", "y", "z"); + assertEquals("a,b,c\r\nx,y,z\r\n", sw.toString()); + } + } + + @Test + public void testTrailingDelimiterOnTwoColumns() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrailingDelimiter())) { + printer.printRecord("A", "B"); + assertEquals("A,B,\r\n", sw.toString()); + } + } + + @Test + public void testTrimOffOneColumn() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim(false))) { + printer.print(" A "); + assertEquals("\" A \"", sw.toString()); + } + } + + @Test + public void testTrimOnOneColumn() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim())) { + printer.print(" A "); + assertEquals("A", sw.toString()); + } + } + + @Test + public void testTrimOnTwoColumns() throws IOException { + final StringWriter sw = new StringWriter(); + try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withTrim())) { + printer.print(" A "); + printer.print(" B "); + assertEquals("A,B", sw.toString()); + } + } + + private String[] toFirstRecordValues(final String expected, final CSVFormat format) throws IOException { + try (final CSVParser parser = CSVParser.parse(expected, format)) { + return parser.getRecords().get(0).values(); + } + } + + private void tryFormat(final List list, final Character quote, final Character escape, final String expected) throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.withQuote(quote).withEscape(escape).withRecordSeparator(null); + final Appendable out = new StringBuilder(); + try (final CSVPrinter printer = new CSVPrinter(out, format)) { + printer.printRecord(list); + } + assertEquals(expected, out.toString()); + } +} diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java b/src/test/java/org/apache/commons/csv/CSVRecordTest.java index 042677f1a3..9be416cf7b 100644 --- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java +++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java @@ -16,12 +16,14 @@ */ package org.apache.commons.csv; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertAll; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -30,6 +32,7 @@ import java.io.ObjectOutputStream; import java.io.StringReader; import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; @@ -47,9 +50,7 @@ private enum EnumFixture { /** This enum overrides toString() but it's the names that matter. */ public enum EnumHeader { - FIRST("first"), - SECOND("second"), - THIRD("third"); + FIRST("first"), SECOND("second"), THIRD("third"); private final String number; @@ -69,7 +70,7 @@ public String toString() { @BeforeEach public void setUp() throws Exception { - values = new String[] {"A", "B", "C"}; + values = new String[] { "A", "B", "C" }; final String rowData = StringUtils.join(values, ','); try (final CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(rowData))) { record = parser.iterator().next(); @@ -88,6 +89,37 @@ public void testCSVRecordNULLValues() throws IOException { assertThrows(IllegalArgumentException.class, () -> csvRecord.get("B")); } + @Test + public void testDuplicateHeaderGet() throws IOException { + final String csv = "A,A,B,B\n1,2,5,6\n"; + final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().build(); + + try (final CSVParser parser = CSVParser.parse(csv, format)) { + final CSVRecord record = parser.nextRecord(); + + assertAll("Test that it gets the last instance of a column when there are duplicate headings", + () -> assertEquals("2", record.get("A")), + () -> assertEquals("6", record.get("B")) + ); + } + } + + @Test + public void testDuplicateHeaderToMap() throws IOException { + final String csv = "A,A,B,B\n1,2,5,6\n"; + final CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().build(); + + try (final CSVParser parser = CSVParser.parse(csv, format)) { + final CSVRecord record = parser.nextRecord(); + final Map map = record.toMap(); + + assertAll("Test that it gets the last instance of a column when there are duplicate headings", + () -> assertEquals("2", map.get("A")), + () -> assertEquals("6", map.get("B")) + ); + } + } + @Test public void testGetInt() { assertEquals(values[0], record.get(0)); @@ -254,12 +286,7 @@ public void testSerialization() throws IOException, ClassNotFoundException { assertFalse(rec.isSet("A")); assertEquals(0, rec.toMap().size()); // This will throw - try { - rec.get("A"); - org.junit.jupiter.api.Assertions.fail("Access by name is not expected after deserialisation"); - } catch (final IllegalStateException expected) { - // OK - } + assertThrows(IllegalStateException.class, () -> rec.get("A")); } } @@ -273,7 +300,17 @@ public void testStream() { } @Test - public void testToList() { + public void testToListAdd() { + final String[] expected = values.clone(); + final List list = record.toList(); + list.add("Last"); + assertEquals("Last", list.get(list.size() - 1)); + assertEquals(list.size(), values.length + 1); + assertArrayEquals(expected, values); + } + + @Test + public void testToListFor() { int i = 0; for (final String value : record.toList()) { assertEquals(values[i], value); @@ -281,6 +318,24 @@ public void testToList() { } } + @Test + public void testToListForEach() { + final AtomicInteger i = new AtomicInteger(); + record.toList().forEach(e -> { + assertEquals(values[i.getAndIncrement()], e); + }); + } + + @Test + public void testToListSet() { + final String[] expected = values.clone(); + final List list = record.toList(); + list.set(list.size() - 1, "Last"); + assertEquals("Last", list.get(list.size() - 1)); + assertEquals(list.size(), values.length); + assertArrayEquals(expected, values); + } + @Test public void testToMap() { final Map map = this.recordWithHeader.toMap(); diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java index cc8d728af0..f36eaa4533 100644 --- a/src/test/java/org/apache/commons/csv/LexerTest.java +++ b/src/test/java/org/apache/commons/csv/LexerTest.java @@ -41,7 +41,6 @@ import org.junit.jupiter.api.Test; /** - * */ public class LexerTest { diff --git a/src/test/java/org/apache/commons/csv/PerformanceTest.java b/src/test/java/org/apache/commons/csv/PerformanceTest.java index ea6e8fd112..489d72350d 100644 --- a/src/test/java/org/apache/commons/csv/PerformanceTest.java +++ b/src/test/java/org/apache/commons/csv/PerformanceTest.java @@ -1,330 +1,331 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.Reader; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.zip.GZIPInputStream; - -import org.apache.commons.io.IOUtils; - -/** - * Basic test harness. - */ -@SuppressWarnings("boxing") -public class PerformanceTest { - - @FunctionalInterface - private interface CSVParserFactory { - CSVParser createParser() throws IOException; - } - - // Container for basic statistics - private static class Stats { - final int count; - final int fields; - Stats(final int c, final int f) { - count = c; - fields = f; - } - } - - private static final String[] PROPS = { - "java.version", // Java Runtime Environment version - "java.vendor", // Java Runtime Environment vendor -// "java.vm.specification.version", // Java Virtual Machine specification version -// "java.vm.specification.vendor", // Java Virtual Machine specification vendor -// "java.vm.specification.name", // Java Virtual Machine specification name - "java.vm.version", // Java Virtual Machine implementation version -// "java.vm.vendor", // Java Virtual Machine implementation vendor - "java.vm.name", // Java Virtual Machine implementation name -// "java.specification.version", // Java Runtime Environment specification version -// "java.specification.vendor", // Java Runtime Environment specification vendor -// "java.specification.name", // Java Runtime Environment specification name - - "os.name", // Operating system name - "os.arch", // Operating system architecture - "os.version", // Operating system version - }; - private static int max = 11; // skip first test - - private static int num; // number of elapsed times recorded - - private static final long[] ELAPSED_TIMES = new long[max]; - private static final CSVFormat format = CSVFormat.EXCEL; - - private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz"; - - private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt"); - - private static Reader createReader() throws IOException { - return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1); - } - - private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input) - throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception { - return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input); - } - - private static Constructor getLexerCtor(final String clazz) throws Exception { - @SuppressWarnings("unchecked") - final Class lexer = (Class) Class.forName("org.apache.commons.csv." + clazz); - return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class); - } - - private static Stats iterate(final Iterable it) { - int count = 0; - int fields = 0; - for (final CSVRecord record : it) { - count++; - fields += record.size(); - } - return new Stats(count, fields); - } - - public static void main(final String [] args) throws Exception { - if (BIG_FILE.exists()) { - System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length()); - } else { - System.out.println("Decompressing test fixture to: " + BIG_FILE + "..."); - try ( - final InputStream input = new GZIPInputStream( - PerformanceTest.class.getClassLoader().getResourceAsStream(TEST_RESRC)); - final OutputStream output = new FileOutputStream(BIG_FILE)) { - IOUtils.copy(input, output); - System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length())); - } - } - final int argc = args.length; - if (argc > 0) { - max = Integer.parseInt(args[0]); - } - - final String[] tests; - if (argc > 1) { - tests = new String[argc - 1]; - System.arraycopy(args, 1, tests, 0, argc - 1); - } else { - tests = new String[] { "file", "split", "extb", "exts", "csv", "csv-path", "csv-path-db", "csv-url", "lexreset", "lexnew" }; - } - for (final String p : PROPS) { - System.out.printf("%s=%s%n", p, System.getProperty(p)); - } - System.out.printf("Max count: %d%n%n", max); - - for (final String test : tests) { - if ("file".equals(test)) { - testReadBigFile(false); - } else if ("split".equals(test)) { - testReadBigFile(true); - } else if ("csv".equals(test)) { - testParseCommonsCSV(); - } else if ("csv-path".equals(test)) { - testParsePath(); - } else if ("csv-path-db".equals(test)) { - testParsePathDoubleBuffering(); - } else if ("csv-url".equals(test)) { - testParseURL(); - } else if ("lexreset".equals(test)) { - testCSVLexer(false, test); - } else if ("lexnew".equals(test)) { - testCSVLexer(true, test); - } else if (test.startsWith("CSVLexer")) { - testCSVLexer(false, test); - } else if ("extb".equals(test)) { - testExtendedBuffer(false); - } else if ("exts".equals(test)) { - testExtendedBuffer(true); - } else { - System.out.printf("Invalid test name: %s%n", test); - } - } - } - - private static Stats readAll(final BufferedReader in, final boolean split) throws IOException { - int count = 0; - int fields = 0; - String record; - while ((record = in.readLine()) != null) { - count++; - fields += split ? record.split(",").length : 1; - } - return new Stats(count, fields); - } - - // calculate and show average - private static void show(){ - if (num > 1) { - long tot = 0; - for (int i = 1; i < num; i++) { // skip first test - tot += ELAPSED_TIMES[i]; - } - System.out.printf("%-20s: %5dms%n%n", "Average(not first)", tot / (num - 1)); - } - num = 0; // ready for next set - } - - // Display end stats; store elapsed for average - private static void show(final String msg, final Stats s, final long start) { - final long elapsed = System.currentTimeMillis() - start; - System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields); - ELAPSED_TIMES[num] = elapsed; - num++; - } - - private static void testCSVLexer(final boolean newToken, final String test) throws Exception { - Token token = new Token(); - String dynamic = ""; - for (int i = 0; i < max; i++) { - final String simpleName; - final Stats stats; - final long startMillis; - try (final ExtendedBufferedReader input = new ExtendedBufferedReader(createReader()); - final Lexer lexer = createTestCSVLexer(test, input)) { - if (test.startsWith("CSVLexer")) { - dynamic = "!"; - } - simpleName = lexer.getClass().getSimpleName(); - int count = 0; - int fields = 0; - startMillis = System.currentTimeMillis(); - do { - if (newToken) { - token = new Token(); - } else { - token.reset(); - } - lexer.nextToken(token); - switch (token.type) { - case EOF: - break; - case EORECORD: - fields++; - count++; - break; - case INVALID: - throw new IOException("invalid parse sequence <" + token.content.toString() + ">"); - case TOKEN: - fields++; - break; - case COMMENT: // not really expecting these - break; - default: - throw new IllegalStateException("Unexpected Token type: " + token.type); - } - } while (!token.type.equals(Token.Type.EOF)); - stats = new Stats(count, fields); - } - show(simpleName + dynamic + " " + (newToken ? "new" : "reset"), stats, startMillis); - } - show(); - } - - private static void testExtendedBuffer(final boolean makeString) throws Exception { - for (int i = 0; i < max; i++) { - int fields = 0; - int lines = 0; - final long startMillis; - try (final ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) { - startMillis = System.currentTimeMillis(); - int read; - if (makeString) { - StringBuilder sb = new StringBuilder(); - while ((read = in.read()) != -1) { - sb.append((char) read); - if (read == ',') { // count delimiters - sb.toString(); - sb = new StringBuilder(); - fields++; - } else if (read == '\n') { - sb.toString(); - sb = new StringBuilder(); - lines++; - } - } - } else { - while ((read = in.read()) != -1) { - if (read == ',') { // count delimiters - fields++; - } else if (read == '\n') { - lines++; - } - } - } - fields += lines; // EOL is a delimiter too - } - show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis); - } - show(); - } - - private static void testParseCommonsCSV() throws Exception { - testParser("CSV", () -> new CSVParser(createReader(), format)); - } - - private static void testParsePath() throws Exception { - testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format)); - } - - private static void testParsePathDoubleBuffering() throws Exception { - testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format)); - } - - private static void testParser(final String msg, final CSVParserFactory fac) throws Exception { - for (int i = 0; i < max; i++) { - final long startMillis; - final Stats stats; - try (final CSVParser parser = fac.createParser()) { - startMillis = System.currentTimeMillis(); - stats = iterate(parser); - } - show(msg, stats, startMillis); - } - show(); - } - - private static void testParseURL() throws Exception { - testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format)); - } - - private static void testReadBigFile(final boolean split) throws Exception { - for (int i = 0; i < max; i++) { - final long startMillis; - final Stats stats; - try (final BufferedReader in = new BufferedReader(createReader())) { - startMillis = System.currentTimeMillis(); - stats = readAll(in, split); - } - show(split ? "file+split" : "file", stats, startMillis); - } - show(); - } - +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.Reader; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.zip.GZIPInputStream; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; + +/** + * Basic test harness. + */ +@SuppressWarnings("boxing") +public class PerformanceTest { + + @FunctionalInterface + private interface CSVParserFactory { + CSVParser createParser() throws IOException; + } + + // Container for basic statistics + private static final class Stats { + final int count; + final int fields; + Stats(final int c, final int f) { + count = c; + fields = f; + } + } + + private static final String[] PROPERTY_NAMES = { + "java.version", // Java Runtime Environment version + "java.vendor", // Java Runtime Environment vendor +// "java.vm.specification.version", // Java Virtual Machine specification version +// "java.vm.specification.vendor", // Java Virtual Machine specification vendor +// "java.vm.specification.name", // Java Virtual Machine specification name + "java.vm.version", // Java Virtual Machine implementation version +// "java.vm.vendor", // Java Virtual Machine implementation vendor + "java.vm.name", // Java Virtual Machine implementation name +// "java.specification.version", // Java Runtime Environment specification version +// "java.specification.vendor", // Java Runtime Environment specification vendor +// "java.specification.name", // Java Runtime Environment specification name + + "os.name", // Operating system name + "os.arch", // Operating system architecture + "os.version", // Operating system version + }; + private static int max = 11; // skip first test + + private static int num; // number of elapsed times recorded + + private static final long[] ELAPSED_TIMES = new long[max]; + private static final CSVFormat format = CSVFormat.EXCEL; + + private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz"; + + private static final File BIG_FILE = new File(FileUtils.getTempDirectoryPath(), "worldcitiespop.txt"); + + private static Reader createReader() throws IOException { + return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1); + } + + private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input) + throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception { + return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input); + } + + private static Constructor getLexerCtor(final String clazz) throws Exception { + @SuppressWarnings("unchecked") + final Class lexer = (Class) Class.forName("org.apache.commons.csv." + clazz); + return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class); + } + + private static Stats iterate(final Iterable iterable) { + int count = 0; + int fields = 0; + for (final CSVRecord record : iterable) { + count++; + fields += record.size(); + } + return new Stats(count, fields); + } + + public static void main(final String [] args) throws Exception { + if (BIG_FILE.exists()) { + System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length()); + } else { + System.out.println("Decompressing test fixture to: " + BIG_FILE + "..."); + try ( + final InputStream input = new GZIPInputStream( + PerformanceTest.class.getClassLoader().getResourceAsStream(TEST_RESRC)); + final OutputStream output = new FileOutputStream(BIG_FILE)) { + IOUtils.copy(input, output); + System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length())); + } + } + final int argc = args.length; + if (argc > 0) { + max = Integer.parseInt(args[0]); + } + + final String[] tests; + if (argc > 1) { + tests = new String[argc - 1]; + System.arraycopy(args, 1, tests, 0, argc - 1); + } else { + tests = new String[] { "file", "split", "extb", "exts", "csv", "csv-path", "csv-path-db", "csv-url", "lexreset", "lexnew" }; + } + for (final String p : PROPERTY_NAMES) { + System.out.printf("%s=%s%n", p, System.getProperty(p)); + } + System.out.printf("Max count: %d%n%n", max); + + for (final String test : tests) { + if ("file".equals(test)) { + testReadBigFile(false); + } else if ("split".equals(test)) { + testReadBigFile(true); + } else if ("csv".equals(test)) { + testParseCommonsCSV(); + } else if ("csv-path".equals(test)) { + testParsePath(); + } else if ("csv-path-db".equals(test)) { + testParsePathDoubleBuffering(); + } else if ("csv-url".equals(test)) { + testParseURL(); + } else if ("lexreset".equals(test)) { + testCSVLexer(false, test); + } else if ("lexnew".equals(test)) { + testCSVLexer(true, test); + } else if (test.startsWith("CSVLexer")) { + testCSVLexer(false, test); + } else if ("extb".equals(test)) { + testExtendedBuffer(false); + } else if ("exts".equals(test)) { + testExtendedBuffer(true); + } else { + System.out.printf("Invalid test name: %s%n", test); + } + } + } + + private static Stats readAll(final BufferedReader in, final boolean split) throws IOException { + int count = 0; + int fields = 0; + String record; + while ((record = in.readLine()) != null) { + count++; + fields += split ? record.split(",").length : 1; + } + return new Stats(count, fields); + } + + // calculate and show average + private static void show(){ + if (num > 1) { + long tot = 0; + for (int i = 1; i < num; i++) { // skip first test + tot += ELAPSED_TIMES[i]; + } + System.out.printf("%-20s: %5dms%n%n", "Average(not first)", tot / (num - 1)); + } + num = 0; // ready for next set + } + + // Display end stats; store elapsed for average + private static void show(final String msg, final Stats s, final long start) { + final long elapsed = System.currentTimeMillis() - start; + System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields); + ELAPSED_TIMES[num] = elapsed; + num++; + } + + private static void testCSVLexer(final boolean newToken, final String test) throws Exception { + Token token = new Token(); + String dynamic = ""; + for (int i = 0; i < max; i++) { + final String simpleName; + final Stats stats; + final long startMillis; + try (final ExtendedBufferedReader input = new ExtendedBufferedReader(createReader()); + final Lexer lexer = createTestCSVLexer(test, input)) { + if (test.startsWith("CSVLexer")) { + dynamic = "!"; + } + simpleName = lexer.getClass().getSimpleName(); + int count = 0; + int fields = 0; + startMillis = System.currentTimeMillis(); + do { + if (newToken) { + token = new Token(); + } else { + token.reset(); + } + lexer.nextToken(token); + switch (token.type) { + case EOF: + break; + case EORECORD: + fields++; + count++; + break; + case INVALID: + throw new IOException("invalid parse sequence <" + token.content.toString() + ">"); + case TOKEN: + fields++; + break; + case COMMENT: // not really expecting these + break; + default: + throw new IllegalStateException("Unexpected Token type: " + token.type); + } + } while (!token.type.equals(Token.Type.EOF)); + stats = new Stats(count, fields); + } + show(simpleName + dynamic + " " + (newToken ? "new" : "reset"), stats, startMillis); + } + show(); + } + + private static void testExtendedBuffer(final boolean makeString) throws Exception { + for (int i = 0; i < max; i++) { + int fields = 0; + int lines = 0; + final long startMillis; + try (final ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) { + startMillis = System.currentTimeMillis(); + int read; + if (makeString) { + StringBuilder sb = new StringBuilder(); + while ((read = in.read()) != -1) { + sb.append((char) read); + if (read == ',') { // count delimiters + sb.toString(); + sb = new StringBuilder(); + fields++; + } else if (read == '\n') { + sb.toString(); + sb = new StringBuilder(); + lines++; + } + } + } else { + while ((read = in.read()) != -1) { + if (read == ',') { // count delimiters + fields++; + } else if (read == '\n') { + lines++; + } + } + } + fields += lines; // EOL is a delimiter too + } + show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis); + } + show(); + } + + private static void testParseCommonsCSV() throws Exception { + testParser("CSV", () -> new CSVParser(createReader(), format)); + } + + private static void testParsePath() throws Exception { + testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format)); + } + + private static void testParsePathDoubleBuffering() throws Exception { + testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format)); + } + + private static void testParser(final String msg, final CSVParserFactory fac) throws Exception { + for (int i = 0; i < max; i++) { + final long startMillis; + final Stats stats; + try (final CSVParser parser = fac.createParser()) { + startMillis = System.currentTimeMillis(); + stats = iterate(parser); + } + show(msg, stats, startMillis); + } + show(); + } + + private static void testParseURL() throws Exception { + testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format)); + } + + private static void testReadBigFile(final boolean split) throws Exception { + for (int i = 0; i < max; i++) { + final long startMillis; + final Stats stats; + try (final BufferedReader in = new BufferedReader(createReader())) { + startMillis = System.currentTimeMillis(); + stats = readAll(in, split); + } + show(split ? "file+split" : "file", stats, startMillis); + } + show(); + } + } \ No newline at end of file diff --git a/src/test/java/org/apache/commons/csv/Utils.java b/src/test/java/org/apache/commons/csv/Utils.java index fcdbc3d17f..d585669a75 100644 --- a/src/test/java/org/apache/commons/csv/Utils.java +++ b/src/test/java/org/apache/commons/csv/Utils.java @@ -13,7 +13,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package org.apache.commons.csv; diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java index fca6bec299..315d2bf738 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java @@ -44,7 +44,7 @@ public void testWithIgnoreSurroundingSpacesEmpty() { /** * The difference between withTrim()and withIgnoreSurroundingSpace()īŧš difference: withTrim() can remove the leading * and trailing spaces and newlines in quotation marks, while withIgnoreSurroundingSpace() cannot The same point: - * you can remove the leading and trailing spaces,tabs and other symbols. + * you can remove the leading and trailing spaces, tabs and other symbols. */ @Test public void testWithTrimEmpty() { diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv149Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv149Test.java index a42cb2a359..70f8c5ead4 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv149Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv149Test.java @@ -17,6 +17,7 @@ package org.apache.commons.csv.issues; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; import java.io.StringReader; @@ -51,6 +52,7 @@ private void testJiraCsv149EndWithEolAtEof(final boolean eolAtEof) throws IOExce int lineCounter = 2; try (final CSVParser parser = new CSVParser(records, format)) { for (final CSVRecord record : parser) { + assertNotNull(record); assertEquals(lineCounter++, parser.getCurrentLineNumber()); } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java index c7effb6e7f..57d63298ef 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java @@ -37,7 +37,7 @@ private Reader getTestReader() { } @Test - public void parse() throws IOException { + public void testParse() throws IOException { int totcomment = 0; int totrecs = 0; try (final Reader reader = getTestReader(); final BufferedReader br = new BufferedReader(reader)) { diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java index 60f711ac09..996e2eb3b7 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java @@ -1,56 +1,51 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.csv.issues; - -import static org.junit.jupiter.api.Assertions.assertNotNull; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.UnsupportedEncodingException; -import java.nio.charset.StandardCharsets; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.csv.CSVRecord; -import org.junit.jupiter.api.Test; - -public class JiraCsv198Test { - - // @formatter:off - private static final CSVFormat CSV_FORMAT = CSVFormat.EXCEL.builder() - .setDelimiter('^') - .setHeader() - .setSkipHeaderRecord(true) - .build(); - // @formatter:on - - @Test - public void test() throws UnsupportedEncodingException, IOException { - final InputStream pointsOfReference = getClass() - .getResourceAsStream("/org/apache/commons/csv/CSV-198/optd_por_public.csv"); - assertNotNull(pointsOfReference); - try (@SuppressWarnings("resource") - CSVParser parser = CSV_FORMAT.parse(new InputStreamReader(pointsOfReference, StandardCharsets.UTF_8))) { - for (final CSVRecord record : parser) { - final String locationType = record.get("location_type"); - assertNotNull(locationType); - } - } - } - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.junit.jupiter.api.Test; + +public class JiraCsv198Test { + + // @formatter:off + private static final CSVFormat CSV_FORMAT = CSVFormat.EXCEL.builder() + .setDelimiter('^') + .setHeader() + .setSkipHeaderRecord(true) + .build(); + // @formatter:on + + @Test + public void test() throws UnsupportedEncodingException, IOException { + final InputStream pointsOfReference = getClass().getResourceAsStream("/org/apache/commons/csv/CSV-198/optd_por_public.csv"); + assertNotNull(pointsOfReference); + try (@SuppressWarnings("resource") + CSVParser parser = CSV_FORMAT.parse(new InputStreamReader(pointsOfReference, StandardCharsets.UTF_8))) { + parser.forEach(record -> assertNotNull(record.get("location_type"))); + } + } + +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv211Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv211Test.java index 7a9d5ae0e9..126e85e504 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv211Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv211Test.java @@ -1,55 +1,51 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.csv.issues; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.IOException; -import java.io.StringReader; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.csv.CSVRecord; -import org.junit.jupiter.api.Test; - -public class JiraCsv211Test { - - @Test - public void testJiraCsv211Format() throws IOException { - final String[] values = {"1", "Jane Doe", "USA", ""}; - - // @formatter:off - final CSVFormat printFormat = CSVFormat.DEFAULT.builder() - .setDelimiter('\t') - .setHeader("ID", "Name", "Country", "Age") - .build(); - // @formatter:on - final String formatted = printFormat.format(values); - assertEquals("ID\tName\tCountry\tAge\r\n1\tJane Doe\tUSA\t", formatted); - - final CSVFormat parseFormat = CSVFormat.DEFAULT.builder().setDelimiter('\t').setHeader() - .setSkipHeaderRecord(true).build(); - try (final CSVParser parser = parseFormat.parse(new StringReader(formatted))) { - for (final CSVRecord record : parser) { - assertEquals("1", record.get(0)); - assertEquals("Jane Doe", record.get(1)); - assertEquals("USA", record.get(2)); - assertEquals("", record.get(3)); - } - } - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.io.StringReader; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.junit.jupiter.api.Test; + +public class JiraCsv211Test { + + @Test + public void testJiraCsv211Format() throws IOException { + // @formatter:off + final CSVFormat printFormat = CSVFormat.DEFAULT.builder() + .setDelimiter('\t') + .setHeader("ID", "Name", "Country", "Age") + .build(); + // @formatter:on + final String formatted = printFormat.format("1", "Jane Doe", "USA", ""); + assertEquals("ID\tName\tCountry\tAge\r\n1\tJane Doe\tUSA\t", formatted); + + final CSVFormat parseFormat = CSVFormat.DEFAULT.builder().setDelimiter('\t').setHeader().setSkipHeaderRecord(true).build(); + try (final CSVParser parser = parseFormat.parse(new StringReader(formatted))) { + parser.forEach(record -> { + assertEquals("1", record.get(0)); + assertEquals("Jane Doe", record.get(1)); + assertEquals("USA", record.get(2)); + assertEquals("", record.get(3)); + }); + } + } +} diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv213Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv213Test.java index 074883fe0b..e175ff0e9e 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv213Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv213Test.java @@ -25,18 +25,17 @@ import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.QuoteMode; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; /** * Tests https://issues.apache.org/jira/browse/CSV-213 - * + *

      * This is normal behavior with the current architecture: The iterator() API presents an object that is backed by data * in the CSVParser as the parser is streaming over the file. The CSVParser is like a forward-only stream. When you * create a new Iterator you are only created a new view on the same position in the parser's stream. For the behavior * you want, you need to open a new CSVParser. + *

      */ -@Disabled public class JiraCsv213Test { private void createEndChannel(final File csvFile) { @@ -64,6 +63,6 @@ private void createEndChannel(final File csvFile) { @Test public void test() { - createEndChannel(new File("src/test/resources/CSV-213/999751170.patch.csv")); + createEndChannel(new File("src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv")); } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java index 84d274acf6..d0a10300bb 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java @@ -20,6 +20,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; @@ -70,12 +71,7 @@ public void testJiraCsv248() throws IOException, ClassNotFoundException { assertFalse(rec.isSet("A")); assertEquals(0, rec.toMap().size()); // This will throw - try { - rec.get("A"); - org.junit.jupiter.api.Assertions.fail("Access by name is not expected after deserialisation"); - } catch (final IllegalStateException expected) { - // OK - } + assertThrows(IllegalStateException.class, () -> rec.get("A")); } } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java index 4f8f51823d..0e18ae55bd 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java @@ -23,6 +23,7 @@ import java.io.StringReader; import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.DuplicateHeaderMode; import org.junit.jupiter.api.Test; @@ -46,7 +47,7 @@ public class JiraCsv264Test { "\"6\",\"7\",\"\",\"\",\"10\""; @Test - public void testJiraCsv264() throws IOException { + public void testJiraCsv264() { final CSVFormat csvFormat = CSVFormat.DEFAULT .builder() .setHeader() @@ -68,13 +69,13 @@ public void testJiraCsv264WithGapAllowEmpty() throws IOException { .setAllowMissingColumnNames(true) .build(); - try (StringReader reader = new StringReader(CSV_STRING_GAP)) { - csvFormat.parse(reader); + try (StringReader reader = new StringReader(CSV_STRING_GAP); final CSVParser parser = csvFormat.parse(reader)) { + // empty } } @Test - public void testJiraCsv264WithGapDisallow() throws IOException { + public void testJiraCsv264WithGapDisallow() { final CSVFormat csvFormat = CSVFormat.DEFAULT .builder() .setHeader() diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java index 81dff1195b..37209e7aff 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java @@ -1,230 +1,214 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv.issues; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.Reader; -import java.io.StringReader; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.csv.CSVPrinter; -import org.apache.commons.csv.CSVRecord; -import org.junit.jupiter.api.Test; - -public class JiraCsv288Test { - @Test - // Before fix: - // expected: but was: - public void testParseWithDoublePipeDelimiter() throws Exception { - final Reader in = new StringReader("a||b||c||d||||f"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f", stringBuilder.toString()); - } - } - } - - @Test - // Before fix: - // expected: but was: - public void testParseWithTriplePipeDelimiter() throws Exception { - final Reader in = new StringReader("a|||b|||c|||d||||||f"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|||").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f", stringBuilder.toString()); - } - } - } - - @Test - // Before fix: - // expected: but was: - public void testParseWithABADelimiter() throws Exception { - final Reader in = new StringReader("a|~|b|~|c|~|d|~||~|f"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|~|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f", stringBuilder.toString()); - } - } - } - - @Test - // Before fix: - // expected: but was: - public void testParseWithDoublePipeDelimiterQuoted() throws Exception { - final Reader in = new StringReader("a||\"b||c\"||d||||f"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b||c,d,,f", stringBuilder.toString()); - } - } - } - - @Test - // Before fix: - // expected: but was: - public void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws Exception { - final Reader in = new StringReader("a||b||c||d||||f||"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f,", stringBuilder.toString()); - } - } - } - - @Test - // Before fix: - // expected: but was: - public void testParseWithTwoCharDelimiterEndsWithDelimiter() throws Exception { - final Reader in = new StringReader("a~|b~|c~|d~|~|f~|"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f,", stringBuilder.toString()); - } - } - } - - @Test - // Regression, already passed before fix - - public void testParseWithDoublePipeDelimiterDoubleCharValue() throws Exception { - final Reader in = new StringReader("a||bb||cc||dd||f"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,bb,cc,dd,f", stringBuilder.toString()); - } - } - } - - @Test - // Regression, already passed before fix - public void testParseWithTwoCharDelimiter1() throws Exception { - final Reader in = new StringReader("a~|b~|c~|d~|~|f"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f", stringBuilder.toString()); - } - } - } - - @Test - // Regression, already passed before fix - public void testParseWithTwoCharDelimiter2() throws Exception { - final Reader in = new StringReader("a~|b~|c~|d~|~|f~"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f~", stringBuilder.toString()); - } - } - } - - @Test - // Regression, already passed before fix - public void testParseWithTwoCharDelimiter3() throws Exception { - final Reader in = new StringReader("a~|b~|c~|d~|~|f|"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f|", stringBuilder.toString()); - } - } - } - - @Test - // Regression, already passed before fix - public void testParseWithTwoCharDelimiter4() throws Exception { - final Reader in = new StringReader("a~|b~|c~|d~|~|f~~||g"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f~,|g", stringBuilder.toString()); - } - } - } - - @Test - // Regression, already passed before fix - public void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws Exception { - final Reader in = new StringReader("a|b|c|d||f|"); - final StringBuilder stringBuilder = new StringBuilder(); - try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); - CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|").build())) { - for (final CSVRecord csvRecord : csvParser) { - for (int i = 0; i < csvRecord.size(); i++) { - csvPrinter.print(csvRecord.get(i)); - } - assertEquals("a,b,c,d,,f,", stringBuilder.toString()); - } - } - } +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.CSVRecord; +import org.junit.jupiter.api.Test; + +public class JiraCsv288Test { + + private void print(final CSVRecord csvRecord, final CSVPrinter csvPrinter) throws IOException { + for (final String value : csvRecord) { + csvPrinter.print(value); + } + } + + @Test + // Before fix: + // expected: but was: + public void testParseWithABADelimiter() throws Exception { + final Reader in = new StringReader("a|~|b|~|c|~|d|~||~|f"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser parser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|~|").build())) { + for (final CSVRecord csvRecord : parser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + public void testParseWithDoublePipeDelimiter() throws Exception { + final Reader in = new StringReader("a||b||c||d||||f"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + + public void testParseWithDoublePipeDelimiterDoubleCharValue() throws Exception { + final Reader in = new StringReader("a||bb||cc||dd||f"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,bb,cc,dd,f", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + public void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws Exception { + final Reader in = new StringReader("a||b||c||d||||f||"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f,", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + public void testParseWithDoublePipeDelimiterQuoted() throws Exception { + final Reader in = new StringReader("a||\"b||c\"||d||||f"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b||c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + public void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws Exception { + final Reader in = new StringReader("a|b|c|d||f|"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|").build())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f,", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + public void testParseWithTriplePipeDelimiter() throws Exception { + final Reader in = new StringReader("a|||b|||c|||d||||||f"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|||").build())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + public void testParseWithTwoCharDelimiter1() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + public void testParseWithTwoCharDelimiter2() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f~"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f~", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + public void testParseWithTwoCharDelimiter3() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f|"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f|", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + public void testParseWithTwoCharDelimiter4() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f~~||g"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f~,|g", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + public void testParseWithTwoCharDelimiterEndsWithDelimiter() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f~|"); + final StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { + for (final CSVRecord csvRecord : csvParser) { + print(csvRecord, csvPrinter); + assertEquals("a,b,c,d,,f,", stringBuilder.toString()); + } + } + } } \ No newline at end of file diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv290Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv290Test.java new file mode 100644 index 0000000000..69aee2c796 --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv290Test.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.InputStreamReader; +import java.io.StringReader; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.CSVRecord; +import org.junit.jupiter.api.Test; + +// psql (14.5 (Homebrew)) +// +// create table COMMONS_CSV_PSQL_TEST (ID INTEGER, COL1 VARCHAR, COL2 VARCHAR, COL3 VARCHAR, COL4 VARCHAR); +// insert into COMMONS_CSV_PSQL_TEST select 1, 'abc', 'test line 1' || chr(10) || 'test line 2', null, ''; +// insert into COMMONS_CSV_PSQL_TEST select 2, 'xyz', '\b:' || chr(8) || ' \t:' || chr(9) || ' \n:' || chr(10) || ' \r:' || chr(13), 'a', 'b'; +// insert into COMMONS_CSV_PSQL_TEST values (3, 'a', 'b,c,d', '"quoted"', 'e'); +// copy COMMONS_CSV_PSQL_TEST TO '/tmp/psql.csv' WITH (FORMAT CSV); +// copy COMMONS_CSV_PSQL_TEST TO '/tmp/psql.tsv'; +// +// cat /tmp/psql.csv +// 1,abc,"test line 1 +// test line 2",,"" +// 2,xyz,"\b:^H \t: \n: +// \r:^M",a,b +// 3,a,"b,c,d","""quoted""",e +// +// cat /tmp/psql.tsv +// 1 abc test line 1\ntest line 2 \N +// 2 xyz \\b:\b \\t:\t \\n:\n \\r:\r a b +// 3 a b,c,d "quoted" e +// +public class JiraCsv290Test { + + private void testHelper(final String fileName, final CSVFormat format) throws Exception { + List> content = new ArrayList<>(); + try (CSVParser csvParser = CSVParser.parse(new InputStreamReader(this.getClass().getResourceAsStream("/org/apache/commons/csv/CSV-290/" + fileName)), + format)) { + content = csvParser.stream().collect(Collectors.mapping(CSVRecord::toList, Collectors.toList())); + } + + assertEquals(3, content.size()); + + assertEquals("1", content.get(0).get(0)); + assertEquals("abc", content.get(0).get(1)); + assertEquals("test line 1\ntest line 2", content.get(0).get(2)); // new line + assertEquals(null, content.get(0).get(3)); // null + assertEquals("", content.get(0).get(4)); + + assertEquals("2", content.get(1).get(0)); + assertEquals("\\b:\b \\t:\t \\n:\n \\r:\r", content.get(1).get(2)); // \b, \t, \n, \r + + assertEquals("3", content.get(2).get(0)); + assertEquals("b,c,d", content.get(2).get(2)); // value has comma + assertEquals("\"quoted\"", content.get(2).get(3)); // quoted + } + + @Test + public void testPostgresqlCsv() throws Exception { + testHelper("psql.csv", CSVFormat.POSTGRESQL_CSV); + } + + @Test + public void testPostgresqlText() throws Exception { + testHelper("psql.tsv", CSVFormat.POSTGRESQL_TEXT); + } + + @Test + public void testWriteThenRead() throws Exception { + final StringWriter sw = new StringWriter(); + + try (CSVPrinter printer = new CSVPrinter(sw, CSVFormat.POSTGRESQL_CSV.builder().setHeader().setSkipHeaderRecord(true).build())) { + + printer.printRecord("column1", "column2"); + printer.printRecord("v11", "v12"); + printer.printRecord("v21", "v22"); + printer.close(); + + final CSVParser parser = new CSVParser(new StringReader(sw.toString()), + CSVFormat.POSTGRESQL_CSV.builder().setHeader().setSkipHeaderRecord(true).build()); + + assertArrayEquals(new Object[] { "column1", "column2" }, parser.getHeaderNames().toArray()); + + final Iterator i = parser.iterator(); + assertArrayEquals(new String[] { "v11", "v12" }, i.next().toList().toArray()); + assertArrayEquals(new String[] { "v21", "v22" }, i.next().toList().toArray()); + } + } +} \ No newline at end of file diff --git a/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java b/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java index 6a049881f5..fcde8984d5 100644 --- a/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java +++ b/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java @@ -31,6 +31,7 @@ import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; +import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -45,7 +46,7 @@ public class PerformanceTest { private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz"; - private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt"); + private static final File BIG_FILE = new File(FileUtils.getTempDirectoryPath(), "worldcitiespop.txt"); @BeforeAll public static void setUpClass() throws FileNotFoundException, IOException { if (BIG_FILE.exists()) { diff --git a/src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv b/src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv new file mode 100644 index 0000000000..e685adc88f --- /dev/null +++ b/src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv @@ -0,0 +1,4 @@ +"1414770317901","android.widget.EditText","pass sem1 _84*|*","0","pass sem1 _8" +"1414770318470","android.widget.EditText","pass sem1 _84:|","0","pass sem1 _84:\" +"1414770318327","android.widget.EditText","pass sem1 +"1414770318628","android.widget.EditText","pass sem1 _84*|*","0","pass sem1 diff --git a/src/test/resources/org/apache/commons/csv/CSV-290/psql.csv b/src/test/resources/org/apache/commons/csv/CSV-290/psql.csv new file mode 100644 index 0000000000..dd50f5a642 --- /dev/null +++ b/src/test/resources/org/apache/commons/csv/CSV-290/psql.csv @@ -0,0 +1,5 @@ +1,abc,"test line 1 +test line 2",,"" +2,xyz,"\b: \t: \n: + \r: ",a,b +3,a,"b,c,d","""quoted""",e diff --git a/src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv b/src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv new file mode 100644 index 0000000000..5358d8eac6 --- /dev/null +++ b/src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv @@ -0,0 +1,3 @@ +1 abc test line 1\ntest line 2 \N +2 xyz \\b:\b \\t:\t \\n:\n \\r:\r a b +3 a b,c,d "quoted" e