diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..5b4750958a --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: 2 +updates: + - package-ecosystem: "maven" + directory: "/" + schedule: + interval: "daily" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml new file mode 100644 index 0000000000..bc2303719a --- /dev/null +++ b/.github/workflows/maven.yml @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java CI + +on: [push, pull_request] + +jobs: + build: + + runs-on: ubuntu-latest + continue-on-error: ${{ matrix.experimental }} + strategy: + matrix: + java: [ 8, 11, 15 ] + experimental: [false] + include: + - java: 16-ea + experimental: true + - java: 17-ea + experimental: true + + steps: + - uses: actions/checkout@v2.3.4 + - uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v1.4.3 + with: + java-version: ${{ matrix.java }} + - name: Build with Maven + run: mvn -V apache-rat:check spotbugs:check javadoc:javadoc -Ddoclint=all package --file pom.xml --no-transfer-progress diff --git a/.gitignore b/.gitignore index b9493d72e7..92a1b2a4a3 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ site-content .classpath .project .externalToolBuilders +/.checkstyle diff --git a/.travis.yml b/.travis.yml index 6e09a29e78..c15d1d2734 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,15 +14,19 @@ # limitations under the License. language: java -sudo: false jdk: - - openjdk7 - - oraclejdk8 - - oraclejdk9 + - openjdk8 + - openjdk11 + - openjdk15 + - openjdk-ea + +matrix: + allow_failures: + - jdk: openjdk-ea script: - - mvn + - mvn -V --no-transfer-progress after_success: - - mvn clean test jacoco:report coveralls:report -Ptravis-jacoco + - mvn -V --no-transfer-progress clean test jacoco:report coveralls:report -Ptravis-jacoco diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e4cb4aaa40..a079867e8c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,7 +25,7 @@ | commons-build-plugin/trunk/src/main/resources/commons-xdoc-templates | +======================================================================+ | | - | 1) Re-generate using: mvn commons:contributing-md | + | 1) Re-generate using: mvn commons-build:contributing-md | | | | 2) Set the following properties in the component's pom: | | - commons.jira.id (required, alphabetic, upper case) | @@ -50,7 +50,7 @@ Getting Started + Make sure you have a [JIRA account](https://issues.apache.org/jira/). + Make sure you have a [GitHub account](https://github.com/signup/free). -+ If you're planning to implement a new feature it makes sense to discuss you're changes on the [dev list](https://commons.apache.org/mail-lists.html) first. This way you can make sure you're not wasting your time on something that isn't considered to be in Apache Commons Text's scope. ++ If you're planning to implement a new feature it makes sense to discuss your changes on the [dev list](https://commons.apache.org/mail-lists.html) first. This way you can make sure you're not wasting your time on something that isn't considered to be in Apache Commons Text's scope. + Submit a [Jira Ticket][jira] for your issue, assuming one does not already exist. + Clearly describe the issue including steps to reproduce when it is a bug. + Make sure you fill in the earliest version that you know has the issue. @@ -107,7 +107,7 @@ Additional Resources + [Apache Commons Text JIRA project page][jira] + [Contributor License Agreement][cla] + [General GitHub documentation](https://help.github.com/) -+ [GitHub pull request documentation](https://help.github.com/send-pull-requests/) ++ [GitHub pull request documentation](https://help.github.com/articles/creating-a-pull-request/) + [Apache Commons Twitter Account](https://twitter.com/ApacheCommons) + `#apache-commons` IRC channel on `irc.freenode.net` diff --git a/NOTICE.txt b/NOTICE.txt index faf1f27d4f..2731b62593 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,5 +1,5 @@ Apache Commons Text -Copyright 2001-2017 The Apache Software Foundation +Copyright 2014-2020 The Apache Software Foundation This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). +The Apache Software Foundation (https://www.apache.org/). diff --git a/README.md b/README.md index cd91e30cb9..3c5565daf3 100644 --- a/README.md +++ b/README.md @@ -1,104 +1,105 @@ - - -Apache Commons Text -=================== - -[![Build Status](https://travis-ci.org/apache/commons-text.svg?branch=master)](https://travis-ci.org/apache/commons-text) -[![Coverage Status](https://coveralls.io/repos/apache/commons-text/badge.svg?branch=master)](https://coveralls.io/r/apache/commons-text) -[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-text/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-text/) - -Apache Commons Text is a library focused on algorithms working on strings. - -Documentation -------------- - -More information can be found on the [Apache Commons Text homepage](https://commons.apache.org/proper/commons-text). -The [JavaDoc](https://commons.apache.org/proper/commons-text/javadocs/api-release) can be browsed. -Questions related to the usage of Apache Commons Text should be posted to the [user mailing list][ml]. - -Where can I get the latest release? ------------------------------------ -You can download source and binaries from our [download page](https://commons.apache.org/proper/commons-text/download_text.cgi). - -Alternatively you can pull it from the central Maven repositories: - -```xml - - org.apache.commons - commons-text - 1.1 - -``` - -Contributing ------------- - -We accept Pull Requests via GitHub. The [developer mailing list][ml] is the main channel of communication for contributors. -There are some guidelines which will make applying PRs easier for us: -+ No tabs! Please use spaces for indentation. -+ Respect the code style. -+ Create minimal diffs - disable on save actions like reformat source code or organize imports. If you feel the source code should be reformatted create a separate PR for this change. -+ Provide JUnit tests for your changes and make sure your changes don't break any existing tests by running ```mvn clean test```. - -If you plan to contribute on a regular basis, please consider filing a [contributor license agreement](https://www.apache.org/licenses/#clas). -You can learn more about contributing via GitHub in our [contribution guidelines](CONTRIBUTING.md). - -License -------- -This code is under the [Apache Licence v2](https://www.apache.org/licenses/LICENSE-2.0). - -See the `NOTICE.txt` file for required notices and attributions. - -Donations ---------- -You like Apache Commons Text? Then [donate back to the ASF](https://www.apache.org/foundation/contributing.html) to support the development. - -Additional Resources --------------------- - -+ [Apache Commons Homepage](https://commons.apache.org/) -+ [Apache Issue Tracker (JIRA)](https://issues.apache.org/jira/browse/TEXT) -+ [Apache Commons Twitter Account](https://twitter.com/ApacheCommons) -+ `#apache-commons` IRC channel on `irc.freenode.org` - -[ml]:https://commons.apache.org/mail-lists.html + + +Apache Commons Text +=================== + +[![Build Status](https://travis-ci.org/apache/commons-text.svg)](https://travis-ci.org/apache/commons-text) +[![Coverage Status](https://coveralls.io/repos/apache/commons-text/badge.svg)](https://coveralls.io/r/apache/commons-text) +[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-text/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-text/) +[![Javadocs](https://javadoc.io/badge/org.apache.commons/commons-text/1.9.svg)](https://javadoc.io/doc/org.apache.commons/commons-text/1.9) + +Apache Commons Text is a library focused on algorithms working on strings. + +Documentation +------------- + +More information can be found on the [Apache Commons Text homepage](https://commons.apache.org/proper/commons-text). +The [Javadoc](https://commons.apache.org/proper/commons-text/apidocs) can be browsed. +Questions related to the usage of Apache Commons Text should be posted to the [user mailing list][ml]. + +Where can I get the latest release? +----------------------------------- +You can download source and binaries from our [download page](https://commons.apache.org/proper/commons-text/download_text.cgi). + +Alternatively you can pull it from the central Maven repositories: + +```xml + + org.apache.commons + commons-text + 1.9 + +``` + +Contributing +------------ + +We accept Pull Requests via GitHub. The [developer mailing list][ml] is the main channel of communication for contributors. +There are some guidelines which will make applying PRs easier for us: ++ No tabs! Please use spaces for indentation. ++ Respect the code style. ++ Create minimal diffs - disable on save actions like reformat source code or organize imports. If you feel the source code should be reformatted create a separate PR for this change. ++ Provide JUnit tests for your changes and make sure your changes don't break any existing tests by running ```mvn clean test```. + +If you plan to contribute on a regular basis, please consider filing a [contributor license agreement](https://www.apache.org/licenses/#clas). +You can learn more about contributing via GitHub in our [contribution guidelines](CONTRIBUTING.md). + +License +------- +This code is under the [Apache Licence v2](https://www.apache.org/licenses/LICENSE-2.0). + +See the `NOTICE.txt` file for required notices and attributions. + +Donations +--------- +You like Apache Commons Text? Then [donate back to the ASF](https://www.apache.org/foundation/contributing.html) to support the development. + +Additional Resources +-------------------- + ++ [Apache Commons Homepage](https://commons.apache.org/) ++ [Apache Issue Tracker (JIRA)](https://issues.apache.org/jira/browse/TEXT) ++ [Apache Commons Twitter Account](https://twitter.com/ApacheCommons) ++ `#apache-commons` IRC channel on `irc.freenode.org` + +[ml]:https://commons.apache.org/mail-lists.html diff --git a/RELEASE-NOTES.txt b/RELEASE-NOTES.txt index 06f736901a..b37e3e23a5 100644 --- a/RELEASE-NOTES.txt +++ b/RELEASE-NOTES.txt @@ -1,18 +1,363 @@ +Apache Commons Text +Version 1.9 +Release Notes + + +INTRODUCTION: + +This document contains the release notes for the 1.9 version of Apache Commons Text. +Commons Text is a set of utility functions and reusable components for the purpose of processing +and manipulating text that should be of use in a Java environment. + + +Apache Commons Text is a library focused on algorithms working on strings. + +Release 1.9. Requires Java 8. + +Changes in this version include: + +New features: +o Add StringMatcher.size(). Thanks to Gary Gregory. +o Refactor TextStringBuilder.readFrom(Readable), extracting readFrom(CharBuffer) and readFrom(Reader). Thanks to Gary Gregory. +o Add BiStringLookup and implementation BiFunctionStringLookup. Thanks to Gary Gregory. +o Add org.apache.commons.text.StringSubstitutor.StringSubstitutor(StringSubstitutor). Thanks to Gary Gregory. +o Add org.apache.commons.text.TextStringBuilder.TextStringBuilder(CharSequence). Thanks to Gary Gregory. +o Add org.apache.commons.text.TextStringBuilder.drainChar(int). Thanks to Gary Gregory. +o Add org.apache.commons.text.TextStringBuilder.drainChars(int, int, char[]. int). Thanks to Gary Gregory. +o Add org.apache.commons.text.TextStringBuilder.isNotEmpty(). Thanks to Gary Gregory. +o Add org.apache.commons.text.TextStringBuilder.isReallocated(). Thanks to Gary Gregory. +o Add org.apache.commons.text.TextStringBuilder.readFrom(Reader, int). Thanks to Gary Gregory. +o Add org.apache.commons.text.TextStringBuilder.set(String). Thanks to Gary Gregory. +o Add org.apache.commons.text.TextStringBuilder.wrap(char[]). Thanks to Gary Gregory. +o Add org.apache.commons.text.TextStringBuilder.wrap(char[], int). Thanks to Gary Gregory. +o Add org.apache.commons.text.io.StringSubstitutorReader. Thanks to Gary Gregory. +o Add org.apache.commons.text.lookup.StringLookupFactory.functionStringLookup(Function). Thanks to Gary Gregory. +o Add org.apache.commons.text.matcher.StringMatcher.isMatch(CharSequence, int). Thanks to Gary Gregory. +o Add org.apache.commons.text.matcher.StringMatcher.isMatch(CharSequence, int, int, int). Thanks to Gary Gregory. +o Add org.apache.commons.text.matcher.StringMatcherFactory.andMatcher(StringMatcher...). Thanks to Gary Gregory. +o Add org.apache.commons.text.matcher.StringMatcherFactory.stringMatcher(char...). Thanks to Gary Gregory. + +Fixed Bugs: +o TEXT-166: Removed non-existing parameter from Javadocs and spelled out parameters in throws. Thanks to Mikko Maunu. +o TEXT-149: StringEscapeUtils.unescapeCsv doesn't remove quotes at begin and end of string. Thanks to Yuji Konishi. +o TEXT-174: ScriptStringLookup does not accept ":" #126. Thanks to furkilic. +o TEXT-178: StringSubstitutor incorrectly removes some escape characters. Thanks to Gary Gregory. +o TEXT-181: Fix Javadocs #135. Thanks to XenoAmess. +o TEXT-182: Fix typos #137. Thanks to XenoAmess. +o TEXT-183: Make ConstantStringLookup.constantCache final #136. Thanks to XenoAmess. +o TEXT-184: Simplify if in CaseUtils #134. Thanks to XenoAmess. + +Changes: +o [javadoc] Fix compiler warnings in Java code example in Javadoc #124. Thanks to Johan Hammar. +o TEXT-177: Update from Apache Commons Lang 3.9 to 3.11. Thanks to Gary Gregory. +o [build] Skip clirr since we use JApiCmp. Thanks to Gary Gregory. +o [test] junit-jupiter 5.5.1 -> 5.5.2. Thanks to Gary Gregory. +o [test] org.assertj:assertj-core 3.13.2 -> 3.16.1. Thanks to Gary Gregory. +o [build] com.puppycrawl.tools:checkstyle 8.23 -> 8.34. Thanks to Gary Gregory. +o [build] Update JUnit from 5.5.2 to 5.6.2. Thanks to Gary Gregory. +o [build] commons.jacoco.version 0.8.4 -> 0.8.5. Thanks to Gary Gregory. +o [build] commons.javadoc.version 3.1.1 -> 3.2.0. Thanks to Gary Gregory. +o [build] commons.japicmp.version 0.14.1 -> 0.14.3. Thanks to Gary Gregory. +o [build] checkstyle.plugin.version 3.1.0 -> 3.1.1. Thanks to Gary Gregory. +o [build] checkstyle.version 8.27 -> 8.33. Thanks to Gary Gregory. +o [build] org.apache.commons:commons-parent 48 -> 51. Thanks to Gary Gregory. +o [build] maven-pmd-plugin 3.12.0 -> 3.13.0. Thanks to Gary Gregory. +o [build] org.mockito 3.3.3 -> 3.4.4. Thanks to Gary Gregory. + + +Historical list of changes: https://commons.apache.org/proper/commons-text/changes-report.html + +For complete information on Apache Commons Text, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Apache Commons Text website: + +https://commons.apache.org/proper/commons-text + +Download page: https://commons.apache.org/proper/commons-text/download_text.cgi + +Have fun! +-Apache Commons Team + +============================================================================= + +Apache Commons Text +Version 1.8 +Release Notes + + +INTRODUCTION: + +This document contains the release notes for the 1.8 version of Apache Commons Text. +Commons Text is a set of utility functions and reusable components for the purpose of processing +and manipulating text that should be of use in a Java environment. + + +Apache Commons Text is a library focused on algorithms working on strings. + +Release 1.8 + +Changes in this version include: + +- New Features + o TEXT-169: Add helper factory method org.apache.commons.text.StringSubstitutor.createInterpolator(). Thanks to Gary Gregory. + o TEXT-170: Add String lookup for host names and IP addresses (DnsStringLookup). Thanks to Gary Gregory. + +- Fixed Bugs + o TEXT-167: commons-text web page missing "RELEASE-NOTES-1.7.txt". Thanks to Larry West. + o TEXT-168: (doc) Fixed wrong value for Jaro-Winkler example #117. Thanks to luksan47. + o TEXT-171: StringLookupFactory.addDefaultStringLookups(Map) does not convert keys to lower case. Thanks to Gary Gregory. + +- Changes + o Expand Javadoc for StringSubstitutor and friends. Thanks to Gary Gregory. + o [site] checkstyle.version 8.21 -> 8.23. Thanks to Gary Gregory. + + +Historical list of changes: https://commons.apache.org/proper/commons-textchanges-report.html + +For complete information on Apache Commons Text, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Apache Commons Text website: + +https://commons.apache.org/proper/commons-text + +Download it from https://commons.apache.org/proper/commons-text/download_text.cgi + +============================================================================= + +Apache Commons Text +Version 1.7 +Release Notes + +INTRODUCTION: + +This document contains the release notes for the 1.7 version of Apache Commons Text. +Commons Text is a set of utility functions and reusable components for the purpose of processing +and manipulating text that should be of use in a Java environment. + +Apache Commons Text is a library focused on algorithms working on strings. + +Changes in this version include: + +New features: + +o TEXT-148: Add an enum to the lookup package that lists all StringLookups +o TEXT-127: Add a toggle to throw an exception when a variable is unknown in StringSubstitutor Thanks to Jean-Baptiste REICH, Sebb, Don Jeba, Gary Gregory. +o TEXT-138: TextStringBuilder append sub-sequence not consistent with Appendable. Thanks to Neal Johnson, Don Jeba. +o TEXT-152: Fix possible infinite loop in WordUtils.wrap for a regex pattern that would trigger on a match of 0 length Thanks to @CAPS50. +o TEXT-155: Add a generic IntersectionSimilarity measure + +Fixed Bugs: + +o TEXT-111: WordUtils.wrap must calculate offset increment from wrapOn pattern length Thanks to @CAPS50. +o TEXT-151: Fix the JaroWinklerSimilarity to use StringUtils.equals to test for CharSequence equality +o TEXT-165: ResourceBundleStringLookup.lookup(String) throws MissingResourceException instead of returning null. + +Changes: + +o TEXT-104: Jaro Winkler Distance refers to similarity Thanks to Sascha Szott. +o TEXT-153: Make prefixSet in LookupTranslator a BitSet Thanks to amirhadadi. +o TEXT-156: Fix the RegexTokenizer to use a static Pattern +o TEXT-157: Remove rounding from JaccardDistance and JaccardSimilarity +o TEXT-162: Update Apache Commons Lang from 3.8.1 to 3.9. +o Update tests from org.assertj:assertj-core 3.12.1 to 3.12.2. +o Update site from com.puppycrawl.tools:checkstyle 8.18 to 8.21. + +Historical list of changes: https://commons.apache.org/proper/commons-text/changes-report.html + +For complete information on Apache Commons Text, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Apache Commons Text website: + +https://commons.apache.org/proper/commons-text + +Download it from https://commons.apache.org/proper/commons-text/download_text.cgi + +============================================================================= + Apache Commons Text - Version 1.1 + Version 1.6 Release Notes +INTRODUCTION +============ + +This document contains the release notes for the 1.6 version of Apache Commons +Text. Commons Text is a set of utility functions and reusable components for +the purpose of processing and manipulating text that should be of use in a Java +environment. + +This component requires Java 8. + +CHANGES +======= + +o TEXT-144: Add the resource string bundle string lookup to the default set of lookups +o TEXT-145: Add StringLookupFactory methods for the URL encoder and decoder string lookups +o TEXT-146: org.apache.commons.text.lookup.StringLookupFactory.interpolatorStringLookup() should reuse a singleton instance +o TEXT-147: Add a Base64 encoder string lookup. + +Historical list of changes: https://commons.apache.org/proper/commons-text/changes-report.html + +For complete information on Apache Commons Text, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Apache Commons Text website: + +https://commons.apache.org/proper/commons-text + +============================================================================= + + Apache Commons Text + Version 1.5 + Release Notes INTRODUCTION ============ -This document contains the release notes for the 1.1 version of Apache Commons +This document contains the release notes for the 1.5 version of Apache Commons Text. Commons Text is a set of utility functions and reusable components for the purpose of processing and manipulating text that should be of use in a Java environment. -Apache Commons Text is a library focused on algorithms working on strings. +This component requires Java 8. +NEW FEATURES +============ + +o TEXT-133: Add a XML file XPath string lookup. +o TEXT-134: Add a Properties file string lookup. +o TEXT-135: Add a script string lookup. +o TEXT-136: Add a file string lookup. +o TEXT-137: Add a URL string lookup. +o TEXT-140: Add a Base64 string lookup. +o TEXT-141: Add org.apache.commons.text.lookup.StringLookupFactory.resourceBundleStringLookup(String). +o TEXT-142: Add URL encoder and decoder string lookups. +o TEXT-143: Add constant string lookup like the one in Apache Commons Configuration. + +FIXED BUGS +========== + +o TEXT-139: Improve JaccardSimilarity computational cost Thanks to Nick Wong. +o TEXT-118: JSON escaping incorrect for the delete control character Thanks to Nandor Kollar. +o TEXT-130: Fixes JaroWinklerDistance: Wrong results due to precision of transpositions Thanks to Jan Martin Keil. +o TEXT-131: JaroWinklerDistance: Calculation deviates from definition Thanks to Jan Martin Keil. + +CHANGES +======= + +o TEXT-132: Update Apache Commons Lang from 3.7 to 3.8.1 + +============================================================================= + + Apache Commons Text + Version 1.4 + Release Notes + +INTRODUCTION +============ + +This document contains the release notes for the 1.4 version of Apache Commons +Text. Commons Text is a set of utility functions and reusable components for +the purpose of processing and manipulating text that should be of use in a Java +environment. + +This component requires Java 8. + + +Changes in this version include: + +Fixed Bugs: +o TEXT-120: StringEscapeUtils#unescapeJson does not unescape double quotes and forward slash. +o TEXT-119: Remove mention of SQL escaping from user guide. +o TEXT-123: WordUtils.wrap throws StringIndexOutOfBoundsException when wrapLength is Integer.MAX_VALUE. Thanks to Takanobu Asanuma. + +Changes: +o TEXT-121: Update Java requirement from version 7 to 8. Thanks to pschumacher. +o TEXT-122: Allow full customization with new API org.apache.commons.text.lookup.StringLookupFactory.interpolatorStringLookup(Map, StringLookup, boolean). + +============================================================================= + + Apache Commons Text + Version 1.3 + Release Notes + +INTRODUCTION +============ + +This document contains the release notes for the 1.3 version of Apache Commons +Text. Commons Text is a set of utility functions and reusable components for +the purpose of processing and manipulating text that should be of use in a Java +environment. + +This component requires Java 7. + +NEW FEATURES +============= + +o Add Automatic-Module-Name MANIFEST entry for Java 9 compatibility Issue: TEXT-110. +o Add an interpolator string lookup: StringLookupFactory#interpolatorStringLookup() Issue: TEXT-113. +o Add a StrSubstitutor replacement based on interfaces: StringSubstitutor Issue: TEXT-114. +o Add a StrBuilder replacement based on the StringMatcher interface: TextStringBuilder Issue: TEXT-115. +o Add a StrTokenizer replacement based on the StringMatcher interface: StringTokenizer Issue: TEXT-116. +o Add a local host string lookup: LocalHostStringLookup Issue: TEXT-117. + +FIXED BUGS +========== + +o Build failure with java 9-ea+159 Issue: TEXT-70. +o StrLookup API confusing Issue: TEXT-80. + +============================================================================= + + Apache Commons Text + Version 1.2 + Release Notes + +INTRODUCTION +============ + +This document contains the release notes for the 1.2 version of Apache Commons +Text. Commons Text is a set of utility functions and reusable components for +the purpose of processing and manipulating text that should be of use in a Java +environment. + +This component requires Java 7. + +JAVA 9 SUPPORT +============== + +At our time of release of 1.1, our build succeeds with Java 9-ea build 159, +and we believe all of our features to be Java 9 compatible. However, when we +run "mvn clean site" we have failures. + +NEW FEATURES +============= + +o TEXT-74: StrSubstitutor: Ability to turn off substitution in values. Thanks to Ioannis Sermetziadis. +o TEXT-97: RandomStringGenerator able to pass multiple ranges to .withinRange(). Thanks to Amey Jadiye. +o TEXT-89: WordUtils.initials support for UTF-16 surrogate pairs. Thanks to Arun Vinud S S. +o TEXT-90: Add CharacterPredicates for ASCII letters (uppercase/lowercase) and arabic numerals. +o TEXT-85: Added CaseUtils class with camel case conversion support. Thanks to Arun Vinud S S. +o TEXT-91: RandomStringGenerator should be able to generate a String with a random length. +o TEXT-102: Add StrLookup.resourceBundleLookup(ResourceBundle). + +FIXED BUGS +========== + +o TEXT-106: Exception thrown in ExtendedMessageFormat using quotes with custom registry. Thanks to Benoit Moreau. +o TEXT-100: StringEscapeUtils#UnEscapeJson doesn't recognize escape signs correctly. Thanks to Don Jeba. +o TEXT-105: Typo in LongestCommonSubsequence#logestCommonSubsequence. Thanks to Abrasha. + +CHANGES +======= + +o TEXT-107: Upversion commons-lang to 3.7. +o TEXT-98: Deprecate isDelimiter and use HashSets for delimiter checks. Thanks to Arun Vinud S S. +o TEXT-88: WordUtils should treat an empty delimiter array as no delimiters. Thanks to Amey Jadiye. +o TEXT-93: Update RandomStringGenerator to accept a list of valid characters. Thanks to Amey Jadiye. +o TEXT-92: Update commons-lang dependency to version 3.6. +o TEXT-83: Document that commons-csv should be used in preference to CsvTranslators. Thanks to Amey Jadiye. +o TEXT-67: NumericEntityUnescaper.options - fix TODO. +o TEXT-84: RandomStringGenerator claims to be immutable, but isn't. + +============================================================================= + + Release Notes for version 1.1 JAVA 9 SUPPORT ============== @@ -42,10 +387,10 @@ CHANGES o TEXT-39: WordUtils should use toXxxxCase(int) rather than toXxxxCase(char) Thanks to Amey Jadiye. +============================================================================= Release Notes for version 1.0 - INCOMPATIBLE CHANGES ==================== @@ -76,7 +421,7 @@ REMOVED o TEXT-40: Escape HTML characters only once: revert. - +============================================================================= Release Notes for version 1.0-beta-1 @@ -169,13 +514,13 @@ o TEXT-51: Remove RandomStringGenerator to be added back in the 1.1 release o TEXT-31: Remove org.apache.commons.text.names, for later release than 1.0 -Historical list of changes: http://commons.apache.org/text/changes-report.html +Historical list of changes: https://commons.apache.org/text/changes-report.html For complete information on Apache Commons Text, including instructions on how to submit bug reports, patches, or suggestions for improvement, see the Apache Apache Commons Text website: -http://commons.apache.org/text/ +https://commons.apache.org/text/ Have fun! -Apache Commons Text team \ No newline at end of file diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000..51943ba7b4 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,17 @@ + +The Apache Commons security page is [https://commons.apache.org/security.html](https://commons.apache.org/security.html). diff --git a/checkstyle-suppressions.xml b/checkstyle-suppressions.xml index 6cffab5b45..d2adfdadbf 100644 --- a/checkstyle-suppressions.xml +++ b/checkstyle-suppressions.xml @@ -1,7 +1,7 @@ + "-//Checkstyle//DTD SuppressionFilter Configuration 1.0//EN" + "https://checkstyle.org/dtds/suppressions_1_0.dtd"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + diff --git a/checkstyle.xml b/checkstyle.xml index 2d65b6cd84..4cb3262a85 100644 --- a/checkstyle.xml +++ b/checkstyle.xml @@ -86,20 +86,18 @@ - + + + - + - - - - @@ -112,7 +110,6 @@ - @@ -127,7 +124,6 @@ - @@ -135,16 +131,11 @@ - - - - - @@ -156,14 +147,13 @@ - + - @@ -172,13 +162,10 @@ - - - @@ -188,17 +175,13 @@ - - - - diff --git a/pom.xml b/pom.xml index 112b5c8c19..66b23b51f6 100644 --- a/pom.xml +++ b/pom.xml @@ -20,24 +20,25 @@ org.apache.commons commons-parent - 42 + 52 commons-text - 1.2-SNAPSHOT + 1.10.0-SNAPSHOT Apache Commons Text Apache Commons Text is a library focused on algorithms working on strings. + https://commons.apache.org/proper/commons-text ISO-8859-1 UTF-8 - 1.7 - 1.7 + 1.8 + 1.8 text org.apache.commons.text - 1.2 - (Java 7+) + 1.10.0 + (Java 8+) TEXT 12318221 @@ -46,47 +47,79 @@ https://svn.apache.org/repos/infra/websites/production/commons/content/proper/commons-text site-content - - 2.8 + 3.1.1 + 8.39 - 2.17 - 6.19 + 4.2.0 + 4.2.0 + + 3.7.0 + 0.8.6 + 3.2.0 - - 0.7.9 - - 3.0.5 + 20.3.0 + + 0.15.2 + false + true + + + 1.9 + RC1 + true + scm:svn:https://dist.apache.org/repos/dist/dev/commons/${commons.componentid} + Gary Gregory + 86fdc7e2a11262cb org.apache.commons commons-lang3 - 3.6 + 3.11 - junit - junit - 4.12 + org.junit.jupiter + junit-jupiter + 5.7.0 + test + + + org.assertj + assertj-core + 3.18.1 + test + + + commons-io + commons-io + 2.8.0 test - org.hamcrest - hamcrest-all - 1.3 + org.mockito + + mockito-inline + ${commons.mockito.version} test - org.assertj - assertj-core - 2.8.0 - test + org.graalvm.js + js + ${graalvm.version} + test + + + org.graalvm.js + js-scriptengine + ${graalvm.version} + test - clean verify apache-rat:check clirr:check checkstyle:check findbugs:check javadoc:javadoc + clean verify apache-rat:check clirr:check japicmp:cmp checkstyle:check spotbugs:check javadoc:javadoc @@ -100,6 +133,19 @@ src/site/resources/release-notes/RELEASE-NOTES-*.txt + + + com.github.siom79.japicmp + japicmp-maven-plugin + + false + + + + org.apache.commons.text.StringSubstitutor#substitute(org.apache.commons.text.TextStringBuilder,int,int,java.util.List) + + + @@ -124,12 +170,18 @@ - org.codehaus.mojo - findbugs-maven-plugin - - ${commons.findbugs.version} + com.github.spotbugs + spotbugs-maven-plugin + ${spotbugs.plugin.version} + + + com.github.spotbugs + spotbugs + ${spotbugs.impl.version} + + - ${basedir}/fb-excludes.xml + ${basedir}/spotbugs-exclude-filter.xml @@ -152,6 +204,13 @@ + + + + ${commons.module.name} + + + org.apache.maven.plugins @@ -162,8 +221,14 @@ + + org.apache.maven.plugins + maven-javadoc-plugin + + ${maven.compiler.source} + + - @@ -189,12 +254,11 @@ - org.codehaus.mojo - findbugs-maven-plugin - - ${commons.findbugs.version} + com.github.spotbugs + spotbugs-maven-plugin + ${spotbugs.plugin.version} - ${basedir}/fb-excludes.xml + ${basedir}/spotbugs-exclude-filter.xml @@ -206,7 +270,7 @@ maven-pmd-plugin - 3.8 + 3.14.0 ${maven.compiler.target} @@ -264,15 +328,9 @@ - - org.codehaus.mojo - javancss-maven-plugin - - http://commons.apache.org/proper/commons-text/ - 2014 @@ -327,17 +385,32 @@ Ioannis Sermetziadis + + Jostein Tveit + + + Luciano Medallia + + + Jan Martin Keil + + + Nandor Kollar + + + Nick Wong + - scm:git:http://git-wip-us.apache.org/repos/asf/commons-text.git - scm:git:https://git-wip-us.apache.org/repos/asf/commons-text.git - https://git-wip-us.apache.org/repos/asf?p=commons-text.git + scm:git:http://gitbox.apache.org/repos/asf/commons-text + scm:git:https://gitbox.apache.org/repos/asf/commons-text + https://gitbox.apache.org/repos/asf?p=commons-text.git jira - http://issues.apache.org/jira/browse/TEXT + https://issues.apache.org/jira/browse/TEXT @@ -373,11 +446,9 @@ - - @@ -393,16 +464,44 @@ - java9 + java9+ - 9 + [9,) - - 3.0.0-M1 - + true + + java13+ + + [13,) + + + + true + + + + java16-ea + + + 16-ea + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + true + + + + + + diff --git a/fb-excludes.xml b/spotbugs-exclude-filter.xml similarity index 89% rename from fb-excludes.xml rename to spotbugs-exclude-filter.xml index 30cf5d98e3..44baec8155 100644 --- a/fb-excludes.xml +++ b/spotbugs-exclude-filter.xml @@ -28,4 +28,10 @@ + + + + + + diff --git a/src/assembly/src.xml b/src/assembly/src.xml index 257df6e241..a2256af7e7 100644 --- a/src/assembly/src.xml +++ b/src/assembly/src.xml @@ -27,7 +27,7 @@ checkstyle.xml checkstyle-suppressions.xml CONTRIBUTING.md - fb-excludes.xml + sb-excludes.xml LICENSE.txt license-header.txt NOTICE.txt diff --git a/src/changes/changes.xml b/src/changes/changes.xml index e43bd8fa81..28e08fe5c3 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -44,8 +44,154 @@ The type attribute can be add,update,fix,remove. Apache Commons Text Changes + + + Fix CaseUtils when the input string contains only delimiters + Add GraalVM test dependencies to fix test failures with Java 15. + Incorrect values for Jaccard similarity with empty strings. + StringSubstitutor map constructor throws NPE on 1.9 with null map. + + Document negative limit for WordUtils abbreviate method + Speed up LevenshteinDistance with threshold by exiting early + Release Notes page hasn't been updated for 1.9 release yet. + Add StrBuilder.isNotEmpty(). + + Update checkstyle from 8.34 to 8.39, #141, #168, #182, #188, #193. + Update spotbugs.plugin.version 4.0.0 to 4.2.0, #144, #150, #167, #176, #194. + Update mockito-inline from 3.4.4 to 3.7.0, #143 #148 #149 #152 #153 #154 #158 #159 #166 #177 #180 #187 #195. + Bump junit-jupiter from 5.6.2 to 5.7.0 #163. + Bump assertj-core from 3.16.1 to 3.18.1 #151 #157 #160 #178 #184. + Bump commons-io from 2.7 to 2.8.0 #161. + Bump actions/setup-java from v1.4.0 to v1.4.3 #147 #156 #155 #172. + Bump commons-parent from 51 to 52 #145. + Bump actions/checkout from v1 to v2.3.4 #138 #146 #165 #183. + Bump maven-pmd-plugin from 3.13.0 to 3.14.0 #186. + Bump graalvm.version from 20.2.0 to 20.3.0 #185. + Minor Improvements #192. + Update commons.japicmp.version 0.14.3 -> 0.15.. + Update commons.jacoco.version 0.8.5 to 0.8.6; fixes Java 15 build. + Update spotbugs from 4.1.3 to 4.2.0 #175, 189. + Minor Improvements #196.. + + + Removed non-existing parameter from Javadocs and spelled out parameters in throws. + StringEscapeUtils.unescapeCsv doesn't remove quotes at begin and end of string. + ScriptStringLookup does not accept ":" #126. + StringSubstitutor incorrectly removes some escape characters. + Fix Javadocs #135. + Fix typos #137. + Make ConstantStringLookup.constantCache final #136. + Simplify if in CaseUtils #134. + [javadoc] Fix compiler warnings in Java code example in Javadoc #124. + Update from Apache Commons Lang 3.9 to 3.11. + Add StringMatcher.size(). + Refactor TextStringBuilder.readFrom(Readable), extracting readFrom(CharBuffer) and readFrom(Reader). + Add BiStringLookup and implementation BiFunctionStringLookup. + Add org.apache.commons.text.StringSubstitutor.StringSubstitutor(StringSubstitutor). + Add org.apache.commons.text.TextStringBuilder.TextStringBuilder(CharSequence). + Add org.apache.commons.text.TextStringBuilder.drainChar(int). + Add org.apache.commons.text.TextStringBuilder.drainChars(int, int, char[]. int). + Add org.apache.commons.text.TextStringBuilder.isNotEmpty(). + Add org.apache.commons.text.TextStringBuilder.isReallocated(). + Add org.apache.commons.text.TextStringBuilder.readFrom(Reader, int). + Add org.apache.commons.text.TextStringBuilder.set(String). + Add org.apache.commons.text.TextStringBuilder.wrap(char[]). + Add org.apache.commons.text.TextStringBuilder.wrap(char[], int). + Add org.apache.commons.text.io.StringSubstitutorReader. + Add org.apache.commons.text.lookup.StringLookupFactory.functionStringLookup(Function<String, V>). + Add org.apache.commons.text.matcher.StringMatcher.isMatch(CharSequence, int). + Add org.apache.commons.text.matcher.StringMatcher.isMatch(CharSequence, int, int, int). + Add org.apache.commons.text.matcher.StringMatcherFactory.andMatcher(StringMatcher...). + Add org.apache.commons.text.matcher.StringMatcherFactory.stringMatcher(char...). + [build] Skip clirr since we use JApiCmp. + [test] junit-jupiter 5.5.1 -> 5.5.2. + [test] org.assertj:assertj-core 3.13.2 -> 3.16.1. + [build] com.puppycrawl.tools:checkstyle 8.23 -> 8.34. + [build] Update JUnit from 5.5.2 to 5.6.2. + [build] commons.jacoco.version 0.8.4 -> 0.8.5. + [build] commons.javadoc.version 3.1.1 -> 3.2.0. + [build] commons.japicmp.version 0.14.1 -> 0.14.3. + [build] checkstyle.plugin.version 3.1.0 -> 3.1.1. + [build] checkstyle.version 8.27 -> 8.33. + [build] org.apache.commons:commons-parent 48 -> 51. + [build] maven-pmd-plugin 3.12.0 -> 3.13.0. + [build] org.mockito 3.3.3 -> 3.4.4. + + + + commons-text web page missing "RELEASE-NOTES-1.7.txt" + (doc) Fixed wrong value for Jaro-Winkler example #117 + Add helper factory method org.apache.commons.text.StringSubstitutor.createInterpolator(). + Add String lookup for host names and IP addresses (DnsStringLookup). + StringLookupFactory.addDefaultStringLookups(Map) does not convert keys to lower case. + Expand Javadoc for StringSubstitutor and friends. + [site] checkstyle.version 8.21 -> 8.23. + + + + WordUtils.wrap must calculate offset increment from wrapOn pattern length + Jaro Winkler Distance refers to similarity + Add an enum to the lookup package that lists all StringLookups + Add a toggle to throw an exception when a variable is unknown in StringSubstitutor + TextStringBuilder append sub-sequence not consistent with Appendable. + Fix possible infinite loop in WordUtils.wrap for a regex pattern that would trigger on a match of 0 length + Make prefixSet in LookupTranslator a BitSet + Fix the RegexTokenizer to use a static Pattern + Remove rounding from JaccardDistance and JaccardSimilarity + Fix the JaroWinklerSimilarity to use StringUtils.equals to test for CharSequence equality + Add a generic IntersectionSimilarity measure + Update Apache Commons Lang from 3.8.1 to 3.9. + ResourceBundleStringLookup.lookup(String) throws MissingResourceException instead of returning null. + Update tests from org.assertj:assertj-core 3.12.1 to 3.12.2. + Update site from com.puppycrawl.tools:checkstyle 8.18 to 8.21. + + + + Add the resource string bundle string lookup to the default set of lookups + Add StringLookupFactory methods for the URL encoder and decoder string lookups + org.apache.commons.text.lookup.StringLookupFactory.interpolatorStringLookup() should reuse a singleton instance + Add a Base64 encoder string lookup. + + + + Improve JaccardSimilarity computational cost + JSON escaping incorrect for the delete control character + Fixes JaroWinklerDistance: Wrong results due to precision of transpositions + JaroWinklerDistance: Calculation deviates from definition + Update Apache Commons Lang from 3.7 to 3.8.1 + Add a XML file XPath string lookup. + Add a Properties file string lookup. + Add a script string lookup. + Add a file string lookup. + Add a URL string lookup. + Add a Base64 string lookup. + Add org.apache.commons.text.lookup.StringLookupFactory.resourceBundleStringLookup(String). + Add URL encoder and decoder string lookups. + Add constant string lookup like the one in Apache Commons Configuration. + + + + StringEscapeUtils#unescapeJson does not unescape double quotes and forward slash + Remove mention of SQL escaping from user guide + Update Java requirement from version 7 to 8. + Allow full customization with new API org.apache.commons.text.lookup.StringLookupFactory.interpolatorStringLookup(Map<String, StringLookup>, StringLookup, boolean). + WordUtils.wrap throws StringIndexOutOfBoundsException when wrapLength is Integer.MAX_VALUE. + + + + Add Automatic-Module-Name MANIFEST entry for Java 9 compatibility + Build failure with java 9-ea+159 + Add an interpolator string lookup: StringLookupFactory#interpolatorStringLookup() + Add a StrSubstitutor replacement based on interfaces: StringSubstitutor + Add a StrBuilder replacement based on the StringMatcher interface: TextStringBuilder + Add a StrTokenizer replacement based on the StringMatcher interface: StringTokenizer + Add a local host string lookup: LocalHostStringLookup + StrLookup API confusing + - + + Upversion commons-lang to 3.7 + Exception thrown in ExtendedMessageFormat using quotes with custom registry StringEscapeUtils#UnEscapeJson doesn't recognize escape signs correctly StrSubstitutor: Ability to turn off substitution in values RandomStringGenerator able to pass multiple ranges to .withinRange() @@ -64,7 +210,7 @@ The type attribute can be add,update,fix,remove. Typo in LongestCommonSubsequence#logestCommonSubsequence - + WordUtils should use toXxxxCase(int) rather than toXxxxCase(char) WordUtils.abbreviate support Putting WordUtils back in to the codebase @@ -74,7 +220,7 @@ The type attribute can be add,update,fix,remove. Similar to LANG-1025, clirr fails site build. - Mutable fields should be private - + Incorporate suggestions from RC2 into 1.0 release Naming packages org.apache.commons.text.beta Upgrading Jacoco for Java 9-ea compatibility. diff --git a/src/changes/release-notes.vm b/src/changes/release-notes.vm index 5511423b98..83225f00e8 100644 --- a/src/changes/release-notes.vm +++ b/src/changes/release-notes.vm @@ -67,28 +67,28 @@ $release.description.replaceAll(" ", " ## #macro ( processaction ) ## Use replaceAll to fix up LF-only line ends on Windows. - #set($action=$actionItem.getAction().replaceAll("\n"," +#set($action=$actionItem.getAction().replaceAll("\n"," ")) ## Fix up indentation for multi-line action descriptions - #set($action=$action.replaceAll("(?m)^ +",$indent2)) - #if ($actionItem.getIssue()) - #set($issue="$actionItem.getIssue():") - ## Pad shorter issue numbers - #if ($issue.length() < $indent1.length())#set ($issue="$issue ")#end - #if ($issue.length() < $indent1.length())#set ($issue="$issue ")#end - #if ($issue.length() < $indent1.length())#set ($issue="$issue ")#end - #else - #set($issue=$indent1) - #end - #if ($actionItem.getDueTo()) - #set($dueto=" Thanks to $actionItem.getDueTo().") - #else - #set($dueto="") - #end +#set($action=$action.replaceAll("(?m)^ +",$indent2)) +#if ($actionItem.getIssue()) +#set($issue="$actionItem.getIssue():") +## Pad shorter issue numbers +#if ($issue.length() < $indent1.length())#set ($issue="$issue ")#end +#if ($issue.length() < $indent1.length())#set ($issue="$issue ")#end +#if ($issue.length() < $indent1.length())#set ($issue="$issue ")#end +#else +#set($issue=$indent1) +#end +#if ($actionItem.getDueTo()) +#set($dueto=" Thanks to $actionItem.getDueTo().") +#else +#set($dueto="") +#end o $issue ${action}$dueto - #set($action="") - #set($issue="") - #set($dueto="") +#set($action="") +#set($issue="") +#set($dueto="") #end ## #if ($release.getActions().size() == 0) @@ -96,39 +96,44 @@ No changes defined in this version. #else Changes in this version include: - #if ($release.getActions('add').size() !=0) - New features: - #foreach($actionItem in $release.getActions('add')) - #processaction() - #end - #end - - #if ($release.getActions('fix').size() !=0) - Fixed Bugs: - #foreach($actionItem in $release.getActions('fix')) - #processaction() - #end - #end - - #if ($release.getActions('update').size() !=0) - Changes: - #foreach($actionItem in $release.getActions('update')) - #processaction() - #end - #end - - #if ($release.getActions('remove').size() !=0) - Removed: - #foreach($actionItem in $release.getActions('remove')) - #processaction() - #end - #end +#if ($release.getActions('add').size() !=0) +New features: +#foreach($actionItem in $release.getActions('add')) +#processaction() +#end +#end + +#if ($release.getActions('fix').size() !=0) +Fixed Bugs: +#foreach($actionItem in $release.getActions('fix')) +#processaction() +#end +#end + +#if ($release.getActions('update').size() !=0) +Changes: +#foreach($actionItem in $release.getActions('update')) +#processaction() +#end +#end + +#if ($release.getActions('remove').size() !=0) +Removed: +#foreach($actionItem in $release.getActions('remove')) +#processaction() +#end +#end ## End of main loop #end -Historical list of changes: ${project.url}changes-report.html +Historical list of changes: ${project.url}/changes-report.html For complete information on ${project.name}, including instructions on how to submit bug reports, patches, or suggestions for improvement, see the Apache ${project.name} website: -${project.url} \ No newline at end of file +${project.url} + +Download page: ${project.url}/download_text.cgi + +Have fun! +-Apache Commons Team diff --git a/src/main/java/org/apache/commons/text/AlphabetConverter.java b/src/main/java/org/apache/commons/text/AlphabetConverter.java index 658525e146..e6ac7537ea 100644 --- a/src/main/java/org/apache/commons/text/AlphabetConverter.java +++ b/src/main/java/org/apache/commons/text/AlphabetConverter.java @@ -29,6 +29,9 @@ import java.util.Objects; import java.util.Set; +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.StringUtils; + /** *

* Convert from one alphabet to another, with the possibility of leaving certain @@ -45,20 +48,20 @@ * chars, which will be of length 1 *

* - *

Sample usage

+ *

Sample usage

* *
- * Character[] originals; // a, b, c, d
- * Character[] encoding; // 0, 1, d
+ * Character[] originals;   // a, b, c, d
+ * Character[] encoding;    // 0, 1, d
  * Character[] doNotEncode; // d
  *
  * AlphabetConverter ac = AlphabetConverter.createConverterFromChars(originals,
  * encoding, doNotEncode);
  *
- * ac.encode("a"); // 00
- * ac.encode("b"); // 01
- * ac.encode("c"); // 0d
- * ac.encode("d"); // d
+ * ac.encode("a");    // 00
+ * ac.encode("b");    // 01
+ * ac.encode("c");    // 0d
+ * ac.encode("d");    // d
  * ac.encode("abcd"); // 00010dd
  * 
* @@ -109,7 +112,7 @@ private AlphabetConverter(final Map originalToEncoded, * Encode a given string. * * @param original the string to be encoded - * @return the encoded string, {@code null} if the given string is null + * @return The encoded string, {@code null} if the given string is null * @throws UnsupportedEncodingException if chars that are not supported are * encountered */ @@ -148,7 +151,7 @@ public String encode(final String original) * * @param encoded a string that has been encoded using this * AlphabetConverter - * @return the decoded string, {@code null} if the given string is null + * @return The decoded string, {@code null} if the given string is null * @throws UnsupportedEncodingException if unexpected characters that * cannot be handled are encountered */ @@ -161,7 +164,7 @@ public String decode(final String encoded) final StringBuilder result = new StringBuilder(); for (int j = 0; j < encoded.length();) { - final Integer i = encoded.codePointAt(j); + final int i = encoded.codePointAt(j); final String s = codePointToString(i); if (s.equals(originalToEncoded.get(i))) { @@ -194,7 +197,7 @@ public String decode(final String encoded) * for each character in the original * alphabet. * - * @return the length of the encoded char + * @return The length of the encoded char */ public int getEncodedCharLength() { return encodedLetterLength; @@ -205,7 +208,7 @@ public int getEncodedCharLength() { * string. Use to reconstruct converter from * serialized map. * - * @return the original map + * @return The original map */ public Map getOriginalToEncoded() { return Collections.unmodifiableMap(originalToEncoded); @@ -315,7 +318,7 @@ public int hashCode() { * Create a new converter from a map. * * @param originalToEncoded a map returned from getOriginalToEncoded() - * @return the reconstructed AlphabetConverter + * @return The reconstructed AlphabetConverter * @see AlphabetConverter#getOriginalToEncoded() */ public static AlphabetConverter createConverterFromMap( @@ -354,7 +357,7 @@ public static AlphabetConverter createConverterFromMap( * @param doNotEncode an array of chars to be encoded using the original * alphabet - every char here must appear in * both the previous params - * @return the AlphabetConverter + * @return The AlphabetConverter * @throws IllegalArgumentException if an AlphabetConverter cannot be * constructed */ @@ -375,8 +378,8 @@ public static AlphabetConverter createConverterFromChars( * @return an equivalent array of integers */ private static Integer[] convertCharsToIntegers(final Character[] chars) { - if (chars == null || chars.length == 0) { - return new Integer[0]; + if (ArrayUtils.isEmpty(chars)) { + return ArrayUtils.EMPTY_INTEGER_OBJECT_ARRAY; } final Integer[] integers = new Integer[chars.length]; for (int i = 0; i < chars.length; i++) { @@ -399,7 +402,7 @@ private static Integer[] convertCharsToIntegers(final Character[] chars) { * @param doNotEncode an array of ints representing the chars to be encoded * using the original alphabet - every char * here must appear in both the previous params - * @return the AlphabetConverter + * @return The AlphabetConverter * @throws IllegalArgumentException if an AlphabetConverter cannot be * constructed */ @@ -407,15 +410,15 @@ public static AlphabetConverter createConverter( final Integer[] original, final Integer[] encoding, final Integer[] doNotEncode) { - final Set originalCopy = new LinkedHashSet<>(Arrays. asList(original)); - final Set encodingCopy = new LinkedHashSet<>(Arrays. asList(encoding)); - final Set doNotEncodeCopy = new LinkedHashSet<>(Arrays. asList(doNotEncode)); + final Set originalCopy = new LinkedHashSet<>(Arrays.asList(original)); + final Set encodingCopy = new LinkedHashSet<>(Arrays.asList(encoding)); + final Set doNotEncodeCopy = new LinkedHashSet<>(Arrays.asList(doNotEncode)); final Map originalToEncoded = new LinkedHashMap<>(); final Map encodedToOriginal = new LinkedHashMap<>(); final Map doNotEncodeMap = new HashMap<>(); - int encodedLetterLength; + final int encodedLetterLength; for (final int i : doNotEncodeCopy) { if (!originalCopy.contains(i)) { @@ -495,7 +498,7 @@ public static AlphabetConverter createConverter( encodedLetterLength); ac.addSingleEncoding(encodedLetterLength, - "", + StringUtils.EMPTY, encodingCopy, originalCopy.iterator(), doNotEncodeMap); diff --git a/src/main/java/org/apache/commons/text/Builder.java b/src/main/java/org/apache/commons/text/Builder.java index c2c435cfd9..cb05b7513e 100644 --- a/src/main/java/org/apache/commons/text/Builder.java +++ b/src/main/java/org/apache/commons/text/Builder.java @@ -38,6 +38,7 @@ * *

* Example Builder: + *

*

  * class FontBuilder implements Builder<Font> {
  *     private Font font;
@@ -82,7 +83,7 @@ public interface Builder {
      * Returns a reference to the object being constructed or result being
      * calculated by the builder.
      *
-     * @return the object constructed or result calculated by the builder.
+     * @return The object constructed or result calculated by the builder.
      */
     T build();
 }
diff --git a/src/main/java/org/apache/commons/text/CaseUtils.java b/src/main/java/org/apache/commons/text/CaseUtils.java
index 2f83861197..ccf902a179 100644
--- a/src/main/java/org/apache/commons/text/CaseUtils.java
+++ b/src/main/java/org/apache/commons/text/CaseUtils.java
@@ -16,37 +16,37 @@
  */
 package org.apache.commons.text;
 
-import org.apache.commons.lang3.StringUtils;
-
 import java.util.HashSet;
 import java.util.Set;
 
+import org.apache.commons.lang3.ArrayUtils;
+import org.apache.commons.lang3.StringUtils;
+
 /**
  * 

Case manipulation operations on Strings that contain words.

* - *

This class tries to handle null input gracefully. - * An exception will not be thrown for a null input. - * Each method documents its behaviour in more detail.

+ *

This class tries to handle {@code null} input gracefully. + * An exception will not be thrown for a {@code null} input. + * Each method documents its behavior in more detail.

* * @since 1.2 */ public class CaseUtils { /** - *

CaseUtils instances should NOT be constructed in + *

{@code CaseUtils} instances should NOT be constructed in * standard programming. Instead, the class should be used as - * CaseUtils.toCamelCase("foo bar", true, new char[]{'-'});.

+ * {@code CaseUtils.toCamelCase("foo bar", true, new char[]{'-'});}.

* *

This constructor is public to permit tools that require a JavaBean * instance to operate.

*/ public CaseUtils() { - super(); } /** *

Converts all the delimiter separated words in a String into camelCase, - * that is each word is made up of a titlecase character and then a series of + * that is each word is made up of a title case character and then a series of * lowercase characters.

* *

The delimiters represent a set of characters understood to separate words. @@ -54,9 +54,12 @@ public CaseUtils() { * character may or may not be capitalized and it's determined by the user input for capitalizeFirstLetter * variable.

* - *

A null input String returns null. + *

A {@code null} input String returns {@code null}.

+ * + *

A input string with only delimiter characters returns {@code ""}.

+ * * Capitalization uses the Unicode title case, normally equivalent to - * upper case and cannot perform locale-sensitive mappings.

+ * upper case and cannot perform locale-sensitive mappings. * *
      * CaseUtils.toCamelCase(null, false)                                 = null
@@ -66,12 +69,13 @@ public CaseUtils() {
      * CaseUtils.toCamelCase("To.Camel.Case", false, new char[]{'.'})     = "toCamelCase"
      * CaseUtils.toCamelCase(" to @ Camel case", true, new char[]{'@'})   = "ToCamelCase"
      * CaseUtils.toCamelCase(" @to @ Camel case", false, new char[]{'@'}) = "toCamelCase"
+     * CaseUtils.toCamelCase(" @", false, new char[]{'@'})                = ""
      * 
* * @param str the String to be converted to camelCase, may be null * @param capitalizeFirstLetter boolean that determines if the first character of first word should be title case. * @param delimiters set of characters to determine capitalization, null and/or empty array means whitespace - * @return camelCase of String, null if null String input + * @return camelCase of String, {@code null} if null String input */ public static String toCamelCase(String str, final boolean capitalizeFirstLetter, final char... delimiters) { if (StringUtils.isEmpty(str)) { @@ -90,10 +94,7 @@ public static String toCamelCase(String str, final boolean capitalizeFirstLetter final int codePoint = str.codePointAt(index); if (delimiterSet.contains(codePoint)) { - capitalizeNext = true; - if (outOffset == 0) { - capitalizeNext = false; - } + capitalizeNext = outOffset != 0; index += Character.charCount(codePoint); } else if (capitalizeNext || outOffset == 0 && capitalizeFirstLetter) { final int titleCaseCodePoint = Character.toTitleCase(codePoint); @@ -105,11 +106,8 @@ public static String toCamelCase(String str, final boolean capitalizeFirstLetter index += Character.charCount(codePoint); } } - if (outOffset != 0) { - return new String(newCodePoints, 0, outOffset); - } else { - return str; - } + + return new String(newCodePoints, 0, outOffset); } /** @@ -122,7 +120,7 @@ public static String toCamelCase(String str, final boolean capitalizeFirstLetter private static Set generateDelimiterSet(final char[] delimiters) { final Set delimiterHashSet = new HashSet<>(); delimiterHashSet.add(Character.codePointAt(new char[]{' '}, 0)); - if (delimiters == null || delimiters.length == 0) { + if (ArrayUtils.isEmpty(delimiters)) { return delimiterHashSet; } diff --git a/src/main/java/org/apache/commons/text/CompositeFormat.java b/src/main/java/org/apache/commons/text/CompositeFormat.java index bbe5754b35..7d5fd147e7 100644 --- a/src/main/java/org/apache/commons/text/CompositeFormat.java +++ b/src/main/java/org/apache/commons/text/CompositeFormat.java @@ -60,7 +60,7 @@ public CompositeFormat(final Format parser, final Format formatter) { * @param obj the object to format * @param toAppendTo the {@link StringBuffer} to append to * @param pos the FieldPosition to use (or ignore). - * @return toAppendTo + * @return {@code toAppendTo} * @see Format#format(Object, StringBuffer, FieldPosition) */ @Override // Therefore has to use StringBuffer @@ -76,7 +76,7 @@ public StringBuffer format(final Object obj, final StringBuffer toAppendTo, * @param pos the ParsePosition containing the position to parse from, will * be updated according to parsing success (index) or failure * (error index) - * @return the parsed Object + * @return The parsed Object * @see Format#parseObject(String, ParsePosition) */ @Override diff --git a/src/main/java/org/apache/commons/text/ExtendedMessageFormat.java b/src/main/java/org/apache/commons/text/ExtendedMessageFormat.java index dcf0766c17..d7694a6be0 100644 --- a/src/main/java/org/apache/commons/text/ExtendedMessageFormat.java +++ b/src/main/java/org/apache/commons/text/ExtendedMessageFormat.java @@ -27,39 +27,42 @@ import java.util.Map; import java.util.Objects; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.matcher.StringMatcherFactory; + /** - * Extends java.text.MessageFormat to allow pluggable/additional formatting + * Extends {@code java.text.MessageFormat} to allow pluggable/additional formatting * options for embedded format elements. Client code should specify a registry - * of FormatFactory instances associated with String + * of {@code FormatFactory} instances associated with {@code String} * format names. This registry will be consulted when the format elements are * parsed from the message pattern. In this way custom patterns can be specified, - * and the formats supported by java.text.MessageFormat can be overridden + * and the formats supported by {@code java.text.MessageFormat} can be overridden * at the format and/or format style level (see MessageFormat). A "format element" * embedded in the message pattern is specified (()? signifies optionality):
- * {argument-number(,format-name - * (,format-style)?)?} + * {@code {}argument-number({@code ,}format-name + * ({@code ,}format-style)?)?{@code }} * *

* format-name and format-style values are trimmed of surrounding whitespace - * in the manner of java.text.MessageFormat. If format-name denotes - * FormatFactory formatFactoryInstance in registry, a Format + * in the manner of {@code java.text.MessageFormat}. If format-name denotes + * {@code FormatFactory formatFactoryInstance} in {@code registry}, a {@code Format} * matching format-name and format-style is requested from - * formatFactoryInstance. If this is successful, the Format + * {@code formatFactoryInstance}. If this is successful, the {@code Format} * found is used for this format element. *

* *

NOTICE: The various subformat mutator methods are considered unnecessary; they exist on the parent * class to allow the type of customization which it is the job of this class to provide in * a configurable fashion. These methods have thus been disabled and will throw - * UnsupportedOperationException if called. + * {@code UnsupportedOperationException} if called. *

* - *

Limitations inherited from java.text.MessageFormat:

+ *

Limitations inherited from {@code java.text.MessageFormat}:

*
    *
  • When using "choice" subformats, support for nested formatting instructions is limited * to that provided by the base class.
  • - *
  • Thread-safety of Formats, including MessageFormat and thus - * ExtendedMessageFormat, is not guaranteed.
  • + *
  • Thread-safety of {@code Format}s, including {@code MessageFormat} and thus + * {@code ExtendedMessageFormat}, is not guaranteed.
  • *
* * @since 1.0 @@ -79,7 +82,7 @@ public class ExtendedMessageFormat extends MessageFormat { /** * The empty string. */ - private static final String DUMMY_PATTERN = ""; + private static final String DUMMY_PATTERN = StringUtils.EMPTY; /** * A comma. @@ -87,12 +90,12 @@ public class ExtendedMessageFormat extends MessageFormat { private static final char START_FMT = ','; /** - * A right side squigly brace. + * A right side squiggly brace. */ private static final char END_FE = '}'; /** - * A left side squigly brace. + * A left side squiggly brace. */ private static final char START_FE = '{'; @@ -411,21 +414,24 @@ private String parseFormatDescription(final String pattern, final ParsePosition seekNonWs(pattern, pos); final int text = pos.getIndex(); int depth = 1; - for (; pos.getIndex() < pattern.length(); next(pos)) { + while (pos.getIndex() < pattern.length()) { switch (pattern.charAt(pos.getIndex())) { case START_FE: depth++; + next(pos); break; case END_FE: depth--; if (depth == 0) { return pattern.substring(text, pos.getIndex()); } + next(pos); break; case QUOTE: getQuotedString(pattern, pos); break; default: + next(pos); break; } } @@ -487,7 +493,7 @@ private void seekNonWs(final String pattern, final ParsePosition pos) { int len = 0; final char[] buffer = pattern.toCharArray(); do { - len = StrMatcher.splitMatcher().isMatch(buffer, pos.getIndex()); + len = StringMatcherFactory.INSTANCE.splitMatcher().isMatch(buffer, pos.getIndex(), 0, buffer.length); pos.setIndex(pos.getIndex() + len); } while (len > 0 && pos.getIndex() < pattern.length()); } @@ -496,7 +502,7 @@ private void seekNonWs(final String pattern, final ParsePosition pos) { * Convenience method to advance parse position by 1. * * @param pos ParsePosition - * @return pos + * @return {@code pos} */ private ParsePosition next(final ParsePosition pos) { pos.setIndex(pos.getIndex() + 1); @@ -504,13 +510,13 @@ private ParsePosition next(final ParsePosition pos) { } /** - * Consume a quoted string, adding it to appendTo if + * Consume a quoted string, adding it to {@code appendTo} if * specified. * * @param pattern pattern to parse * @param pos current parse position * @param appendTo optional StringBuilder to append - * @return appendTo + * @return {@code appendTo} */ private StringBuilder appendQuotedString(final String pattern, final ParsePosition pos, final StringBuilder appendTo) { @@ -553,7 +559,7 @@ private void getQuotedString(final String pattern, final ParsePosition pos) { /** * Learn whether the specified Collection contains non-null elements. * @param coll to check - * @return true if some Object was found, false otherwise. + * @return {@code true} if some Object was found, {@code false} otherwise. */ private boolean containsElements(final Collection coll) { if (coll == null || coll.isEmpty()) { diff --git a/src/main/java/org/apache/commons/text/FormatFactory.java b/src/main/java/org/apache/commons/text/FormatFactory.java index ceee01bc1a..d2908d3f0d 100644 --- a/src/main/java/org/apache/commons/text/FormatFactory.java +++ b/src/main/java/org/apache/commons/text/FormatFactory.java @@ -31,8 +31,8 @@ public interface FormatFactory { * * @param name The format type name * @param arguments Arguments used to create the format instance. This allows the - * FormatFactory to implement the "format style" - * concept from java.text.MessageFormat. + * {@code FormatFactory} to implement the "format style" + * concept from {@code java.text.MessageFormat}. * @param locale The locale, may be null * @return The format instance */ diff --git a/src/main/java/org/apache/commons/text/FormattableUtils.java b/src/main/java/org/apache/commons/text/FormattableUtils.java index 043cbf177b..78b5bc6941 100644 --- a/src/main/java/org/apache/commons/text/FormattableUtils.java +++ b/src/main/java/org/apache/commons/text/FormattableUtils.java @@ -16,10 +16,12 @@ */ package org.apache.commons.text; +import static java.util.FormattableFlags.LEFT_JUSTIFY; + import java.util.Formattable; import java.util.Formatter; -import static java.util.FormattableFlags.LEFT_JUSTIFY; +import org.apache.commons.lang3.StringUtils; /** *

Provides utilities for working with the {@code Formattable} interface.

@@ -47,7 +49,6 @@ public class FormattableUtils { * instance to operate.

*/ public FormattableUtils() { - super(); } //----------------------------------------------------------------------- @@ -56,7 +57,7 @@ public FormattableUtils() { * {@code Formattable}. * * @param formattable the instance to convert to a string, not null - * @return the resulting string, not null + * @return The resulting string, not null */ public static String toString(final Formattable formattable) { return String.format(SIMPLEST_FORMAT, formattable); @@ -72,7 +73,7 @@ public static String toString(final Formattable formattable) { * @param flags the flags for formatting, see {@code Formattable} * @param width the width of the output, see {@code Formattable} * @param precision the precision of the output, see {@code Formattable} - * @return the {@code formatter} instance, not null + * @return The {@code formatter} instance, not null */ public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width, final int precision) { @@ -89,7 +90,7 @@ public static Formatter append(final CharSequence seq, final Formatter formatter * @param width the width of the output, see {@code Formattable} * @param precision the precision of the output, see {@code Formattable} * @param padChar the pad character to use - * @return the {@code formatter} instance, not null + * @return The {@code formatter} instance, not null */ public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width, final int precision, final char padChar) { @@ -107,7 +108,7 @@ public static Formatter append(final CharSequence seq, final Formatter formatter * @param precision the precision of the output, see {@code Formattable} * @param ellipsis the ellipsis to use when precision dictates truncation, null or * empty causes a hard truncation - * @return the {@code formatter} instance, not null + * @return The {@code formatter} instance, not null */ public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width, final int precision, final CharSequence ellipsis) { @@ -125,21 +126,21 @@ public static Formatter append(final CharSequence seq, final Formatter formatter * @param padChar the pad character to use * @param ellipsis the ellipsis to use when precision dictates truncation, null or * empty causes a hard truncation - * @return the {@code formatter} instance, not null + * @return The {@code formatter} instance, not null */ public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width, final int precision, final char padChar, final CharSequence ellipsis) { if (!(ellipsis == null || precision < 0 || ellipsis.length() <= precision)) { throw new IllegalArgumentException( - String.format("Specified ellipsis '%1$s' exceeds precision of %2$s", + String.format("Specified ellipsis '%s' exceeds precision of %s", ellipsis, - Integer.valueOf(precision))); + precision)); } final StringBuilder buf = new StringBuilder(seq); if (precision >= 0 && precision < seq.length()) { final CharSequence _ellipsis; if (ellipsis == null) { - _ellipsis = ""; + _ellipsis = StringUtils.EMPTY; } else { _ellipsis = ellipsis; } diff --git a/src/main/java/org/apache/commons/text/RandomStringGenerator.java b/src/main/java/org/apache/commons/text/RandomStringGenerator.java index dd50f96166..3a36e5d3b9 100644 --- a/src/main/java/org/apache/commons/text/RandomStringGenerator.java +++ b/src/main/java/org/apache/commons/text/RandomStringGenerator.java @@ -16,14 +16,17 @@ */ package org.apache.commons.text; -import org.apache.commons.lang3.Validate; - import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.ThreadLocalRandom; +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; + /** *

* Generates random Unicode strings containing the specified number of code points. @@ -48,7 +51,7 @@ * String randomLetters = generator.generate(20); *

*

- * {@code RandomStringBuilder} instances are thread-safe when using the + * {@code RandomStringGenerator} instances are thread-safe when using the * default random number generator (RNG). If a custom RNG is set by calling the method * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety * must be ensured externally. @@ -113,7 +116,7 @@ private RandomStringGenerator(final int minimumCodePoint, final int maximumCodeP * the minimum value allowed * @param maxInclusive * the maximum value allowed - * @return the random number. + * @return The random number. */ private int generateRandomNumber(final int minInclusive, final int maxInclusive) { if (random != null) { @@ -127,7 +130,7 @@ private int generateRandomNumber(final int minInclusive, final int maxInclusive) * or the user-supplied source of randomness. * * @param characterList predefined char list. - * @return the random number. + * @return The random number. */ private int generateRandomNumber(final List characterList) { final int listSize = characterList.size(); @@ -156,13 +159,13 @@ private int generateRandomNumber(final List characterList) { * * @param length * the number of code points to generate - * @return the generated string + * @return The generated string * @throws IllegalArgumentException * if {@code length < 0} */ public String generate(final int length) { if (length == 0) { - return ""; + return StringUtils.EMPTY; } Validate.isTrue(length > 0, "Length %d is smaller than zero.", length); @@ -170,7 +173,7 @@ public String generate(final int length) { long remaining = length; do { - int codePoint; + final int codePoint; if (characterList != null && !characterList.isEmpty()) { codePoint = generateRandomNumber(characterList); } else { @@ -213,7 +216,7 @@ public String generate(final int length) { * the minimum (inclusive) number of code points to generate * @param maxLengthInclusive * the maximum (inclusive) number of code points to generate - * @return the generated string + * @return The generated string * @throws IllegalArgumentException * if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive} * @see RandomStringGenerator#generate(int) @@ -228,7 +231,7 @@ public String generate(final int minLengthInclusive, final int maxLengthInclusiv /** *

A builder for generating {@code RandomStringGenerator} instances.

- *

The behaviour of a generator is controlled by properties set by this + *

The behavior of a generator is controlled by properties set by this * builder. Each property has a default value, which can be overridden by * calling the methods defined in this class, prior to calling {@link #build()}.

* @@ -340,8 +343,8 @@ public Builder withinRange(final int minimumCodePoint, final int maximumCodePoin * @param pairs array of characters array, expected is to pass min, max pairs through this arg. * @return {@code this}, to allow method chaining. */ - public Builder withinRange(final char[] ... pairs) { - characterList = new ArrayList(); + public Builder withinRange(final char[]... pairs) { + characterList = new ArrayList<>(); for (final char[] pair : pairs) { Validate.isTrue(pair.length == 2, "Each pair must contain minimum and maximum code point"); @@ -366,7 +369,7 @@ public Builder withinRange(final char[] ... pairs) { * *

* Passing {@code null} or an empty array to this method will revert to the - * default behaviour of allowing any character. Multiple calls to this + * default behavior of allowing any character. Multiple calls to this * method will replace the previously stored predicates. *

* @@ -375,7 +378,7 @@ public Builder withinRange(final char[] ... pairs) { * @return {@code this}, to allow method chaining */ public Builder filteredBy(final CharacterPredicate... predicates) { - if (predicates == null || predicates.length == 0) { + if (ArrayUtils.isEmpty(predicates)) { inclusivePredicates = null; return this; } @@ -386,9 +389,7 @@ public Builder filteredBy(final CharacterPredicate... predicates) { inclusivePredicates.clear(); } - for (final CharacterPredicate predicate : predicates) { - inclusivePredicates.add(predicate); - } + Collections.addAll(inclusivePredicates, predicates); return this; } @@ -397,7 +398,7 @@ public Builder filteredBy(final CharacterPredicate... predicates) { *

* Overrides the default source of randomness. It is highly * recommended that a random number generator library like - * Apache Commons RNG + * Apache Commons RNG * be used to provide the random number generation. *

* @@ -437,7 +438,7 @@ public Builder usingRandom(final TextRandomProvider random) { * *

* Passing {@code null} or an empty array to this method will revert to the - * default behaviour of allowing any character. Multiple calls to this + * default behavior of allowing any character. Multiple calls to this * method will replace the previously stored Character. *

* @@ -446,7 +447,7 @@ public Builder usingRandom(final TextRandomProvider random) { * @return {@code this}, to allow method chaining * @since 1.2 */ - public Builder selectFrom(final char ... chars) { + public Builder selectFrom(final char... chars) { characterList = new ArrayList<>(); for (final char c : chars) { characterList.add(c); @@ -456,7 +457,7 @@ public Builder selectFrom(final char ... chars) { /** *

Builds the {@code RandomStringGenerator} using the properties specified.

- * @return the configured {@code RandomStringGenerator} + * @return The configured {@code RandomStringGenerator} */ @Override public RandomStringGenerator build() { diff --git a/src/main/java/org/apache/commons/text/StrBuilder.java b/src/main/java/org/apache/commons/text/StrBuilder.java index 446b2b90cb..90827bf011 100644 --- a/src/main/java/org/apache/commons/text/StrBuilder.java +++ b/src/main/java/org/apache/commons/text/StrBuilder.java @@ -25,6 +25,9 @@ import java.util.List; import java.util.Objects; +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.StringUtils; + /** * Builds a string from constituent parts providing a more flexible and powerful API * than StringBuffer. @@ -65,28 +68,207 @@ *

* * @since 1.0 - * + * @deprecated Deprecated as of 1.3, use {@link TextStringBuilder} instead. This class will be removed in 2.0. */ +@Deprecated public class StrBuilder implements CharSequence, Appendable, Serializable, Builder { + //----------------------------------------------------------------------- + /** + * Inner class to allow StrBuilder to operate as a reader. + */ + class StrBuilderReader extends Reader { + /** The current stream position. */ + private int pos; + /** The last mark position. */ + private int mark; + + /** + * Default constructor. + */ + StrBuilderReader() { + } + + /** {@inheritDoc} */ + @Override + public void close() { + // do nothing + } + + /** {@inheritDoc} */ + @Override + public void mark(final int readAheadLimit) { + mark = pos; + } + + /** {@inheritDoc} */ + @Override + public boolean markSupported() { + return true; + } + + /** {@inheritDoc} */ + @Override + public int read() { + if (!ready()) { + return -1; + } + return StrBuilder.this.charAt(pos++); + } + + /** {@inheritDoc} */ + @Override + public int read(final char[] b, final int off, int len) { + if (off < 0 || len < 0 || off > b.length + || off + len > b.length || off + len < 0) { + throw new IndexOutOfBoundsException(); + } + if (len == 0) { + return 0; + } + if (pos >= StrBuilder.this.size()) { + return -1; + } + if (pos + len > size()) { + len = StrBuilder.this.size() - pos; + } + StrBuilder.this.getChars(pos, pos + len, b, off); + pos += len; + return len; + } + + /** {@inheritDoc} */ + @Override + public boolean ready() { + return pos < StrBuilder.this.size(); + } + + /** {@inheritDoc} */ + @Override + public void reset() { + pos = mark; + } + + /** {@inheritDoc} */ + @Override + public long skip(long n) { + if (pos + n > StrBuilder.this.size()) { + n = StrBuilder.this.size() - pos; + } + if (n < 0) { + return 0; + } + pos += n; + return n; + } + } + + //----------------------------------------------------------------------- + /** + * Inner class to allow StrBuilder to operate as a tokenizer. + */ + class StrBuilderTokenizer extends StrTokenizer { + + /** + * Default constructor. + */ + StrBuilderTokenizer() { + } + + /** {@inheritDoc} */ + @Override + public String getContent() { + final String str = super.getContent(); + if (str == null) { + return StrBuilder.this.toString(); + } + return str; + } + + /** {@inheritDoc} */ + @Override + protected List tokenize(final char[] chars, final int offset, final int count) { + if (chars == null) { + return super.tokenize( + StrBuilder.this.buffer, 0, StrBuilder.this.size()); + } + return super.tokenize(chars, offset, count); + } + } + + //----------------------------------------------------------------------- + /** + * Inner class to allow StrBuilder to operate as a writer. + */ + class StrBuilderWriter extends Writer { + + /** + * Default constructor. + */ + StrBuilderWriter() { + } + + /** {@inheritDoc} */ + @Override + public void close() { + // do nothing + } + + /** {@inheritDoc} */ + @Override + public void flush() { + // do nothing + } + + /** {@inheritDoc} */ + @Override + public void write(final char[] cbuf) { + StrBuilder.this.append(cbuf); + } + + /** {@inheritDoc} */ + @Override + public void write(final char[] cbuf, final int off, final int len) { + StrBuilder.this.append(cbuf, off, len); + } + + /** {@inheritDoc} */ + @Override + public void write(final int c) { + StrBuilder.this.append((char) c); + } + + /** {@inheritDoc} */ + @Override + public void write(final String str) { + StrBuilder.this.append(str); + } + + /** {@inheritDoc} */ + @Override + public void write(final String str, final int off, final int len) { + StrBuilder.this.append(str, off, len); + } + } /** * The extra capacity for new builders. */ static final int CAPACITY = 32; - /** * Required for serialization support. * * @see java.io.Serializable */ private static final long serialVersionUID = 7628716375283629643L; - /** Internal data storage. */ char[] buffer; // package-protected for test code use only + /** Current size of the buffer. */ private int size; + /** The new line. */ private String newLine; + /** The null text. */ private String nullText; @@ -104,7 +286,6 @@ public StrBuilder() { * @param initialCapacity the initial capacity, zero or less will be converted to 32 */ public StrBuilder(int initialCapacity) { - super(); if (initialCapacity <= 0) { initialCapacity = CAPACITY; } @@ -118,7 +299,6 @@ public StrBuilder(int initialCapacity) { * @param str the string to copy, null treated as blank string */ public StrBuilder(final String str) { - super(); if (str == null) { buffer = new char[CAPACITY]; } else { @@ -127,436 +307,303 @@ public StrBuilder(final String str) { } } - //----------------------------------------------------------------------- - /** - * Gets the text to be appended when a new line is added. - * - * @return the new line text, null means use system default - */ - public String getNewLineText() { - return newLine; - } - /** - * Sets the text to be appended when a new line is added. + * Appends a boolean value to the string builder. * - * @param newLine the new line text, null means use system default + * @param value the value to append * @return this, to enable chaining */ - public StrBuilder setNewLineText(final String newLine) { - this.newLine = newLine; + public StrBuilder append(final boolean value) { + if (value) { + ensureCapacity(size + 4); + buffer[size++] = 't'; + buffer[size++] = 'r'; + buffer[size++] = 'u'; + buffer[size++] = 'e'; + } else { + ensureCapacity(size + 5); + buffer[size++] = 'f'; + buffer[size++] = 'a'; + buffer[size++] = 'l'; + buffer[size++] = 's'; + buffer[size++] = 'e'; + } return this; } - //----------------------------------------------------------------------- /** - * Gets the text to be appended when null is added. + * Appends a char value to the string builder. * - * @return the null text, null means no append + * @param ch the value to append + * @return this, to enable chaining */ - public String getNullText() { - return nullText; + @Override + public StrBuilder append(final char ch) { + final int len = length(); + ensureCapacity(len + 1); + buffer[size++] = ch; + return this; } /** - * Sets the text to be appended when null is added. + * Appends a char array to the string builder. + * Appending null will call {@link #appendNull()}. * - * @param nullText the null text, null means no append + * @param chars the char array to append * @return this, to enable chaining */ - public StrBuilder setNullText(String nullText) { - if (nullText != null && nullText.isEmpty()) { - nullText = null; + public StrBuilder append(final char[] chars) { + if (chars == null) { + return appendNull(); + } + final int strLen = chars.length; + if (strLen > 0) { + final int len = length(); + ensureCapacity(len + strLen); + System.arraycopy(chars, 0, buffer, len, strLen); + size += strLen; } - this.nullText = nullText; return this; } - //----------------------------------------------------------------------- /** - * Gets the length of the string builder. + * Appends a char array to the string builder. + * Appending null will call {@link #appendNull()}. * - * @return the length + * @param chars the char array to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining */ - @Override - public int length() { - return size; + public StrBuilder append(final char[] chars, final int startIndex, final int length) { + if (chars == null) { + return appendNull(); + } + if (startIndex < 0 || startIndex > chars.length) { + throw new StringIndexOutOfBoundsException("Invalid startIndex: " + length); + } + if (length < 0 || startIndex + length > chars.length) { + throw new StringIndexOutOfBoundsException("Invalid length: " + length); + } + if (length > 0) { + final int len = length(); + ensureCapacity(len + length); + System.arraycopy(chars, startIndex, buffer, len, length); + size += length; + } + return this; } /** - * Updates the length of the builder by either dropping the last characters - * or adding filler of Unicode zero. + * Appends the contents of a char buffer to this string builder. + * Appending null will call {@link #appendNull()}. * - * @param length the length to set to, must be zero or positive + * @param buf the char buffer to append * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the length is negative */ - public StrBuilder setLength(final int length) { - if (length < 0) { - throw new StringIndexOutOfBoundsException(length); + public StrBuilder append(final CharBuffer buf) { + if (buf == null) { + return appendNull(); } - if (length < size) { - size = length; - } else if (length > size) { - ensureCapacity(length); - final int oldEnd = size; - final int newEnd = length; - size = length; - for (int i = oldEnd; i < newEnd; i++) { - buffer[i] = '\0'; - } + if (buf.hasArray()) { + final int length = buf.remaining(); + final int len = length(); + ensureCapacity(len + length); + System.arraycopy(buf.array(), buf.arrayOffset() + buf.position(), buffer, len, length); + size += length; + } else { + append(buf.toString()); } return this; } - //----------------------------------------------------------------------- /** - * Gets the current size of the internal character array buffer. + * Appends the contents of a char buffer to this string builder. + * Appending null will call {@link #appendNull()}. * - * @return the capacity + * @param buf the char buffer to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining */ - public int capacity() { - return buffer.length; + public StrBuilder append(final CharBuffer buf, final int startIndex, final int length) { + if (buf == null) { + return appendNull(); + } + if (buf.hasArray()) { + final int totalLength = buf.remaining(); + if (startIndex < 0 || startIndex > totalLength) { + throw new StringIndexOutOfBoundsException("startIndex must be valid"); + } + if (length < 0 || startIndex + length > totalLength) { + throw new StringIndexOutOfBoundsException("length must be valid"); + } + final int len = length(); + ensureCapacity(len + length); + System.arraycopy(buf.array(), buf.arrayOffset() + buf.position() + startIndex, buffer, len, length); + size += length; + } else { + append(buf.toString(), startIndex, length); + } + return this; } /** - * Checks the capacity and ensures that it is at least the size specified. + * Appends a CharSequence to this string builder. + * Appending null will call {@link #appendNull()}. * - * @param capacity the capacity to ensure + * @param seq the CharSequence to append * @return this, to enable chaining */ - public StrBuilder ensureCapacity(final int capacity) { - if (capacity > buffer.length) { - final char[] old = buffer; - buffer = new char[capacity * 2]; - System.arraycopy(old, 0, buffer, 0, size); + @Override + public StrBuilder append(final CharSequence seq) { + if (seq == null) { + return appendNull(); } - return this; + if (seq instanceof StrBuilder) { + return append((StrBuilder) seq); + } + if (seq instanceof StringBuilder) { + return append((StringBuilder) seq); + } + if (seq instanceof StringBuffer) { + return append((StringBuffer) seq); + } + if (seq instanceof CharBuffer) { + return append((CharBuffer) seq); + } + return append(seq.toString()); } /** - * Minimizes the capacity to the actual length of the string. + * Appends part of a CharSequence to this string builder. + * Appending null will call {@link #appendNull()}. * + * @param seq the CharSequence to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid * @return this, to enable chaining */ - public StrBuilder minimizeCapacity() { - if (buffer.length > length()) { - final char[] old = buffer; - buffer = new char[length()]; - System.arraycopy(old, 0, buffer, 0, size); + @Override + public StrBuilder append(final CharSequence seq, final int startIndex, final int length) { + if (seq == null) { + return appendNull(); } - return this; + return append(seq.toString(), startIndex, length); } - //----------------------------------------------------------------------- /** - * Gets the length of the string builder. - *

- * This method is the same as {@link #length()} and is provided to match the - * API of Collections. + * Appends a double value to the string builder using {@code String.valueOf}. * - * @return the length + * @param value the value to append + * @return this, to enable chaining */ - public int size() { - return size; + public StrBuilder append(final double value) { + return append(String.valueOf(value)); } /** - * Checks is the string builder is empty (convenience Collections API style method). - *

- * This method is the same as checking {@link #length()} and is provided to match the - * API of Collections. + * Appends a float value to the string builder using {@code String.valueOf}. * - * @return true if the size is 0. + * @param value the value to append + * @return this, to enable chaining */ - public boolean isEmpty() { - return size == 0; + public StrBuilder append(final float value) { + return append(String.valueOf(value)); } /** - * Clears the string builder (convenience Collections API style method). - *

- * This method does not reduce the size of the internal character buffer. - * To do that, call clear() followed by {@link #minimizeCapacity()}. - *

- * This method is the same as {@link #setLength(int)} called with zero - * and is provided to match the API of Collections. + * Appends an int value to the string builder using {@code String.valueOf}. * + * @param value the value to append * @return this, to enable chaining */ - public StrBuilder clear() { - size = 0; - return this; + public StrBuilder append(final int value) { + return append(String.valueOf(value)); } - //----------------------------------------------------------------------- /** - * Gets the character at the specified index. + * Appends a long value to the string builder using {@code String.valueOf}. * - * @see #setCharAt(int, char) - * @see #deleteCharAt(int) - * @param index the index to retrieve, must be valid - * @return the character at the index - * @throws IndexOutOfBoundsException if the index is invalid + * @param value the value to append + * @return this, to enable chaining */ - @Override - public char charAt(final int index) { - if (index < 0 || index >= length()) { - throw new StringIndexOutOfBoundsException(index); - } - return buffer[index]; + public StrBuilder append(final long value) { + return append(String.valueOf(value)); } /** - * Sets the character at the specified index. + * Appends an object to this string builder. + * Appending null will call {@link #appendNull()}. * - * @see #charAt(int) - * @see #deleteCharAt(int) - * @param index the index to set - * @param ch the new character + * @param obj the object to append * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder setCharAt(final int index, final char ch) { - if (index < 0 || index >= length()) { - throw new StringIndexOutOfBoundsException(index); + public StrBuilder append(final Object obj) { + if (obj == null) { + return appendNull(); } - buffer[index] = ch; - return this; + if (obj instanceof CharSequence) { + return append((CharSequence) obj); + } + return append(obj.toString()); } /** - * Deletes the character at the specified index. + * Appends another string builder to this string builder. + * Appending null will call {@link #appendNull()}. * - * @see #charAt(int) - * @see #setCharAt(int, char) - * @param index the index to delete + * @param str the string builder to append * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder deleteCharAt(final int index) { - if (index < 0 || index >= size) { - throw new StringIndexOutOfBoundsException(index); + public StrBuilder append(final StrBuilder str) { + if (str == null) { + return appendNull(); + } + final int strLen = str.length(); + if (strLen > 0) { + final int len = length(); + ensureCapacity(len + strLen); + System.arraycopy(str.buffer, 0, buffer, len, strLen); + size += strLen; } - deleteImpl(index, index + 1, 1); return this; } - //----------------------------------------------------------------------- /** - * Copies the builder's character array into a new character array. + * Appends part of a string builder to this string builder. + * Appending null will call {@link #appendNull()}. * - * @return a new array that represents the contents of the builder + * @param str the string to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining */ - public char[] toCharArray() { - if (size == 0) { - return new char[0]; + public StrBuilder append(final StrBuilder str, final int startIndex, final int length) { + if (str == null) { + return appendNull(); } - final char[] chars = new char[size]; - System.arraycopy(buffer, 0, chars, 0, size); - return chars; + if (startIndex < 0 || startIndex > str.length()) { + throw new StringIndexOutOfBoundsException("startIndex must be valid"); + } + if (length < 0 || startIndex + length > str.length()) { + throw new StringIndexOutOfBoundsException("length must be valid"); + } + if (length > 0) { + final int len = length(); + ensureCapacity(len + length); + str.getChars(startIndex, startIndex + length, buffer, len); + size += length; + } + return this; } /** - * Copies part of the builder's character array into a new character array. + * Appends a string to this string builder. + * Appending null will call {@link #appendNull()}. * - * @param startIndex the start index, inclusive, must be valid - * @param endIndex the end index, exclusive, must be valid except that - * if too large it is treated as end of string - * @return a new array that holds part of the contents of the builder - * @throws IndexOutOfBoundsException if startIndex is invalid, - * or if endIndex is invalid (but endIndex greater than size is valid) - */ - public char[] toCharArray(final int startIndex, int endIndex) { - endIndex = validateRange(startIndex, endIndex); - final int len = endIndex - startIndex; - if (len == 0) { - return new char[0]; - } - final char[] chars = new char[len]; - System.arraycopy(buffer, startIndex, chars, 0, len); - return chars; - } - - /** - * Copies the character array into the specified array. - * - * @param destination the destination array, null will cause an array to be created - * @return the input array, unless that was null or too small - */ - public char[] getChars(char[] destination) { - final int len = length(); - if (destination == null || destination.length < len) { - destination = new char[len]; - } - System.arraycopy(buffer, 0, destination, 0, len); - return destination; - } - - /** - * Copies the character array into the specified array. - * - * @param startIndex first index to copy, inclusive, must be valid - * @param endIndex last index, exclusive, must be valid - * @param destination the destination array, must not be null or too small - * @param destinationIndex the index to start copying in destination - * @throws NullPointerException if the array is null - * @throws IndexOutOfBoundsException if any index is invalid - */ - public void getChars(final int startIndex, - final int endIndex, - final char[] destination, - final int destinationIndex) { - if (startIndex < 0) { - throw new StringIndexOutOfBoundsException(startIndex); - } - if (endIndex < 0 || endIndex > length()) { - throw new StringIndexOutOfBoundsException(endIndex); - } - if (startIndex > endIndex) { - throw new StringIndexOutOfBoundsException("end < start"); - } - System.arraycopy(buffer, startIndex, destination, destinationIndex, endIndex - startIndex); - } - - //----------------------------------------------------------------------- - /** - * If possible, reads chars from the provided {@link Readable} directly into underlying - * character buffer without making extra copies. - * - * @param readable object to read from - * @return the number of characters read - * @throws IOException if an I/O error occurs - * - * @see #appendTo(Appendable) - */ - public int readFrom(final Readable readable) throws IOException { - final int oldSize = size; - if (readable instanceof Reader) { - final Reader r = (Reader) readable; - ensureCapacity(size + 1); - int read; - while ((read = r.read(buffer, size, buffer.length - size)) != -1) { - size += read; - ensureCapacity(size + 1); - } - } else if (readable instanceof CharBuffer) { - final CharBuffer cb = (CharBuffer) readable; - final int remaining = cb.remaining(); - ensureCapacity(size + remaining); - cb.get(buffer, size, remaining); - size += remaining; - } else { - while (true) { - ensureCapacity(size + 1); - final CharBuffer buf = CharBuffer.wrap(buffer, size, buffer.length - size); - final int read = readable.read(buf); - if (read == -1) { - break; - } - size += read; - } - } - return size - oldSize; - } - - //----------------------------------------------------------------------- - /** - * Appends the new line string to this string builder. - *

- * The new line string can be altered using {@link #setNewLineText(String)}. - * This might be used to force the output to always use Unix line endings - * even when on Windows. - * - * @return this, to enable chaining - */ - public StrBuilder appendNewLine() { - if (newLine == null) { - append(System.lineSeparator()); - return this; - } - return append(newLine); - } - - /** - * Appends the text representing null to this string builder. - * - * @return this, to enable chaining - */ - public StrBuilder appendNull() { - if (nullText == null) { - return this; - } - return append(nullText); - } - - /** - * Appends an object to this string builder. - * Appending null will call {@link #appendNull()}. - * - * @param obj the object to append - * @return this, to enable chaining - */ - public StrBuilder append(final Object obj) { - if (obj == null) { - return appendNull(); - } - if (obj instanceof CharSequence) { - return append((CharSequence) obj); - } - return append(obj.toString()); - } - - /** - * Appends a CharSequence to this string builder. - * Appending null will call {@link #appendNull()}. - * - * @param seq the CharSequence to append - * @return this, to enable chaining - */ - @Override - public StrBuilder append(final CharSequence seq) { - if (seq == null) { - return appendNull(); - } - if (seq instanceof StrBuilder) { - return append((StrBuilder) seq); - } - if (seq instanceof StringBuilder) { - return append((StringBuilder) seq); - } - if (seq instanceof StringBuffer) { - return append((StringBuffer) seq); - } - if (seq instanceof CharBuffer) { - return append((CharBuffer) seq); - } - return append(seq.toString()); - } - - /** - * Appends part of a CharSequence to this string builder. - * Appending null will call {@link #appendNull()}. - * - * @param seq the CharSequence to append - * @param startIndex the start index, inclusive, must be valid - * @param length the length to append, must be valid - * @return this, to enable chaining - */ - @Override - public StrBuilder append(final CharSequence seq, final int startIndex, final int length) { - if (seq == null) { - return appendNull(); - } - return append(seq.toString(), startIndex, length); - } - - /** - * Appends a string to this string builder. - * Appending null will call {@link #appendNull()}. - * - * @param str the string to append - * @return this, to enable chaining + * @param str the string to append + * @return this, to enable chaining */ public StrBuilder append(final String str) { if (str == null) { @@ -572,7 +619,6 @@ public StrBuilder append(final String str) { return this; } - /** * Appends part of a string to this string builder. * Appending null will call {@link #appendNull()}. @@ -589,7 +635,7 @@ public StrBuilder append(final String str, final int startIndex, final int lengt if (startIndex < 0 || startIndex > str.length()) { throw new StringIndexOutOfBoundsException("startIndex must be valid"); } - if (length < 0 || (startIndex + length) > str.length()) { + if (length < 0 || startIndex + length > str.length()) { throw new StringIndexOutOfBoundsException("length must be valid"); } if (length > 0) { @@ -613,60 +659,6 @@ public StrBuilder append(final String format, final Object... objs) { return append(String.format(format, objs)); } - /** - * Appends the contents of a char buffer to this string builder. - * Appending null will call {@link #appendNull()}. - * - * @param buf the char buffer to append - * @return this, to enable chaining - */ - public StrBuilder append(final CharBuffer buf) { - if (buf == null) { - return appendNull(); - } - if (buf.hasArray()) { - final int length = buf.remaining(); - final int len = length(); - ensureCapacity(len + length); - System.arraycopy(buf.array(), buf.arrayOffset() + buf.position(), buffer, len, length); - size += length; - } else { - append(buf.toString()); - } - return this; - } - - /** - * Appends the contents of a char buffer to this string builder. - * Appending null will call {@link #appendNull()}. - * - * @param buf the char buffer to append - * @param startIndex the start index, inclusive, must be valid - * @param length the length to append, must be valid - * @return this, to enable chaining - */ - public StrBuilder append(final CharBuffer buf, final int startIndex, final int length) { - if (buf == null) { - return appendNull(); - } - if (buf.hasArray()) { - final int totalLength = buf.remaining(); - if (startIndex < 0 || startIndex > totalLength) { - throw new StringIndexOutOfBoundsException("startIndex must be valid"); - } - if (length < 0 || (startIndex + length) > totalLength) { - throw new StringIndexOutOfBoundsException("length must be valid"); - } - final int len = length(); - ensureCapacity(len + length); - System.arraycopy(buf.array(), buf.arrayOffset() + buf.position() + startIndex, buffer, len, length); - size += length; - } else { - append(buf.toString(), startIndex, length); - } - return this; - } - /** * Appends a string buffer to this string builder. * Appending null will call {@link #appendNull()}. @@ -704,7 +696,7 @@ public StrBuilder append(final StringBuffer str, final int startIndex, final int if (startIndex < 0 || startIndex > str.length()) { throw new StringIndexOutOfBoundsException("startIndex must be valid"); } - if (length < 0 || (startIndex + length) > str.length()) { + if (length < 0 || startIndex + length > str.length()) { throw new StringIndexOutOfBoundsException("length must be valid"); } if (length > 0) { @@ -753,7 +745,7 @@ public StrBuilder append(final StringBuilder str, final int startIndex, final in if (startIndex < 0 || startIndex > str.length()) { throw new StringIndexOutOfBoundsException("startIndex must be valid"); } - if (length < 0 || (startIndex + length) > str.length()) { + if (length < 0 || startIndex + length > str.length()) { throw new StringIndexOutOfBoundsException("length must be valid"); } if (length > 0) { @@ -766,179 +758,242 @@ public StrBuilder append(final StringBuilder str, final int startIndex, final in } /** - * Appends another string builder to this string builder. - * Appending null will call {@link #appendNull()}. + * Appends each item in an iterable to the builder without any separators. + * Appending a null iterable will have no effect. + * Each object is appended using {@link #append(Object)}. * - * @param str the string builder to append + * @param iterable the iterable to append * @return this, to enable chaining */ - public StrBuilder append(final StrBuilder str) { - if (str == null) { - return appendNull(); - } - final int strLen = str.length(); - if (strLen > 0) { - final int len = length(); - ensureCapacity(len + strLen); - System.arraycopy(str.buffer, 0, buffer, len, strLen); - size += strLen; + public StrBuilder appendAll(final Iterable iterable) { + if (iterable != null) { + for (final Object o : iterable) { + append(o); + } } return this; } /** - * Appends part of a string builder to this string builder. - * Appending null will call {@link #appendNull()}. + * Appends each item in an iterator to the builder without any separators. + * Appending a null iterator will have no effect. + * Each object is appended using {@link #append(Object)}. * - * @param str the string to append - * @param startIndex the start index, inclusive, must be valid - * @param length the length to append, must be valid + * @param it the iterator to append * @return this, to enable chaining */ - public StrBuilder append(final StrBuilder str, final int startIndex, final int length) { - if (str == null) { - return appendNull(); - } - if (startIndex < 0 || startIndex > str.length()) { - throw new StringIndexOutOfBoundsException("startIndex must be valid"); - } - if (length < 0 || (startIndex + length) > str.length()) { - throw new StringIndexOutOfBoundsException("length must be valid"); - } - if (length > 0) { - final int len = length(); - ensureCapacity(len + length); - str.getChars(startIndex, startIndex + length, buffer, len); - size += length; + public StrBuilder appendAll(final Iterator it) { + if (it != null) { + while (it.hasNext()) { + append(it.next()); + } } return this; } + + //----------------------------------------------------------------------- /** - * Appends a char array to the string builder. - * Appending null will call {@link #appendNull()}. + * Appends each item in an array to the builder without any separators. + * Appending a null array will have no effect. + * Each object is appended using {@link #append(Object)}. * - * @param chars the char array to append + * @param the element type + * @param array the array to append * @return this, to enable chaining */ - public StrBuilder append(final char[] chars) { - if (chars == null) { - return appendNull(); - } - final int strLen = chars.length; - if (strLen > 0) { - final int len = length(); - ensureCapacity(len + strLen); - System.arraycopy(chars, 0, buffer, len, strLen); - size += strLen; + public StrBuilder appendAll(@SuppressWarnings("unchecked") final T... array) { + /* + * @SuppressWarnings used to hide warning about vararg usage. We cannot + * use @SafeVarargs, since this method is not final. Using @SuppressWarnings + * is fine, because it isn't inherited by subclasses, so each subclass must + * vouch for itself whether its use of 'array' is safe. + */ + if (array != null && array.length > 0) { + for (final Object element : array) { + append(element); + } } return this; } /** - * Appends a char array to the string builder. - * Appending null will call {@link #appendNull()}. + * Appends an object to the builder padding on the left to a fixed width. + * The {@code String.valueOf} of the {@code int} value is used. + * If the formatted value is larger than the length, the left hand side is lost. * - * @param chars the char array to append - * @param startIndex the start index, inclusive, must be valid - * @param length the length to append, must be valid + * @param value the value to append + * @param width the fixed field width, zero or negative has no effect + * @param padChar the pad character to use * @return this, to enable chaining */ - public StrBuilder append(final char[] chars, final int startIndex, final int length) { - if (chars == null) { - return appendNull(); - } - if (startIndex < 0 || startIndex > chars.length) { - throw new StringIndexOutOfBoundsException("Invalid startIndex: " + length); - } - if (length < 0 || (startIndex + length) > chars.length) { - throw new StringIndexOutOfBoundsException("Invalid length: " + length); - } - if (length > 0) { - final int len = length(); - ensureCapacity(len + length); - System.arraycopy(chars, startIndex, buffer, len, length); - size += length; + public StrBuilder appendFixedWidthPadLeft(final int value, final int width, final char padChar) { + return appendFixedWidthPadLeft(String.valueOf(value), width, padChar); + } + + //----------------------------------------------------------------------- + /** + * Appends an object to the builder padding on the left to a fixed width. + * The {@code toString} of the object is used. + * If the object is larger than the length, the left hand side is lost. + * If the object is null, the null text value is used. + * + * @param obj the object to append, null uses null text + * @param width the fixed field width, zero or negative has no effect + * @param padChar the pad character to use + * @return this, to enable chaining + */ + public StrBuilder appendFixedWidthPadLeft(final Object obj, final int width, final char padChar) { + if (width > 0) { + ensureCapacity(size + width); + String str = obj == null ? getNullText() : obj.toString(); + if (str == null) { + str = StringUtils.EMPTY; + } + final int strLen = str.length(); + if (strLen >= width) { + str.getChars(strLen - width, strLen, buffer, size); + } else { + final int padLen = width - strLen; + for (int i = 0; i < padLen; i++) { + buffer[size + i] = padChar; + } + str.getChars(0, strLen, buffer, size + padLen); + } + size += width; } return this; } /** - * Appends a boolean value to the string builder. + * Appends an object to the builder padding on the right to a fixed length. + * The {@code String.valueOf} of the {@code int} value is used. + * If the object is larger than the length, the right hand side is lost. * * @param value the value to append + * @param width the fixed field width, zero or negative has no effect + * @param padChar the pad character to use * @return this, to enable chaining */ - public StrBuilder append(final boolean value) { - if (value) { - ensureCapacity(size + 4); - buffer[size++] = 't'; - buffer[size++] = 'r'; - buffer[size++] = 'u'; - buffer[size++] = 'e'; - } else { - ensureCapacity(size + 5); - buffer[size++] = 'f'; - buffer[size++] = 'a'; - buffer[size++] = 'l'; - buffer[size++] = 's'; - buffer[size++] = 'e'; + public StrBuilder appendFixedWidthPadRight(final int value, final int width, final char padChar) { + return appendFixedWidthPadRight(String.valueOf(value), width, padChar); + } + + /** + * Appends an object to the builder padding on the right to a fixed length. + * The {@code toString} of the object is used. + * If the object is larger than the length, the right hand side is lost. + * If the object is null, null text value is used. + * + * @param obj the object to append, null uses null text + * @param width the fixed field width, zero or negative has no effect + * @param padChar the pad character to use + * @return this, to enable chaining + */ + public StrBuilder appendFixedWidthPadRight(final Object obj, final int width, final char padChar) { + if (width > 0) { + ensureCapacity(size + width); + String str = obj == null ? getNullText() : obj.toString(); + if (str == null) { + str = StringUtils.EMPTY; + } + final int strLen = str.length(); + if (strLen >= width) { + str.getChars(0, width, buffer, size); + } else { + final int padLen = width - strLen; + str.getChars(0, strLen, buffer, size); + for (int i = 0; i < padLen; i++) { + buffer[size + strLen + i] = padChar; + } + } + size += width; } return this; } /** - * Appends a char value to the string builder. + * Appends a boolean value followed by a new line to the string builder. + * + * @param value the value to append + * @return this, to enable chaining + */ + public StrBuilder appendln(final boolean value) { + return append(value).appendNewLine(); + } + + /** + * Appends a char value followed by a new line to the string builder. * * @param ch the value to append * @return this, to enable chaining */ - @Override - public StrBuilder append(final char ch) { - final int len = length(); - ensureCapacity(len + 1); - buffer[size++] = ch; - return this; + public StrBuilder appendln(final char ch) { + return append(ch).appendNewLine(); + } + + /** + * Appends a char array followed by a new line to the string builder. + * Appending null will call {@link #appendNull()}. + * + * @param chars the char array to append + * @return this, to enable chaining + */ + public StrBuilder appendln(final char[] chars) { + return append(chars).appendNewLine(); + } + + /** + * Appends a char array followed by a new line to the string builder. + * Appending null will call {@link #appendNull()}. + * + * @param chars the char array to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public StrBuilder appendln(final char[] chars, final int startIndex, final int length) { + return append(chars, startIndex, length).appendNewLine(); } /** - * Appends an int value to the string builder using String.valueOf. + * Appends a double value followed by a new line to the string builder using {@code String.valueOf}. * * @param value the value to append * @return this, to enable chaining */ - public StrBuilder append(final int value) { - return append(String.valueOf(value)); + public StrBuilder appendln(final double value) { + return append(value).appendNewLine(); } /** - * Appends a long value to the string builder using String.valueOf. + * Appends a float value followed by a new line to the string builder using {@code String.valueOf}. * * @param value the value to append * @return this, to enable chaining */ - public StrBuilder append(final long value) { - return append(String.valueOf(value)); + public StrBuilder appendln(final float value) { + return append(value).appendNewLine(); } /** - * Appends a float value to the string builder using String.valueOf. + * Appends an int value followed by a new line to the string builder using {@code String.valueOf}. * * @param value the value to append * @return this, to enable chaining */ - public StrBuilder append(final float value) { - return append(String.valueOf(value)); + public StrBuilder appendln(final int value) { + return append(value).appendNewLine(); } /** - * Appends a double value to the string builder using String.valueOf. + * Appends a long value followed by a new line to the string builder using {@code String.valueOf}. * * @param value the value to append * @return this, to enable chaining */ - public StrBuilder append(final double value) { - return append(String.valueOf(value)); + public StrBuilder appendln(final long value) { + return append(value).appendNewLine(); } //----------------------------------------------------------------------- @@ -953,6 +1008,30 @@ public StrBuilder appendln(final Object obj) { return append(obj).appendNewLine(); } + /** + * Appends another string builder followed by a new line to this string builder. + * Appending null will call {@link #appendNull()}. + * + * @param str the string builder to append + * @return this, to enable chaining + */ + public StrBuilder appendln(final StrBuilder str) { + return append(str).appendNewLine(); + } + + /** + * Appends part of a string builder followed by a new line to this string builder. + * Appending null will call {@link #appendNull()}. + * + * @param str the string to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public StrBuilder appendln(final StrBuilder str, final int startIndex, final int length) { + return append(str, startIndex, length).appendNewLine(); + } + /** * Appends a string followed by a new line to this string builder. * Appending null will call {@link #appendNull()}. @@ -1001,204 +1080,314 @@ public StrBuilder appendln(final StringBuffer str) { } /** - * Appends a string builder followed by a new line to this string builder. + * Appends part of a string buffer followed by a new line to this string builder. * Appending null will call {@link #appendNull()}. * - * @param str the string builder to append + * @param str the string to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid * @return this, to enable chaining */ - public StrBuilder appendln(final StringBuilder str) { - return append(str).appendNewLine(); + public StrBuilder appendln(final StringBuffer str, final int startIndex, final int length) { + return append(str, startIndex, length).appendNewLine(); } /** - * Appends part of a string builder followed by a new line to this string builder. + * Appends a string builder followed by a new line to this string builder. * Appending null will call {@link #appendNull()}. * * @param str the string builder to append - * @param startIndex the start index, inclusive, must be valid - * @param length the length to append, must be valid * @return this, to enable chaining */ - public StrBuilder appendln(final StringBuilder str, final int startIndex, final int length) { - return append(str, startIndex, length).appendNewLine(); + public StrBuilder appendln(final StringBuilder str) { + return append(str).appendNewLine(); } /** - * Appends part of a string buffer followed by a new line to this string builder. + * Appends part of a string builder followed by a new line to this string builder. * Appending null will call {@link #appendNull()}. * - * @param str the string to append + * @param str the string builder to append * @param startIndex the start index, inclusive, must be valid * @param length the length to append, must be valid * @return this, to enable chaining */ - public StrBuilder appendln(final StringBuffer str, final int startIndex, final int length) { + public StrBuilder appendln(final StringBuilder str, final int startIndex, final int length) { return append(str, startIndex, length).appendNewLine(); } + //----------------------------------------------------------------------- /** - * Appends another string builder followed by a new line to this string builder. - * Appending null will call {@link #appendNull()}. + * Appends the new line string to this string builder. + *

+ * The new line string can be altered using {@link #setNewLineText(String)}. + * This might be used to force the output to always use Unix line endings + * even when on Windows. * - * @param str the string builder to append * @return this, to enable chaining */ - public StrBuilder appendln(final StrBuilder str) { - return append(str).appendNewLine(); + public StrBuilder appendNewLine() { + if (newLine == null) { + append(System.lineSeparator()); + return this; + } + return append(newLine); } /** - * Appends part of a string builder followed by a new line to this string builder. - * Appending null will call {@link #appendNull()}. + * Appends the text representing {@code null} to this string builder. * - * @param str the string to append - * @param startIndex the start index, inclusive, must be valid - * @param length the length to append, must be valid * @return this, to enable chaining */ - public StrBuilder appendln(final StrBuilder str, final int startIndex, final int length) { - return append(str, startIndex, length).appendNewLine(); + public StrBuilder appendNull() { + if (nullText == null) { + return this; + } + return append(nullText); } + //----------------------------------------------------------------------- /** - * Appends a char array followed by a new line to the string builder. - * Appending null will call {@link #appendNull()}. + * Appends the pad character to the builder the specified number of times. * - * @param chars the char array to append + * @param length the length to append, negative means no append + * @param padChar the character to append * @return this, to enable chaining */ - public StrBuilder appendln(final char[] chars) { - return append(chars).appendNewLine(); + public StrBuilder appendPadding(final int length, final char padChar) { + if (length >= 0) { + ensureCapacity(size + length); + for (int i = 0; i < length; i++) { + buffer[size++] = padChar; + } + } + return this; } /** - * Appends a char array followed by a new line to the string builder. - * Appending null will call {@link #appendNull()}. + * Appends a separator if the builder is currently non-empty. + * The separator is appended using {@link #append(char)}. + *

+ * This method is useful for adding a separator each time around the + * loop except the first. + *

+     * for (Iterator it = list.iterator(); it.hasNext();){
+     *   appendSeparator(',');
+     *   append(it.next());
+     * }
+     * 
+ * Note that for this simple example, you should use + * {@link #appendWithSeparators(Iterable, String)}. * - * @param chars the char array to append - * @param startIndex the start index, inclusive, must be valid - * @param length the length to append, must be valid + * @param separator the separator to use * @return this, to enable chaining */ - public StrBuilder appendln(final char[] chars, final int startIndex, final int length) { - return append(chars, startIndex, length).appendNewLine(); + public StrBuilder appendSeparator(final char separator) { + if (isNotEmpty()) { + append(separator); + } + return this; } /** - * Appends a boolean value followed by a new line to the string builder. + * Append one of both separators to the builder + * If the builder is currently empty it will append the defaultIfEmpty-separator + * Otherwise it will append the standard-separator * - * @param value the value to append + * The separator is appended using {@link #append(char)}. + * @param standard the separator if builder is not empty + * @param defaultIfEmpty the separator if builder is empty * @return this, to enable chaining */ - public StrBuilder appendln(final boolean value) { - return append(value).appendNewLine(); + public StrBuilder appendSeparator(final char standard, final char defaultIfEmpty) { + if (isNotEmpty()) { + append(standard); + } else { + append(defaultIfEmpty); + } + return this; } /** - * Appends a char value followed by a new line to the string builder. + * Appends a separator to the builder if the loop index is greater than zero. + * The separator is appended using {@link #append(char)}. + *

+ * This method is useful for adding a separator each time around the + * loop except the first. + *

+ *
+     * for (int i = 0; i < list.size(); i++) {
+     *   appendSeparator(",", i);
+     *   append(list.get(i));
+     * }
+     * 
+ * Note that for this simple example, you should use + * {@link #appendWithSeparators(Iterable, String)}. * - * @param ch the value to append + * @param separator the separator to use + * @param loopIndex the loop index * @return this, to enable chaining */ - public StrBuilder appendln(final char ch) { - return append(ch).appendNewLine(); + public StrBuilder appendSeparator(final char separator, final int loopIndex) { + if (loopIndex > 0) { + append(separator); + } + return this; } + //----------------------------------------------------------------------- /** - * Appends an int value followed by a new line to the string builder using String.valueOf. + * Appends a separator if the builder is currently non-empty. + * Appending a null separator will have no effect. + * The separator is appended using {@link #append(String)}. + *

+ * This method is useful for adding a separator each time around the + * loop except the first. + *

+     * for (Iterator it = list.iterator(); it.hasNext();){
+     *   appendSeparator(",");
+     *   append(it.next());
+     * }
+     * 
+ * Note that for this simple example, you should use + * {@link #appendWithSeparators(Iterable, String)}. * - * @param value the value to append + * @param separator the separator to use, null means no separator * @return this, to enable chaining */ - public StrBuilder appendln(final int value) { - return append(value).appendNewLine(); + public StrBuilder appendSeparator(final String separator) { + return appendSeparator(separator, null); } /** - * Appends a long value followed by a new line to the string builder using String.valueOf. + * Appends a separator to the builder if the loop index is greater than zero. + * Appending a null separator will have no effect. + * The separator is appended using {@link #append(String)}. + *

+ * This method is useful for adding a separator each time around the + * loop except the first. + *

+ *
+     * for (int i = 0; i < list.size(); i++) {
+     *   appendSeparator(",", i);
+     *   append(list.get(i));
+     * }
+     * 
+ * Note that for this simple example, you should use + * {@link #appendWithSeparators(Iterable, String)}. * - * @param value the value to append + * @param separator the separator to use, null means no separator + * @param loopIndex the loop index * @return this, to enable chaining */ - public StrBuilder appendln(final long value) { - return append(value).appendNewLine(); + public StrBuilder appendSeparator(final String separator, final int loopIndex) { + if (separator != null && loopIndex > 0) { + append(separator); + } + return this; } /** - * Appends a float value followed by a new line to the string builder using String.valueOf. + * Appends one of both separators to the StrBuilder. + * If the builder is currently empty it will append the defaultIfEmpty-separator + * Otherwise it will append the standard-separator * - * @param value the value to append - * @return this, to enable chaining - */ - public StrBuilder appendln(final float value) { - return append(value).appendNewLine(); - } - - /** - * Appends a double value followed by a new line to the string builder using String.valueOf. + * Appending a null separator will have no effect. + * The separator is appended using {@link #append(String)}. + *

+ * This method is for example useful for constructing queries + *

+     * StrBuilder whereClause = new StrBuilder();
+     * if(searchCommand.getPriority() != null) {
+     *  whereClause.appendSeparator(" and", " where");
+     *  whereClause.append(" priority = ?")
+     * }
+     * if(searchCommand.getComponent() != null) {
+     *  whereClause.appendSeparator(" and", " where");
+     *  whereClause.append(" component = ?")
+     * }
+     * selectClause.append(whereClause)
+     * 
* - * @param value the value to append + * @param standard the separator if builder is not empty, null means no separator + * @param defaultIfEmpty the separator if builder is empty, null means no separator * @return this, to enable chaining */ - public StrBuilder appendln(final double value) { - return append(value).appendNewLine(); + public StrBuilder appendSeparator(final String standard, final String defaultIfEmpty) { + final String str = isEmpty() ? defaultIfEmpty : standard; + if (str != null) { + append(str); + } + return this; } - //----------------------------------------------------------------------- /** - * Appends each item in an array to the builder without any separators. - * Appending a null array will have no effect. - * Each object is appended using {@link #append(Object)}. + * Appends current contents of this {@code StrBuilder} to the + * provided {@link Appendable}. + *

+ * This method tries to avoid doing any extra copies of contents. * - * @param the element type - * @param array the array to append - * @return this, to enable chaining + * @param appendable the appendable to append data to + * @throws IOException if an I/O error occurs + * + * @see #readFrom(Readable) */ - public StrBuilder appendAll(@SuppressWarnings("unchecked") final T... array) { - /* - * @SuppressWarnings used to hide warning about vararg usage. We cannot - * use @SafeVarargs, since this method is not final. Using @SuppressWarnings - * is fine, because it isn't inherited by subclasses, so each subclass must - * vouch for itself whether its use of 'array' is safe. - */ - if (array != null && array.length > 0) { - for (final Object element : array) { - append(element); - } + public void appendTo(final Appendable appendable) throws IOException { + if (appendable instanceof Writer) { + ((Writer) appendable).write(buffer, 0, size); + } else if (appendable instanceof StringBuilder) { + ((StringBuilder) appendable).append(buffer, 0, size); + } else if (appendable instanceof StringBuffer) { + ((StringBuffer) appendable).append(buffer, 0, size); + } else if (appendable instanceof CharBuffer) { + ((CharBuffer) appendable).put(buffer, 0, size); + } else { + appendable.append(this); } - return this; } /** - * Appends each item in an iterable to the builder without any separators. + * Appends an iterable placing separators between each value, but + * not before the first or after the last. * Appending a null iterable will have no effect. * Each object is appended using {@link #append(Object)}. * * @param iterable the iterable to append + * @param separator the separator to use, null means no separator * @return this, to enable chaining */ - public StrBuilder appendAll(final Iterable iterable) { + public StrBuilder appendWithSeparators(final Iterable iterable, final String separator) { if (iterable != null) { - for (final Object o : iterable) { - append(o); + final String sep = Objects.toString(separator, StringUtils.EMPTY); + final Iterator it = iterable.iterator(); + while (it.hasNext()) { + append(it.next()); + if (it.hasNext()) { + append(sep); + } } } return this; } /** - * Appends each item in an iterator to the builder without any separators. + * Appends an iterator placing separators between each value, but + * not before the first or after the last. * Appending a null iterator will have no effect. * Each object is appended using {@link #append(Object)}. * * @param it the iterator to append + * @param separator the separator to use, null means no separator * @return this, to enable chaining */ - public StrBuilder appendAll(final Iterator it) { + public StrBuilder appendWithSeparators(final Iterator it, final String separator) { if (it != null) { + final String sep = Objects.toString(separator, StringUtils.EMPTY); while (it.hasNext()) { append(it.next()); + if (it.hasNext()) { + append(sep); + } } } return this; @@ -1217,7 +1406,7 @@ public StrBuilder appendAll(final Iterator it) { */ public StrBuilder appendWithSeparators(final Object[] array, final String separator) { if (array != null && array.length > 0) { - final String sep = Objects.toString(separator, ""); + final String sep = Objects.toString(separator, StringUtils.EMPTY); append(array[0]); for (int i = 1; i < array.length; i++) { append(sep); @@ -1227,1073 +1416,1029 @@ public StrBuilder appendWithSeparators(final Object[] array, final String separa return this; } + //----------------------------------------------------------------------- /** - * Appends an iterable placing separators between each value, but - * not before the first or after the last. - * Appending a null iterable will have no effect. - * Each object is appended using {@link #append(Object)}. + * Gets the contents of this builder as a Reader. + *

+ * This method allows the contents of the builder to be read + * using any standard method that expects a Reader. + *

+ * To use, simply create a {@code StrBuilder}, populate it with + * data, call {@code asReader}, and then read away. + *

+ * The internal character array is shared between the builder and the reader. + * This allows you to append to the builder after creating the reader, + * and the changes will be picked up. + * Note however, that no synchronization occurs, so you must perform + * all operations with the builder and the reader in one thread. + *

+ * The returned reader supports marking, and ignores the flush method. * - * @param iterable the iterable to append - * @param separator the separator to use, null means no separator - * @return this, to enable chaining + * @return a reader that reads from this builder */ - public StrBuilder appendWithSeparators(final Iterable iterable, final String separator) { - if (iterable != null) { - final String sep = Objects.toString(separator, ""); - final Iterator it = iterable.iterator(); - while (it.hasNext()) { - append(it.next()); - if (it.hasNext()) { - append(sep); - } - } - } - return this; + public Reader asReader() { + return new StrBuilderReader(); } + //----------------------------------------------------------------------- /** - * Appends an iterator placing separators between each value, but - * not before the first or after the last. - * Appending a null iterator will have no effect. - * Each object is appended using {@link #append(Object)}. + * Creates a tokenizer that can tokenize the contents of this builder. + *

+ * This method allows the contents of this builder to be tokenized. + * The tokenizer will be setup by default to tokenize on space, tab, + * newline and form feed (as per StringTokenizer). These values can be + * changed on the tokenizer class, before retrieving the tokens. + *

+ * The returned tokenizer is linked to this builder. You may intermix + * calls to the builder and tokenizer within certain limits, however + * there is no synchronization. Once the tokenizer has been used once, + * it must be {@link StrTokenizer#reset() reset} to pickup the latest + * changes in the builder. For example: + *

+     * StrBuilder b = new StrBuilder();
+     * b.append("a b ");
+     * StrTokenizer t = b.asTokenizer();
+     * String[] tokens1 = t.getTokenArray();  // returns a,b
+     * b.append("c d ");
+     * String[] tokens2 = t.getTokenArray();  // returns a,b (c and d ignored)
+     * t.reset();              // reset causes builder changes to be picked up
+     * String[] tokens3 = t.getTokenArray();  // returns a,b,c,d
+     * 
+ * In addition to simply intermixing appends and tokenization, you can also + * call the set methods on the tokenizer to alter how it tokenizes. Just + * remember to call reset when you want to pickup builder changes. + *

+ * Calling {@link StrTokenizer#reset(String)} or {@link StrTokenizer#reset(char[])} + * with a non-null value will break the link with the builder. * - * @param it the iterator to append - * @param separator the separator to use, null means no separator - * @return this, to enable chaining + * @return a tokenizer that is linked to this builder */ - public StrBuilder appendWithSeparators(final Iterator it, final String separator) { - if (it != null) { - final String sep = Objects.toString(separator, ""); - while (it.hasNext()) { - append(it.next()); - if (it.hasNext()) { - append(sep); - } - } - } - return this; + public StrTokenizer asTokenizer() { + return new StrBuilderTokenizer(); } //----------------------------------------------------------------------- /** - * Appends a separator if the builder is currently non-empty. - * Appending a null separator will have no effect. - * The separator is appended using {@link #append(String)}. + * Gets this builder as a Writer that can be written to. *

- * This method is useful for adding a separator each time around the - * loop except the first. - *

-     * for (Iterator it = list.iterator(); it.hasNext();){
-     *   appendSeparator(",");
-     *   append(it.next());
-     * }
-     * 
- * Note that for this simple example, you should use - * {@link #appendWithSeparators(Iterable, String)}. + * This method allows you to populate the contents of the builder + * using any standard method that takes a Writer. + *

+ * To use, simply create a {@code StrBuilder}, + * call {@code asWriter}, and populate away. The data is available + * at any time using the methods of the {@code StrBuilder}. + *

+ * The internal character array is shared between the builder and the writer. + * This allows you to intermix calls that append to the builder and + * write using the writer and the changes will be occur correctly. + * Note however, that no synchronization occurs, so you must perform + * all operations with the builder and the writer in one thread. + *

+ * The returned writer ignores the close and flush methods. * - * @param separator the separator to use, null means no separator - * @return this, to enable chaining + * @return a writer that populates this builder */ - public StrBuilder appendSeparator(final String separator) { - return appendSeparator(separator, null); + public Writer asWriter() { + return new StrBuilderWriter(); } /** - * Appends one of both separators to the StrBuilder. - * If the builder is currently empty it will append the defaultIfEmpty-separator - * Otherwise it will append the standard-separator - * - * Appending a null separator will have no effect. - * The separator is appended using {@link #append(String)}. - *

- * This method is for example useful for constructing queries - *

-     * StrBuilder whereClause = new StrBuilder();
-     * if(searchCommand.getPriority() != null) {
-     *  whereClause.appendSeparator(" and", " where");
-     *  whereClause.append(" priority = ?")
-     * }
-     * if(searchCommand.getComponent() != null) {
-     *  whereClause.appendSeparator(" and", " where");
-     *  whereClause.append(" component = ?")
-     * }
-     * selectClause.append(whereClause)
-     * 
- * - * @param standard the separator if builder is not empty, null means no separator - * @param defaultIfEmpty the separator if builder is empty, null means no separator - * @return this, to enable chaining + * Implement the {@link Builder} interface. + * @return The builder as a String + * @see #toString() */ - public StrBuilder appendSeparator(final String standard, final String defaultIfEmpty) { - final String str = isEmpty() ? defaultIfEmpty : standard; - if (str != null) { - append(str); - } - return this; + @Override + public String build() { + return toString(); } + //----------------------------------------------------------------------- /** - * Appends a separator if the builder is currently non-empty. - * The separator is appended using {@link #append(char)}. - *

- * This method is useful for adding a separator each time around the - * loop except the first. - *

-     * for (Iterator it = list.iterator(); it.hasNext();){
-     *   appendSeparator(',');
-     *   append(it.next());
-     * }
-     * 
- * Note that for this simple example, you should use - * {@link #appendWithSeparators(Iterable, String)}. + * Gets the current size of the internal character array buffer. * - * @param separator the separator to use - * @return this, to enable chaining + * @return The capacity */ - public StrBuilder appendSeparator(final char separator) { - if (size() > 0) { - append(separator); - } - return this; + public int capacity() { + return buffer.length; } + //----------------------------------------------------------------------- /** - * Append one of both separators to the builder - * If the builder is currently empty it will append the defaultIfEmpty-separator - * Otherwise it will append the standard-separator + * Gets the character at the specified index. * - * The separator is appended using {@link #append(char)}. - * @param standard the separator if builder is not empty - * @param defaultIfEmpty the separator if builder is empty - * @return this, to enable chaining + * @see #setCharAt(int, char) + * @see #deleteCharAt(int) + * @param index the index to retrieve, must be valid + * @return The character at the index + * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder appendSeparator(final char standard, final char defaultIfEmpty) { - if (size() > 0) { - append(standard); - } else { - append(defaultIfEmpty); + @Override + public char charAt(final int index) { + if (index < 0 || index >= length()) { + throw new StringIndexOutOfBoundsException(index); } - return this; + return buffer[index]; } + /** - * Appends a separator to the builder if the loop index is greater than zero. - * Appending a null separator will have no effect. - * The separator is appended using {@link #append(String)}. + * Clears the string builder (convenience Collections API style method). *

- * This method is useful for adding a separator each time around the - * loop except the first. - *

- *
-     * for (int i = 0; i < list.size(); i++) {
-     *   appendSeparator(",", i);
-     *   append(list.get(i));
-     * }
-     * 
- * Note that for this simple example, you should use - * {@link #appendWithSeparators(Iterable, String)}. + * This method does not reduce the size of the internal character buffer. + * To do that, call {@code clear()} followed by {@link #minimizeCapacity()}. + *

+ * This method is the same as {@link #setLength(int)} called with zero + * and is provided to match the API of Collections. * - * @param separator the separator to use, null means no separator - * @param loopIndex the loop index * @return this, to enable chaining */ - public StrBuilder appendSeparator(final String separator, final int loopIndex) { - if (separator != null && loopIndex > 0) { - append(separator); - } + public StrBuilder clear() { + size = 0; return this; } + //----------------------------------------------------------------------- /** - * Appends a separator to the builder if the loop index is greater than zero. - * The separator is appended using {@link #append(char)}. - *

- * This method is useful for adding a separator each time around the - * loop except the first. - *

- *
-     * for (int i = 0; i < list.size(); i++) {
-     *   appendSeparator(",", i);
-     *   append(list.get(i));
-     * }
-     * 
- * Note that for this simple example, you should use - * {@link #appendWithSeparators(Iterable, String)}. + * Checks if the string builder contains the specified char. * - * @param separator the separator to use - * @param loopIndex the loop index - * @return this, to enable chaining + * @param ch the character to find + * @return true if the builder contains the character */ - public StrBuilder appendSeparator(final char separator, final int loopIndex) { - if (loopIndex > 0) { - append(separator); + public boolean contains(final char ch) { + final char[] thisBuf = buffer; + for (int i = 0; i < this.size; i++) { + if (thisBuf[i] == ch) { + return true; + } } - return this; + return false; } - //----------------------------------------------------------------------- /** - * Appends the pad character to the builder the specified number of times. + * Checks if the string builder contains the specified string. * - * @param length the length to append, negative means no append - * @param padChar the character to append - * @return this, to enable chaining + * @param str the string to find + * @return true if the builder contains the string */ - public StrBuilder appendPadding(final int length, final char padChar) { - if (length >= 0) { - ensureCapacity(size + length); - for (int i = 0; i < length; i++) { - buffer[size++] = padChar; - } - } - return this; + public boolean contains(final String str) { + return indexOf(str, 0) >= 0; } - //----------------------------------------------------------------------- /** - * Appends an object to the builder padding on the left to a fixed width. - * The toString of the object is used. - * If the object is larger than the length, the left hand side is lost. - * If the object is null, the null text value is used. + * Checks if the string builder contains a string matched using the + * specified matcher. + *

+ * Matchers can be used to perform advanced searching behavior. + * For example you could write a matcher to search for the character + * 'a' followed by a number. * - * @param obj the object to append, null uses null text - * @param width the fixed field width, zero or negative has no effect - * @param padChar the pad character to use + * @param matcher the matcher to use, null returns -1 + * @return true if the matcher finds a match in the builder + */ + public boolean contains(final StrMatcher matcher) { + return indexOf(matcher, 0) >= 0; + } + /** + * Deletes the characters between the two specified indices. + * + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid except + * that if too large it is treated as end of string * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder appendFixedWidthPadLeft(final Object obj, final int width, final char padChar) { - if (width > 0) { - ensureCapacity(size + width); - String str = (obj == null ? getNullText() : obj.toString()); - if (str == null) { - str = ""; - } - final int strLen = str.length(); - if (strLen >= width) { - str.getChars(strLen - width, strLen, buffer, size); - } else { - final int padLen = width - strLen; - for (int i = 0; i < padLen; i++) { - buffer[size + i] = padChar; - } - str.getChars(0, strLen, buffer, size + padLen); - } - size += width; + public StrBuilder delete(final int startIndex, int endIndex) { + endIndex = validateRange(startIndex, endIndex); + final int len = endIndex - startIndex; + if (len > 0) { + deleteImpl(startIndex, endIndex, len); } return this; } + //----------------------------------------------------------------------- /** - * Appends an object to the builder padding on the left to a fixed width. - * The String.valueOf of the int value is used. - * If the formatted value is larger than the length, the left hand side is lost. + * Deletes the character wherever it occurs in the builder. * - * @param value the value to append - * @param width the fixed field width, zero or negative has no effect - * @param padChar the pad character to use + * @param ch the character to delete * @return this, to enable chaining */ - public StrBuilder appendFixedWidthPadLeft(final int value, final int width, final char padChar) { - return appendFixedWidthPadLeft(String.valueOf(value), width, padChar); + public StrBuilder deleteAll(final char ch) { + for (int i = 0; i < size; i++) { + if (buffer[i] == ch) { + final int start = i; + while (++i < size) { + if (buffer[i] != ch) { + break; + } + } + final int len = i - start; + deleteImpl(start, i, len); + i -= len; + } + } + return this; } + //----------------------------------------------------------------------- /** - * Appends an object to the builder padding on the right to a fixed length. - * The toString of the object is used. - * If the object is larger than the length, the right hand side is lost. - * If the object is null, null text value is used. + * Deletes the string wherever it occurs in the builder. * - * @param obj the object to append, null uses null text - * @param width the fixed field width, zero or negative has no effect - * @param padChar the pad character to use + * @param str the string to delete, null causes no action * @return this, to enable chaining */ - public StrBuilder appendFixedWidthPadRight(final Object obj, final int width, final char padChar) { - if (width > 0) { - ensureCapacity(size + width); - String str = (obj == null ? getNullText() : obj.toString()); - if (str == null) { - str = ""; - } - final int strLen = str.length(); - if (strLen >= width) { - str.getChars(0, width, buffer, size); - } else { - final int padLen = width - strLen; - str.getChars(0, strLen, buffer, size); - for (int i = 0; i < padLen; i++) { - buffer[size + strLen + i] = padChar; - } + public StrBuilder deleteAll(final String str) { + final int len = str == null ? 0 : str.length(); + if (len > 0) { + int index = indexOf(str, 0); + while (index >= 0) { + deleteImpl(index, index + len, len); + index = indexOf(str, index); } - size += width; } return this; } + //----------------------------------------------------------------------- /** - * Appends an object to the builder padding on the right to a fixed length. - * The String.valueOf of the int value is used. - * If the object is larger than the length, the right hand side is lost. + * Deletes all parts of the builder that the matcher matches. + *

+ * Matchers can be used to perform advanced deletion behavior. + * For example you could write a matcher to delete all occurrences + * where the character 'a' is followed by a number. * - * @param value the value to append - * @param width the fixed field width, zero or negative has no effect - * @param padChar the pad character to use + * @param matcher the matcher to use to find the deletion, null causes no action * @return this, to enable chaining */ - public StrBuilder appendFixedWidthPadRight(final int value, final int width, final char padChar) { - return appendFixedWidthPadRight(String.valueOf(value), width, padChar); + public StrBuilder deleteAll(final StrMatcher matcher) { + return replace(matcher, null, 0, size, -1); } - //----------------------------------------------------------------------- /** - * Inserts the string representation of an object into this builder. - * Inserting null will use the stored null text value. + * Deletes the character at the specified index. * - * @param index the index to add at, must be valid - * @param obj the object to insert + * @see #charAt(int) + * @see #setCharAt(int, char) + * @param index the index to delete * @return this, to enable chaining * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder insert(final int index, final Object obj) { - if (obj == null) { - return insert(index, nullText); + public StrBuilder deleteCharAt(final int index) { + if (index < 0 || index >= size) { + throw new StringIndexOutOfBoundsException(index); } - return insert(index, obj.toString()); + deleteImpl(index, index + 1, 1); + return this; } /** - * Inserts the string into this builder. - * Inserting null will use the stored null text value. + * Deletes the character wherever it occurs in the builder. * - * @param index the index to add at, must be valid - * @param str the string to insert + * @param ch the character to delete * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder insert(final int index, String str) { - validateIndex(index); - if (str == null) { - str = nullText; - } - if (str != null) { - final int strLen = str.length(); - if (strLen > 0) { - final int newSize = size + strLen; - ensureCapacity(newSize); - System.arraycopy(buffer, index, buffer, index + strLen, size - index); - size = newSize; - str.getChars(0, strLen, buffer, index); + public StrBuilder deleteFirst(final char ch) { + for (int i = 0; i < size; i++) { + if (buffer[i] == ch) { + deleteImpl(i, i + 1, 1); + break; } } return this; } /** - * Inserts the character array into this builder. - * Inserting null will use the stored null text value. + * Deletes the string wherever it occurs in the builder. * - * @param index the index to add at, must be valid - * @param chars the char array to insert + * @param str the string to delete, null causes no action * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder insert(final int index, final char[] chars) { - validateIndex(index); - if (chars == null) { - return insert(index, nullText); - } - final int len = chars.length; + public StrBuilder deleteFirst(final String str) { + final int len = str == null ? 0 : str.length(); if (len > 0) { - ensureCapacity(size + len); - System.arraycopy(buffer, index, buffer, index + len, size - index); - System.arraycopy(chars, 0, buffer, index, len); - size += len; + final int index = indexOf(str, 0); + if (index >= 0) { + deleteImpl(index, index + len, len); + } } return this; } /** - * Inserts part of the character array into this builder. - * Inserting null will use the stored null text value. + * Deletes the first match within the builder using the specified matcher. + *

+ * Matchers can be used to perform advanced deletion behavior. + * For example you could write a matcher to delete + * where the character 'a' is followed by a number. * - * @param index the index to add at, must be valid - * @param chars the char array to insert - * @param offset the offset into the character array to start at, must be valid - * @param length the length of the character array part to copy, must be positive + * @param matcher the matcher to use to find the deletion, null causes no action * @return this, to enable chaining - * @throws IndexOutOfBoundsException if any index is invalid */ - public StrBuilder insert(final int index, final char[] chars, final int offset, final int length) { - validateIndex(index); - if (chars == null) { - return insert(index, nullText); - } - if (offset < 0 || offset > chars.length) { - throw new StringIndexOutOfBoundsException("Invalid offset: " + offset); - } - if (length < 0 || offset + length > chars.length) { - throw new StringIndexOutOfBoundsException("Invalid length: " + length); - } - if (length > 0) { - ensureCapacity(size + length); - System.arraycopy(buffer, index, buffer, index + length, size - index); - System.arraycopy(chars, offset, buffer, index, length); - size += length; - } - return this; + public StrBuilder deleteFirst(final StrMatcher matcher) { + return replace(matcher, null, 0, size, 1); } + //----------------------------------------------------------------------- /** - * Inserts the value into this builder. + * Internal method to delete a range without validation. * - * @param index the index to add at, must be valid - * @param value the value to insert - * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the index is invalid + * @param startIndex the start index, must be valid + * @param endIndex the end index (exclusive), must be valid + * @param len the length, must be valid + * @throws IndexOutOfBoundsException if any index is invalid */ - public StrBuilder insert(int index, final boolean value) { - validateIndex(index); - if (value) { - ensureCapacity(size + 4); - System.arraycopy(buffer, index, buffer, index + 4, size - index); - buffer[index++] = 't'; - buffer[index++] = 'r'; - buffer[index++] = 'u'; - buffer[index] = 'e'; - size += 4; - } else { - ensureCapacity(size + 5); - System.arraycopy(buffer, index, buffer, index + 5, size - index); - buffer[index++] = 'f'; - buffer[index++] = 'a'; - buffer[index++] = 'l'; - buffer[index++] = 's'; - buffer[index] = 'e'; - size += 5; + private void deleteImpl(final int startIndex, final int endIndex, final int len) { + System.arraycopy(buffer, endIndex, buffer, startIndex, size - endIndex); + size -= len; + } + + /** + * Checks whether this builder ends with the specified string. + *

+ * Note that this method handles null input quietly, unlike String. + * + * @param str the string to search for, null returns false + * @return true if the builder ends with the string + */ + public boolean endsWith(final String str) { + if (str == null) { + return false; } - return this; + final int len = str.length(); + if (len == 0) { + return true; + } + if (len > size) { + return false; + } + int pos = size - len; + for (int i = 0; i < len; i++, pos++) { + if (buffer[pos] != str.charAt(i)) { + return false; + } + } + return true; } /** - * Inserts the value into this builder. + * Checks the capacity and ensures that it is at least the size specified. * - * @param index the index to add at, must be valid - * @param value the value to insert + * @param capacity the capacity to ensure * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder insert(final int index, final char value) { - validateIndex(index); - ensureCapacity(size + 1); - System.arraycopy(buffer, index, buffer, index + 1, size - index); - buffer[index] = value; - size++; + public StrBuilder ensureCapacity(final int capacity) { + if (capacity > buffer.length) { + final char[] old = buffer; + buffer = new char[capacity * 2]; + System.arraycopy(old, 0, buffer, 0, size); + } return this; } /** - * Inserts the value into this builder. + * Checks the contents of this builder against another to see if they + * contain the same character content. * - * @param index the index to add at, must be valid - * @param value the value to insert - * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the index is invalid + * @param obj the object to check, null returns false + * @return true if the builders contain the same characters in the same order */ - public StrBuilder insert(final int index, final int value) { - return insert(index, String.valueOf(value)); + @Override + public boolean equals(final Object obj) { + return obj instanceof StrBuilder + && equals((StrBuilder) obj); } /** - * Inserts the value into this builder. + * Checks the contents of this builder against another to see if they + * contain the same character content. * - * @param index the index to add at, must be valid - * @param value the value to insert - * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the index is invalid + * @param other the object to check, null returns false + * @return true if the builders contain the same characters in the same order */ - public StrBuilder insert(final int index, final long value) { - return insert(index, String.valueOf(value)); + public boolean equals(final StrBuilder other) { + if (this == other) { + return true; + } + if (other == null) { + return false; + } + if (this.size != other.size) { + return false; + } + final char[] thisBuf = this.buffer; + final char[] otherBuf = other.buffer; + for (int i = size - 1; i >= 0; i--) { + if (thisBuf[i] != otherBuf[i]) { + return false; + } + } + return true; } /** - * Inserts the value into this builder. + * Checks the contents of this builder against another to see if they + * contain the same character content ignoring case. * - * @param index the index to add at, must be valid - * @param value the value to insert - * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the index is invalid + * @param other the object to check, null returns false + * @return true if the builders contain the same characters in the same order */ - public StrBuilder insert(final int index, final float value) { - return insert(index, String.valueOf(value)); + public boolean equalsIgnoreCase(final StrBuilder other) { + if (this == other) { + return true; + } + if (this.size != other.size) { + return false; + } + final char[] thisBuf = this.buffer; + final char[] otherBuf = other.buffer; + for (int i = size - 1; i >= 0; i--) { + final char c1 = thisBuf[i]; + final char c2 = otherBuf[i]; + if (c1 != c2 && Character.toUpperCase(c1) != Character.toUpperCase(c2)) { + return false; + } + } + return true; } /** - * Inserts the value into this builder. + * Copies the character array into the specified array. * - * @param index the index to add at, must be valid - * @param value the value to insert - * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the index is invalid + * @param destination the destination array, null will cause an array to be created + * @return The input array, unless that was null or too small */ - public StrBuilder insert(final int index, final double value) { - return insert(index, String.valueOf(value)); + public char[] getChars(char[] destination) { + final int len = length(); + if (destination == null || destination.length < len) { + destination = new char[len]; + } + System.arraycopy(buffer, 0, destination, 0, len); + return destination; } - //----------------------------------------------------------------------- /** - * Internal method to delete a range without validation. + * Copies the character array into the specified array. * - * @param startIndex the start index, must be valid - * @param endIndex the end index (exclusive), must be valid - * @param len the length, must be valid + * @param startIndex first index to copy, inclusive, must be valid + * @param endIndex last index, exclusive, must be valid + * @param destination the destination array, must not be null or too small + * @param destinationIndex the index to start copying in destination + * @throws NullPointerException if the array is null * @throws IndexOutOfBoundsException if any index is invalid */ - private void deleteImpl(final int startIndex, final int endIndex, final int len) { - System.arraycopy(buffer, endIndex, buffer, startIndex, size - endIndex); - size -= len; + public void getChars(final int startIndex, + final int endIndex, + final char[] destination, + final int destinationIndex) { + if (startIndex < 0) { + throw new StringIndexOutOfBoundsException(startIndex); + } + if (endIndex < 0 || endIndex > length()) { + throw new StringIndexOutOfBoundsException(endIndex); + } + if (startIndex > endIndex) { + throw new StringIndexOutOfBoundsException("end < start"); + } + System.arraycopy(buffer, startIndex, destination, destinationIndex, endIndex - startIndex); } + //----------------------------------------------------------------------- /** - * Deletes the characters between the two specified indices. + * Gets the text to be appended when a new line is added. * - * @param startIndex the start index, inclusive, must be valid - * @param endIndex the end index, exclusive, must be valid except - * that if too large it is treated as end of string - * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the index is invalid + * @return The new line text, null means use system default */ - public StrBuilder delete(final int startIndex, int endIndex) { - endIndex = validateRange(startIndex, endIndex); - final int len = endIndex - startIndex; - if (len > 0) { - deleteImpl(startIndex, endIndex, len); - } - return this; + public String getNewLineText() { + return newLine; } //----------------------------------------------------------------------- /** - * Deletes the character wherever it occurs in the builder. + * Gets the text to be appended when null is added. * - * @param ch the character to delete - * @return this, to enable chaining + * @return The null text, null means no append */ - public StrBuilder deleteAll(final char ch) { - for (int i = 0; i < size; i++) { - if (buffer[i] == ch) { - final int start = i; - while (++i < size) { - if (buffer[i] != ch) { - break; - } - } - final int len = i - start; - deleteImpl(start, i, len); - i -= len; - } - } - return this; + public String getNullText() { + return nullText; } /** - * Deletes the character wherever it occurs in the builder. + * Gets a suitable hash code for this builder. * - * @param ch the character to delete - * @return this, to enable chaining + * @return a hash code */ - public StrBuilder deleteFirst(final char ch) { - for (int i = 0; i < size; i++) { - if (buffer[i] == ch) { - deleteImpl(i, i + 1, 1); - break; - } + @Override + public int hashCode() { + final char[] buf = buffer; + int hash = 0; + for (int i = size - 1; i >= 0; i--) { + hash = 31 * hash + buf[i]; } - return this; + return hash; } //----------------------------------------------------------------------- /** - * Deletes the string wherever it occurs in the builder. + * Searches the string builder to find the first reference to the specified char. * - * @param str the string to delete, null causes no action - * @return this, to enable chaining + * @param ch the character to find + * @return The first index of the character, or -1 if not found */ - public StrBuilder deleteAll(final String str) { - final int len = (str == null ? 0 : str.length()); - if (len > 0) { - int index = indexOf(str, 0); - while (index >= 0) { - deleteImpl(index, index + len, len); - index = indexOf(str, index); - } - } - return this; + public int indexOf(final char ch) { + return indexOf(ch, 0); } /** - * Deletes the string wherever it occurs in the builder. + * Searches the string builder to find the first reference to the specified char. * - * @param str the string to delete, null causes no action - * @return this, to enable chaining + * @param ch the character to find + * @param startIndex the index to start at, invalid index rounded to edge + * @return The first index of the character, or -1 if not found */ - public StrBuilder deleteFirst(final String str) { - final int len = (str == null ? 0 : str.length()); - if (len > 0) { - final int index = indexOf(str, 0); - if (index >= 0) { - deleteImpl(index, index + len, len); + public int indexOf(final char ch, int startIndex) { + startIndex = startIndex < 0 ? 0 : startIndex; + if (startIndex >= size) { + return -1; + } + final char[] thisBuf = buffer; + for (int i = startIndex; i < size; i++) { + if (thisBuf[i] == ch) { + return i; } } - return this; + return -1; } - //----------------------------------------------------------------------- /** - * Deletes all parts of the builder that the matcher matches. + * Searches the string builder to find the first reference to the specified string. *

- * Matchers can be used to perform advanced deletion behaviour. - * For example you could write a matcher to delete all occurrences - * where the character 'a' is followed by a number. + * Note that a null input string will return -1, whereas the JDK throws an exception. * - * @param matcher the matcher to use to find the deletion, null causes no action - * @return this, to enable chaining + * @param str the string to find, null returns -1 + * @return The first index of the string, or -1 if not found */ - public StrBuilder deleteAll(final StrMatcher matcher) { - return replace(matcher, null, 0, size, -1); + public int indexOf(final String str) { + return indexOf(str, 0); } /** - * Deletes the first match within the builder using the specified matcher. + * Searches the string builder to find the first reference to the specified + * string starting searching from the given index. *

- * Matchers can be used to perform advanced deletion behaviour. - * For example you could write a matcher to delete - * where the character 'a' is followed by a number. + * Note that a null input string will return -1, whereas the JDK throws an exception. * - * @param matcher the matcher to use to find the deletion, null causes no action - * @return this, to enable chaining + * @param str the string to find, null returns -1 + * @param startIndex the index to start at, invalid index rounded to edge + * @return The first index of the string, or -1 if not found */ - public StrBuilder deleteFirst(final StrMatcher matcher) { - return replace(matcher, null, 0, size, 1); + public int indexOf(final String str, int startIndex) { + startIndex = startIndex < 0 ? 0 : startIndex; + if (str == null || startIndex >= size) { + return -1; + } + final int strLen = str.length(); + if (strLen == 1) { + return indexOf(str.charAt(0), startIndex); + } + if (strLen == 0) { + return startIndex; + } + if (strLen > size) { + return -1; + } + final char[] thisBuf = buffer; + final int len = size - strLen + 1; + outer: + for (int i = startIndex; i < len; i++) { + for (int j = 0; j < strLen; j++) { + if (str.charAt(j) != thisBuf[i + j]) { + continue outer; + } + } + return i; + } + return -1; } - //----------------------------------------------------------------------- /** - * Internal method to delete a range without validation. + * Searches the string builder using the matcher to find the first match. + *

+ * Matchers can be used to perform advanced searching behavior. + * For example you could write a matcher to find the character 'a' + * followed by a number. * - * @param startIndex the start index, must be valid - * @param endIndex the end index (exclusive), must be valid - * @param removeLen the length to remove (endIndex - startIndex), must be valid - * @param insertStr the string to replace with, null means delete range - * @param insertLen the length of the insert string, must be valid - * @throws IndexOutOfBoundsException if any index is invalid + * @param matcher the matcher to use, null returns -1 + * @return The first index matched, or -1 if not found */ - private void replaceImpl(final int startIndex, - final int endIndex, - final int removeLen, - final String insertStr, - final int insertLen) { - final int newSize = size - removeLen + insertLen; - if (insertLen != removeLen) { - ensureCapacity(newSize); - System.arraycopy(buffer, endIndex, buffer, startIndex + insertLen, size - endIndex); - size = newSize; - } - if (insertLen > 0) { - insertStr.getChars(0, insertLen, buffer, startIndex); - } + public int indexOf(final StrMatcher matcher) { + return indexOf(matcher, 0); } /** - * Replaces a portion of the string builder with another string. - * The length of the inserted string does not have to match the removed length. + * Searches the string builder using the matcher to find the first + * match searching from the given index. + *

+ * Matchers can be used to perform advanced searching behavior. + * For example you could write a matcher to find the character 'a' + * followed by a number. * - * @param startIndex the start index, inclusive, must be valid - * @param endIndex the end index, exclusive, must be valid except - * that if too large it is treated as end of string - * @param replaceStr the string to replace with, null means delete range - * @return this, to enable chaining - * @throws IndexOutOfBoundsException if the index is invalid + * @param matcher the matcher to use, null returns -1 + * @param startIndex the index to start at, invalid index rounded to edge + * @return The first index matched, or -1 if not found */ - public StrBuilder replace(final int startIndex, int endIndex, final String replaceStr) { - endIndex = validateRange(startIndex, endIndex); - final int insertLen = (replaceStr == null ? 0 : replaceStr.length()); - replaceImpl(startIndex, endIndex, endIndex - startIndex, replaceStr, insertLen); - return this; + public int indexOf(final StrMatcher matcher, int startIndex) { + startIndex = startIndex < 0 ? 0 : startIndex; + if (matcher == null || startIndex >= size) { + return -1; + } + final int len = size; + final char[] buf = buffer; + for (int i = startIndex; i < len; i++) { + if (matcher.isMatch(buf, i, startIndex, len) > 0) { + return i; + } + } + return -1; } - //----------------------------------------------------------------------- /** - * Replaces the search character with the replace character - * throughout the builder. + * Inserts the value into this builder. * - * @param search the search character - * @param replace the replace character + * @param index the index to add at, must be valid + * @param value the value to insert * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder replaceAll(final char search, final char replace) { - if (search != replace) { - for (int i = 0; i < size; i++) { - if (buffer[i] == search) { - buffer[i] = replace; - } - } + public StrBuilder insert(int index, final boolean value) { + validateIndex(index); + if (value) { + ensureCapacity(size + 4); + System.arraycopy(buffer, index, buffer, index + 4, size - index); + buffer[index++] = 't'; + buffer[index++] = 'r'; + buffer[index++] = 'u'; + buffer[index] = 'e'; + size += 4; + } else { + ensureCapacity(size + 5); + System.arraycopy(buffer, index, buffer, index + 5, size - index); + buffer[index++] = 'f'; + buffer[index++] = 'a'; + buffer[index++] = 'l'; + buffer[index++] = 's'; + buffer[index] = 'e'; + size += 5; } return this; } /** - * Replaces the first instance of the search character with the - * replace character in the builder. + * Inserts the value into this builder. * - * @param search the search character - * @param replace the replace character + * @param index the index to add at, must be valid + * @param value the value to insert * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder replaceFirst(final char search, final char replace) { - if (search != replace) { - for (int i = 0; i < size; i++) { - if (buffer[i] == search) { - buffer[i] = replace; - break; - } - } - } + public StrBuilder insert(final int index, final char value) { + validateIndex(index); + ensureCapacity(size + 1); + System.arraycopy(buffer, index, buffer, index + 1, size - index); + buffer[index] = value; + size++; return this; } - //----------------------------------------------------------------------- /** - * Replaces the search string with the replace string throughout the builder. + * Inserts the character array into this builder. + * Inserting null will use the stored null text value. * - * @param searchStr the search string, null causes no action to occur - * @param replaceStr the replace string, null is equivalent to an empty string + * @param index the index to add at, must be valid + * @param chars the char array to insert * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder replaceAll(final String searchStr, final String replaceStr) { - final int searchLen = (searchStr == null ? 0 : searchStr.length()); - if (searchLen > 0) { - final int replaceLen = (replaceStr == null ? 0 : replaceStr.length()); - int index = indexOf(searchStr, 0); - while (index >= 0) { - replaceImpl(index, index + searchLen, searchLen, replaceStr, replaceLen); - index = indexOf(searchStr, index + replaceLen); - } + public StrBuilder insert(final int index, final char[] chars) { + validateIndex(index); + if (chars == null) { + return insert(index, nullText); + } + final int len = chars.length; + if (len > 0) { + ensureCapacity(size + len); + System.arraycopy(buffer, index, buffer, index + len, size - index); + System.arraycopy(chars, 0, buffer, index, len); + size += len; } return this; } /** - * Replaces the first instance of the search string with the replace string. + * Inserts part of the character array into this builder. + * Inserting null will use the stored null text value. * - * @param searchStr the search string, null causes no action to occur - * @param replaceStr the replace string, null is equivalent to an empty string + * @param index the index to add at, must be valid + * @param chars the char array to insert + * @param offset the offset into the character array to start at, must be valid + * @param length the length of the character array part to copy, must be positive * @return this, to enable chaining + * @throws IndexOutOfBoundsException if any index is invalid */ - public StrBuilder replaceFirst(final String searchStr, final String replaceStr) { - final int searchLen = (searchStr == null ? 0 : searchStr.length()); - if (searchLen > 0) { - final int index = indexOf(searchStr, 0); - if (index >= 0) { - final int replaceLen = (replaceStr == null ? 0 : replaceStr.length()); - replaceImpl(index, index + searchLen, searchLen, replaceStr, replaceLen); - } + public StrBuilder insert(final int index, final char[] chars, final int offset, final int length) { + validateIndex(index); + if (chars == null) { + return insert(index, nullText); + } + if (offset < 0 || offset > chars.length) { + throw new StringIndexOutOfBoundsException("Invalid offset: " + offset); + } + if (length < 0 || offset + length > chars.length) { + throw new StringIndexOutOfBoundsException("Invalid length: " + length); + } + if (length > 0) { + ensureCapacity(size + length); + System.arraycopy(buffer, index, buffer, index + length, size - index); + System.arraycopy(chars, offset, buffer, index, length); + size += length; } return this; } - //----------------------------------------------------------------------- /** - * Replaces all matches within the builder with the replace string. - *

- * Matchers can be used to perform advanced replace behaviour. - * For example you could write a matcher to replace all occurrences - * where the character 'a' is followed by a number. + * Inserts the value into this builder. * - * @param matcher the matcher to use to find the deletion, null causes no action - * @param replaceStr the replace string, null is equivalent to an empty string + * @param index the index to add at, must be valid + * @param value the value to insert * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder replaceAll(final StrMatcher matcher, final String replaceStr) { - return replace(matcher, replaceStr, 0, size, -1); + public StrBuilder insert(final int index, final double value) { + return insert(index, String.valueOf(value)); } /** - * Replaces the first match within the builder with the replace string. - *

- * Matchers can be used to perform advanced replace behaviour. - * For example you could write a matcher to replace - * where the character 'a' is followed by a number. + * Inserts the value into this builder. * - * @param matcher the matcher to use to find the deletion, null causes no action - * @param replaceStr the replace string, null is equivalent to an empty string + * @param index the index to add at, must be valid + * @param value the value to insert * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder replaceFirst(final StrMatcher matcher, final String replaceStr) { - return replace(matcher, replaceStr, 0, size, 1); + public StrBuilder insert(final int index, final float value) { + return insert(index, String.valueOf(value)); } - // ----------------------------------------------------------------------- /** - * Advanced search and replaces within the builder using a matcher. - *

- * Matchers can be used to perform advanced behaviour. - * For example you could write a matcher to delete all occurrences - * where the character 'a' is followed by a number. + * Inserts the value into this builder. * - * @param matcher the matcher to use to find the deletion, null causes no action - * @param replaceStr the string to replace the match with, null is a delete - * @param startIndex the start index, inclusive, must be valid - * @param endIndex the end index, exclusive, must be valid except - * that if too large it is treated as end of string - * @param replaceCount the number of times to replace, -1 for replace all + * @param index the index to add at, must be valid + * @param value the value to insert * @return this, to enable chaining - * @throws IndexOutOfBoundsException if start index is invalid + * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder replace( - final StrMatcher matcher, final String replaceStr, - final int startIndex, int endIndex, final int replaceCount) { - endIndex = validateRange(startIndex, endIndex); - return replaceImpl(matcher, replaceStr, startIndex, endIndex, replaceCount); + public StrBuilder insert(final int index, final int value) { + return insert(index, String.valueOf(value)); } /** - * Replaces within the builder using a matcher. - *

- * Matchers can be used to perform advanced behaviour. - * For example you could write a matcher to delete all occurrences - * where the character 'a' is followed by a number. + * Inserts the value into this builder. * - * @param matcher the matcher to use to find the deletion, null causes no action - * @param replaceStr the string to replace the match with, null is a delete - * @param from the start index, must be valid - * @param to the end index (exclusive), must be valid - * @param replaceCount the number of times to replace, -1 for replace all + * @param index the index to add at, must be valid + * @param value the value to insert * @return this, to enable chaining - * @throws IndexOutOfBoundsException if any index is invalid + * @throws IndexOutOfBoundsException if the index is invalid */ - private StrBuilder replaceImpl( - final StrMatcher matcher, final String replaceStr, - final int from, int to, int replaceCount) { - if (matcher == null || size == 0) { - return this; - } - final int replaceLen = (replaceStr == null ? 0 : replaceStr.length()); - for (int i = from; i < to && replaceCount != 0; i++) { - final char[] buf = buffer; - final int removeLen = matcher.isMatch(buf, i, from, to); - if (removeLen > 0) { - replaceImpl(i, i + removeLen, removeLen, replaceStr, replaceLen); - to = to - removeLen + replaceLen; - i = i + replaceLen - 1; - if (replaceCount > 0) { - replaceCount--; - } - } - } - return this; + public StrBuilder insert(final int index, final long value) { + return insert(index, String.valueOf(value)); } //----------------------------------------------------------------------- /** - * Reverses the string builder placing each character in the opposite index. + * Inserts the string representation of an object into this builder. + * Inserting null will use the stored null text value. * + * @param index the index to add at, must be valid + * @param obj the object to insert * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder reverse() { - if (size == 0) { - return this; - } - - final int half = size / 2; - final char[] buf = buffer; - for (int leftIdx = 0, rightIdx = size - 1; leftIdx < half; leftIdx++, rightIdx--) { - final char swap = buf[leftIdx]; - buf[leftIdx] = buf[rightIdx]; - buf[rightIdx] = swap; + public StrBuilder insert(final int index, final Object obj) { + if (obj == null) { + return insert(index, nullText); } - return this; + return insert(index, obj.toString()); } - //----------------------------------------------------------------------- /** - * Trims the builder by removing characters less than or equal to a space - * from the beginning and end. + * Inserts the string into this builder. + * Inserting null will use the stored null text value. * + * @param index the index to add at, must be valid + * @param str the string to insert * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid */ - public StrBuilder trim() { - if (size == 0) { - return this; - } - int len = size; - final char[] buf = buffer; - int pos = 0; - while (pos < len && buf[pos] <= ' ') { - pos++; - } - while (pos < len && buf[len - 1] <= ' ') { - len--; - } - if (len < size) { - delete(len, size); + public StrBuilder insert(final int index, String str) { + validateIndex(index); + if (str == null) { + str = nullText; } - if (pos > 0) { - delete(0, pos); + if (str != null) { + final int strLen = str.length(); + if (strLen > 0) { + final int newSize = size + strLen; + ensureCapacity(newSize); + System.arraycopy(buffer, index, buffer, index + strLen, size - index); + size = newSize; + str.getChars(0, strLen, buffer, index); + } } return this; } - //----------------------------------------------------------------------- /** - * Checks whether this builder starts with the specified string. + * Tests if the string builder is empty (convenience Collections API style method). *

- * Note that this method handles null input quietly, unlike String. + * This method is the same as checking {@link #length()} and is provided to match the + * API of Collections. * - * @param str the string to search for, null returns false - * @return true if the builder starts with the string + * @return {@code true} if the size is {@code 0}. */ - public boolean startsWith(final String str) { - if (str == null) { - return false; - } - final int len = str.length(); - if (len == 0) { - return true; - } - if (len > size) { - return false; - } - for (int i = 0; i < len; i++) { - if (buffer[i] != str.charAt(i)) { - return false; - } - } - return true; + public boolean isEmpty() { + return size == 0; } /** - * Checks whether this builder ends with the specified string. + * Tests if the string builder is not empty (convenience Collections API style method). *

- * Note that this method handles null input quietly, unlike String. + * This method is the same as checking {@link #length()} and is provided to match the + * API of Collections. * - * @param str the string to search for, null returns false - * @return true if the builder ends with the string + * @return {@code true} if the size is greater than {@code 0}. + * @since 1.10.0 */ - public boolean endsWith(final String str) { - if (str == null) { - return false; - } - final int len = str.length(); - if (len == 0) { - return true; - } - if (len > size) { - return false; - } - int pos = size - len; - for (int i = 0; i < len; i++, pos++) { - if (buffer[pos] != str.charAt(i)) { - return false; - } - } - return true; + public boolean isNotEmpty() { + return size > 0; } //----------------------------------------------------------------------- /** - * {@inheritDoc} + * Searches the string builder to find the last reference to the specified char. + * + * @param ch the character to find + * @return The last index of the character, or -1 if not found */ - @Override - public CharSequence subSequence(final int startIndex, final int endIndex) { - if (startIndex < 0) { - throw new StringIndexOutOfBoundsException(startIndex); - } - if (endIndex > size) { - throw new StringIndexOutOfBoundsException(endIndex); - } - if (startIndex > endIndex) { - throw new StringIndexOutOfBoundsException(endIndex - startIndex); - } - return substring(startIndex, endIndex); + public int lastIndexOf(final char ch) { + return lastIndexOf(ch, size - 1); } /** - * Extracts a portion of this string builder as a string. + * Searches the string builder to find the last reference to the specified char. * - * @param start the start index, inclusive, must be valid - * @return the new string - * @throws IndexOutOfBoundsException if the index is invalid + * @param ch the character to find + * @param startIndex the index to start at, invalid index rounded to edge + * @return The last index of the character, or -1 if not found */ - public String substring(final int start) { - return substring(start, size); + public int lastIndexOf(final char ch, int startIndex) { + startIndex = startIndex >= size ? size - 1 : startIndex; + if (startIndex < 0) { + return -1; + } + for (int i = startIndex; i >= 0; i--) { + if (buffer[i] == ch) { + return i; + } + } + return -1; } /** - * Extracts a portion of this string builder as a string. + * Searches the string builder to find the last reference to the specified string. *

- * Note: This method treats an endIndex greater than the length of the - * builder as equal to the length of the builder, and continues - * without error, unlike StringBuffer or String. + * Note that a null input string will return -1, whereas the JDK throws an exception. * - * @param startIndex the start index, inclusive, must be valid - * @param endIndex the end index, exclusive, must be valid except - * that if too large it is treated as end of string - * @return the new string - * @throws IndexOutOfBoundsException if the index is invalid + * @param str the string to find, null returns -1 + * @return The last index of the string, or -1 if not found */ - public String substring(final int startIndex, int endIndex) { - endIndex = validateRange(startIndex, endIndex); - return new String(buffer, startIndex, endIndex - startIndex); + public int lastIndexOf(final String str) { + return lastIndexOf(str, size - 1); } /** - * Extracts the leftmost characters from the string builder without - * throwing an exception. + * Searches the string builder to find the last reference to the specified + * string starting searching from the given index. *

- * This method extracts the left length characters from - * the builder. If this many characters are not available, the whole - * builder is returned. Thus the returned string may be shorter than the - * length requested. + * Note that a null input string will return -1, whereas the JDK throws an exception. * - * @param length the number of characters to extract, negative returns empty string - * @return the new string + * @param str the string to find, null returns -1 + * @param startIndex the index to start at, invalid index rounded to edge + * @return The last index of the string, or -1 if not found */ - public String leftString(final int length) { - if (length <= 0) { - return ""; - } else if (length >= size) { - return new String(buffer, 0, size); - } else { - return new String(buffer, 0, length); + public int lastIndexOf(final String str, int startIndex) { + startIndex = startIndex >= size ? size - 1 : startIndex; + if (str == null || startIndex < 0) { + return -1; + } + final int strLen = str.length(); + if (strLen > 0 && strLen <= size) { + if (strLen == 1) { + return lastIndexOf(str.charAt(0), startIndex); + } + + outer: + for (int i = startIndex - strLen + 1; i >= 0; i--) { + for (int j = 0; j < strLen; j++) { + if (str.charAt(j) != buffer[i + j]) { + continue outer; + } + } + return i; + } + + } else if (strLen == 0) { + return startIndex; } + return -1; } /** - * Extracts the rightmost characters from the string builder without + * Searches the string builder using the matcher to find the last match. + *

+ * Matchers can be used to perform advanced searching behavior. + * For example you could write a matcher to find the character 'a' + * followed by a number. + * + * @param matcher the matcher to use, null returns -1 + * @return The last index matched, or -1 if not found + */ + public int lastIndexOf(final StrMatcher matcher) { + return lastIndexOf(matcher, size); + } + + /** + * Searches the string builder using the matcher to find the last + * match searching from the given index. + *

+ * Matchers can be used to perform advanced searching behavior. + * For example you could write a matcher to find the character 'a' + * followed by a number. + * + * @param matcher the matcher to use, null returns -1 + * @param startIndex the index to start at, invalid index rounded to edge + * @return The last index matched, or -1 if not found + */ + public int lastIndexOf(final StrMatcher matcher, int startIndex) { + startIndex = startIndex >= size ? size - 1 : startIndex; + if (matcher == null || startIndex < 0) { + return -1; + } + final char[] buf = buffer; + final int endIndex = startIndex + 1; + for (int i = startIndex; i >= 0; i--) { + if (matcher.isMatch(buf, i, 0, endIndex) > 0) { + return i; + } + } + return -1; + } + + /** + * Extracts the leftmost characters from the string builder without * throwing an exception. *

- * This method extracts the right length characters from + * This method extracts the left {@code length} characters from * the builder. If this many characters are not available, the whole * builder is returned. Thus the returned string may be shorter than the * length requested. * * @param length the number of characters to extract, negative returns empty string - * @return the new string + * @return The new string */ - public String rightString(final int length) { + public String leftString(final int length) { if (length <= 0) { - return ""; + return StringUtils.EMPTY; } else if (length >= size) { return new String(buffer, 0, size); } else { - return new String(buffer, size - length, length); + return new String(buffer, 0, length); } } + //----------------------------------------------------------------------- + /** + * Gets the length of the string builder. + * + * @return The length + */ + @Override + public int length() { + return size; + } + /** * Extracts some characters from the middle of the string builder without * throwing an exception. *

- * This method extracts length characters from the builder + * This method extracts {@code length} characters from the builder * at the specified index. * If the index is negative it is treated as zero. * If the index is greater than the builder size, it is treated as the builder size. @@ -2303,14 +2448,14 @@ public String rightString(final int length) { * * @param index the index to start at, negative means zero * @param length the number of characters to extract, negative returns empty string - * @return the new string + * @return The new string */ public String midString(int index, final int length) { if (index < 0) { index = 0; } if (length <= 0 || index >= size) { - return ""; + return StringUtils.EMPTY; } if (size <= index + length) { return new String(buffer, index, size - index); @@ -2318,483 +2463,512 @@ public String midString(int index, final int length) { return new String(buffer, index, length); } + /** + * Minimizes the capacity to the actual length of the string. + * + * @return this, to enable chaining + */ + public StrBuilder minimizeCapacity() { + if (buffer.length > length()) { + final char[] old = buffer; + buffer = new char[length()]; + System.arraycopy(old, 0, buffer, 0, size); + } + return this; + } + //----------------------------------------------------------------------- /** - * Checks if the string builder contains the specified char. + * If possible, reads chars from the provided {@link Readable} directly into underlying + * character buffer without making extra copies. * - * @param ch the character to find - * @return true if the builder contains the character + * @param readable object to read from + * @return The number of characters read + * @throws IOException if an I/O error occurs + * + * @see #appendTo(Appendable) */ - public boolean contains(final char ch) { - final char[] thisBuf = buffer; - for (int i = 0; i < this.size; i++) { - if (thisBuf[i] == ch) { - return true; + public int readFrom(final Readable readable) throws IOException { + final int oldSize = size; + if (readable instanceof Reader) { + final Reader r = (Reader) readable; + ensureCapacity(size + 1); + int read; + while ((read = r.read(buffer, size, buffer.length - size)) != -1) { + size += read; + ensureCapacity(size + 1); + } + } else if (readable instanceof CharBuffer) { + final CharBuffer cb = (CharBuffer) readable; + final int remaining = cb.remaining(); + ensureCapacity(size + remaining); + cb.get(buffer, size, remaining); + size += remaining; + } else { + while (true) { + ensureCapacity(size + 1); + final CharBuffer buf = CharBuffer.wrap(buffer, size, buffer.length - size); + final int read = readable.read(buf); + if (read == -1) { + break; + } + size += read; } } - return false; + return size - oldSize; } /** - * Checks if the string builder contains the specified string. + * Replaces a portion of the string builder with another string. + * The length of the inserted string does not have to match the removed length. * - * @param str the string to find - * @return true if the builder contains the string + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid except + * that if too large it is treated as end of string + * @param replaceStr the string to replace with, null means delete range + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid */ - public boolean contains(final String str) { - return indexOf(str, 0) >= 0; + public StrBuilder replace(final int startIndex, int endIndex, final String replaceStr) { + endIndex = validateRange(startIndex, endIndex); + final int insertLen = replaceStr == null ? 0 : replaceStr.length(); + replaceImpl(startIndex, endIndex, endIndex - startIndex, replaceStr, insertLen); + return this; } + // ----------------------------------------------------------------------- /** - * Checks if the string builder contains a string matched using the - * specified matcher. + * Advanced search and replaces within the builder using a matcher. *

- * Matchers can be used to perform advanced searching behaviour. - * For example you could write a matcher to search for the character - * 'a' followed by a number. + * Matchers can be used to perform advanced behavior. + * For example you could write a matcher to delete all occurrences + * where the character 'a' is followed by a number. * - * @param matcher the matcher to use, null returns -1 - * @return true if the matcher finds a match in the builder + * @param matcher the matcher to use to find the deletion, null causes no action + * @param replaceStr the string to replace the match with, null is a delete + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid except + * that if too large it is treated as end of string + * @param replaceCount the number of times to replace, -1 for replace all + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if start index is invalid */ - public boolean contains(final StrMatcher matcher) { - return indexOf(matcher, 0) >= 0; + public StrBuilder replace( + final StrMatcher matcher, final String replaceStr, + final int startIndex, int endIndex, final int replaceCount) { + endIndex = validateRange(startIndex, endIndex); + return replaceImpl(matcher, replaceStr, startIndex, endIndex, replaceCount); } //----------------------------------------------------------------------- /** - * Searches the string builder to find the first reference to the specified char. + * Replaces the search character with the replace character + * throughout the builder. * - * @param ch the character to find - * @return the first index of the character, or -1 if not found + * @param search the search character + * @param replace the replace character + * @return this, to enable chaining */ - public int indexOf(final char ch) { - return indexOf(ch, 0); + public StrBuilder replaceAll(final char search, final char replace) { + if (search != replace) { + for (int i = 0; i < size; i++) { + if (buffer[i] == search) { + buffer[i] = replace; + } + } + } + return this; } + //----------------------------------------------------------------------- /** - * Searches the string builder to find the first reference to the specified char. + * Replaces the search string with the replace string throughout the builder. * - * @param ch the character to find - * @param startIndex the index to start at, invalid index rounded to edge - * @return the first index of the character, or -1 if not found + * @param searchStr the search string, null causes no action to occur + * @param replaceStr the replace string, null is equivalent to an empty string + * @return this, to enable chaining */ - public int indexOf(final char ch, int startIndex) { - startIndex = (startIndex < 0 ? 0 : startIndex); - if (startIndex >= size) { - return -1; - } - final char[] thisBuf = buffer; - for (int i = startIndex; i < size; i++) { - if (thisBuf[i] == ch) { - return i; + public StrBuilder replaceAll(final String searchStr, final String replaceStr) { + final int searchLen = searchStr == null ? 0 : searchStr.length(); + if (searchLen > 0) { + final int replaceLen = replaceStr == null ? 0 : replaceStr.length(); + int index = indexOf(searchStr, 0); + while (index >= 0) { + replaceImpl(index, index + searchLen, searchLen, replaceStr, replaceLen); + index = indexOf(searchStr, index + replaceLen); } } - return -1; + return this; } + //----------------------------------------------------------------------- /** - * Searches the string builder to find the first reference to the specified string. + * Replaces all matches within the builder with the replace string. *

- * Note that a null input string will return -1, whereas the JDK throws an exception. + * Matchers can be used to perform advanced replace behavior. + * For example you could write a matcher to replace all occurrences + * where the character 'a' is followed by a number. * - * @param str the string to find, null returns -1 - * @return the first index of the string, or -1 if not found + * @param matcher the matcher to use to find the deletion, null causes no action + * @param replaceStr the replace string, null is equivalent to an empty string + * @return this, to enable chaining */ - public int indexOf(final String str) { - return indexOf(str, 0); + public StrBuilder replaceAll(final StrMatcher matcher, final String replaceStr) { + return replace(matcher, replaceStr, 0, size, -1); } /** - * Searches the string builder to find the first reference to the specified - * string starting searching from the given index. - *

- * Note that a null input string will return -1, whereas the JDK throws an exception. + * Replaces the first instance of the search character with the + * replace character in the builder. * - * @param str the string to find, null returns -1 - * @param startIndex the index to start at, invalid index rounded to edge - * @return the first index of the string, or -1 if not found + * @param search the search character + * @param replace the replace character + * @return this, to enable chaining */ - public int indexOf(final String str, int startIndex) { - startIndex = (startIndex < 0 ? 0 : startIndex); - if (str == null || startIndex >= size) { - return -1; - } - final int strLen = str.length(); - if (strLen == 1) { - return indexOf(str.charAt(0), startIndex); - } - if (strLen == 0) { - return startIndex; - } - if (strLen > size) { - return -1; - } - final char[] thisBuf = buffer; - final int len = size - strLen + 1; - outer: - for (int i = startIndex; i < len; i++) { - for (int j = 0; j < strLen; j++) { - if (str.charAt(j) != thisBuf[i + j]) { - continue outer; + public StrBuilder replaceFirst(final char search, final char replace) { + if (search != replace) { + for (int i = 0; i < size; i++) { + if (buffer[i] == search) { + buffer[i] = replace; + break; } } - return i; } - return -1; + return this; } /** - * Searches the string builder using the matcher to find the first match. - *

- * Matchers can be used to perform advanced searching behaviour. - * For example you could write a matcher to find the character 'a' - * followed by a number. + * Replaces the first instance of the search string with the replace string. * - * @param matcher the matcher to use, null returns -1 - * @return the first index matched, or -1 if not found + * @param searchStr the search string, null causes no action to occur + * @param replaceStr the replace string, null is equivalent to an empty string + * @return this, to enable chaining */ - public int indexOf(final StrMatcher matcher) { - return indexOf(matcher, 0); + public StrBuilder replaceFirst(final String searchStr, final String replaceStr) { + final int searchLen = searchStr == null ? 0 : searchStr.length(); + if (searchLen > 0) { + final int index = indexOf(searchStr, 0); + if (index >= 0) { + final int replaceLen = replaceStr == null ? 0 : replaceStr.length(); + replaceImpl(index, index + searchLen, searchLen, replaceStr, replaceLen); + } + } + return this; } /** - * Searches the string builder using the matcher to find the first - * match searching from the given index. + * Replaces the first match within the builder with the replace string. *

- * Matchers can be used to perform advanced searching behaviour. - * For example you could write a matcher to find the character 'a' - * followed by a number. + * Matchers can be used to perform advanced replace behavior. + * For example you could write a matcher to replace + * where the character 'a' is followed by a number. * - * @param matcher the matcher to use, null returns -1 - * @param startIndex the index to start at, invalid index rounded to edge - * @return the first index matched, or -1 if not found + * @param matcher the matcher to use to find the deletion, null causes no action + * @param replaceStr the replace string, null is equivalent to an empty string + * @return this, to enable chaining */ - public int indexOf(final StrMatcher matcher, int startIndex) { - startIndex = (startIndex < 0 ? 0 : startIndex); - if (matcher == null || startIndex >= size) { - return -1; - } - final int len = size; - final char[] buf = buffer; - for (int i = startIndex; i < len; i++) { - if (matcher.isMatch(buf, i, startIndex, len) > 0) { - return i; - } - } - return -1; + public StrBuilder replaceFirst(final StrMatcher matcher, final String replaceStr) { + return replace(matcher, replaceStr, 0, size, 1); } //----------------------------------------------------------------------- /** - * Searches the string builder to find the last reference to the specified char. + * Internal method to delete a range without validation. * - * @param ch the character to find - * @return the last index of the character, or -1 if not found + * @param startIndex the start index, must be valid + * @param endIndex the end index (exclusive), must be valid + * @param removeLen the length to remove (endIndex - startIndex), must be valid + * @param insertStr the string to replace with, null means delete range + * @param insertLen the length of the insert string, must be valid + * @throws IndexOutOfBoundsException if any index is invalid */ - public int lastIndexOf(final char ch) { - return lastIndexOf(ch, size - 1); + private void replaceImpl(final int startIndex, + final int endIndex, + final int removeLen, + final String insertStr, + final int insertLen) { + final int newSize = size - removeLen + insertLen; + if (insertLen != removeLen) { + ensureCapacity(newSize); + System.arraycopy(buffer, endIndex, buffer, startIndex + insertLen, size - endIndex); + size = newSize; + } + if (insertLen > 0) { + insertStr.getChars(0, insertLen, buffer, startIndex); + } } /** - * Searches the string builder to find the last reference to the specified char. + * Replaces within the builder using a matcher. + *

+ * Matchers can be used to perform advanced behavior. + * For example you could write a matcher to delete all occurrences + * where the character 'a' is followed by a number. * - * @param ch the character to find - * @param startIndex the index to start at, invalid index rounded to edge - * @return the last index of the character, or -1 if not found + * @param matcher the matcher to use to find the deletion, null causes no action + * @param replaceStr the string to replace the match with, null is a delete + * @param from the start index, must be valid + * @param to the end index (exclusive), must be valid + * @param replaceCount the number of times to replace, -1 for replace all + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if any index is invalid */ - public int lastIndexOf(final char ch, int startIndex) { - startIndex = (startIndex >= size ? size - 1 : startIndex); - if (startIndex < 0) { - return -1; + private StrBuilder replaceImpl( + final StrMatcher matcher, final String replaceStr, + final int from, int to, int replaceCount) { + if (matcher == null || size == 0) { + return this; } - for (int i = startIndex; i >= 0; i--) { - if (buffer[i] == ch) { - return i; + final int replaceLen = replaceStr == null ? 0 : replaceStr.length(); + for (int i = from; i < to && replaceCount != 0; i++) { + final char[] buf = buffer; + final int removeLen = matcher.isMatch(buf, i, from, to); + if (removeLen > 0) { + replaceImpl(i, i + removeLen, removeLen, replaceStr, replaceLen); + to = to - removeLen + replaceLen; + i = i + replaceLen - 1; + if (replaceCount > 0) { + replaceCount--; + } } } - return -1; + return this; } + //----------------------------------------------------------------------- /** - * Searches the string builder to find the last reference to the specified string. - *

- * Note that a null input string will return -1, whereas the JDK throws an exception. + * Reverses the string builder placing each character in the opposite index. * - * @param str the string to find, null returns -1 - * @return the last index of the string, or -1 if not found + * @return this, to enable chaining */ - public int lastIndexOf(final String str) { - return lastIndexOf(str, size - 1); + public StrBuilder reverse() { + if (size == 0) { + return this; + } + + final int half = size / 2; + final char[] buf = buffer; + for (int leftIdx = 0, rightIdx = size - 1; leftIdx < half; leftIdx++, rightIdx--) { + final char swap = buf[leftIdx]; + buf[leftIdx] = buf[rightIdx]; + buf[rightIdx] = swap; + } + return this; } /** - * Searches the string builder to find the last reference to the specified - * string starting searching from the given index. + * Extracts the rightmost characters from the string builder without + * throwing an exception. *

- * Note that a null input string will return -1, whereas the JDK throws an exception. + * This method extracts the right {@code length} characters from + * the builder. If this many characters are not available, the whole + * builder is returned. Thus the returned string may be shorter than the + * length requested. * - * @param str the string to find, null returns -1 - * @param startIndex the index to start at, invalid index rounded to edge - * @return the last index of the string, or -1 if not found + * @param length the number of characters to extract, negative returns empty string + * @return The new string */ - public int lastIndexOf(final String str, int startIndex) { - startIndex = (startIndex >= size ? size - 1 : startIndex); - if (str == null || startIndex < 0) { - return -1; - } - final int strLen = str.length(); - if (strLen > 0 && strLen <= size) { - if (strLen == 1) { - return lastIndexOf(str.charAt(0), startIndex); - } - - outer: - for (int i = startIndex - strLen + 1; i >= 0; i--) { - for (int j = 0; j < strLen; j++) { - if (str.charAt(j) != buffer[i + j]) { - continue outer; - } - } - return i; - } - - } else if (strLen == 0) { - return startIndex; + public String rightString(final int length) { + if (length <= 0) { + return StringUtils.EMPTY; + } else if (length >= size) { + return new String(buffer, 0, size); + } else { + return new String(buffer, size - length, length); } - return -1; } /** - * Searches the string builder using the matcher to find the last match. - *

- * Matchers can be used to perform advanced searching behaviour. - * For example you could write a matcher to find the character 'a' - * followed by a number. + * Sets the character at the specified index. * - * @param matcher the matcher to use, null returns -1 - * @return the last index matched, or -1 if not found + * @see #charAt(int) + * @see #deleteCharAt(int) + * @param index the index to set + * @param ch the new character + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid */ - public int lastIndexOf(final StrMatcher matcher) { - return lastIndexOf(matcher, size); + public StrBuilder setCharAt(final int index, final char ch) { + if (index < 0 || index >= length()) { + throw new StringIndexOutOfBoundsException(index); + } + buffer[index] = ch; + return this; } /** - * Searches the string builder using the matcher to find the last - * match searching from the given index. - *

- * Matchers can be used to perform advanced searching behaviour. - * For example you could write a matcher to find the character 'a' - * followed by a number. + * Updates the length of the builder by either dropping the last characters + * or adding filler of Unicode zero. * - * @param matcher the matcher to use, null returns -1 - * @param startIndex the index to start at, invalid index rounded to edge - * @return the last index matched, or -1 if not found + * @param length the length to set to, must be zero or positive + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the length is negative */ - public int lastIndexOf(final StrMatcher matcher, int startIndex) { - startIndex = (startIndex >= size ? size - 1 : startIndex); - if (matcher == null || startIndex < 0) { - return -1; + public StrBuilder setLength(final int length) { + if (length < 0) { + throw new StringIndexOutOfBoundsException(length); } - final char[] buf = buffer; - final int endIndex = startIndex + 1; - for (int i = startIndex; i >= 0; i--) { - if (matcher.isMatch(buf, i, 0, endIndex) > 0) { - return i; + if (length < size) { + size = length; + } else if (length > size) { + ensureCapacity(length); + final int oldEnd = size; + final int newEnd = length; + size = length; + for (int i = oldEnd; i < newEnd; i++) { + buffer[i] = '\0'; } } - return -1; + return this; } - //----------------------------------------------------------------------- /** - * Creates a tokenizer that can tokenize the contents of this builder. - *

- * This method allows the contents of this builder to be tokenized. - * The tokenizer will be setup by default to tokenize on space, tab, - * newline and formfeed (as per StringTokenizer). These values can be - * changed on the tokenizer class, before retrieving the tokens. - *

- * The returned tokenizer is linked to this builder. You may intermix - * calls to the builder and tokenizer within certain limits, however - * there is no synchronization. Once the tokenizer has been used once, - * it must be {@link StrTokenizer#reset() reset} to pickup the latest - * changes in the builder. For example: - *

-     * StrBuilder b = new StrBuilder();
-     * b.append("a b ");
-     * StrTokenizer t = b.asTokenizer();
-     * String[] tokens1 = t.getTokenArray();  // returns a,b
-     * b.append("c d ");
-     * String[] tokens2 = t.getTokenArray();  // returns a,b (c and d ignored)
-     * t.reset();              // reset causes builder changes to be picked up
-     * String[] tokens3 = t.getTokenArray();  // returns a,b,c,d
-     * 
- * In addition to simply intermixing appends and tokenization, you can also - * call the set methods on the tokenizer to alter how it tokenizes. Just - * remember to call reset when you want to pickup builder changes. - *

- * Calling {@link StrTokenizer#reset(String)} or {@link StrTokenizer#reset(char[])} - * with a non-null value will break the link with the builder. + * Sets the text to be appended when a new line is added. * - * @return a tokenizer that is linked to this builder + * @param newLine the new line text, null means use system default + * @return this, to enable chaining */ - public StrTokenizer asTokenizer() { - return new StrBuilderTokenizer(); + public StrBuilder setNewLineText(final String newLine) { + this.newLine = newLine; + return this; } - //----------------------------------------------------------------------- /** - * Gets the contents of this builder as a Reader. - *

- * This method allows the contents of the builder to be read - * using any standard method that expects a Reader. - *

- * To use, simply create a StrBuilder, populate it with - * data, call asReader, and then read away. - *

- * The internal character array is shared between the builder and the reader. - * This allows you to append to the builder after creating the reader, - * and the changes will be picked up. - * Note however, that no synchronization occurs, so you must perform - * all operations with the builder and the reader in one thread. - *

- * The returned reader supports marking, and ignores the flush method. + * Sets the text to be appended when null is added. * - * @return a reader that reads from this builder + * @param nullText the null text, null means no append + * @return this, to enable chaining */ - public Reader asReader() { - return new StrBuilderReader(); + public StrBuilder setNullText(String nullText) { + if (nullText != null && nullText.isEmpty()) { + nullText = null; + } + this.nullText = nullText; + return this; } //----------------------------------------------------------------------- /** - * Gets this builder as a Writer that can be written to. - *

- * This method allows you to populate the contents of the builder - * using any standard method that takes a Writer. - *

- * To use, simply create a StrBuilder, - * call asWriter, and populate away. The data is available - * at any time using the methods of the StrBuilder. - *

- * The internal character array is shared between the builder and the writer. - * This allows you to intermix calls that append to the builder and - * write using the writer and the changes will be occur correctly. - * Note however, that no synchronization occurs, so you must perform - * all operations with the builder and the writer in one thread. + * Gets the length of the string builder. *

- * The returned writer ignores the close and flush methods. + * This method is the same as {@link #length()} and is provided to match the + * API of Collections. * - * @return a writer that populates this builder + * @return The length */ - public Writer asWriter() { - return new StrBuilderWriter(); + public int size() { + return size; } + //----------------------------------------------------------------------- /** - * Appends current contents of this StrBuilder to the - * provided {@link Appendable}. + * Checks whether this builder starts with the specified string. *

- * This method tries to avoid doing any extra copies of contents. - * - * @param appendable the appendable to append data to - * @throws IOException if an I/O error occurs + * Note that this method handles null input quietly, unlike String. * - * @see #readFrom(Readable) + * @param str the string to search for, null returns false + * @return true if the builder starts with the string */ - public void appendTo(final Appendable appendable) throws IOException { - if (appendable instanceof Writer) { - ((Writer) appendable).write(buffer, 0, size); - } else if (appendable instanceof StringBuilder) { - ((StringBuilder) appendable).append(buffer, 0, size); - } else if (appendable instanceof StringBuffer) { - ((StringBuffer) appendable).append(buffer, 0, size); - } else if (appendable instanceof CharBuffer) { - ((CharBuffer) appendable).put(buffer, 0, size); - } else { - appendable.append(this); + public boolean startsWith(final String str) { + if (str == null) { + return false; } - } - - /** - * Checks the contents of this builder against another to see if they - * contain the same character content ignoring case. - * - * @param other the object to check, null returns false - * @return true if the builders contain the same characters in the same order - */ - public boolean equalsIgnoreCase(final StrBuilder other) { - if (this == other) { + final int len = str.length(); + if (len == 0) { return true; } - if (this.size != other.size) { + if (len > size) { return false; } - final char[] thisBuf = this.buffer; - final char[] otherBuf = other.buffer; - for (int i = size - 1; i >= 0; i--) { - final char c1 = thisBuf[i]; - final char c2 = otherBuf[i]; - if (c1 != c2 && Character.toUpperCase(c1) != Character.toUpperCase(c2)) { + for (int i = 0; i < len; i++) { + if (buffer[i] != str.charAt(i)) { return false; } } return true; } + //----------------------------------------------------------------------- /** - * Checks the contents of this builder against another to see if they - * contain the same character content. + * {@inheritDoc} + */ + @Override + public CharSequence subSequence(final int startIndex, final int endIndex) { + if (startIndex < 0) { + throw new StringIndexOutOfBoundsException(startIndex); + } + if (endIndex > size) { + throw new StringIndexOutOfBoundsException(endIndex); + } + if (startIndex > endIndex) { + throw new StringIndexOutOfBoundsException(endIndex - startIndex); + } + return substring(startIndex, endIndex); + } + + /** + * Extracts a portion of this string builder as a string. * - * @param other the object to check, null returns false - * @return true if the builders contain the same characters in the same order + * @param start the start index, inclusive, must be valid + * @return The new string + * @throws IndexOutOfBoundsException if the index is invalid */ - public boolean equals(final StrBuilder other) { - if (this == other) { - return true; - } - if (other == null) { - return false; - } - if (this.size != other.size) { - return false; - } - final char[] thisBuf = this.buffer; - final char[] otherBuf = other.buffer; - for (int i = size - 1; i >= 0; i--) { - if (thisBuf[i] != otherBuf[i]) { - return false; - } - } - return true; + public String substring(final int start) { + return substring(start, size); } /** - * Checks the contents of this builder against another to see if they - * contain the same character content. + * Extracts a portion of this string builder as a string. + *

+ * Note: This method treats an endIndex greater than the length of the + * builder as equal to the length of the builder, and continues + * without error, unlike StringBuffer or String. * - * @param obj the object to check, null returns false - * @return true if the builders contain the same characters in the same order + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid except + * that if too large it is treated as end of string + * @return The new string + * @throws IndexOutOfBoundsException if the index is invalid */ - @Override - public boolean equals(final Object obj) { - return obj instanceof StrBuilder - && equals((StrBuilder) obj); + public String substring(final int startIndex, int endIndex) { + endIndex = validateRange(startIndex, endIndex); + return new String(buffer, startIndex, endIndex - startIndex); + } + + //----------------------------------------------------------------------- + /** + * Copies the builder's character array into a new character array. + * + * @return a new array that represents the contents of the builder + */ + public char[] toCharArray() { + if (size == 0) { + return ArrayUtils.EMPTY_CHAR_ARRAY; + } + final char[] chars = new char[size]; + System.arraycopy(buffer, 0, chars, 0, size); + return chars; } /** - * Gets a suitable hash code for this builder. + * Copies part of the builder's character array into a new character array. * - * @return a hash code + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid except that + * if too large it is treated as end of string + * @return a new array that holds part of the contents of the builder + * @throws IndexOutOfBoundsException if startIndex is invalid, + * or if endIndex is invalid (but endIndex greater than size is valid) */ - @Override - public int hashCode() { - final char[] buf = buffer; - int hash = 0; - for (int i = size - 1; i >= 0; i--) { - hash = 31 * hash + buf[i]; + public char[] toCharArray(final int startIndex, int endIndex) { + endIndex = validateRange(startIndex, endIndex); + final int len = endIndex - startIndex; + if (len == 0) { + return ArrayUtils.EMPTY_CHAR_ARRAY; } - return hash; + final char[] chars = new char[len]; + System.arraycopy(buffer, startIndex, chars, 0, len); + return chars; } //----------------------------------------------------------------------- @@ -2805,7 +2979,7 @@ public int hashCode() { * Note that unlike StringBuffer, the string version returned is * independent of the string builder. * - * @return the builder as a String + * @return The builder as a String */ @Override public String toString() { @@ -2816,7 +2990,7 @@ public String toString() { * Gets a StringBuffer version of the string builder, creating a * new instance each time the method is called. * - * @return the builder as a StringBuffer + * @return The builder as a StringBuffer */ public StringBuffer toStringBuffer() { return new StringBuffer(size).append(buffer, 0, size); @@ -2826,43 +3000,39 @@ public StringBuffer toStringBuffer() { * Gets a StringBuilder version of the string builder, creating a * new instance each time the method is called. * - * @return the builder as a StringBuilder + * @return The builder as a StringBuilder */ public StringBuilder toStringBuilder() { return new StringBuilder(size).append(buffer, 0, size); } - /** - * Implement the {@link Builder} interface. - * @return the builder as a String - * @see #toString() - */ - @Override - public String build() { - return toString(); - } - //----------------------------------------------------------------------- /** - * Validates parameters defining a range of the builder. + * Trims the builder by removing characters less than or equal to a space + * from the beginning and end. * - * @param startIndex the start index, inclusive, must be valid - * @param endIndex the end index, exclusive, must be valid except - * that if too large it is treated as end of string - * @return the new string - * @throws IndexOutOfBoundsException if the index is invalid + * @return this, to enable chaining */ - protected int validateRange(final int startIndex, int endIndex) { - if (startIndex < 0) { - throw new StringIndexOutOfBoundsException(startIndex); + public StrBuilder trim() { + if (size == 0) { + return this; } - if (endIndex > size) { - endIndex = size; + int len = size; + final char[] buf = buffer; + int pos = 0; + while (pos < len && buf[pos] <= ' ') { + pos++; } - if (startIndex > endIndex) { - throw new StringIndexOutOfBoundsException("end < start"); + while (pos < len && buf[len - 1] <= ' ') { + len--; } - return endIndex; + if (len < size) { + delete(len, size); + } + if (pos > 0) { + delete(0, pos); + } + return this; } /** @@ -2879,183 +3049,25 @@ protected void validateIndex(final int index) { //----------------------------------------------------------------------- /** - * Inner class to allow StrBuilder to operate as a tokenizer. - */ - class StrBuilderTokenizer extends StrTokenizer { - - /** - * Default constructor. - */ - StrBuilderTokenizer() { - super(); - } - - /** {@inheritDoc} */ - @Override - protected List tokenize(final char[] chars, final int offset, final int count) { - if (chars == null) { - return super.tokenize( - StrBuilder.this.buffer, 0, StrBuilder.this.size()); - } - return super.tokenize(chars, offset, count); - } - - /** {@inheritDoc} */ - @Override - public String getContent() { - final String str = super.getContent(); - if (str == null) { - return StrBuilder.this.toString(); - } - return str; - } - } - - //----------------------------------------------------------------------- - /** - * Inner class to allow StrBuilder to operate as a reader. - */ - class StrBuilderReader extends Reader { - /** The current stream position. */ - private int pos; - /** The last mark position. */ - private int mark; - - /** - * Default constructor. - */ - StrBuilderReader() { - super(); - } - - /** {@inheritDoc} */ - @Override - public void close() { - // do nothing - } - - /** {@inheritDoc} */ - @Override - public int read() { - if (!ready()) { - return -1; - } - return StrBuilder.this.charAt(pos++); - } - - /** {@inheritDoc} */ - @Override - public int read(final char[] b, final int off, int len) { - if (off < 0 || len < 0 || off > b.length - || (off + len) > b.length || (off + len) < 0) { - throw new IndexOutOfBoundsException(); - } - if (len == 0) { - return 0; - } - if (pos >= StrBuilder.this.size()) { - return -1; - } - if (pos + len > size()) { - len = StrBuilder.this.size() - pos; - } - StrBuilder.this.getChars(pos, pos + len, b, off); - pos += len; - return len; - } - - /** {@inheritDoc} */ - @Override - public long skip(long n) { - if (pos + n > StrBuilder.this.size()) { - n = StrBuilder.this.size() - pos; - } - if (n < 0) { - return 0; - } - pos += n; - return n; - } - - /** {@inheritDoc} */ - @Override - public boolean ready() { - return pos < StrBuilder.this.size(); - } - - /** {@inheritDoc} */ - @Override - public boolean markSupported() { - return true; - } - - /** {@inheritDoc} */ - @Override - public void mark(final int readAheadLimit) { - mark = pos; - } - - /** {@inheritDoc} */ - @Override - public void reset() { - pos = mark; - } - } - - //----------------------------------------------------------------------- - /** - * Inner class to allow StrBuilder to operate as a writer. + * Validates parameters defining a range of the builder. + * + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid except + * that if too large it is treated as end of string + * @return The new string + * @throws IndexOutOfBoundsException if the index is invalid */ - class StrBuilderWriter extends Writer { - - /** - * Default constructor. - */ - StrBuilderWriter() { - super(); - } - - /** {@inheritDoc} */ - @Override - public void close() { - // do nothing - } - - /** {@inheritDoc} */ - @Override - public void flush() { - // do nothing - } - - /** {@inheritDoc} */ - @Override - public void write(final int c) { - StrBuilder.this.append((char) c); - } - - /** {@inheritDoc} */ - @Override - public void write(final char[] cbuf) { - StrBuilder.this.append(cbuf); - } - - /** {@inheritDoc} */ - @Override - public void write(final char[] cbuf, final int off, final int len) { - StrBuilder.this.append(cbuf, off, len); + protected int validateRange(final int startIndex, int endIndex) { + if (startIndex < 0) { + throw new StringIndexOutOfBoundsException(startIndex); } - - /** {@inheritDoc} */ - @Override - public void write(final String str) { - StrBuilder.this.append(str); + if (endIndex > size) { + endIndex = size; } - - /** {@inheritDoc} */ - @Override - public void write(final String str, final int off, final int len) { - StrBuilder.this.append(str, off, len); + if (startIndex > endIndex) { + throw new StringIndexOutOfBoundsException("end < start"); } + return endIndex; } } diff --git a/src/main/java/org/apache/commons/text/StrLookup.java b/src/main/java/org/apache/commons/text/StrLookup.java index 5eeed88d8f..12e94313b3 100644 --- a/src/main/java/org/apache/commons/text/StrLookup.java +++ b/src/main/java/org/apache/commons/text/StrLookup.java @@ -19,6 +19,9 @@ import java.util.Map; import java.util.ResourceBundle; +import org.apache.commons.text.lookup.StringLookup; +import org.apache.commons.text.lookup.StringLookupFactory; + /** * Lookup a String key to a String value. *

@@ -33,8 +36,10 @@ * * @param the type of the values supported by the lookup * @since 1.0 + * @deprecated Deprecated as of 1.3, use {@link StringLookupFactory} instead. This class will be removed in 2.0. */ -public abstract class StrLookup { +@Deprecated +public abstract class StrLookup implements StringLookup { /** * Lookup that always returns null. @@ -91,6 +96,7 @@ public static StrLookup mapLookup(final Map map) { * * @param resourceBundle the map of keys to values, may be null * @return a lookup using the map, not null + * @see StringLookupFactory#resourceBundleStringLookup(String) */ public static StrLookup resourceBundleLookup(final ResourceBundle resourceBundle) { return new ResourceBundleLookup(resourceBundle); @@ -101,33 +107,8 @@ public static StrLookup resourceBundleLookup(final ResourceBundle resour * Constructor. */ protected StrLookup() { - super(); } - /** - * Looks up a String key to a String value. - *

- * The internal implementation may use any mechanism to return the value. The simplest implementation is to use a - * Map. However, virtually any implementation is possible. - *

- * For example, it would be possible to implement a lookup that used the key as a primary key, and looked up the - * value on demand from the database Or, a numeric based implementation could be created that treats the key as an - * integer, increments the value and return the result as a string - converting 1 to 2, 15 to 16 etc. - *

- * The {@link #lookup(String)} method always returns a String, regardless of the underlying data, by converting it - * as necessary. For example: - * - *

-     * Map<String, Object> map = new HashMap<String, Object>();
-     * map.put("number", Integer.valueOf(2));
-     * assertEquals("2", StrLookup.mapLookup(map).lookup("number"));
-     * 
- * - * @param key the key to be looked up, may be null - * @return the matching value, null if no match - */ - public abstract String lookup(String key); - // ----------------------------------------------------------------------- /** * Lookup implementation that uses a Map. @@ -154,7 +135,7 @@ static class MapStrLookup extends StrLookup { * If the map is null, then null is returned. The map result object is converted to a string using toString(). * * @param key the key to be looked up, may be null - * @return the matching value, null if no match + * @return The matching value, null if no match */ @Override public String lookup(final String key) { @@ -217,12 +198,11 @@ private static final class SystemPropertiesStrLookup extends StrLookup { */ @Override public String lookup(final String key) { - if (key.length() > 0) { + if (!key.isEmpty()) { try { return System.getProperty(key); } catch (final SecurityException scex) { // Squelched. All lookup(String) will return null. - return null; } } return null; diff --git a/src/main/java/org/apache/commons/text/StrMatcher.java b/src/main/java/org/apache/commons/text/StrMatcher.java index 357118c0a0..9996643c12 100644 --- a/src/main/java/org/apache/commons/text/StrMatcher.java +++ b/src/main/java/org/apache/commons/text/StrMatcher.java @@ -18,6 +18,9 @@ import java.util.Arrays; +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.text.matcher.StringMatcherFactory; + /** * A matcher class that can be queried to determine if a character array * portion matches. @@ -26,42 +29,52 @@ * If these do not suffice, you can subclass and implement your own matcher. * * @since 1.0 + * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0. */ +@Deprecated public abstract class StrMatcher { /** * Matches the comma character. */ private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); + /** * Matches the tab character. */ private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); + /** * Matches the space character. */ private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); + /** * Matches the same characters as StringTokenizer, - * namely space, tab, newline, formfeed. + * namely space, tab, newline, form feed. */ private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); + /** * Matches the String trim() whitespace characters. */ private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); + /** * Matches the double quote character. */ private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); + /** * Matches the double quote character. */ private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); + /** * Matches the single or double quote character. */ private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); + /** * Matches no characters. */ @@ -98,9 +111,9 @@ public static StrMatcher spaceMatcher() { /** * Matches the same characters as StringTokenizer, - * namely space, tab, newline and formfeed. + * namely space, tab, newline and form feed. * - * @return the split matcher + * @return The split matcher */ public static StrMatcher splitMatcher() { return SPLIT_MATCHER; @@ -109,7 +122,7 @@ public static StrMatcher splitMatcher() { /** * Matches the String trim() whitespace characters. * - * @return the trim matcher + * @return The trim matcher */ public static StrMatcher trimMatcher() { return TRIM_MATCHER; @@ -152,7 +165,7 @@ public static StrMatcher noneMatcher() { } /** - * Constructor that creates a matcher from a character. + * Creates a matcher from a character. * * @param ch the character to match, must not be null * @return a new Matcher for the given char @@ -162,13 +175,13 @@ public static StrMatcher charMatcher(final char ch) { } /** - * Constructor that creates a matcher from a set of characters. + * Creates a matcher from a set of characters. * * @param chars the characters to match, null or empty matches nothing * @return a new matcher for the given char[] */ public static StrMatcher charSetMatcher(final char... chars) { - if (chars == null || chars.length == 0) { + if (ArrayUtils.isEmpty(chars)) { return NONE_MATCHER; } if (chars.length == 1) { @@ -178,13 +191,13 @@ public static StrMatcher charSetMatcher(final char... chars) { } /** - * Constructor that creates a matcher from a string representing a set of characters. + * Creates a matcher from a string representing a set of characters. * * @param chars the characters to match, null or empty matches nothing * @return a new Matcher for the given characters */ public static StrMatcher charSetMatcher(final String chars) { - if (chars == null || chars.length() == 0) { + if (chars == null || chars.isEmpty()) { return NONE_MATCHER; } if (chars.length() == 1) { @@ -194,13 +207,13 @@ public static StrMatcher charSetMatcher(final String chars) { } /** - * Constructor that creates a matcher from a string. + * Creates a matcher from a string. * * @param str the string to match, null or empty matches nothing * @return a new Matcher for the given String */ public static StrMatcher stringMatcher(final String str) { - if (str == null || str.length() == 0) { + if (str == null || str.isEmpty()) { return NONE_MATCHER; } return new StringMatcher(str); @@ -211,23 +224,22 @@ public static StrMatcher stringMatcher(final String str) { * Constructor. */ protected StrMatcher() { - super(); } /** - * Returns the number of matching characters, zero for no match. + * Returns the number of matching characters, or zero if there is no match. *

* This method is called to check for a match. - * The parameter pos represents the current position to be - * checked in the string buffer (a character array which must + * The parameter {@code pos} represents the current position to be + * checked in the string {@code buffer} (a character array which must * not be changed). - * The API guarantees that pos is a valid index for buffer. + * The API guarantees that {@code pos} is a valid index for {@code buffer}. *

* The character array may be larger than the active area to be matched. * Only values in the buffer between the specified indices may be accessed. *

* The matching code may check one character or many. - * It may check characters preceding pos as well as those + * It may check characters preceding {@code pos} as well as those * after, so long as no checks exceed the bounds specified. *

* It must return zero for no match, or a positive number if a match was found. @@ -237,28 +249,28 @@ protected StrMatcher() { * @param pos the starting position for the match, valid for buffer * @param bufferStart the first active index in the buffer, valid for buffer * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer - * @return the number of matching characters, zero for no match + * @return The number of matching characters, or zero if there is no match */ public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); /** - * Returns the number of matching characters, zero for no match. + * Returns the number of matching characters, or zero if there is no match. *

* This method is called to check for a match. - * The parameter pos represents the current position to be - * checked in the string buffer (a character array which must + * The parameter {@code pos} represents the current position to be + * checked in the string {@code buffer} (a character array which must * not be changed). - * The API guarantees that pos is a valid index for buffer. + * The API guarantees that {@code pos} is a valid index for {@code buffer}. *

* The matching code may check one character or many. - * It may check characters preceding pos as well as those after. + * It may check characters preceding {@code pos} as well as those after. *

* It must return zero for no match, or a positive number if a match was found. * The number indicates the number of characters that matched. * * @param buffer the text content to match against, do not change * @param pos the starting position for the match, valid for buffer - * @return the number of matching characters, zero for no match + * @return The number of matching characters, or zero if there is no match */ public int isMatch(final char[] buffer, final int pos) { return isMatch(buffer, pos, 0, buffer.length); @@ -278,19 +290,18 @@ static final class CharSetMatcher extends StrMatcher { * @param chars the characters to match, must not be null */ CharSetMatcher(final char[] chars) { - super(); this.chars = chars.clone(); Arrays.sort(this.chars); } /** - * Returns whether or not the given character matches. + * Returns {@code 1} if there is a match, or {@code 0} if there is no match. * * @param buffer the text content to match against, do not change * @param pos the starting position for the match, valid for buffer * @param bufferStart the first active index in the buffer, valid for buffer * @param bufferEnd the end index of the active buffer, valid for buffer - * @return the number of matching characters, zero for no match + * @return The number of matching characters, or zero if there is no match */ @Override public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { @@ -312,18 +323,17 @@ static final class CharMatcher extends StrMatcher { * @param ch the character to match */ CharMatcher(final char ch) { - super(); this.ch = ch; } /** - * Returns whether or not the given character matches. + * Returns {@code 1} if there is a match, or {@code 0} if there is no match. * * @param buffer the text content to match against, do not change * @param pos the starting position for the match, valid for buffer * @param bufferStart the first active index in the buffer, valid for buffer * @param bufferEnd the end index of the active buffer, valid for buffer - * @return the number of matching characters, zero for no match + * @return The number of matching characters, or zero if there is no match */ @Override public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { @@ -345,18 +355,17 @@ static final class StringMatcher extends StrMatcher { * @param str the string to match, must not be null */ StringMatcher(final String str) { - super(); chars = str.toCharArray(); } /** - * Returns whether or not the given text matches the stored string. + * Returns the number of matching characters, or zero if there is no match. * * @param buffer the text content to match against, do not change * @param pos the starting position for the match, valid for buffer * @param bufferStart the first active index in the buffer, valid for buffer * @param bufferEnd the end index of the active buffer, valid for buffer - * @return the number of matching characters, zero for no match + * @return The number of matching characters, or zero if there is no match */ @Override public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { @@ -386,20 +395,19 @@ public String toString() { static final class NoMatcher extends StrMatcher { /** - * Constructs a new instance of NoMatcher. + * Constructs a new instance of {@code NoMatcher}. */ NoMatcher() { - super(); } /** - * Always returns false. + * Always returns {@code 0}. * * @param buffer the text content to match against, do not change * @param pos the starting position for the match, valid for buffer * @param bufferStart the first active index in the buffer, valid for buffer * @param bufferEnd the end index of the active buffer, valid for buffer - * @return the number of matching characters, zero for no match + * @return The number of matching characters, or zero if there is no match */ @Override public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { @@ -414,10 +422,9 @@ public int isMatch(final char[] buffer, final int pos, final int bufferStart, fi static final class TrimMatcher extends StrMatcher { /** - * Constructs a new instance of TrimMatcher. + * Constructs a new instance of {@code TrimMatcher}. */ TrimMatcher() { - super(); } /** @@ -427,7 +434,7 @@ static final class TrimMatcher extends StrMatcher { * @param pos the starting position for the match, valid for buffer * @param bufferStart the first active index in the buffer, valid for buffer * @param bufferEnd the end index of the active buffer, valid for buffer - * @return the number of matching characters, zero for no match + * @return The number of matching characters, or zero if there is no match */ @Override public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { diff --git a/src/main/java/org/apache/commons/text/StrSubstitutor.java b/src/main/java/org/apache/commons/text/StrSubstitutor.java index 7bff411d00..42fee5e587 100644 --- a/src/main/java/org/apache/commons/text/StrSubstitutor.java +++ b/src/main/java/org/apache/commons/text/StrSubstitutor.java @@ -29,7 +29,7 @@ * Substitutes variables within a string by values. *

* This class takes a piece of text and substitutes all the variables within it. - * The default definition of a variable is ${variableName}. + * The default definition of a variable is {@code ${variableName}}. * The prefix and suffix can be changed via constructors and set methods. *

* Variable values are typically resolved from a map, but could also be resolved @@ -44,12 +44,12 @@ * Typical usage of this class follows the following pattern: First an instance is created * and initialized with the map that contains the values for the available variables. * If a prefix and/or suffix for variables should be used other than the default ones, - * the appropriate settings can be performed. After that the replace() + * the appropriate settings can be performed. After that the {@code replace()} * method can be called passing in the source text for interpolation. In the returned * text all variable references (as long as their values are known) will be resolved. * The following example demonstrates this: *

- * Map valuesMap = HashMap();
+ * Map<String, String> valuesMap = new HashMap<>();
  * valuesMap.put("animal", "quick brown fox");
  * valuesMap.put("target", "lazy dog");
  * String templateString = "The ${animal} jumped over the ${target}.";
@@ -69,7 +69,7 @@
  * {@link #setValueDelimiter(char)} or {@link #setValueDelimiter(String)}.
  * The following shows an example with variable default value settings:
  * 
- * Map valuesMap = HashMap();
+ * Map<String, String> valuesMap = new HashMap<>();
  * valuesMap.put("animal", "quick brown fox");
  * valuesMap.put("target", "lazy dog");
  * String templateString = "The ${animal} jumped over the ${target}. ${undefined.number:-1234567890}.";
@@ -96,7 +96,7 @@
  *   The variable ${${name}} must be used.
  * 
* Here only the variable's name referred to in the text should be replaced resulting - * in the text (assuming that the value of the name variable is x): + * in the text (assuming that the value of the {@code name} variable is {@code x}): *
  *   The variable ${x} must be used.
  * 
@@ -114,28 +114,33 @@ *
  * ${jre-${java.specification.version}}
  * 
- * StrSubstitutor supports this recursive substitution in variable + * {@code StrSubstitutor} supports this recursive substitution in variable * names, but it has to be enabled explicitly by setting the * {@link #setEnableSubstitutionInVariables(boolean) enableSubstitutionInVariables} * property to true. *

This class is not thread safe.

* * @since 1.0 + * @deprecated Deprecated as of 1.3, use {@link StringSubstitutor} instead. This class will be removed in 2.0. */ +@Deprecated public class StrSubstitutor { /** * Constant for the default escape character. */ public static final char DEFAULT_ESCAPE = '$'; + /** * Constant for the default variable prefix. */ public static final StrMatcher DEFAULT_PREFIX = StrMatcher.stringMatcher("${"); + /** * Constant for the default variable suffix. */ public static final StrMatcher DEFAULT_SUFFIX = StrMatcher.stringMatcher("}"); + /** * Constant for the default value delimiter of a variable. */ @@ -145,30 +150,37 @@ public class StrSubstitutor { * Stores the escape character. */ private char escapeChar; + /** * Stores the variable prefix. */ private StrMatcher prefixMatcher; + /** * Stores the variable suffix. */ private StrMatcher suffixMatcher; + /** * Stores the default variable value delimiter. */ private StrMatcher valueDelimiterMatcher; + /** * Variable resolution is delegated to an implementor of VariableResolver. */ private StrLookup variableResolver; + /** * The flag whether substitution in variable names is enabled. */ private boolean enableSubstitutionInVariables; + /** * Whether escapes should be preserved. Default is false; */ - private boolean preserveEscapes = false; + private boolean preserveEscapes; + /** * The flag whether substitution in variable values is disabled. */ @@ -182,7 +194,7 @@ public class StrSubstitutor { * @param the type of the values in the map * @param source the source text containing the variables to substitute, null returns null * @param valueMap the map with the values, may be null - * @return the result of the replace operation + * @return The result of the replace operation */ public static String replace(final Object source, final Map valueMap) { return new StrSubstitutor(valueMap).replace(source); @@ -198,7 +210,7 @@ public static String replace(final Object source, final Map value * @param valueMap the map with the values, may be null * @param prefix the prefix of variables, not null * @param suffix the suffix of variables, not null - * @return the result of the replace operation + * @return The result of the replace operation * @throws IllegalArgumentException if the prefix or suffix is null */ public static String replace(final Object source, @@ -214,7 +226,7 @@ public static String replace(final Object source, * * @param source the source text containing the variables to substitute, null returns null * @param valueProperties the properties with values, may be null - * @return the result of the replace operation + * @return The result of the replace operation */ public static String replace(final Object source, final Properties valueProperties) { if (valueProperties == null) { @@ -235,7 +247,7 @@ public static String replace(final Object source, final Properties valueProperti * their matching values from the system properties. * * @param source the source text containing the variables to substitute, null returns null - * @return the result of the replace operation + * @return The result of the replace operation */ public static String replaceSystemProperties(final Object source) { return new StrSubstitutor(StrLookup.systemPropertiesLookup()).replace(source); @@ -392,7 +404,7 @@ public StrSubstitutor( * from the resolver using the given source string as a template. * * @param source the string to replace in, null returns null - * @return the result of the replace operation + * @return The result of the replace operation */ public String replace(final String source) { if (source == null) { @@ -415,7 +427,7 @@ public String replace(final String source) { * @param source the string to replace in, null returns null * @param offset the start offset within the array, must be valid * @param length the length within the array to be processed, must be valid - * @return the result of the replace operation + * @return The result of the replace operation */ public String replace(final String source, final int offset, final int length) { if (source == null) { @@ -435,7 +447,7 @@ public String replace(final String source, final int offset, final int length) { * The array is not altered by this method. * * @param source the character array to replace in, not altered, null returns null - * @return the result of the replace operation + * @return The result of the replace operation */ public String replace(final char[] source) { if (source == null) { @@ -457,7 +469,7 @@ public String replace(final char[] source) { * @param source the character array to replace in, not altered, null returns null * @param offset the start offset within the array, must be valid * @param length the length within the array to be processed, must be valid - * @return the result of the replace operation + * @return The result of the replace operation */ public String replace(final char[] source, final int offset, final int length) { if (source == null) { @@ -475,7 +487,7 @@ public String replace(final char[] source, final int offset, final int length) { * The buffer is not altered by this method. * * @param source the buffer to use as a template, not changed, null returns null - * @return the result of the replace operation + * @return The result of the replace operation */ public String replace(final StringBuffer source) { if (source == null) { @@ -497,7 +509,7 @@ public String replace(final StringBuffer source) { * @param source the buffer to use as a template, not changed, null returns null * @param offset the start offset within the array, must be valid * @param length the length within the array to be processed, must be valid - * @return the result of the replace operation + * @return The result of the replace operation */ public String replace(final StringBuffer source, final int offset, final int length) { if (source == null) { @@ -514,7 +526,7 @@ public String replace(final StringBuffer source, final int offset, final int len * The source is not altered by this method. * * @param source the buffer to use as a template, not changed, null returns null - * @return the result of the replace operation + * @return The result of the replace operation */ public String replace(final CharSequence source) { if (source == null) { @@ -534,7 +546,7 @@ public String replace(final CharSequence source) { * @param source the buffer to use as a template, not changed, null returns null * @param offset the start offset within the array, must be valid * @param length the length within the array to be processed, must be valid - * @return the result of the replace operation + * @return The result of the replace operation */ public String replace(final CharSequence source, final int offset, final int length) { if (source == null) { @@ -552,7 +564,7 @@ public String replace(final CharSequence source, final int offset, final int len * The builder is not altered by this method. * * @param source the builder to use as a template, not changed, null returns null - * @return the result of the replace operation + * @return The result of the replace operation */ public String replace(final StrBuilder source) { if (source == null) { @@ -574,7 +586,7 @@ public String replace(final StrBuilder source) { * @param source the builder to use as a template, not changed, null returns null * @param offset the start offset within the array, must be valid * @param length the length within the array to be processed, must be valid - * @return the result of the replace operation + * @return The result of the replace operation */ public String replace(final StrBuilder source, final int offset, final int length) { if (source == null) { @@ -589,10 +601,10 @@ public String replace(final StrBuilder source, final int offset, final int lengt /** * Replaces all the occurrences of variables in the given source object with * their matching values from the resolver. The input source object is - * converted to a string using toString and is not altered. + * converted to a string using {@code toString} and is not altered. * * @param source the source to replace in, null returns null - * @return the result of the replace operation + * @return The result of the replace operation */ public String replace(final Object source) { if (source == null) { @@ -747,7 +759,7 @@ protected boolean substitute(final StrBuilder buf, final int offset, final int l * @param offset the start offset within the builder, must be valid * @param length the length within the builder to be processed, must be valid * @param priorVariables the stack keeping track of the replaced variables, may be null - * @return the length change that occurs, unless priorVariables is null when the int + * @return The length change that occurs, unless priorVariables is null when the int * represents a boolean flag as to whether any change occurred. */ private int substitute(final StrBuilder buf, final int offset, final int length, List priorVariables) { @@ -929,7 +941,7 @@ private void checkCyclicSubstitution(final String varName, final List pr * @param buf the buffer where the substitution is occurring, not null * @param startPos the start position of the variable including the prefix, valid * @param endPos the end position of the variable including the suffix, valid - * @return the variable's value or null if the variable is unknown + * @return The variable's value or null if the variable is unknown */ protected String resolveVariable(final String variableName, final StrBuilder buf, @@ -947,7 +959,7 @@ protected String resolveVariable(final String variableName, /** * Returns the escape character. * - * @return the character used for escaping variable references + * @return The character used for escaping variable references */ public char getEscapeChar() { return this.escapeChar; @@ -973,7 +985,7 @@ public void setEscapeChar(final char escapeCharacter) { * start of a variable. This prefix is expressed in terms of a matcher * allowing advanced prefix matches. * - * @return the prefix matcher in use + * @return The prefix matcher in use */ public StrMatcher getVariablePrefixMatcher() { return prefixMatcher; @@ -1034,7 +1046,7 @@ public StrSubstitutor setVariablePrefix(final String prefix) { * end of a variable. This suffix is expressed in terms of a matcher * allowing advanced suffix matches. * - * @return the suffix matcher in use + * @return The suffix matcher in use */ public StrMatcher getVariableSuffixMatcher() { return suffixMatcher; @@ -1091,13 +1103,13 @@ public StrSubstitutor setVariableSuffix(final String suffix) { /** * Gets the variable default value delimiter matcher currently in use. *

- * The variable default value delimiter is the character or characters that delimite the + * The variable default value delimiter is the character or characters that delimit the * variable name and the variable default value. This delimiter is expressed in terms of a matcher * allowing advanced variable default value delimiter matches. *

* If it returns null, then the variable default value resolution is disabled. * - * @return the variable default value delimiter matcher in use, may be null + * @return The variable default value delimiter matcher in use, may be null */ public StrMatcher getValueDelimiterMatcher() { return valueDelimiterMatcher; @@ -1106,11 +1118,11 @@ public StrMatcher getValueDelimiterMatcher() { /** * Sets the variable default value delimiter matcher to use. *

- * The variable default value delimiter is the character or characters that delimite the + * The variable default value delimiter is the character or characters that delimit the * variable name and the variable default value. This delimiter is expressed in terms of a matcher * allowing advanced variable default value delimiter matches. *

- * If the valueDelimiterMatcher is null, then the variable default value resolution + * If the {@code valueDelimiterMatcher} is null, then the variable default value resolution * becomes disabled. * * @param valueDelimiterMatcher variable default value delimiter matcher to use, may be null @@ -1124,7 +1136,7 @@ public StrSubstitutor setValueDelimiterMatcher(final StrMatcher valueDelimiterMa /** * Sets the variable default value delimiter to use. *

- * The variable default value delimiter is the character or characters that delimite the + * The variable default value delimiter is the character or characters that delimit the * variable name and the variable default value. This method allows a single character * variable default value delimiter to be easily set. * @@ -1138,18 +1150,18 @@ public StrSubstitutor setValueDelimiter(final char valueDelimiter) { /** * Sets the variable default value delimiter to use. *

- * The variable default value delimiter is the character or characters that delimite the + * The variable default value delimiter is the character or characters that delimit the * variable name and the variable default value. This method allows a string * variable default value delimiter to be easily set. *

- * If the valueDelimiter is null or empty string, then the variable default + * If the {@code valueDelimiter} is null or empty string, then the variable default * value resolution becomes disabled. * * @param valueDelimiter the variable default value delimiter string to use, may be null or empty * @return this, to enable chaining */ public StrSubstitutor setValueDelimiter(final String valueDelimiter) { - if (valueDelimiter == null || valueDelimiter.length() == 0) { + if (valueDelimiter == null || valueDelimiter.isEmpty()) { setValueDelimiterMatcher(null); return this; } @@ -1161,7 +1173,7 @@ public StrSubstitutor setValueDelimiter(final String valueDelimiter) { /** * Gets the VariableResolver that is used to lookup variables. * - * @return the VariableResolver + * @return The VariableResolver */ public StrLookup getVariableResolver() { return this.variableResolver; @@ -1181,7 +1193,7 @@ public void setVariableResolver(final StrLookup variableResolver) { /** * Returns a flag whether substitution is done in variable names. * - * @return the substitution in variable names flag + * @return The substitution in variable names flag */ public boolean isEnableSubstitutionInVariables() { return enableSubstitutionInVariables; @@ -1191,7 +1203,7 @@ public boolean isEnableSubstitutionInVariables() { * Sets a flag whether substitution is done in variable names. If set to * true, the names of variables can contain other variables which are * processed first before the original variable is evaluated, e.g. - * ${jre-${java.version}}. The default value is false. + * {@code ${jre-${java.version}}}. The default value is false. * * @param enableSubstitutionInVariables the new value of the flag */ @@ -1205,7 +1217,7 @@ public void setEnableSubstitutionInVariables( * true, the values of variables can contain other variables will not be * processed and substituted original variable is evaluated, e.g. *

-     * Map valuesMap = HashMap();
+     * Map<String, String> valuesMap = new HashMap<>();
      * valuesMap.put("name", "Douglas ${surname}");
      * valuesMap.put("surname", "Crockford");
      * String templateString = "Hi ${name}";
@@ -1217,7 +1229,7 @@ public void setEnableSubstitutionInVariables(
      *      Hi Douglas ${surname}
      * 
* - * @return the substitution in variable values flag + * @return The substitution in variable values flag * * @since 1.2 */ @@ -1232,7 +1244,7 @@ public boolean isDisableSubstitutionInValues() { * * @since 1.2 */ - public void setDisableSubstitutionInValues(boolean disableSubstitutionInValues) { + public void setDisableSubstitutionInValues(final boolean disableSubstitutionInValues) { this.disableSubstitutionInValues = disableSubstitutionInValues; } @@ -1240,7 +1252,7 @@ public void setDisableSubstitutionInValues(boolean disableSubstitutionInValues) * Returns the flag controlling whether escapes are preserved during * substitution. * - * @return the preserve escape flag + * @return The preserve escape flag */ public boolean isPreserveEscapes() { return preserveEscapes; @@ -1249,11 +1261,11 @@ public boolean isPreserveEscapes() { /** * Sets a flag controlling whether escapes are preserved during * substitution. If set to true, the escape character is retained - * during substitution (e.g. $${this-is-escaped} remains - * $${this-is-escaped}). If set to false, the escape + * during substitution (e.g. {@code $${this-is-escaped}} remains + * {@code $${this-is-escaped}}). If set to false, the escape * character is removed during substitution (e.g. - * $${this-is-escaped} becomes - * ${this-is-escaped}). The default value is false + * {@code $${this-is-escaped}} becomes + * {@code ${this-is-escaped}}). The default value is false * * @param preserveEscapes true if escapes are to be preserved */ diff --git a/src/main/java/org/apache/commons/text/StrTokenizer.java b/src/main/java/org/apache/commons/text/StrTokenizer.java index d4a2c6c7d3..f1e1b7474a 100644 --- a/src/main/java/org/apache/commons/text/StrTokenizer.java +++ b/src/main/java/org/apache/commons/text/StrTokenizer.java @@ -22,6 +22,9 @@ import java.util.ListIterator; import java.util.NoSuchElementException; +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.StringUtils; + /** * Tokenizes a string based on delimiters (separators) * and supporting quoting and ignored character concepts. @@ -29,8 +32,8 @@ * This class can split a String into many smaller strings. It aims * to do a similar job to {@link java.util.StringTokenizer StringTokenizer}, * however it offers much more control and flexibility including implementing - * the ListIterator interface. By default, it is set up - * like StringTokenizer. + * the {@code ListIterator} interface. By default, it is set up + * like {@code StringTokenizer}. *

* The input String is split into a number of tokens. * Each token is separated from the next String by a delimiter. @@ -54,11 +57,9 @@ * " a, b , c " - Three tokens "a","b","c" (default CSV processing trims whitespace) * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched) *

- *

* - * This tokenizer has the following properties and options: - * - * + *
+ * * * * @@ -80,7 +81,9 @@ *
StrTokenizer properties and options
PropertyTypeDefault
* * @since 1.0 + * @deprecated Deprecated as of 1.3, use {@link StringTokenizer} instead. This class will be removed in 2.0. */ +@Deprecated public class StrTokenizer implements ListIterator, Cloneable { /** Comma separated values tokenizer internal variable. */ @@ -105,38 +108,14 @@ public class StrTokenizer implements ListIterator, Cloneable { TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); } - /** The text to work on. */ - private char[] chars; - /** The parsed tokens. */ - private String[] tokens; - /** The current iteration position. */ - private int tokenPos; - - /** The delimiter matcher. */ - private StrMatcher delimMatcher = StrMatcher.splitMatcher(); - /** The quote matcher. */ - private StrMatcher quoteMatcher = StrMatcher.noneMatcher(); - /** The ignored matcher. */ - private StrMatcher ignoredMatcher = StrMatcher.noneMatcher(); - /** The trimmer matcher. */ - private StrMatcher trimmerMatcher = StrMatcher.noneMatcher(); - - /** Whether to return empty tokens as null. */ - private boolean emptyAsNull = false; - /** Whether to ignore empty tokens. */ - private boolean ignoreEmptyTokens = true; - - //----------------------------------------------------------------------- - /** - * Returns a clone of CSV_TOKENIZER_PROTOTYPE. + * Returns a clone of {@code CSV_TOKENIZER_PROTOTYPE}. * - * @return a clone of CSV_TOKENIZER_PROTOTYPE. + * @return a clone of {@code CSV_TOKENIZER_PROTOTYPE}. */ private static StrTokenizer getCSVClone() { return (StrTokenizer) CSV_TOKENIZER_PROTOTYPE.clone(); } - /** * Gets a new tokenizer instance which parses Comma Separated Value strings * initializing it with the given input. The default for CSV processing @@ -149,7 +128,6 @@ private static StrTokenizer getCSVClone() { public static StrTokenizer getCSVInstance() { return getCSVClone(); } - /** * Gets a new tokenizer instance which parses Comma Separated Value strings * initializing it with the given input. The default for CSV processing @@ -159,7 +137,7 @@ public static StrTokenizer getCSVInstance() { * @param input the text to parse * @return a new tokenizer instance which parses Comma Separated Value strings */ - public static StrTokenizer getCSVInstance(final String input) { + public static StrTokenizer getCSVInstance(final char[] input) { final StrTokenizer tok = getCSVClone(); tok.reset(input); return tok; @@ -174,22 +152,19 @@ public static StrTokenizer getCSVInstance(final String input) { * @param input the text to parse * @return a new tokenizer instance which parses Comma Separated Value strings */ - public static StrTokenizer getCSVInstance(final char[] input) { + public static StrTokenizer getCSVInstance(final String input) { final StrTokenizer tok = getCSVClone(); tok.reset(input); return tok; } - /** - * Returns a clone of TSV_TOKENIZER_PROTOTYPE. + * Returns a clone of {@code TSV_TOKENIZER_PROTOTYPE}. * - * @return a clone of TSV_TOKENIZER_PROTOTYPE. + * @return a clone of {@code TSV_TOKENIZER_PROTOTYPE}. */ private static StrTokenizer getTSVClone() { return (StrTokenizer) TSV_TOKENIZER_PROTOTYPE.clone(); } - - /** * Gets a new tokenizer instance which parses Tab Separated Value strings. * The default for CSV processing will be trim whitespace from both ends @@ -201,7 +176,6 @@ private static StrTokenizer getTSVClone() { public static StrTokenizer getTSVInstance() { return getTSVClone(); } - /** * Gets a new tokenizer instance which parses Tab Separated Value strings. * The default for CSV processing will be trim whitespace from both ends @@ -209,7 +183,7 @@ public static StrTokenizer getTSVInstance() { * @param input the string to parse * @return a new tokenizer instance which parses Tab Separated Value strings. */ - public static StrTokenizer getTSVInstance(final String input) { + public static StrTokenizer getTSVInstance(final char[] input) { final StrTokenizer tok = getTSVClone(); tok.reset(input); return tok; @@ -222,318 +196,381 @@ public static StrTokenizer getTSVInstance(final String input) { * @param input the string to parse * @return a new tokenizer instance which parses Tab Separated Value strings. */ - public static StrTokenizer getTSVInstance(final char[] input) { + public static StrTokenizer getTSVInstance(final String input) { final StrTokenizer tok = getTSVClone(); tok.reset(input); return tok; } + /** The text to work on. */ + private char[] chars; + + //----------------------------------------------------------------------- + + /** The parsed tokens. */ + private String[] tokens; + + /** The current iteration position. */ + private int tokenPos; + + /** The delimiter matcher. */ + private StrMatcher delimMatcher = StrMatcher.splitMatcher(); + + /** The quote matcher. */ + private StrMatcher quoteMatcher = StrMatcher.noneMatcher(); + + /** The ignored matcher. */ + private StrMatcher ignoredMatcher = StrMatcher.noneMatcher(); + + + /** The trimmer matcher. */ + private StrMatcher trimmerMatcher = StrMatcher.noneMatcher(); + + /** Whether to return empty tokens as null. */ + private boolean emptyAsNull; + + /** Whether to ignore empty tokens. */ + private boolean ignoreEmptyTokens = true; //----------------------------------------------------------------------- /** - * Constructs a tokenizer splitting on space, tab, newline and formfeed + * Constructs a tokenizer splitting on space, tab, newline and form feed * as per StringTokenizer, but with no text to tokenize. *

* This constructor is normally used with {@link #reset(String)}. */ public StrTokenizer() { - super(); this.chars = null; } /** - * Constructs a tokenizer splitting on space, tab, newline and formfeed + * Constructs a tokenizer splitting on space, tab, newline and form feed * as per StringTokenizer. * - * @param input the string which is to be parsed + * @param input the string which is to be parsed, not cloned */ - public StrTokenizer(final String input) { - super(); - if (input != null) { - chars = input.toCharArray(); + public StrTokenizer(final char[] input) { + if (input == null) { + this.chars = null; } else { - chars = null; + this.chars = input.clone(); } } /** - * Constructs a tokenizer splitting on the specified delimiter character. + * Constructs a tokenizer splitting on the specified character. * - * @param input the string which is to be parsed - * @param delim the field delimiter character + * @param input the string which is to be parsed, not cloned + * @param delim the field delimiter character */ - public StrTokenizer(final String input, final char delim) { + public StrTokenizer(final char[] input, final char delim) { this(input); setDelimiterChar(delim); } /** - * Constructs a tokenizer splitting on the specified delimiter string. + * Constructs a tokenizer splitting on the specified delimiter character + * and handling quotes using the specified quote character. * - * @param input the string which is to be parsed - * @param delim the field delimiter string + * @param input the string which is to be parsed, not cloned + * @param delim the field delimiter character + * @param quote the field quoted string character */ - public StrTokenizer(final String input, final String delim) { - this(input); - setDelimiterString(delim); + public StrTokenizer(final char[] input, final char delim, final char quote) { + this(input, delim); + setQuoteChar(quote); } /** - * Constructs a tokenizer splitting using the specified delimiter matcher. + * Constructs a tokenizer splitting on the specified string. * - * @param input the string which is to be parsed - * @param delim the field delimiter matcher + * @param input the string which is to be parsed, not cloned + * @param delim the field delimiter string */ - public StrTokenizer(final String input, final StrMatcher delim) { + public StrTokenizer(final char[] input, final String delim) { this(input); - setDelimiterMatcher(delim); + setDelimiterString(delim); } /** - * Constructs a tokenizer splitting on the specified delimiter character - * and handling quotes using the specified quote character. + * Constructs a tokenizer splitting using the specified delimiter matcher. * - * @param input the string which is to be parsed - * @param delim the field delimiter character - * @param quote the field quoted string character + * @param input the string which is to be parsed, not cloned + * @param delim the field delimiter matcher */ - public StrTokenizer(final String input, final char delim, final char quote) { - this(input, delim); - setQuoteChar(quote); + public StrTokenizer(final char[] input, final StrMatcher delim) { + this(input); + setDelimiterMatcher(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher * and handling quotes using the specified quote matcher. * - * @param input the string which is to be parsed - * @param delim the field delimiter matcher - * @param quote the field quoted string matcher + * @param input the string which is to be parsed, not cloned + * @param delim the field delimiter character + * @param quote the field quoted string character */ - public StrTokenizer(final String input, final StrMatcher delim, final StrMatcher quote) { + public StrTokenizer(final char[] input, final StrMatcher delim, final StrMatcher quote) { this(input, delim); setQuoteMatcher(quote); } /** - * Constructs a tokenizer splitting on space, tab, newline and formfeed + * Constructs a tokenizer splitting on space, tab, newline and form feed * as per StringTokenizer. * - * @param input the string which is to be parsed, not cloned + * @param input the string which is to be parsed */ - public StrTokenizer(final char[] input) { - super(); - if (input == null) { - this.chars = null; + public StrTokenizer(final String input) { + if (input != null) { + chars = input.toCharArray(); } else { - this.chars = input.clone(); + chars = null; } } /** - * Constructs a tokenizer splitting on the specified character. + * Constructs a tokenizer splitting on the specified delimiter character. * - * @param input the string which is to be parsed, not cloned - * @param delim the field delimiter character + * @param input the string which is to be parsed + * @param delim the field delimiter character */ - public StrTokenizer(final char[] input, final char delim) { + public StrTokenizer(final String input, final char delim) { this(input); setDelimiterChar(delim); } /** - * Constructs a tokenizer splitting on the specified string. + * Constructs a tokenizer splitting on the specified delimiter character + * and handling quotes using the specified quote character. * - * @param input the string which is to be parsed, not cloned - * @param delim the field delimiter string + * @param input the string which is to be parsed + * @param delim the field delimiter character + * @param quote the field quoted string character */ - public StrTokenizer(final char[] input, final String delim) { - this(input); - setDelimiterString(delim); + public StrTokenizer(final String input, final char delim, final char quote) { + this(input, delim); + setQuoteChar(quote); } /** - * Constructs a tokenizer splitting using the specified delimiter matcher. + * Constructs a tokenizer splitting on the specified delimiter string. * - * @param input the string which is to be parsed, not cloned - * @param delim the field delimiter matcher + * @param input the string which is to be parsed + * @param delim the field delimiter string */ - public StrTokenizer(final char[] input, final StrMatcher delim) { + public StrTokenizer(final String input, final String delim) { this(input); - setDelimiterMatcher(delim); + setDelimiterString(delim); } /** - * Constructs a tokenizer splitting on the specified delimiter character - * and handling quotes using the specified quote character. + * Constructs a tokenizer splitting using the specified delimiter matcher. * - * @param input the string which is to be parsed, not cloned - * @param delim the field delimiter character - * @param quote the field quoted string character + * @param input the string which is to be parsed + * @param delim the field delimiter matcher */ - public StrTokenizer(final char[] input, final char delim, final char quote) { - this(input, delim); - setQuoteChar(quote); + public StrTokenizer(final String input, final StrMatcher delim) { + this(input); + setDelimiterMatcher(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher * and handling quotes using the specified quote matcher. * - * @param input the string which is to be parsed, not cloned - * @param delim the field delimiter character - * @param quote the field quoted string character + * @param input the string which is to be parsed + * @param delim the field delimiter matcher + * @param quote the field quoted string matcher */ - public StrTokenizer(final char[] input, final StrMatcher delim, final StrMatcher quote) { + public StrTokenizer(final String input, final StrMatcher delim, final StrMatcher quote) { this(input, delim); setQuoteMatcher(quote); } - // API - //----------------------------------------------------------------------- /** - * Gets the number of tokens found in the String. - * - * @return the number of matched tokens + * Unsupported ListIterator operation. + * @param obj this parameter ignored. + * @throws UnsupportedOperationException always */ - public int size() { - checkTokenized(); - return tokens.length; + @Override + public void add(final String obj) { + throw new UnsupportedOperationException("add() is unsupported"); } /** - * Gets the next token from the String. - * Equivalent to {@link #next()} except it returns null rather than - * throwing {@link NoSuchElementException} when no tokens remain. + * Adds a token to a list, paying attention to the parameters we've set. * - * @return the next sequential token, or null when no more tokens are found + * @param list the list to add to + * @param tok the token to add */ - public String nextToken() { - if (hasNext()) { - return tokens[tokenPos++]; + private void addToken(final List list, String tok) { + if (tok == null || tok.isEmpty()) { + if (isIgnoreEmptyTokens()) { + return; + } + if (isEmptyTokenAsNull()) { + tok = null; + } } - return null; + list.add(tok); } + // Implementation + //----------------------------------------------------------------------- /** - * Gets the previous token from the String. - * - * @return the previous sequential token, or null when no more tokens are found + * Checks if tokenization has been done, and if not then do it. */ - public String previousToken() { - if (hasPrevious()) { - return tokens[--tokenPos]; + private void checkTokenized() { + if (tokens == null) { + if (chars == null) { + // still call tokenize as subclass may do some work + final List split = tokenize(null, 0, 0); + tokens = split.toArray(ArrayUtils.EMPTY_STRING_ARRAY); + } else { + final List split = tokenize(chars, 0, chars.length); + tokens = split.toArray(ArrayUtils.EMPTY_STRING_ARRAY); + } } - return null; } + //----------------------------------------------------------------------- /** - * Gets a copy of the full token list as an independent modifiable array. + * Creates a new instance of this Tokenizer. The new instance is reset so + * that it will be at the start of the token list. + * If a {@link CloneNotSupportedException} is caught, return {@code null}. * - * @return the tokens as a String array + * @return a new instance of this Tokenizer which has been reset. */ - public String[] getTokenArray() { - checkTokenized(); - return tokens.clone(); - } - - /** - * Gets a copy of the full token list as an independent modifiable list. - * - * @return the tokens as a String array - */ - public List getTokenList() { - checkTokenized(); - final List list = new ArrayList<>(tokens.length); - for (final String element : tokens) { - list.add(element); + @Override + public Object clone() { + try { + return cloneReset(); + } catch (final CloneNotSupportedException ex) { + return null; } - return list; } /** - * Resets this tokenizer, forgetting all parsing and iteration already completed. - *

- * This method allows the same tokenizer to be reused for the same String. + * Creates a new instance of this Tokenizer. The new instance is reset so that + * it will be at the start of the token list. * - * @return this, to enable chaining + * @return a new instance of this Tokenizer which has been reset. + * @throws CloneNotSupportedException if there is a problem cloning */ - public StrTokenizer reset() { - tokenPos = 0; - tokens = null; - return this; + Object cloneReset() throws CloneNotSupportedException { + // this method exists to enable 100% test coverage + final StrTokenizer cloned = (StrTokenizer) super.clone(); + if (cloned.chars != null) { + cloned.chars = cloned.chars.clone(); + } + cloned.reset(); + return cloned; } + //----------------------------------------------------------------------- /** - * Reset this tokenizer, giving it a new input string to parse. - * In this manner you can re-use a tokenizer with the same settings - * on multiple input lines. + * Gets the String content that the tokenizer is parsing. * - * @param input the new string to tokenize, null sets no text to parse - * @return this, to enable chaining + * @return The string content being parsed */ - public StrTokenizer reset(final String input) { - reset(); - if (input != null) { - this.chars = input.toCharArray(); - } else { - this.chars = null; + public String getContent() { + if (chars == null) { + return null; } - return this; + return new String(chars); } + // Delimiter + //----------------------------------------------------------------------- /** - * Reset this tokenizer, giving it a new input string to parse. - * In this manner you can re-use a tokenizer with the same settings - * on multiple input lines. + * Gets the field delimiter matcher. * - * @param input the new character array to tokenize, not cloned, null sets no text to parse - * @return this, to enable chaining + * @return The delimiter matcher in use */ - public StrTokenizer reset(final char[] input) { - reset(); - if (input != null) { - this.chars = input.clone(); - } else { - this.chars = null; - } - return this; + public StrMatcher getDelimiterMatcher() { + return this.delimMatcher; } - // ListIterator + // Ignored //----------------------------------------------------------------------- /** - * Checks whether there are any more tokens. + * Gets the ignored character matcher. + *

+ * These characters are ignored when parsing the String, unless they are + * within a quoted region. + * The default value is not to ignore anything. * - * @return true if there are more tokens + * @return The ignored matcher in use */ - @Override - public boolean hasNext() { + public StrMatcher getIgnoredMatcher() { + return ignoredMatcher; + } + + // Quote + //----------------------------------------------------------------------- + /** + * Gets the quote matcher currently in use. + *

+ * The quote character is used to wrap data between the tokens. + * This enables delimiters to be entered as data. + * The default value is '"' (double quote). + * + * @return The quote matcher in use + */ + public StrMatcher getQuoteMatcher() { + return quoteMatcher; + } + + /** + * Gets a copy of the full token list as an independent modifiable array. + * + * @return The tokens as a String array + */ + public String[] getTokenArray() { checkTokenized(); - return tokenPos < tokens.length; + return tokens.clone(); } /** - * Gets the next token. + * Gets a copy of the full token list as an independent modifiable list. * - * @return the next String token - * @throws NoSuchElementException if there are no more elements + * @return The tokens as a String array */ - @Override - public String next() { - if (hasNext()) { - return tokens[tokenPos++]; - } - throw new NoSuchElementException(); + public List getTokenList() { + checkTokenized(); + final List list = new ArrayList<>(tokens.length); + Collections.addAll(list, tokens); + + return list; } + // Trimmer + //----------------------------------------------------------------------- /** - * Gets the index of the next token to return. + * Gets the trimmer character matcher. + *

+ * These characters are trimmed off on each side of the delimiter + * until the token or quote is found. + * The default value is not to trim anything. + * + * @return The trimmer matcher in use + */ + public StrMatcher getTrimmerMatcher() { + return trimmerMatcher; + } + + // ListIterator + //----------------------------------------------------------------------- + /** + * Checks whether there are any more tokens. * - * @return the next token index + * @return true if there are more tokens */ @Override - public int nextIndex() { - return tokenPos; + public boolean hasNext() { + checkTokenized(); + return tokenPos < tokens.length; } /** @@ -547,134 +584,123 @@ public boolean hasPrevious() { return tokenPos > 0; } + //----------------------------------------------------------------------- /** - * Gets the token previous to the last returned token. + * Gets whether the tokenizer currently returns empty tokens as null. + * The default for this property is false. * - * @return the previous token + * @return true if empty tokens are returned as null */ - @Override - public String previous() { - if (hasPrevious()) { - return tokens[--tokenPos]; - } - throw new NoSuchElementException(); + public boolean isEmptyTokenAsNull() { + return this.emptyAsNull; } + //----------------------------------------------------------------------- /** - * Gets the index of the previous token. + * Gets whether the tokenizer currently ignores empty tokens. + * The default for this property is true. * - * @return the previous token index + * @return true if empty tokens are not returned */ - @Override - public int previousIndex() { - return tokenPos - 1; + public boolean isIgnoreEmptyTokens() { + return ignoreEmptyTokens; } /** - * Unsupported ListIterator operation. + * Checks if the characters at the index specified match the quote + * already matched in readNextToken(). * - * @throws UnsupportedOperationException always + * @param srcChars the character array being tokenized + * @param pos the position to check for a quote + * @param len the length of the character array being tokenized + * @param quoteStart the start position of the matched quote, 0 if no quoting + * @param quoteLen the length of the matched quote, 0 if no quoting + * @return true if a quote is matched */ - @Override - public void remove() { - throw new UnsupportedOperationException("remove() is unsupported"); + private boolean isQuote(final char[] srcChars, + final int pos, + final int len, + final int quoteStart, + final int quoteLen) { + for (int i = 0; i < quoteLen; i++) { + if (pos + i >= len || srcChars[pos + i] != srcChars[quoteStart + i]) { + return false; + } + } + return true; } /** - * Unsupported ListIterator operation. - * @param obj this parameter ignored. - * @throws UnsupportedOperationException always + * Gets the next token. + * + * @return The next String token + * @throws NoSuchElementException if there are no more elements */ @Override - public void set(final String obj) { - throw new UnsupportedOperationException("set() is unsupported"); + public String next() { + if (hasNext()) { + return tokens[tokenPos++]; + } + throw new NoSuchElementException(); } /** - * Unsupported ListIterator operation. - * @param obj this parameter ignored. - * @throws UnsupportedOperationException always + * Gets the index of the next token to return. + * + * @return The next token index */ @Override - public void add(final String obj) { - throw new UnsupportedOperationException("add() is unsupported"); + public int nextIndex() { + return tokenPos; } - // Implementation - //----------------------------------------------------------------------- /** - * Checks if tokenization has been done, and if not then do it. + * Gets the next token from the String. + * Equivalent to {@link #next()} except it returns null rather than + * throwing {@link NoSuchElementException} when no tokens remain. + * + * @return The next sequential token, or null when no more tokens are found */ - private void checkTokenized() { - if (tokens == null) { - if (chars == null) { - // still call tokenize as subclass may do some work - final List split = tokenize(null, 0, 0); - tokens = split.toArray(new String[split.size()]); - } else { - final List split = tokenize(chars, 0, chars.length); - tokens = split.toArray(new String[split.size()]); - } + public String nextToken() { + if (hasNext()) { + return tokens[tokenPos++]; } + return null; } /** - * Internal method to performs the tokenization. - *

- * Most users of this class do not need to call this method. This method - * will be called automatically by other (public) methods when required. - *

- * This method exists to allow subclasses to add code before or after the - * tokenization. For example, a subclass could alter the character array, - * offset or count to be parsed, or call the tokenizer multiple times on - * multiple strings. It is also be possible to filter the results. - *

- * StrTokenizer will always pass a zero offset and a count - * equal to the length of the array to this method, however a subclass - * may pass other values, or even an entirely different array. + * Gets the token previous to the last returned token. * - * @param srcChars the character array being tokenized, may be null - * @param offset the start position within the character array, must be valid - * @param count the number of characters to tokenize, must be valid - * @return the modifiable list of String tokens, unmodifiable if null array or zero count + * @return The previous token */ - protected List tokenize(final char[] srcChars, final int offset, final int count) { - if (srcChars == null || count == 0) { - return Collections.emptyList(); + @Override + public String previous() { + if (hasPrevious()) { + return tokens[--tokenPos]; } - final StrBuilder buf = new StrBuilder(); - final List tokenList = new ArrayList<>(); - int pos = offset; - - // loop around the entire buffer - while (pos >= 0 && pos < count) { - // find next token - pos = readNextToken(srcChars, pos, count, buf, tokenList); + throw new NoSuchElementException(); + } - // handle case where end of string is a delimiter - if (pos >= count) { - addToken(tokenList, ""); - } - } - return tokenList; + /** + * Gets the index of the previous token. + * + * @return The previous token index + */ + @Override + public int previousIndex() { + return tokenPos - 1; } /** - * Adds a token to a list, paying attention to the parameters we've set. + * Gets the previous token from the String. * - * @param list the list to add to - * @param tok the token to add + * @return The previous sequential token, or null when no more tokens are found */ - private void addToken(final List list, String tok) { - if (tok == null || tok.length() == 0) { - if (isIgnoreEmptyTokens()) { - return; - } - if (isEmptyTokenAsNull()) { - tok = null; - } + public String previousToken() { + if (hasPrevious()) { + return tokens[--tokenPos]; } - list.add(tok); + return null; } /** @@ -685,7 +711,7 @@ private void addToken(final List list, String tok) { * @param len the length of the character array being tokenized * @param workArea a temporary work area * @param tokenList the list of parsed tokens - * @return the starting position of the next field (the character + * @return The starting position of the next field (the character * immediately after the delimiter), or -1 if end of string found */ private int readNextToken(final char[] srcChars, @@ -709,14 +735,14 @@ private int readNextToken(final char[] srcChars, // handle reaching end if (start >= len) { - addToken(tokenList, ""); + addToken(tokenList, StringUtils.EMPTY); return -1; } // handle empty token final int delimLen = getDelimiterMatcher().isMatch(srcChars, start, start, len); if (delimLen > 0) { - addToken(tokenList, ""); + addToken(tokenList, StringUtils.EMPTY); return start + delimLen; } @@ -738,7 +764,7 @@ private int readNextToken(final char[] srcChars, * @param tokenList the list of parsed tokens * @param quoteStart the start position of the matched quote, 0 if no quoting * @param quoteLen the length of the matched quote, 0 if no quoting - * @return the starting position of the next field (the character + * @return The starting position of the next field (the character * immediately after the delimiter, or if end of string found, * then the length of string */ @@ -828,38 +854,82 @@ private int readWithQuotes(final char[] srcChars, final int start, final int len } /** - * Checks if the characters at the index specified match the quote - * already matched in readNextToken(). - * - * @param srcChars the character array being tokenized - * @param pos the position to check for a quote - * @param len the length of the character array being tokenized - * @param quoteStart the start position of the matched quote, 0 if no quoting - * @param quoteLen the length of the matched quote, 0 if no quoting - * @return true if a quote is matched + * Unsupported ListIterator operation. + * + * @throws UnsupportedOperationException always + */ + @Override + public void remove() { + throw new UnsupportedOperationException("remove() is unsupported"); + } + + /** + * Resets this tokenizer, forgetting all parsing and iteration already completed. + *

+ * This method allows the same tokenizer to be reused for the same String. + * + * @return this, to enable chaining + */ + public StrTokenizer reset() { + tokenPos = 0; + tokens = null; + return this; + } + + /** + * Reset this tokenizer, giving it a new input string to parse. + * In this manner you can re-use a tokenizer with the same settings + * on multiple input lines. + * + * @param input the new character array to tokenize, not cloned, null sets no text to parse + * @return this, to enable chaining + */ + public StrTokenizer reset(final char[] input) { + reset(); + if (input != null) { + this.chars = input.clone(); + } else { + this.chars = null; + } + return this; + } + + /** + * Reset this tokenizer, giving it a new input string to parse. + * In this manner you can re-use a tokenizer with the same settings + * on multiple input lines. + * + * @param input the new string to tokenize, null sets no text to parse + * @return this, to enable chaining + */ + public StrTokenizer reset(final String input) { + reset(); + if (input != null) { + this.chars = input.toCharArray(); + } else { + this.chars = null; + } + return this; + } + + /** + * Unsupported ListIterator operation. + * @param obj this parameter ignored. + * @throws UnsupportedOperationException always */ - private boolean isQuote(final char[] srcChars, - final int pos, - final int len, - final int quoteStart, - final int quoteLen) { - for (int i = 0; i < quoteLen; i++) { - if (pos + i >= len || srcChars[pos + i] != srcChars[quoteStart + i]) { - return false; - } - } - return true; + @Override + public void set(final String obj) { + throw new UnsupportedOperationException("set() is unsupported"); } - // Delimiter - //----------------------------------------------------------------------- /** - * Gets the field delimiter matcher. + * Sets the field delimiter character. * - * @return the delimiter matcher in use + * @param delim the delimiter character to use + * @return this, to enable chaining */ - public StrMatcher getDelimiterMatcher() { - return this.delimMatcher; + public StrTokenizer setDelimiterChar(final char delim) { + return setDelimiterMatcher(StrMatcher.charMatcher(delim)); } /** @@ -879,16 +949,6 @@ public StrTokenizer setDelimiterMatcher(final StrMatcher delim) { return this; } - /** - * Sets the field delimiter character. - * - * @param delim the delimiter character to use - * @return this, to enable chaining - */ - public StrTokenizer setDelimiterChar(final char delim) { - return setDelimiterMatcher(StrMatcher.charMatcher(delim)); - } - /** * Sets the field delimiter string. * @@ -899,63 +959,29 @@ public StrTokenizer setDelimiterString(final String delim) { return setDelimiterMatcher(StrMatcher.stringMatcher(delim)); } - // Quote - //----------------------------------------------------------------------- - /** - * Gets the quote matcher currently in use. - *

- * The quote character is used to wrap data between the tokens. - * This enables delimiters to be entered as data. - * The default value is '"' (double quote). - * - * @return the quote matcher in use - */ - public StrMatcher getQuoteMatcher() { - return quoteMatcher; - } - /** - * Set the quote matcher to use. - *

- * The quote character is used to wrap data between the tokens. - * This enables delimiters to be entered as data. + * Sets whether the tokenizer should return empty tokens as null. + * The default for this property is false. * - * @param quote the quote matcher to use, null ignored + * @param emptyAsNull whether empty tokens are returned as null * @return this, to enable chaining */ - public StrTokenizer setQuoteMatcher(final StrMatcher quote) { - if (quote != null) { - this.quoteMatcher = quote; - } + public StrTokenizer setEmptyTokenAsNull(final boolean emptyAsNull) { + this.emptyAsNull = emptyAsNull; return this; } /** - * Sets the quote character to use. - *

- * The quote character is used to wrap data between the tokens. - * This enables delimiters to be entered as data. - * - * @param quote the quote character to use - * @return this, to enable chaining - */ - public StrTokenizer setQuoteChar(final char quote) { - return setQuoteMatcher(StrMatcher.charMatcher(quote)); - } - - // Ignored - //----------------------------------------------------------------------- - /** - * Gets the ignored character matcher. + * Set the character to ignore. *

- * These characters are ignored when parsing the String, unless they are + * This character is ignored when parsing the String, unless it is * within a quoted region. - * The default value is not to ignore anything. * - * @return the ignored matcher in use + * @param ignored the ignored character to use + * @return this, to enable chaining */ - public StrMatcher getIgnoredMatcher() { - return ignoredMatcher; + public StrTokenizer setIgnoredChar(final char ignored) { + return setIgnoredMatcher(StrMatcher.charMatcher(ignored)); } /** @@ -975,31 +1001,44 @@ public StrTokenizer setIgnoredMatcher(final StrMatcher ignored) { } /** - * Set the character to ignore. + * Sets whether the tokenizer should ignore and not return empty tokens. + * The default for this property is true. + * + * @param ignoreEmptyTokens whether empty tokens are not returned + * @return this, to enable chaining + */ + public StrTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens) { + this.ignoreEmptyTokens = ignoreEmptyTokens; + return this; + } + + /** + * Sets the quote character to use. *

- * This character is ignored when parsing the String, unless it is - * within a quoted region. + * The quote character is used to wrap data between the tokens. + * This enables delimiters to be entered as data. * - * @param ignored the ignored character to use + * @param quote the quote character to use * @return this, to enable chaining */ - public StrTokenizer setIgnoredChar(final char ignored) { - return setIgnoredMatcher(StrMatcher.charMatcher(ignored)); + public StrTokenizer setQuoteChar(final char quote) { + return setQuoteMatcher(StrMatcher.charMatcher(quote)); } - // Trimmer - //----------------------------------------------------------------------- /** - * Gets the trimmer character matcher. + * Set the quote matcher to use. *

- * These characters are trimmed off on each side of the delimiter - * until the token or quote is found. - * The default value is not to trim anything. + * The quote character is used to wrap data between the tokens. + * This enables delimiters to be entered as data. * - * @return the trimmer matcher in use + * @param quote the quote matcher to use, null ignored + * @return this, to enable chaining */ - public StrMatcher getTrimmerMatcher() { - return trimmerMatcher; + public StrTokenizer setQuoteMatcher(final StrMatcher quote) { + if (quote != null) { + this.quoteMatcher = quote; + } + return this; } /** @@ -1018,104 +1057,64 @@ public StrTokenizer setTrimmerMatcher(final StrMatcher trimmer) { return this; } + // API //----------------------------------------------------------------------- /** - * Gets whether the tokenizer currently returns empty tokens as null. - * The default for this property is false. - * - * @return true if empty tokens are returned as null - */ - public boolean isEmptyTokenAsNull() { - return this.emptyAsNull; - } - - /** - * Sets whether the tokenizer should return empty tokens as null. - * The default for this property is false. - * - * @param emptyAsNull whether empty tokens are returned as null - * @return this, to enable chaining - */ - public StrTokenizer setEmptyTokenAsNull(final boolean emptyAsNull) { - this.emptyAsNull = emptyAsNull; - return this; - } - - //----------------------------------------------------------------------- - /** - * Gets whether the tokenizer currently ignores empty tokens. - * The default for this property is true. - * - * @return true if empty tokens are not returned - */ - public boolean isIgnoreEmptyTokens() { - return ignoreEmptyTokens; - } - - /** - * Sets whether the tokenizer should ignore and not return empty tokens. - * The default for this property is true. + * Gets the number of tokens found in the String. * - * @param ignoreEmptyTokens whether empty tokens are not returned - * @return this, to enable chaining + * @return The number of matched tokens */ - public StrTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens) { - this.ignoreEmptyTokens = ignoreEmptyTokens; - return this; + public int size() { + checkTokenized(); + return tokens.length; } - //----------------------------------------------------------------------- /** - * Gets the String content that the tokenizer is parsing. + * Internal method to performs the tokenization. + *

+ * Most users of this class do not need to call this method. This method + * will be called automatically by other (public) methods when required. + *

+ * This method exists to allow subclasses to add code before or after the + * tokenization. For example, a subclass could alter the character array, + * offset or count to be parsed, or call the tokenizer multiple times on + * multiple strings. It is also be possible to filter the results. + *

+ * {@code StrTokenizer} will always pass a zero offset and a count + * equal to the length of the array to this method, however a subclass + * may pass other values, or even an entirely different array. * - * @return the string content being parsed + * @param srcChars the character array being tokenized, may be null + * @param offset the start position within the character array, must be valid + * @param count the number of characters to tokenize, must be valid + * @return The modifiable list of String tokens, unmodifiable if null array or zero count */ - public String getContent() { - if (chars == null) { - return null; + protected List tokenize(final char[] srcChars, final int offset, final int count) { + if (srcChars == null || count == 0) { + return Collections.emptyList(); } - return new String(chars); - } + final StrBuilder buf = new StrBuilder(); + final List tokenList = new ArrayList<>(); + int pos = offset; - //----------------------------------------------------------------------- - /** - * Creates a new instance of this Tokenizer. The new instance is reset so - * that it will be at the start of the token list. - * If a {@link CloneNotSupportedException} is caught, return null. - * - * @return a new instance of this Tokenizer which has been reset. - */ - @Override - public Object clone() { - try { - return cloneReset(); - } catch (final CloneNotSupportedException ex) { - return null; - } - } + // loop around the entire buffer + while (pos >= 0 && pos < count) { + // find next token + pos = readNextToken(srcChars, pos, count, buf, tokenList); - /** - * Creates a new instance of this Tokenizer. The new instance is reset so that - * it will be at the start of the token list. - * - * @return a new instance of this Tokenizer which has been reset. - * @throws CloneNotSupportedException if there is a problem cloning - */ - Object cloneReset() throws CloneNotSupportedException { - // this method exists to enable 100% test coverage - final StrTokenizer cloned = (StrTokenizer) super.clone(); - if (cloned.chars != null) { - cloned.chars = cloned.chars.clone(); + // handle case where end of string is a delimiter + if (pos >= count) { + addToken(tokenList, StringUtils.EMPTY); + } } - cloned.reset(); - return cloned; + return tokenList; } //----------------------------------------------------------------------- /** * Gets the String content that the tokenizer is parsing. * - * @return the string content being parsed + * @return The string content being parsed */ @Override public String toString() { diff --git a/src/main/java/org/apache/commons/text/StringEscapeUtils.java b/src/main/java/org/apache/commons/text/StringEscapeUtils.java index cd15a8ac99..7daaace325 100644 --- a/src/main/java/org/apache/commons/text/StringEscapeUtils.java +++ b/src/main/java/org/apache/commons/text/StringEscapeUtils.java @@ -16,6 +16,12 @@ */ package org.apache.commons.text; +import java.io.IOException; +import java.io.Writer; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.translate.AggregateTranslator; import org.apache.commons.text.translate.CharSequenceTranslator; @@ -29,18 +35,14 @@ import org.apache.commons.text.translate.UnicodeUnescaper; import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover; -import java.io.IOException; -import java.io.Writer; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - /** - *

Escapes and unescapes {@code String}s for - * Java, Java Script, HTML and XML.

- * - *

#ThreadSafe#

+ *

+ * Escapes and unescapes {@code String}s for Java, Java Script, HTML and XML. + *

* + *

+ * #ThreadSafe# + *

* *

* This code has been adapted from Apache Commons Lang 3.5. @@ -108,7 +110,7 @@ public class StringEscapeUtils { ESCAPE_JSON = new AggregateTranslator( new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)), new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), - JavaUnicodeEscaper.outsideOf(32, 0x7f) + JavaUnicodeEscaper.outsideOf(32, 0x7e) ); } @@ -248,8 +250,8 @@ public class StringEscapeUtils { escapeXsiMap.put("'", "\\'"); escapeXsiMap.put(" ", "\\ "); escapeXsiMap.put("\t", "\\\t"); - escapeXsiMap.put("\r\n", ""); - escapeXsiMap.put("\n", ""); + escapeXsiMap.put("\r\n", StringUtils.EMPTY); + escapeXsiMap.put("\n", StringUtils.EMPTY); escapeXsiMap.put("*", "\\*"); escapeXsiMap.put("?", "\\?"); escapeXsiMap.put("[", "\\["); @@ -277,7 +279,7 @@ public class StringEscapeUtils { unescapeJavaMap.put("\\\\", "\\"); unescapeJavaMap.put("\\\"", "\""); unescapeJavaMap.put("\\'", "'"); - unescapeJavaMap.put("\\", ""); + unescapeJavaMap.put("\\", StringUtils.EMPTY); UNESCAPE_JAVA = new AggregateTranslator( new OctalUnescaper(), // .between('\1', '\377'), new UnicodeUnescaper(), @@ -302,14 +304,7 @@ public class StringEscapeUtils { * object allows the Json unescaping functionality to be used * as the foundation for a custom translator. */ - public static final CharSequenceTranslator UNESCAPE_JSON; - static { - UNESCAPE_JSON = new AggregateTranslator( - new OctalUnescaper(), // .between('\1', '\377'), - new UnicodeUnescaper(), - new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE) - ); - } + public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; /** * Translator object for unescaping escaped HTML 3.0. @@ -383,7 +378,7 @@ static class XsiUnescaper extends CharSequenceTranslator { private static final char BACKSLASH = '\\'; @Override - public int translate(final CharSequence input, final int index, final Writer out) throws IOException { + public int translate(final CharSequence input, final int index, final Writer writer) throws IOException { if (index != 0) { throw new IllegalStateException("XsiUnescaper should never reach the [1] index"); @@ -397,12 +392,12 @@ public int translate(final CharSequence input, final int index, final Writer out final int pos = s.indexOf(BACKSLASH, searchOffset); if (pos == -1) { if (segmentStart < s.length()) { - out.write(s.substring(segmentStart)); + writer.write(s.substring(segmentStart)); } break; } if (pos > segmentStart) { - out.write(s.substring(segmentStart, pos)); + writer.write(s.substring(segmentStart, pos)); } segmentStart = pos + 1; searchOffset = pos + 2; @@ -425,7 +420,6 @@ public int translate(final CharSequence input, final int index, final Writer out * instance to operate.

*/ public StringEscapeUtils() { - super(); } /** @@ -490,7 +484,7 @@ public Builder append(final String input) { /** *

Return the escaped string.

* - * @return the escaped string + * @return The escaped string */ @Override public String toString() { @@ -544,7 +538,7 @@ public static final String escapeJava(final String input) { *

The only difference between Java strings and EcmaScript strings * is that in EcmaScript, a single quote and forward-slash (/) are escaped.

* - *

Note that EcmaScript is best known by the JavaScript and ActionScript dialects.

+ *

Note that EcmaScript is best known by the JavaScript and ActionScript dialects.

* *

Example:

*
@@ -577,7 +571,7 @@ public static final String escapeEcmaScript(final String input) {
      * 

The only difference between Java strings and Json strings * is that in Json, forward-slash (/) is escaped.

* - *

See http://www.ietf.org/rfc/rfc4627.txt for further details.

+ *

See http://www.ietf.org/rfc/rfc4627.txt for further details.

* *

Example:

*
@@ -643,15 +637,15 @@ public static final String unescapeJson(final String input) {
      * 

* For example: *

- *

"bread" & "butter"

+ *

{@code "bread" & "butter"}

* becomes: *

- * &quot;bread&quot; &amp; &quot;butter&quot;. + * {@code &quot;bread&quot; &amp; &quot;butter&quot;}. *

* *

Supports all known HTML 4.0 entities, including funky accents. * Note that the commonly used apostrophe escape character (&apos;) - * is not a legal entity and so is not supported).

+ * is not a legal entity and so is not supported).

* * @param input the {@code String} to escape, may be null * @return a new escaped {@code String}, {@code null} if null string input @@ -668,7 +662,7 @@ public static final String escapeHtml4(final String input) { /** *

Escapes the characters in a {@code String} using HTML entities.

- *

Supports only the HTML 3.0 entities.

+ *

Supports only the HTML 3.0 entities.

* * @param input the {@code String} to escape, may be null * @return a new escaped {@code String}, {@code null} if null string input @@ -779,7 +773,7 @@ public static String escapeXml11(final String input) { * Does not support DTDs or external entities.

* *

Note that numerical \\u Unicode codes are unescaped to their respective - * Unicode characters. This may change in future releases.

+ * Unicode characters. This may change in future releases.

* * @param input the {@code String} to unescape, may be null * @return a new unescaped {@code String}, {@code null} if null string input @@ -808,7 +802,7 @@ public static final String unescapeXml(final String input) { * RFC 4180. * * @param input the input CSV column String, may be null - * @return the input String, enclosed in double quotes if the value contains a comma, + * @return The input String, enclosed in double quotes if the value contains a comma, * newline or double quote, {@code null} if null string input */ public static final String escapeCsv(final String input) { @@ -816,14 +810,14 @@ public static final String escapeCsv(final String input) { } /** - *

Returns a {@code String} value for an unescaped CSV column.

+ *

Returns a {@code String} value for an unescaped CSV column.

* *

If the value is enclosed in double quotes, and contains a comma, newline * or double quote, then quotes are removed. *

* *

Any double quote escaped characters (a pair of double quotes) are unescaped - * to just one double quote.

+ * to just one double quote.

* *

If the value is not enclosed in double quotes, or is and does not contain a * comma, newline or double quote, then the String value is returned unchanged.

@@ -832,7 +826,7 @@ public static final String escapeCsv(final String input) { * RFC 4180. * * @param input the input CSV column String, may be null - * @return the input String, with enclosing double quotes removed and embedded double + * @return The input String, with enclosing double quotes removed and embedded double * quotes unescaped, {@code null} if null string input */ public static final String unescapeCsv(final String input) { diff --git a/src/main/java/org/apache/commons/text/StringSubstitutor.java b/src/main/java/org/apache/commons/text/StringSubstitutor.java new file mode 100644 index 0000000000..64b9f36bd2 --- /dev/null +++ b/src/main/java/org/apache/commons/text/StringSubstitutor.java @@ -0,0 +1,1466 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text; + +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Properties; + +import org.apache.commons.lang3.Validate; +import org.apache.commons.text.lookup.StringLookup; +import org.apache.commons.text.lookup.StringLookupFactory; +import org.apache.commons.text.matcher.StringMatcher; +import org.apache.commons.text.matcher.StringMatcherFactory; + +/** + * Substitutes variables within a string by values. + *

+ * This class takes a piece of text and substitutes all the variables within it. The default definition of a variable is + * {@code ${variableName}}. The prefix and suffix can be changed via constructors and set methods. + *

+ *

+ * Variable values are typically resolved from a map, but could also be resolved from system properties, or by supplying + * a custom variable resolver. + *

+ *

Using System Properties

+ *

+ * The simplest example is to use this class to replace Java System properties. For example: + *

+ * + *
+ * StringSubstitutor
+ *     .replaceSystemProperties("You are running with java.version = ${java.version} and os.name = ${os.name}.");
+ * 
+ * + *

Using a Custom Map

+ *

+ * Typical usage of this class follows the following pattern: + *

+ *
    + *
  • Create and initialize a StringSubstitutor with the map that contains the values for the variables you want to + * make available.
  • + *
  • Optionally set attributes like variable prefix, variable suffix, default value delimiter, and so on.
  • + *
  • Call the {@code replace()} method with in the source text for interpolation.
  • + *
  • The returned text contains all variable references (as long as their values are known) as resolved.
  • + *
+ *

+ * For example: + *

+ * + *
+ * // Build map
+ * Map<String, String> valuesMap = new HashMap<>();
+ * valuesMap.put("animal", "quick brown fox");
+ * valuesMap.put("target", "lazy dog");
+ * String templateString = "The ${animal} jumped over the ${target}.";
+ *
+ * // Build StringSubstitutor
+ * StringSubstitutor sub = new StringSubstitutor(valuesMap);
+ *
+ * // Replace
+ * String resolvedString = sub.replace(templateString);
+ * 
+ * + *

+ * yielding: + *

+ * + *
+ * "The quick brown fox jumped over the lazy dog."
+ * 
+ * + *

Providing Default Values

+ *

+ * You can set a default value for unresolved variables. The default value for a variable can be appended to the + * variable name after the variable default value delimiter. The default value of the variable default value delimiter + * is ":-", as in bash and other *nix shells. + *

+ *

+ * You can set the variable value delimiter with {@link #setValueDelimiterMatcher(StringMatcher)}, + * {@link #setValueDelimiter(char)} or {@link #setValueDelimiter(String)}. + *

+ *

+ * For example: + *

+ * + *
+ * // Build map
+ * Map<String, String> valuesMap = new HashMap<>();
+ * valuesMap.put("animal", "quick brown fox");
+ * valuesMap.put("target", "lazy dog");
+ * String templateString = "The ${animal} jumped over the ${target} ${undefined.number:-1234567890} times.";
+ *
+ * // Build StringSubstitutor
+ * StringSubstitutor sub = new StringSubstitutor(valuesMap);
+ *
+ * // Replace
+ * String resolvedString = sub.replace(templateString);
+ * 
+ * + *

+ * yielding: + *

+ * + *
+ * "The quick brown fox jumped over the lazy dog 1234567890 times."
+ * 
+ * + *

+ * {@code StringSubstitutor} supports throwing exceptions for unresolved variables, you enable this by setting calling + * {@link #setEnableUndefinedVariableException(boolean)} with {@code true}. + *

+ * + *

Reusing Instances

+ *

+ * Static shortcut methods cover the most common use cases. If multiple replace operations are to be performed, creating + * and reusing an instance of this class will be more efficient. + *

+ * + *

Using Interpolation

+ *

+ * The default interpolator let's you use string lookups like: + *

+ * + *
+ * final StringSubstitutor interpolator = StringSubstitutor.createInterpolator();
+ * interpolator.setEnableSubstitutionInVariables(true); // Allows for nested $'s.
+ * final String text = interpolator.replace("Base64 Decoder:        ${base64Decoder:SGVsbG9Xb3JsZCE=}\n"
+ *     + "Base64 Encoder:        ${base64Encoder:HelloWorld!}\n"
+ *     + "Java Constant:         ${const:java.awt.event.KeyEvent.VK_ESCAPE}\n"
+ *     + "Date:                  ${date:yyyy-MM-dd}\n" + "DNS:                   ${dns:address|apache.org}\n"
+ *     + "Environment Variable:  ${env:USERNAME}\n"
+ *     + "File Content:          ${file:UTF-8:src/test/resources/document.properties}\n"
+ *     + "Java:                  ${java:version}\n" + "Localhost:             ${localhost:canonical-name}\n"
+ *     + "Properties File:       ${properties:src/test/resources/document.properties::mykey}\n"
+ *     + "Resource Bundle:       ${resourceBundle:org.example.testResourceBundleLookup:mykey}\n"
+ *     + "Script:                ${script:javascript:3 + 4}\n" + "System Property:       ${sys:user.dir}\n"
+ *     + "URL Decoder:           ${urlDecoder:Hello%20World%21}\n"
+ *     + "URL Encoder:           ${urlEncoder:Hello World!}\n"
+ *     + "URL Content (HTTP):    ${url:UTF-8:http://www.apache.org}\n"
+ *     + "URL Content (HTTPS):   ${url:UTF-8:https://www.apache.org}\n"
+ *     + "URL Content (File):    ${url:UTF-8:file:///${sys:user.dir}/src/test/resources/document.properties}\n"
+ *     + "XML XPath:             ${xml:src/test/resources/document.xml:/root/path/to/node}\n");
+ * 
+ *

+ * For documentation of each lookup, see {@link StringLookupFactory}. + *

+ * + *

Using Recursive Variable Replacement

+ *

+ * Variable replacement can work recursively by calling {@link #setEnableSubstitutionInVariables(boolean)} with + * {@code true}. If a variable value contains a variable then that variable will also be replaced. Cyclic replacements + * are detected and will throw an exception. + *

+ *

+ * You can get the replace result to contain a variable prefix. For example: + *

+ * + *
+ * "The variable ${${name}} must be used."
+ * 
+ * + *

+ * If the value of the "name" variable is "x", then only the variable "name" is replaced resulting in: + *

+ * + *
+ * "The variable ${x} must be used."
+ * 
+ * + *

+ * To achieve this effect there are two possibilities: Either set a different prefix and suffix for variables which do + * not conflict with the result text you want to produce. The other possibility is to use the escape character, by + * default '$'. If this character is placed before a variable reference, this reference is ignored and won't be + * replaced. For example: + *

+ * + *
+ * "The variable $${${name}} must be used."
+ * 
+ *

+ * In some complex scenarios you might even want to perform substitution in the names of variables, for instance + *

+ * + *
+ * ${jre-${java.specification.version}}
+ * 
+ * + *

+ * {@code StringSubstitutor} supports this recursive substitution in variable names, but it has to be enabled explicitly + * by calling {@link #setEnableSubstitutionInVariables(boolean)} with {@code true}. + *

+ * + *

Thread Safety

+ *

+ * This class is not thread safe. + *

+ * + * @since 1.3 + */ +public class StringSubstitutor { + + /** + * The low-level result of a substitution. + * + * @since 1.9 + */ + private static final class Result { + + /** Whether the buffer is altered. */ + public final boolean altered; + + /** The length of change. */ + public final int lengthChange; + + private Result(final boolean altered, final int lengthChange) { + this.altered = altered; + this.lengthChange = lengthChange; + } + + @Override + public String toString() { + return "Result [altered=" + altered + ", lengthChange=" + lengthChange + "]"; + } + } + + /** + * Constant for the default escape character. + */ + public static final char DEFAULT_ESCAPE = '$'; + + /** + * The default variable default separator. + * + * @since 1.5. + */ + public static final String DEFAULT_VAR_DEFAULT = ":-"; + + /** + * The default variable end separator. + * + * @since 1.5. + */ + public static final String DEFAULT_VAR_END = "}"; + + /** + * The default variable start separator. + * + * @since 1.5. + */ + public static final String DEFAULT_VAR_START = "${"; + + /** + * Constant for the default variable prefix. + */ + public static final StringMatcher DEFAULT_PREFIX = StringMatcherFactory.INSTANCE.stringMatcher(DEFAULT_VAR_START); + + /** + * Constant for the default variable suffix. + */ + public static final StringMatcher DEFAULT_SUFFIX = StringMatcherFactory.INSTANCE.stringMatcher(DEFAULT_VAR_END); + + /** + * Constant for the default value delimiter of a variable. + */ + public static final StringMatcher DEFAULT_VALUE_DELIMITER = StringMatcherFactory.INSTANCE + .stringMatcher(DEFAULT_VAR_DEFAULT); + + /** + * Creates a new instance using the interpolator string lookup + * {@link StringLookupFactory#interpolatorStringLookup()}. + *

+ * This StringSubstitutor lets you perform substituions like: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace(
+     *   "OS name: ${sys:os.name}, " + "3 + 4 = ${script:javascript:3 + 4}");
+     * 
+ * + * @return a new instance using the interpolator string lookup. + * @see StringLookupFactory#interpolatorStringLookup() + * @since 1.8 + */ + public static StringSubstitutor createInterpolator() { + return new StringSubstitutor(StringLookupFactory.INSTANCE.interpolatorStringLookup()); + } + + /** + * Replaces all the occurrences of variables in the given source object with their matching values from the map. + * + * @param the type of the values in the map + * @param source the source text containing the variables to substitute, null returns null + * @param valueMap the map with the values, may be null + * @return The result of the replace operation + * @throws IllegalArgumentException if a variable is not found and enableUndefinedVariableException is true + */ + public static String replace(final Object source, final Map valueMap) { + return new StringSubstitutor(valueMap).replace(source); + } + + /** + * Replaces all the occurrences of variables in the given source object with their matching values from the map. + * This method allows to specify a custom variable prefix and suffix + * + * @param the type of the values in the map + * @param source the source text containing the variables to substitute, null returns null + * @param valueMap the map with the values, may be null + * @param prefix the prefix of variables, not null + * @param suffix the suffix of variables, not null + * @return The result of the replace operation + * @throws IllegalArgumentException if the prefix or suffix is null + * @throws IllegalArgumentException if a variable is not found and enableUndefinedVariableException is true + */ + public static String replace(final Object source, final Map valueMap, final String prefix, + final String suffix) { + return new StringSubstitutor(valueMap, prefix, suffix).replace(source); + } + + /** + * Replaces all the occurrences of variables in the given source object with their matching values from the + * properties. + * + * @param source the source text containing the variables to substitute, null returns null + * @param valueProperties the properties with values, may be null + * @return The result of the replace operation + * @throws IllegalArgumentException if a variable is not found and enableUndefinedVariableException is true + */ + public static String replace(final Object source, final Properties valueProperties) { + if (valueProperties == null) { + return source.toString(); + } + final Map valueMap = new HashMap<>(); + final Enumeration propNames = valueProperties.propertyNames(); + while (propNames.hasMoreElements()) { + final String propName = (String) propNames.nextElement(); + final String propValue = valueProperties.getProperty(propName); + valueMap.put(propName, propValue); + } + return StringSubstitutor.replace(source, valueMap); + } + + /** + * Replaces all the occurrences of variables in the given source object with their matching values from the system + * properties. + * + * @param source the source text containing the variables to substitute, null returns null + * @return The result of the replace operation + * @throws IllegalArgumentException if a variable is not found and enableUndefinedVariableException is true + */ + public static String replaceSystemProperties(final Object source) { + return new StringSubstitutor(StringLookupFactory.INSTANCE.systemPropertyStringLookup()).replace(source); + } + + /** + * The flag whether substitution in variable values is disabled. + */ + private boolean disableSubstitutionInValues; + + /** + * The flag whether substitution in variable names is enabled. + */ + private boolean enableSubstitutionInVariables; + + /** + * The flag whether exception should be thrown on undefined variable. + */ + private boolean enableUndefinedVariableException; + + /** + * Stores the escape character. + */ + private char escapeChar; + + /** + * Stores the variable prefix. + */ + private StringMatcher prefixMatcher; + + /** + * Whether escapes should be preserved. Default is false; + */ + private boolean preserveEscapes; + + /** + * Stores the variable suffix. + */ + private StringMatcher suffixMatcher; + + /** + * Stores the default variable value delimiter. + */ + private StringMatcher valueDelimiterMatcher; + + /** + * Variable resolution is delegated to an implementor of {@link StringLookup}. + */ + private StringLookup variableResolver; + + /** + * Creates a new instance with defaults for variable prefix and suffix and the escaping character. + */ + public StringSubstitutor() { + this((StringLookup) null, DEFAULT_PREFIX, DEFAULT_SUFFIX, DEFAULT_ESCAPE); + } + + /** + * Creates a new instance and initializes it. Uses defaults for variable prefix and suffix and the escaping + * character. + * + * @param the type of the values in the map + * @param valueMap the map with the variables' values, may be null + */ + public StringSubstitutor(final Map valueMap) { + this(StringLookupFactory.INSTANCE.mapStringLookup(valueMap), DEFAULT_PREFIX, DEFAULT_SUFFIX, DEFAULT_ESCAPE); + } + + /** + * Creates a new instance and initializes it. Uses a default escaping character. + * + * @param the type of the values in the map + * @param valueMap the map with the variables' values, may be null + * @param prefix the prefix for variables, not null + * @param suffix the suffix for variables, not null + * @throws IllegalArgumentException if the prefix or suffix is null + */ + public StringSubstitutor(final Map valueMap, final String prefix, final String suffix) { + this(StringLookupFactory.INSTANCE.mapStringLookup(valueMap), prefix, suffix, DEFAULT_ESCAPE); + } + + /** + * Creates a new instance and initializes it. + * + * @param the type of the values in the map + * @param valueMap the map with the variables' values, may be null + * @param prefix the prefix for variables, not null + * @param suffix the suffix for variables, not null + * @param escape the escape character + * @throws IllegalArgumentException if the prefix or suffix is null + */ + public StringSubstitutor(final Map valueMap, final String prefix, final String suffix, + final char escape) { + this(StringLookupFactory.INSTANCE.mapStringLookup(valueMap), prefix, suffix, escape); + } + + /** + * Creates a new instance and initializes it. + * + * @param the type of the values in the map + * @param valueMap the map with the variables' values, may be null + * @param prefix the prefix for variables, not null + * @param suffix the suffix for variables, not null + * @param escape the escape character + * @param valueDelimiter the variable default value delimiter, may be null + * @throws IllegalArgumentException if the prefix or suffix is null + */ + public StringSubstitutor(final Map valueMap, final String prefix, final String suffix, + final char escape, final String valueDelimiter) { + this(StringLookupFactory.INSTANCE.mapStringLookup(valueMap), prefix, suffix, escape, valueDelimiter); + } + + /** + * Creates a new instance and initializes it. + * + * @param variableResolver the variable resolver, may be null + */ + public StringSubstitutor(final StringLookup variableResolver) { + this(variableResolver, DEFAULT_PREFIX, DEFAULT_SUFFIX, DEFAULT_ESCAPE); + } + + /** + * Creates a new instance and initializes it. + * + * @param variableResolver the variable resolver, may be null + * @param prefix the prefix for variables, not null + * @param suffix the suffix for variables, not null + * @param escape the escape character + * @throws IllegalArgumentException if the prefix or suffix is null + */ + public StringSubstitutor(final StringLookup variableResolver, final String prefix, final String suffix, + final char escape) { + this.setVariableResolver(variableResolver); + this.setVariablePrefix(prefix); + this.setVariableSuffix(suffix); + this.setEscapeChar(escape); + this.setValueDelimiterMatcher(DEFAULT_VALUE_DELIMITER); + } + + /** + * Creates a new instance and initializes it. + * + * @param variableResolver the variable resolver, may be null + * @param prefix the prefix for variables, not null + * @param suffix the suffix for variables, not null + * @param escape the escape character + * @param valueDelimiter the variable default value delimiter string, may be null + * @throws IllegalArgumentException if the prefix or suffix is null + */ + public StringSubstitutor(final StringLookup variableResolver, final String prefix, final String suffix, + final char escape, final String valueDelimiter) { + this.setVariableResolver(variableResolver); + this.setVariablePrefix(prefix); + this.setVariableSuffix(suffix); + this.setEscapeChar(escape); + this.setValueDelimiter(valueDelimiter); + } + + /** + * Creates a new instance and initializes it. + * + * @param variableResolver the variable resolver, may be null + * @param prefixMatcher the prefix for variables, not null + * @param suffixMatcher the suffix for variables, not null + * @param escape the escape character + * @throws IllegalArgumentException if the prefix or suffix is null + */ + public StringSubstitutor(final StringLookup variableResolver, final StringMatcher prefixMatcher, + final StringMatcher suffixMatcher, final char escape) { + this(variableResolver, prefixMatcher, suffixMatcher, escape, DEFAULT_VALUE_DELIMITER); + } + + /** + * Creates a new instance and initializes it. + * + * @param variableResolver the variable resolver, may be null + * @param prefixMatcher the prefix for variables, not null + * @param suffixMatcher the suffix for variables, not null + * @param escape the escape character + * @param valueDelimiterMatcher the variable default value delimiter matcher, may be null + * @throws IllegalArgumentException if the prefix or suffix is null + */ + public StringSubstitutor(final StringLookup variableResolver, final StringMatcher prefixMatcher, + final StringMatcher suffixMatcher, final char escape, final StringMatcher valueDelimiterMatcher) { + this.setVariableResolver(variableResolver); + this.setVariablePrefixMatcher(prefixMatcher); + this.setVariableSuffixMatcher(suffixMatcher); + this.setEscapeChar(escape); + this.setValueDelimiterMatcher(valueDelimiterMatcher); + } + + /** + * Creates a new instance based on the given StringSubstitutor. + * + * @param other The StringSubstitutor is use as the source. + * @since 1.9 + */ + public StringSubstitutor(final StringSubstitutor other) { + disableSubstitutionInValues = other.isDisableSubstitutionInValues(); + enableSubstitutionInVariables = other.isEnableSubstitutionInVariables(); + enableUndefinedVariableException = other.isEnableUndefinedVariableException(); + escapeChar = other.getEscapeChar(); + prefixMatcher = other.getVariablePrefixMatcher(); + preserveEscapes = other.isPreserveEscapes(); + suffixMatcher = other.getVariableSuffixMatcher(); + valueDelimiterMatcher = other.getValueDelimiterMatcher(); + variableResolver = other.getStringLookup(); + } + + /** + * Checks if the specified variable is already in the stack (list) of variables. + * + * @param varName the variable name to check + * @param priorVariables the list of prior variables + */ + private void checkCyclicSubstitution(final String varName, final List priorVariables) { + if (!priorVariables.contains(varName)) { + return; + } + final TextStringBuilder buf = new TextStringBuilder(256); + buf.append("Infinite loop in property interpolation of "); + buf.append(priorVariables.remove(0)); + buf.append(": "); + buf.appendWithSeparators(priorVariables, "->"); + throw new IllegalStateException(buf.toString()); + } + + // Escape + /** + * Returns the escape character. + * + * @return The character used for escaping variable references + */ + public char getEscapeChar() { + return this.escapeChar; + } + + /** + * Gets the StringLookup that is used to lookup variables. + * + * @return The StringLookup + */ + public StringLookup getStringLookup() { + return this.variableResolver; + } + + /** + * Gets the variable default value delimiter matcher currently in use. + *

+ * The variable default value delimiter is the character or characters that delimit the variable name and the + * variable default value. This delimiter is expressed in terms of a matcher allowing advanced variable default + * value delimiter matches. + *

+ *

+ * If it returns null, then the variable default value resolution is disabled. + * + * @return The variable default value delimiter matcher in use, may be null + */ + public StringMatcher getValueDelimiterMatcher() { + return valueDelimiterMatcher; + } + + /** + * Gets the variable prefix matcher currently in use. + *

+ * The variable prefix is the character or characters that identify the start of a variable. This prefix is + * expressed in terms of a matcher allowing advanced prefix matches. + *

+ * + * @return The prefix matcher in use + */ + public StringMatcher getVariablePrefixMatcher() { + return prefixMatcher; + } + + /** + * Gets the variable suffix matcher currently in use. + *

+ * The variable suffix is the character or characters that identify the end of a variable. This suffix is expressed + * in terms of a matcher allowing advanced suffix matches. + *

+ * + * @return The suffix matcher in use + */ + public StringMatcher getVariableSuffixMatcher() { + return suffixMatcher; + } + + /** + * Returns a flag whether substitution is disabled in variable values.If set to true, the values of variables + * can contain other variables will not be processed and substituted original variable is evaluated, e.g. + * + *
+     * Map<String, String> valuesMap = new HashMap<>();
+     * valuesMap.put("name", "Douglas ${surname}");
+     * valuesMap.put("surname", "Crockford");
+     * String templateString = "Hi ${name}";
+     * StrSubstitutor sub = new StrSubstitutor(valuesMap);
+     * String resolvedString = sub.replace(templateString);
+     * 
+ * + * yielding: + * + *
+     *      Hi Douglas ${surname}
+     * 
+ * + * @return The substitution in variable values flag + */ + public boolean isDisableSubstitutionInValues() { + return disableSubstitutionInValues; + } + + /** + * Returns a flag whether substitution is done in variable names. + * + * @return The substitution in variable names flag + */ + public boolean isEnableSubstitutionInVariables() { + return enableSubstitutionInVariables; + } + + /** + * Returns a flag whether exception can be thrown upon undefined variable. + * + * @return The fail on undefined variable flag + */ + public boolean isEnableUndefinedVariableException() { + return enableUndefinedVariableException; + } + + /** + * Returns the flag controlling whether escapes are preserved during substitution. + * + * @return The preserve escape flag + */ + public boolean isPreserveEscapes() { + return preserveEscapes; + } + + /** + * Replaces all the occurrences of variables with their matching values from the resolver using the given source + * array as a template. The array is not altered by this method. + * + * @param source the character array to replace in, not altered, null returns null + * @return The result of the replace operation + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public String replace(final char[] source) { + if (source == null) { + return null; + } + final TextStringBuilder buf = new TextStringBuilder(source.length).append(source); + substitute(buf, 0, source.length); + return buf.toString(); + } + + /** + * Replaces all the occurrences of variables with their matching values from the resolver using the given source + * array as a template. The array is not altered by this method. + *

+ * Only the specified portion of the array will be processed. The rest of the array is not processed, and is not + * returned. + *

+ * + * @param source the character array to replace in, not altered, null returns null + * @param offset the start offset within the array, must be valid + * @param length the length within the array to be processed, must be valid + * @return The result of the replace operation + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public String replace(final char[] source, final int offset, final int length) { + if (source == null) { + return null; + } + final TextStringBuilder buf = new TextStringBuilder(length).append(source, offset, length); + substitute(buf, 0, length); + return buf.toString(); + } + + /** + * Replaces all the occurrences of variables with their matching values from the resolver using the given source as + * a template. The source is not altered by this method. + * + * @param source the buffer to use as a template, not changed, null returns null + * @return The result of the replace operation + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public String replace(final CharSequence source) { + if (source == null) { + return null; + } + return replace(source, 0, source.length()); + } + + /** + * Replaces all the occurrences of variables with their matching values from the resolver using the given source as + * a template. The source is not altered by this method. + *

+ * Only the specified portion of the buffer will be processed. The rest of the buffer is not processed, and is not + * returned. + *

+ * + * @param source the buffer to use as a template, not changed, null returns null + * @param offset the start offset within the array, must be valid + * @param length the length within the array to be processed, must be valid + * @return The result of the replace operation + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public String replace(final CharSequence source, final int offset, final int length) { + if (source == null) { + return null; + } + final TextStringBuilder buf = new TextStringBuilder(length).append(source.toString(), offset, length); + substitute(buf, 0, length); + return buf.toString(); + } + + /** + * Replaces all the occurrences of variables in the given source object with their matching values from the + * resolver. The input source object is converted to a string using {@code toString} and is not altered. + * + * @param source the source to replace in, null returns null + * @return The result of the replace operation + * @throws IllegalArgumentException if a variable is not found and enableUndefinedVariableException is true + */ + public String replace(final Object source) { + if (source == null) { + return null; + } + final TextStringBuilder buf = new TextStringBuilder().append(source); + substitute(buf, 0, buf.length()); + return buf.toString(); + } + + /** + * Replaces all the occurrences of variables with their matching values from the resolver using the given source + * string as a template. + * + * @param source the string to replace in, null returns null + * @return The result of the replace operation + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public String replace(final String source) { + if (source == null) { + return null; + } + final TextStringBuilder buf = new TextStringBuilder(source); + if (!substitute(buf, 0, source.length())) { + return source; + } + return buf.toString(); + } + + /** + * Replaces all the occurrences of variables with their matching values from the resolver using the given source + * string as a template. + *

+ * Only the specified portion of the string will be processed. The rest of the string is not processed, and is not + * returned. + *

+ * + * @param source the string to replace in, null returns null + * @param offset the start offset within the source, must be valid + * @param length the length within the source to be processed, must be valid + * @return The result of the replace operation + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public String replace(final String source, final int offset, final int length) { + if (source == null) { + return null; + } + final TextStringBuilder buf = new TextStringBuilder(length).append(source, offset, length); + if (!substitute(buf, 0, length)) { + return source.substring(offset, offset + length); + } + return buf.toString(); + } + + /** + * Replaces all the occurrences of variables with their matching values from the resolver using the given source + * buffer as a template. The buffer is not altered by this method. + * + * @param source the buffer to use as a template, not changed, null returns null + * @return The result of the replace operation + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public String replace(final StringBuffer source) { + if (source == null) { + return null; + } + final TextStringBuilder buf = new TextStringBuilder(source.length()).append(source); + substitute(buf, 0, buf.length()); + return buf.toString(); + } + + /** + * Replaces all the occurrences of variables with their matching values from the resolver using the given source + * buffer as a template. The buffer is not altered by this method. + *

+ * Only the specified portion of the buffer will be processed. The rest of the buffer is not processed, and is not + * returned. + *

+ * + * @param source the buffer to use as a template, not changed, null returns null + * @param offset the start offset within the source, must be valid + * @param length the length within the source to be processed, must be valid + * @return The result of the replace operation + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public String replace(final StringBuffer source, final int offset, final int length) { + if (source == null) { + return null; + } + final TextStringBuilder buf = new TextStringBuilder(length).append(source, offset, length); + substitute(buf, 0, length); + return buf.toString(); + } + + /** + * Replaces all the occurrences of variables with their matching values from the resolver using the given source + * builder as a template. The builder is not altered by this method. + * + * @param source the builder to use as a template, not changed, null returns null + * @return The result of the replace operation + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public String replace(final TextStringBuilder source) { + if (source == null) { + return null; + } + final TextStringBuilder builder = new TextStringBuilder(source.length()).append(source); + substitute(builder, 0, builder.length()); + return builder.toString(); + } + + /** + * Replaces all the occurrences of variables with their matching values from the resolver using the given source + * builder as a template. The builder is not altered by this method. + *

+ * Only the specified portion of the builder will be processed. The rest of the builder is not processed, and is not + * returned. + *

+ * + * @param source the builder to use as a template, not changed, null returns null + * @param offset the start offset within the source, must be valid + * @param length the length within the source to be processed, must be valid + * @return The result of the replace operation + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public String replace(final TextStringBuilder source, final int offset, final int length) { + if (source == null) { + return null; + } + final TextStringBuilder buf = new TextStringBuilder(length).append(source, offset, length); + substitute(buf, 0, length); + return buf.toString(); + } + + /** + * Replaces all the occurrences of variables within the given source buffer with their matching values from the + * resolver. The buffer is updated with the result. + * + * @param source the buffer to replace in, updated, null returns zero + * @return true if altered + */ + public boolean replaceIn(final StringBuffer source) { + if (source == null) { + return false; + } + return replaceIn(source, 0, source.length()); + } + + /** + * Replaces all the occurrences of variables within the given source buffer with their matching values from the + * resolver. The buffer is updated with the result. + *

+ * Only the specified portion of the buffer will be processed. The rest of the buffer is not processed, but it is + * not deleted. + *

+ * + * @param source the buffer to replace in, updated, null returns zero + * @param offset the start offset within the source, must be valid + * @param length the length within the source to be processed, must be valid + * @return true if altered + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public boolean replaceIn(final StringBuffer source, final int offset, final int length) { + if (source == null) { + return false; + } + final TextStringBuilder buf = new TextStringBuilder(length).append(source, offset, length); + if (!substitute(buf, 0, length)) { + return false; + } + source.replace(offset, offset + length, buf.toString()); + return true; + } + + /** + * Replaces all the occurrences of variables within the given source buffer with their matching values from the + * resolver. The buffer is updated with the result. + * + * @param source the buffer to replace in, updated, null returns zero + * @return true if altered + */ + public boolean replaceIn(final StringBuilder source) { + if (source == null) { + return false; + } + return replaceIn(source, 0, source.length()); + } + + /** + * Replaces all the occurrences of variables within the given source builder with their matching values from the + * resolver. The builder is updated with the result. + *

+ * Only the specified portion of the buffer will be processed. The rest of the buffer is not processed, but it is + * not deleted. + *

+ * + * @param source the buffer to replace in, updated, null returns zero + * @param offset the start offset within the source, must be valid + * @param length the length within the source to be processed, must be valid + * @return true if altered + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public boolean replaceIn(final StringBuilder source, final int offset, final int length) { + if (source == null) { + return false; + } + final TextStringBuilder buf = new TextStringBuilder(length).append(source, offset, length); + if (!substitute(buf, 0, length)) { + return false; + } + source.replace(offset, offset + length, buf.toString()); + return true; + } + + /** + * Replaces all the occurrences of variables within the given source builder with their matching values from the + * resolver. + * + * @param source the builder to replace in, updated, null returns zero + * @return true if altered + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public boolean replaceIn(final TextStringBuilder source) { + if (source == null) { + return false; + } + return substitute(source, 0, source.length()); + } + + /** + * Replaces all the occurrences of variables within the given source builder with their matching values from the + * resolver. + *

+ * Only the specified portion of the builder will be processed. The rest of the builder is not processed, but it is + * not deleted. + *

+ * + * @param source the builder to replace in, null returns zero + * @param offset the start offset within the source, must be valid + * @param length the length within the source to be processed, must be valid + * @return true if altered + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + */ + public boolean replaceIn(final TextStringBuilder source, final int offset, final int length) { + if (source == null) { + return false; + } + return substitute(source, offset, length); + } + + /** + * Internal method that resolves the value of a variable. + *

+ * Most users of this class do not need to call this method. This method is called automatically by the substitution + * process. + *

+ *

+ * Writers of subclasses can override this method if they need to alter how each substitution occurs. The method is + * passed the variable's name and must return the corresponding value. This implementation uses the + * {@link #getStringLookup()} with the variable's name as the key. + *

+ * + * @param variableName the name of the variable, not null + * @param buf the buffer where the substitution is occurring, not null + * @param startPos the start position of the variable including the prefix, valid + * @param endPos the end position of the variable including the suffix, valid + * @return The variable's value or null if the variable is unknown + */ + protected String resolveVariable(final String variableName, final TextStringBuilder buf, final int startPos, + final int endPos) { + final StringLookup resolver = getStringLookup(); + if (resolver == null) { + return null; + } + return resolver.lookup(variableName); + } + + /** + * Sets a flag whether substitution is done in variable values (recursive). + * + * @param disableSubstitutionInValues true if substitution in variable value are disabled + * @return this, to enable chaining + */ + public StringSubstitutor setDisableSubstitutionInValues(final boolean disableSubstitutionInValues) { + this.disableSubstitutionInValues = disableSubstitutionInValues; + return this; + } + + /** + * Sets a flag whether substitution is done in variable names. If set to true, the names of variables can + * contain other variables which are processed first before the original variable is evaluated, e.g. + * {@code ${jre-${java.version}}}. The default value is false. + * + * @param enableSubstitutionInVariables the new value of the flag + * @return this, to enable chaining + */ + public StringSubstitutor setEnableSubstitutionInVariables(final boolean enableSubstitutionInVariables) { + this.enableSubstitutionInVariables = enableSubstitutionInVariables; + return this; + } + + /** + * Sets a flag whether exception should be thrown if any variable is undefined. + * + * @param failOnUndefinedVariable true if exception should be thrown on undefined variable + * @return this, to enable chaining + */ + public StringSubstitutor setEnableUndefinedVariableException(final boolean failOnUndefinedVariable) { + this.enableUndefinedVariableException = failOnUndefinedVariable; + return this; + } + + /** + * Sets the escape character. If this character is placed before a variable reference in the source text, this + * variable will be ignored. + * + * @param escapeCharacter the escape character (0 for disabling escaping) + * @return this, to enable chaining + */ + public StringSubstitutor setEscapeChar(final char escapeCharacter) { + this.escapeChar = escapeCharacter; + return this; + } + + /** + * Sets a flag controlling whether escapes are preserved during substitution. If set to true, the escape + * character is retained during substitution (e.g. {@code $${this-is-escaped}} remains {@code $${this-is-escaped}}). + * If set to false, the escape character is removed during substitution (e.g. {@code $${this-is-escaped}} + * becomes {@code ${this-is-escaped}}). The default value is false + * + * @param preserveEscapes true if escapes are to be preserved + * @return this, to enable chaining + */ + public StringSubstitutor setPreserveEscapes(final boolean preserveEscapes) { + this.preserveEscapes = preserveEscapes; + return this; + } + + /** + * Sets the variable default value delimiter to use. + *

+ * The variable default value delimiter is the character or characters that delimit the variable name and the + * variable default value. This method allows a single character variable default value delimiter to be easily set. + *

+ * + * @param valueDelimiter the variable default value delimiter character to use + * @return this, to enable chaining + */ + public StringSubstitutor setValueDelimiter(final char valueDelimiter) { + return setValueDelimiterMatcher(StringMatcherFactory.INSTANCE.charMatcher(valueDelimiter)); + } + + /** + * Sets the variable default value delimiter to use. + *

+ * The variable default value delimiter is the character or characters that delimit the variable name and the + * variable default value. This method allows a string variable default value delimiter to be easily set. + *

+ *

+ * If the {@code valueDelimiter} is null or empty string, then the variable default value resolution becomes + * disabled. + *

+ * + * @param valueDelimiter the variable default value delimiter string to use, may be null or empty + * @return this, to enable chaining + */ + public StringSubstitutor setValueDelimiter(final String valueDelimiter) { + if (valueDelimiter == null || valueDelimiter.isEmpty()) { + setValueDelimiterMatcher(null); + return this; + } + return setValueDelimiterMatcher(StringMatcherFactory.INSTANCE.stringMatcher(valueDelimiter)); + } + + /** + * Sets the variable default value delimiter matcher to use. + *

+ * The variable default value delimiter is the character or characters that delimit the variable name and the + * variable default value. This delimiter is expressed in terms of a matcher allowing advanced variable default + * value delimiter matches. + *

+ *

+ * If the {@code valueDelimiterMatcher} is null, then the variable default value resolution becomes disabled. + *

+ * + * @param valueDelimiterMatcher variable default value delimiter matcher to use, may be null + * @return this, to enable chaining + */ + public StringSubstitutor setValueDelimiterMatcher(final StringMatcher valueDelimiterMatcher) { + this.valueDelimiterMatcher = valueDelimiterMatcher; + return this; + } + + /** + * Sets the variable prefix to use. + *

+ * The variable prefix is the character or characters that identify the start of a variable. This method allows a + * single character prefix to be easily set. + *

+ * + * @param prefix the prefix character to use + * @return this, to enable chaining + */ + public StringSubstitutor setVariablePrefix(final char prefix) { + return setVariablePrefixMatcher(StringMatcherFactory.INSTANCE.charMatcher(prefix)); + } + + /** + * Sets the variable prefix to use. + *

+ * The variable prefix is the character or characters that identify the start of a variable. This method allows a + * string prefix to be easily set. + *

+ * + * @param prefix the prefix for variables, not null + * @return this, to enable chaining + * @throws IllegalArgumentException if the prefix is null + */ + public StringSubstitutor setVariablePrefix(final String prefix) { + Validate.isTrue(prefix != null, "Variable prefix must not be null!"); + return setVariablePrefixMatcher(StringMatcherFactory.INSTANCE.stringMatcher(prefix)); + } + + /** + * Sets the variable prefix matcher currently in use. + *

+ * The variable prefix is the character or characters that identify the start of a variable. This prefix is + * expressed in terms of a matcher allowing advanced prefix matches. + *

+ * + * @param prefixMatcher the prefix matcher to use, null ignored + * @return this, to enable chaining + * @throws IllegalArgumentException if the prefix matcher is null + */ + public StringSubstitutor setVariablePrefixMatcher(final StringMatcher prefixMatcher) { + Validate.isTrue(prefixMatcher != null, "Variable prefix matcher must not be null!"); + this.prefixMatcher = prefixMatcher; + return this; + } + + /** + * Sets the VariableResolver that is used to lookup variables. + * + * @param variableResolver the VariableResolver + * @return this, to enable chaining + */ + public StringSubstitutor setVariableResolver(final StringLookup variableResolver) { + this.variableResolver = variableResolver; + return this; + } + + /** + * Sets the variable suffix to use. + *

+ * The variable suffix is the character or characters that identify the end of a variable. This method allows a + * single character suffix to be easily set. + *

+ * + * @param suffix the suffix character to use + * @return this, to enable chaining + */ + public StringSubstitutor setVariableSuffix(final char suffix) { + return setVariableSuffixMatcher(StringMatcherFactory.INSTANCE.charMatcher(suffix)); + } + + /** + * Sets the variable suffix to use. + *

+ * The variable suffix is the character or characters that identify the end of a variable. This method allows a + * string suffix to be easily set. + *

+ * + * @param suffix the suffix for variables, not null + * @return this, to enable chaining + * @throws IllegalArgumentException if the suffix is null + */ + public StringSubstitutor setVariableSuffix(final String suffix) { + Validate.isTrue(suffix != null, "Variable suffix must not be null!"); + return setVariableSuffixMatcher(StringMatcherFactory.INSTANCE.stringMatcher(suffix)); + } + + /** + * Sets the variable suffix matcher currently in use. + *

+ * The variable suffix is the character or characters that identify the end of a variable. This suffix is expressed + * in terms of a matcher allowing advanced suffix matches. + *

+ * + * @param suffixMatcher the suffix matcher to use, null ignored + * @return this, to enable chaining + * @throws IllegalArgumentException if the suffix matcher is null + */ + public StringSubstitutor setVariableSuffixMatcher(final StringMatcher suffixMatcher) { + Validate.isTrue(suffixMatcher != null, "Variable suffix matcher must not be null!"); + this.suffixMatcher = suffixMatcher; + return this; + } + + /** + * Internal method that substitutes the variables. + *

+ * Most users of this class do not need to call this method. This method will be called automatically by another + * (public) method. + *

+ *

+ * Writers of subclasses can override this method if they need access to the substitution process at the start or + * end. + *

+ * + * @param builder the string builder to substitute into, not null + * @param offset the start offset within the builder, must be valid + * @param length the length within the builder to be processed, must be valid + * @return true if altered + */ + protected boolean substitute(final TextStringBuilder builder, final int offset, final int length) { + return substitute(builder, offset, length, null).altered; + } + + /** + * Recursive handler for multiple levels of interpolation. This is the main interpolation method, which resolves the + * values of all variable references contained in the passed in text. + * + * @param builder the string builder to substitute into, not null + * @param offset the start offset within the builder, must be valid + * @param length the length within the builder to be processed, must be valid + * @param priorVariables the stack keeping track of the replaced variables, may be null + * @return The result. + * @throws IllegalArgumentException if variable is not found when its allowed to throw exception + * @since 1.9 + */ + private Result substitute(final TextStringBuilder builder, final int offset, final int length, + List priorVariables) { + Objects.requireNonNull(builder, "builder"); + final StringMatcher prefixMatcher = getVariablePrefixMatcher(); + final StringMatcher suffixMatcher = getVariableSuffixMatcher(); + final char escapeCh = getEscapeChar(); + final StringMatcher valueDelimMatcher = getValueDelimiterMatcher(); + final boolean substitutionInVariablesEnabled = isEnableSubstitutionInVariables(); + final boolean substitutionInValuesDisabled = isDisableSubstitutionInValues(); + final boolean undefinedVariableException = isEnableUndefinedVariableException(); + final boolean preserveEscapes = isPreserveEscapes(); + + boolean altered = false; + int lengthChange = 0; + int bufEnd = offset + length; + int pos = offset; + int escPos = -1; + outer: while (pos < bufEnd) { + final int startMatchLen = prefixMatcher.isMatch(builder, pos, offset, bufEnd); + if (startMatchLen == 0) { + pos++; + } else { + // found variable start marker + if (pos > offset && builder.charAt(pos - 1) == escapeCh) { + // escape detected + if (preserveEscapes) { + // keep escape + pos++; + continue; + } + // mark esc ch for deletion if we find a complete variable + escPos = pos - 1; + } + // find suffix + int startPos = pos; + pos += startMatchLen; + int endMatchLen = 0; + int nestedVarCount = 0; + while (pos < bufEnd) { + if (substitutionInVariablesEnabled && prefixMatcher.isMatch(builder, pos, offset, bufEnd) != 0) { + // found a nested variable start + endMatchLen = prefixMatcher.isMatch(builder, pos, offset, bufEnd); + nestedVarCount++; + pos += endMatchLen; + continue; + } + + endMatchLen = suffixMatcher.isMatch(builder, pos, offset, bufEnd); + if (endMatchLen == 0) { + pos++; + } else { + // found variable end marker + if (nestedVarCount == 0) { + if (escPos >= 0) { + // delete escape + builder.deleteCharAt(escPos); + escPos = -1; + lengthChange--; + altered = true; + bufEnd--; + pos = startPos + 1; + startPos--; + continue outer; + } + // get var name + String varNameExpr = builder.midString(startPos + startMatchLen, + pos - startPos - startMatchLen); + if (substitutionInVariablesEnabled) { + final TextStringBuilder bufName = new TextStringBuilder(varNameExpr); + substitute(bufName, 0, bufName.length()); + varNameExpr = bufName.toString(); + } + pos += endMatchLen; + final int endPos = pos; + + String varName = varNameExpr; + String varDefaultValue = null; + + if (valueDelimMatcher != null) { + final char[] varNameExprChars = varNameExpr.toCharArray(); + int valueDelimiterMatchLen = 0; + for (int i = 0; i < varNameExprChars.length; i++) { + // if there's any nested variable when nested variable substitution disabled, + // then stop resolving name and default value. + if (!substitutionInVariablesEnabled && prefixMatcher.isMatch(varNameExprChars, i, i, + varNameExprChars.length) != 0) { + break; + } + if (valueDelimMatcher.isMatch(varNameExprChars, i, 0, + varNameExprChars.length) != 0) { + valueDelimiterMatchLen = valueDelimMatcher.isMatch(varNameExprChars, i, 0, + varNameExprChars.length); + varName = varNameExpr.substring(0, i); + varDefaultValue = varNameExpr.substring(i + valueDelimiterMatchLen); + break; + } + } + } + + // on the first call initialize priorVariables + if (priorVariables == null) { + priorVariables = new ArrayList<>(); + priorVariables.add(builder.midString(offset, length)); + } + + // handle cyclic substitution + checkCyclicSubstitution(varName, priorVariables); + priorVariables.add(varName); + + // resolve the variable + String varValue = resolveVariable(varName, builder, startPos, endPos); + if (varValue == null) { + varValue = varDefaultValue; + } + if (varValue != null) { + final int varLen = varValue.length(); + builder.replace(startPos, endPos, varValue); + altered = true; + int change = 0; + if (!substitutionInValuesDisabled) { // recursive replace + change = substitute(builder, startPos, varLen, priorVariables).lengthChange; + } + change = change + varLen - (endPos - startPos); + pos += change; + bufEnd += change; + lengthChange += change; + } else if (undefinedVariableException) { + throw new IllegalArgumentException( + String.format("Cannot resolve variable '%s' (enableSubstitutionInVariables=%s).", + varName, substitutionInVariablesEnabled)); + } + + // remove variable from the cyclic stack + priorVariables.remove(priorVariables.size() - 1); + break; + } + nestedVarCount--; + pos += endMatchLen; + } + } + } + } + return new Result(altered, lengthChange); + } +} diff --git a/src/main/java/org/apache/commons/text/StringTokenizer.java b/src/main/java/org/apache/commons/text/StringTokenizer.java new file mode 100644 index 0000000000..9cd9922391 --- /dev/null +++ b/src/main/java/org/apache/commons/text/StringTokenizer.java @@ -0,0 +1,1135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.ListIterator; +import java.util.NoSuchElementException; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.matcher.StringMatcher; +import org.apache.commons.text.matcher.StringMatcherFactory; + +/** + * Tokenizes a string based on delimiters (separators) and supporting quoting and ignored character concepts. + *

+ * This class can split a String into many smaller strings. It aims to do a similar job to + * {@link java.util.StringTokenizer StringTokenizer}, however it offers much more control and flexibility including + * implementing the {@code ListIterator} interface. By default, it is set up like {@code StringTokenizer}. + *

+ * The input String is split into a number of tokens. Each token is separated from the next String by a + * delimiter. One or more delimiter characters must be specified. + *

+ * Each token may be surrounded by quotes. The quote matcher specifies the quote character(s). A quote may be + * escaped within a quoted section by duplicating itself. + *

+ * Between each token and the delimiter are potentially characters that need trimming. The trimmer matcher + * specifies these characters. One usage might be to trim whitespace characters. + *

+ * At any point outside the quotes there might potentially be invalid characters. The ignored matcher specifies + * these characters to be removed. One usage might be to remove new line characters. + *

+ * Empty tokens may be removed or returned as null. + * + *

+ * "a,b,c"         - Three tokens "a","b","c"   (comma delimiter)
+ * " a, b , c "    - Three tokens "a","b","c"   (default CSV processing trims whitespace)
+ * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched)
+ * 
+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
StringTokenizer properties and options
PropertyTypeDefault
delimCharSetMatcher{ \t\n\r\f}
quoteNoneMatcher{}
ignoreNoneMatcher{}
emptyTokenAsNullbooleanfalse
ignoreEmptyTokensbooleantrue
+ * + * @since 1.3 + */ +public class StringTokenizer implements ListIterator, Cloneable { + + /** Comma separated values tokenizer internal variable. */ + private static final StringTokenizer CSV_TOKENIZER_PROTOTYPE; + /** Tab separated values tokenizer internal variable. */ + private static final StringTokenizer TSV_TOKENIZER_PROTOTYPE; + static { + CSV_TOKENIZER_PROTOTYPE = new StringTokenizer(); + CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StringMatcherFactory.INSTANCE.commaMatcher()); + CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher()); + CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher()); + CSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); + CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); + + TSV_TOKENIZER_PROTOTYPE = new StringTokenizer(); + TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StringMatcherFactory.INSTANCE.tabMatcher()); + TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher()); + TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher()); + TSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); + TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); + } + + /** + * Returns a clone of {@code CSV_TOKENIZER_PROTOTYPE}. + * + * @return a clone of {@code CSV_TOKENIZER_PROTOTYPE}. + */ + private static StringTokenizer getCSVClone() { + return (StringTokenizer) CSV_TOKENIZER_PROTOTYPE.clone(); + } + /** + * Gets a new tokenizer instance which parses Comma Separated Value strings initializing it with the given input. + * The default for CSV processing will be trim whitespace from both ends (which can be overridden with the + * setTrimmer method). + *

+ * You must call a "reset" method to set the string which you want to parse. + *

+ * + * @return a new tokenizer instance which parses Comma Separated Value strings + */ + public static StringTokenizer getCSVInstance() { + return getCSVClone(); + } + /** + * Gets a new tokenizer instance which parses Comma Separated Value strings initializing it with the given input. + * The default for CSV processing will be trim whitespace from both ends (which can be overridden with the + * setTrimmer method). + * + * @param input + * the text to parse + * @return a new tokenizer instance which parses Comma Separated Value strings + */ + public static StringTokenizer getCSVInstance(final char[] input) { + return getCSVClone().reset(input); + } + + /** + * Gets a new tokenizer instance which parses Comma Separated Value strings initializing it with the given input. + * The default for CSV processing will be trim whitespace from both ends (which can be overridden with the + * setTrimmer method). + * + * @param input + * the text to parse + * @return a new tokenizer instance which parses Comma Separated Value strings + */ + public static StringTokenizer getCSVInstance(final String input) { + return getCSVClone().reset(input); + } + /** + * Returns a clone of {@code TSV_TOKENIZER_PROTOTYPE}. + * + * @return a clone of {@code TSV_TOKENIZER_PROTOTYPE}. + */ + private static StringTokenizer getTSVClone() { + return (StringTokenizer) TSV_TOKENIZER_PROTOTYPE.clone(); + } + /** + * Gets a new tokenizer instance which parses Tab Separated Value strings. The default for CSV processing will be + * trim whitespace from both ends (which can be overridden with the setTrimmer method). + *

+ * You must call a "reset" method to set the string which you want to parse. + *

+ * + * @return a new tokenizer instance which parses Tab Separated Value strings. + */ + public static StringTokenizer getTSVInstance() { + return getTSVClone(); + } + /** + * Gets a new tokenizer instance which parses Tab Separated Value strings. The default for CSV processing will be + * trim whitespace from both ends (which can be overridden with the setTrimmer method). + * + * @param input + * the string to parse + * @return a new tokenizer instance which parses Tab Separated Value strings. + */ + public static StringTokenizer getTSVInstance(final char[] input) { + return getTSVClone().reset(input); + } + + /** + * Gets a new tokenizer instance which parses Tab Separated Value strings. The default for CSV processing will be + * trim whitespace from both ends (which can be overridden with the setTrimmer method). + * + * @param input + * the string to parse + * @return a new tokenizer instance which parses Tab Separated Value strings. + */ + public static StringTokenizer getTSVInstance(final String input) { + return getTSVClone().reset(input); + } + /** The text to work on. */ + private char[] chars; + + /** The parsed tokens. */ + private String[] tokens; + + /** The current iteration position. */ + private int tokenPos; + + /** The delimiter matcher. */ + private StringMatcher delimMatcher = StringMatcherFactory.INSTANCE.splitMatcher(); + + /** The quote matcher. */ + private StringMatcher quoteMatcher = StringMatcherFactory.INSTANCE.noneMatcher(); + + /** The ignored matcher. */ + private StringMatcher ignoredMatcher = StringMatcherFactory.INSTANCE.noneMatcher(); + + /** The trimmer matcher. */ + private StringMatcher trimmerMatcher = StringMatcherFactory.INSTANCE.noneMatcher(); + + /** Whether to return empty tokens as null. */ + private boolean emptyAsNull; + + /** Whether to ignore empty tokens. */ + private boolean ignoreEmptyTokens = true; + + /** + * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer, but with no text to + * tokenize. + *

+ * This constructor is normally used with {@link #reset(String)}. + *

+ */ + public StringTokenizer() { + this.chars = null; + } + + /** + * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer. + * + * @param input + * the string which is to be parsed, not cloned + */ + public StringTokenizer(final char[] input) { + this.chars = input != null ? input.clone() : null; + } + + /** + * Constructs a tokenizer splitting on the specified character. + * + * @param input + * the string which is to be parsed, not cloned + * @param delim + * the field delimiter character + */ + public StringTokenizer(final char[] input, final char delim) { + this(input); + setDelimiterChar(delim); + } + + /** + * Constructs a tokenizer splitting on the specified delimiter character and handling quotes using the specified + * quote character. + * + * @param input + * the string which is to be parsed, not cloned + * @param delim + * the field delimiter character + * @param quote + * the field quoted string character + */ + public StringTokenizer(final char[] input, final char delim, final char quote) { + this(input, delim); + setQuoteChar(quote); + } + + /** + * Constructs a tokenizer splitting on the specified string. + * + * @param input + * the string which is to be parsed, not cloned + * @param delim + * the field delimiter string + */ + public StringTokenizer(final char[] input, final String delim) { + this(input); + setDelimiterString(delim); + } + + /** + * Constructs a tokenizer splitting using the specified delimiter matcher. + * + * @param input + * the string which is to be parsed, not cloned + * @param delim + * the field delimiter matcher + */ + public StringTokenizer(final char[] input, final StringMatcher delim) { + this(input); + setDelimiterMatcher(delim); + } + + /** + * Constructs a tokenizer splitting using the specified delimiter matcher and handling quotes using the specified + * quote matcher. + * + * @param input + * the string which is to be parsed, not cloned + * @param delim + * the field delimiter character + * @param quote + * the field quoted string character + */ + public StringTokenizer(final char[] input, final StringMatcher delim, final StringMatcher quote) { + this(input, delim); + setQuoteMatcher(quote); + } + + /** + * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer. + * + * @param input + * the string which is to be parsed + */ + public StringTokenizer(final String input) { + this.chars = input != null ? input.toCharArray() : null; + } + + /** + * Constructs a tokenizer splitting on the specified delimiter character. + * + * @param input + * the string which is to be parsed + * @param delim + * the field delimiter character + */ + public StringTokenizer(final String input, final char delim) { + this(input); + setDelimiterChar(delim); + } + + /** + * Constructs a tokenizer splitting on the specified delimiter character and handling quotes using the specified + * quote character. + * + * @param input + * the string which is to be parsed + * @param delim + * the field delimiter character + * @param quote + * the field quoted string character + */ + public StringTokenizer(final String input, final char delim, final char quote) { + this(input, delim); + setQuoteChar(quote); + } + + /** + * Constructs a tokenizer splitting on the specified delimiter string. + * + * @param input + * the string which is to be parsed + * @param delim + * the field delimiter string + */ + public StringTokenizer(final String input, final String delim) { + this(input); + setDelimiterString(delim); + } + + /** + * Constructs a tokenizer splitting using the specified delimiter matcher. + * + * @param input + * the string which is to be parsed + * @param delim + * the field delimiter matcher + */ + public StringTokenizer(final String input, final StringMatcher delim) { + this(input); + setDelimiterMatcher(delim); + } + + /** + * Constructs a tokenizer splitting using the specified delimiter matcher and handling quotes using the specified + * quote matcher. + * + * @param input + * the string which is to be parsed + * @param delim + * the field delimiter matcher + * @param quote + * the field quoted string matcher + */ + public StringTokenizer(final String input, final StringMatcher delim, final StringMatcher quote) { + this(input, delim); + setQuoteMatcher(quote); + } + + /** + * Unsupported ListIterator operation. + * + * @param obj + * this parameter ignored. + * @throws UnsupportedOperationException + * always + */ + @Override + public void add(final String obj) { + throw new UnsupportedOperationException("add() is unsupported"); + } + + /** + * Adds a token to a list, paying attention to the parameters we've set. + * + * @param list + * the list to add to + * @param tok + * the token to add + */ + private void addToken(final List list, String tok) { + if (tok == null || tok.isEmpty()) { + if (isIgnoreEmptyTokens()) { + return; + } + if (isEmptyTokenAsNull()) { + tok = null; + } + } + list.add(tok); + } + + /** + * Checks if tokenization has been done, and if not then do it. + */ + private void checkTokenized() { + if (tokens == null) { + final List split; + if (chars == null) { + // still call tokenize as subclass may do some work + split = tokenize(null, 0, 0); + } else { + split = tokenize(chars, 0, chars.length); + } + tokens = split.toArray(ArrayUtils.EMPTY_STRING_ARRAY); + } + } + + /** + * Creates a new instance of this Tokenizer. The new instance is reset so that it will be at the start of the token + * list. If a {@link CloneNotSupportedException} is caught, return {@code null}. + * + * @return a new instance of this Tokenizer which has been reset. + */ + @Override + public Object clone() { + try { + return cloneReset(); + } catch (final CloneNotSupportedException ex) { + return null; + } + } + + /** + * Creates a new instance of this Tokenizer. The new instance is reset so that it will be at the start of the token + * list. + * + * @return a new instance of this Tokenizer which has been reset. + * @throws CloneNotSupportedException + * if there is a problem cloning + */ + Object cloneReset() throws CloneNotSupportedException { + // this method exists to enable 100% test coverage + final StringTokenizer cloned = (StringTokenizer) super.clone(); + if (cloned.chars != null) { + cloned.chars = cloned.chars.clone(); + } + cloned.reset(); + return cloned; + } + + /** + * Gets the String content that the tokenizer is parsing. + * + * @return The string content being parsed + */ + public String getContent() { + if (chars == null) { + return null; + } + return new String(chars); + } + + /** + * Gets the field delimiter matcher. + * + * @return The delimiter matcher in use + */ + public StringMatcher getDelimiterMatcher() { + return this.delimMatcher; + } + + /** + * Gets the ignored character matcher. + *

+ * These characters are ignored when parsing the String, unless they are within a quoted region. The default value + * is not to ignore anything. + *

+ * + * @return The ignored matcher in use + */ + public StringMatcher getIgnoredMatcher() { + return ignoredMatcher; + } + + /** + * Gets the quote matcher currently in use. + *

+ * The quote character is used to wrap data between the tokens. This enables delimiters to be entered as data. The + * default value is '"' (double quote). + *

+ * + * @return The quote matcher in use + */ + public StringMatcher getQuoteMatcher() { + return quoteMatcher; + } + + /** + * Gets a copy of the full token list as an independent modifiable array. + * + * @return The tokens as a String array + */ + public String[] getTokenArray() { + checkTokenized(); + return tokens.clone(); + } + + /** + * Gets a copy of the full token list as an independent modifiable list. + * + * @return The tokens as a String array + */ + public List getTokenList() { + checkTokenized(); + return Arrays.asList(tokens); + } + + /** + * Gets the trimmer character matcher. + *

+ * These characters are trimmed off on each side of the delimiter until the token or quote is found. The default + * value is not to trim anything. + *

+ * + * @return The trimmer matcher in use + */ + public StringMatcher getTrimmerMatcher() { + return trimmerMatcher; + } + + /** + * Checks whether there are any more tokens. + * + * @return true if there are more tokens + */ + @Override + public boolean hasNext() { + checkTokenized(); + return tokenPos < tokens.length; + } + + /** + * Checks whether there are any previous tokens that can be iterated to. + * + * @return true if there are previous tokens + */ + @Override + public boolean hasPrevious() { + checkTokenized(); + return tokenPos > 0; + } + + /** + * Gets whether the tokenizer currently returns empty tokens as null. The default for this property is false. + * + * @return true if empty tokens are returned as null + */ + public boolean isEmptyTokenAsNull() { + return this.emptyAsNull; + } + + /** + * Gets whether the tokenizer currently ignores empty tokens. The default for this property is true. + * + * @return true if empty tokens are not returned + */ + public boolean isIgnoreEmptyTokens() { + return ignoreEmptyTokens; + } + + /** + * Checks if the characters at the index specified match the quote already matched in readNextToken(). + * + * @param srcChars + * the character array being tokenized + * @param pos + * the position to check for a quote + * @param len + * the length of the character array being tokenized + * @param quoteStart + * the start position of the matched quote, 0 if no quoting + * @param quoteLen + * the length of the matched quote, 0 if no quoting + * @return true if a quote is matched + */ + private boolean isQuote(final char[] srcChars, final int pos, final int len, final int quoteStart, + final int quoteLen) { + for (int i = 0; i < quoteLen; i++) { + if (pos + i >= len || srcChars[pos + i] != srcChars[quoteStart + i]) { + return false; + } + } + return true; + } + + /** + * Gets the next token. + * + * @return The next String token + * @throws NoSuchElementException + * if there are no more elements + */ + @Override + public String next() { + if (hasNext()) { + return tokens[tokenPos++]; + } + throw new NoSuchElementException(); + } + + /** + * Gets the index of the next token to return. + * + * @return The next token index + */ + @Override + public int nextIndex() { + return tokenPos; + } + + /** + * Gets the next token from the String. Equivalent to {@link #next()} except it returns null rather than throwing + * {@link NoSuchElementException} when no tokens remain. + * + * @return The next sequential token, or null when no more tokens are found + */ + public String nextToken() { + if (hasNext()) { + return tokens[tokenPos++]; + } + return null; + } + + /** + * Gets the token previous to the last returned token. + * + * @return The previous token + */ + @Override + public String previous() { + if (hasPrevious()) { + return tokens[--tokenPos]; + } + throw new NoSuchElementException(); + } + + /** + * Gets the index of the previous token. + * + * @return The previous token index + */ + @Override + public int previousIndex() { + return tokenPos - 1; + } + + /** + * Gets the previous token from the String. + * + * @return The previous sequential token, or null when no more tokens are found + */ + public String previousToken() { + if (hasPrevious()) { + return tokens[--tokenPos]; + } + return null; + } + + /** + * Reads character by character through the String to get the next token. + * + * @param srcChars + * the character array being tokenized + * @param start + * the first character of field + * @param len + * the length of the character array being tokenized + * @param workArea + * a temporary work area + * @param tokenList + * the list of parsed tokens + * @return The starting position of the next field (the character immediately after the delimiter), or -1 if end of + * string found + */ + private int readNextToken(final char[] srcChars, int start, final int len, final TextStringBuilder workArea, + final List tokenList) { + // skip all leading whitespace, unless it is the + // field delimiter or the quote character + while (start < len) { + final int removeLen = Math.max(getIgnoredMatcher().isMatch(srcChars, start, start, len), + getTrimmerMatcher().isMatch(srcChars, start, start, len)); + if (removeLen == 0 || getDelimiterMatcher().isMatch(srcChars, start, start, len) > 0 + || getQuoteMatcher().isMatch(srcChars, start, start, len) > 0) { + break; + } + start += removeLen; + } + + // handle reaching end + if (start >= len) { + addToken(tokenList, StringUtils.EMPTY); + return -1; + } + + // handle empty token + final int delimLen = getDelimiterMatcher().isMatch(srcChars, start, start, len); + if (delimLen > 0) { + addToken(tokenList, StringUtils.EMPTY); + return start + delimLen; + } + + // handle found token + final int quoteLen = getQuoteMatcher().isMatch(srcChars, start, start, len); + if (quoteLen > 0) { + return readWithQuotes(srcChars, start + quoteLen, len, workArea, tokenList, start, quoteLen); + } + return readWithQuotes(srcChars, start, len, workArea, tokenList, 0, 0); + } + + /** + * Reads a possibly quoted string token. + * + * @param srcChars + * the character array being tokenized + * @param start + * the first character of field + * @param len + * the length of the character array being tokenized + * @param workArea + * a temporary work area + * @param tokenList + * the list of parsed tokens + * @param quoteStart + * the start position of the matched quote, 0 if no quoting + * @param quoteLen + * the length of the matched quote, 0 if no quoting + * @return The starting position of the next field (the character immediately after the delimiter, or if end of + * string found, then the length of string + */ + private int readWithQuotes(final char[] srcChars, final int start, final int len, final TextStringBuilder workArea, + final List tokenList, final int quoteStart, final int quoteLen) { + // Loop until we've found the end of the quoted + // string or the end of the input + workArea.clear(); + int pos = start; + boolean quoting = quoteLen > 0; + int trimStart = 0; + + while (pos < len) { + // quoting mode can occur several times throughout a string + // we must switch between quoting and non-quoting until we + // encounter a non-quoted delimiter, or end of string + if (quoting) { + // In quoting mode + + // If we've found a quote character, see if it's + // followed by a second quote. If so, then we need + // to actually put the quote character into the token + // rather than end the token. + if (isQuote(srcChars, pos, len, quoteStart, quoteLen)) { + if (isQuote(srcChars, pos + quoteLen, len, quoteStart, quoteLen)) { + // matched pair of quotes, thus an escaped quote + workArea.append(srcChars, pos, quoteLen); + pos += quoteLen * 2; + trimStart = workArea.size(); + continue; + } + + // end of quoting + quoting = false; + pos += quoteLen; + continue; + } + + // copy regular character from inside quotes + workArea.append(srcChars[pos++]); + trimStart = workArea.size(); + + } else { + // Not in quoting mode + + // check for delimiter, and thus end of token + final int delimLen = getDelimiterMatcher().isMatch(srcChars, pos, start, len); + if (delimLen > 0) { + // return condition when end of token found + addToken(tokenList, workArea.substring(0, trimStart)); + return pos + delimLen; + } + + // check for quote, and thus back into quoting mode + if (quoteLen > 0 && isQuote(srcChars, pos, len, quoteStart, quoteLen)) { + quoting = true; + pos += quoteLen; + continue; + } + + // check for ignored (outside quotes), and ignore + final int ignoredLen = getIgnoredMatcher().isMatch(srcChars, pos, start, len); + if (ignoredLen > 0) { + pos += ignoredLen; + continue; + } + + // check for trimmed character + // don't yet know if its at the end, so copy to workArea + // use trimStart to keep track of trim at the end + final int trimmedLen = getTrimmerMatcher().isMatch(srcChars, pos, start, len); + if (trimmedLen > 0) { + workArea.append(srcChars, pos, trimmedLen); + pos += trimmedLen; + continue; + } + + // copy regular character from outside quotes + workArea.append(srcChars[pos++]); + trimStart = workArea.size(); + } + } + + // return condition when end of string found + addToken(tokenList, workArea.substring(0, trimStart)); + return -1; + } + + /** + * Unsupported ListIterator operation. + * + * @throws UnsupportedOperationException + * always + */ + @Override + public void remove() { + throw new UnsupportedOperationException("remove() is unsupported"); + } + + /** + * Resets this tokenizer, forgetting all parsing and iteration already completed. + *

+ * This method allows the same tokenizer to be reused for the same String. + * + * @return this, to enable chaining + */ + public StringTokenizer reset() { + tokenPos = 0; + tokens = null; + return this; + } + + /** + * Reset this tokenizer, giving it a new input string to parse. In this manner you can re-use a tokenizer with the + * same settings on multiple input lines. + * + * @param input + * the new character array to tokenize, not cloned, null sets no text to parse + * @return this, to enable chaining + */ + public StringTokenizer reset(final char[] input) { + reset(); + this.chars = input != null ? input.clone() : null; + return this; + } + + /** + * Reset this tokenizer, giving it a new input string to parse. In this manner you can re-use a tokenizer with the + * same settings on multiple input lines. + * + * @param input + * the new string to tokenize, null sets no text to parse + * @return this, to enable chaining + */ + public StringTokenizer reset(final String input) { + reset(); + this.chars = input != null ? input.toCharArray() : null; + return this; + } + + /** + * Unsupported ListIterator operation. + * + * @param obj + * this parameter ignored. + * @throws UnsupportedOperationException + * always + */ + @Override + public void set(final String obj) { + throw new UnsupportedOperationException("set() is unsupported"); + } + + /** + * Sets the field delimiter character. + * + * @param delim + * the delimiter character to use + * @return this, to enable chaining + */ + public StringTokenizer setDelimiterChar(final char delim) { + return setDelimiterMatcher(StringMatcherFactory.INSTANCE.charMatcher(delim)); + } + + /** + * Sets the field delimiter matcher. + *

+ * The delimiter is used to separate one token from another. + *

+ * + * @param delim + * the delimiter matcher to use + * @return this, to enable chaining + */ + public StringTokenizer setDelimiterMatcher(final StringMatcher delim) { + this.delimMatcher = delim == null ? StringMatcherFactory.INSTANCE.noneMatcher() : delim; + return this; + } + + /** + * Sets the field delimiter string. + * + * @param delim + * the delimiter string to use + * @return this, to enable chaining + */ + public StringTokenizer setDelimiterString(final String delim) { + return setDelimiterMatcher(StringMatcherFactory.INSTANCE.stringMatcher(delim)); + } + + /** + * Sets whether the tokenizer should return empty tokens as null. The default for this property is false. + * + * @param emptyAsNull + * whether empty tokens are returned as null + * @return this, to enable chaining + */ + public StringTokenizer setEmptyTokenAsNull(final boolean emptyAsNull) { + this.emptyAsNull = emptyAsNull; + return this; + } + + /** + * Set the character to ignore. + *

+ * This character is ignored when parsing the String, unless it is within a quoted region. + *

+ * + * @param ignored + * the ignored character to use + * @return this, to enable chaining + */ + public StringTokenizer setIgnoredChar(final char ignored) { + return setIgnoredMatcher(StringMatcherFactory.INSTANCE.charMatcher(ignored)); + } + + /** + * Set the matcher for characters to ignore. + *

+ * These characters are ignored when parsing the String, unless they are within a quoted region. + *

+ * + * @param ignored + * the ignored matcher to use, null ignored + * @return this, to enable chaining + */ + public StringTokenizer setIgnoredMatcher(final StringMatcher ignored) { + if (ignored != null) { + this.ignoredMatcher = ignored; + } + return this; + } + + /** + * Sets whether the tokenizer should ignore and not return empty tokens. The default for this property is true. + * + * @param ignoreEmptyTokens + * whether empty tokens are not returned + * @return this, to enable chaining + */ + public StringTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens) { + this.ignoreEmptyTokens = ignoreEmptyTokens; + return this; + } + + /** + * Sets the quote character to use. + *

+ * The quote character is used to wrap data between the tokens. This enables delimiters to be entered as data. + *

+ * + * @param quote + * the quote character to use + * @return this, to enable chaining + */ + public StringTokenizer setQuoteChar(final char quote) { + return setQuoteMatcher(StringMatcherFactory.INSTANCE.charMatcher(quote)); + } + + /** + * Set the quote matcher to use. + *

+ * The quote character is used to wrap data between the tokens. This enables delimiters to be entered as data. + *

+ * + * @param quote + * the quote matcher to use, null ignored + * @return this, to enable chaining + */ + public StringTokenizer setQuoteMatcher(final StringMatcher quote) { + if (quote != null) { + this.quoteMatcher = quote; + } + return this; + } + + /** + * Sets the matcher for characters to trim. + *

+ * These characters are trimmed off on each side of the delimiter until the token or quote is found. + * + * @param trimmer + * the trimmer matcher to use, null ignored + * @return this, to enable chaining + */ + public StringTokenizer setTrimmerMatcher(final StringMatcher trimmer) { + if (trimmer != null) { + this.trimmerMatcher = trimmer; + } + return this; + } + + /** + * Gets the number of tokens found in the String. + * + * @return The number of matched tokens + */ + public int size() { + checkTokenized(); + return tokens.length; + } + + /** + * Internal method to performs the tokenization. + *

+ * Most users of this class do not need to call this method. This method will be called automatically by other + * (public) methods when required. + *

+ *

+ * This method exists to allow subclasses to add code before or after the tokenization. For example, a subclass + * could alter the character array, offset or count to be parsed, or call the tokenizer multiple times on multiple + * strings. It is also be possible to filter the results. + *

+ *

+ * {@code StrTokenizer} will always pass a zero offset and a count equal to the length of the array to this + * method, however a subclass may pass other values, or even an entirely different array. + *

+ * + * @param srcChars + * the character array being tokenized, may be null + * @param offset + * the start position within the character array, must be valid + * @param count + * the number of characters to tokenize, must be valid + * @return The modifiable list of String tokens, unmodifiable if null array or zero count + */ + protected List tokenize(final char[] srcChars, final int offset, final int count) { + if (srcChars == null || count == 0) { + return Collections.emptyList(); + } + final TextStringBuilder buf = new TextStringBuilder(); + final List tokenList = new ArrayList<>(); + int pos = offset; + + // loop around the entire buffer + while (pos >= 0 && pos < count) { + // find next token + pos = readNextToken(srcChars, pos, count, buf, tokenList); + + // handle case where end of string is a delimiter + if (pos >= count) { + addToken(tokenList, StringUtils.EMPTY); + } + } + return tokenList; + } + + /** + * Gets the String content that the tokenizer is parsing. + * + * @return The string content being parsed + */ + @Override + public String toString() { + if (tokens == null) { + return "StringTokenizer[not tokenized yet]"; + } + return "StringTokenizer" + getTokenList(); + } + +} diff --git a/src/main/java/org/apache/commons/text/TextRandomProvider.java b/src/main/java/org/apache/commons/text/TextRandomProvider.java index 98c5f2d13c..e0d5fb313b 100644 --- a/src/main/java/org/apache/commons/text/TextRandomProvider.java +++ b/src/main/java/org/apache/commons/text/TextRandomProvider.java @@ -20,7 +20,7 @@ *

* TextRandomProvider implementations are used by {@link RandomStringGenerator} * as a source of randomness. It is highly recommended that the - * Apache Commons RNG + * Apache Commons RNG * library be used to provide the random number generation. *

* diff --git a/src/main/java/org/apache/commons/text/TextStringBuilder.java b/src/main/java/org/apache/commons/text/TextStringBuilder.java new file mode 100644 index 0000000000..6a4bde8563 --- /dev/null +++ b/src/main/java/org/apache/commons/text/TextStringBuilder.java @@ -0,0 +1,3116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text; + +import java.io.IOException; +import java.io.Reader; +import java.io.Serializable; +import java.io.Writer; +import java.nio.CharBuffer; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.matcher.StringMatcher; + +/** + * Builds a string from constituent parts providing a more flexible and powerful API than StringBuffer. + *

+ * The main differences from StringBuffer/StringBuilder are: + *

+ *
    + *
  • Not synchronized
  • + *
  • Not final
  • + *
  • Subclasses have direct access to character array
  • + *
  • Additional methods + *
      + *
    • appendWithSeparators - adds an array of values, with a separator
    • + *
    • appendPadding - adds a length padding characters
    • + *
    • appendFixedLength - adds a fixed width field to the builder
    • + *
    • toCharArray/getChars - simpler ways to get a range of the character array
    • + *
    • delete - delete char or string
    • + *
    • replace - search and replace for a char or string
    • + *
    • leftString/rightString/midString - substring without exceptions
    • + *
    • contains - whether the builder contains a char or string
    • + *
    • size/clear/isEmpty - collections style API methods
    • + *
    + *
  • + *
  • Views + *
      + *
    • asTokenizer - uses the internal buffer as the source of a StrTokenizer
    • + *
    • asReader - uses the internal buffer as the source of a Reader
    • + *
    • asWriter - allows a Writer to write directly to the internal buffer
    • + *
    + *
  • + *
+ *

+ * The aim has been to provide an API that mimics very closely what StringBuffer provides, but with additional methods. + * It should be noted that some edge cases, with invalid indices or null input, have been altered - see individual + * methods. The biggest of these changes is that by default, null will not output the text 'null'. This can be + * controlled by a property, {@link #setNullText(String)}. + *

+ *

+ * This class is called {@code TextStringBuilder} instead of {@code StringBuilder} to avoid clashing with + * {@link java.lang.StringBuilder}. + *

+ * + * @since 1.3 + */ +public class TextStringBuilder implements CharSequence, Appendable, Serializable, Builder { + + /** + * Inner class to allow StrBuilder to operate as a reader. + */ + class TextStringBuilderReader extends Reader { + + /** The last mark position. */ + private int mark; + + /** The current stream position. */ + private int pos; + + /** + * Default constructor. + */ + TextStringBuilderReader() { + } + + /** {@inheritDoc} */ + @Override + public void close() { + // do nothing + } + + /** {@inheritDoc} */ + @Override + public void mark(final int readAheadLimit) { + mark = pos; + } + + /** {@inheritDoc} */ + @Override + public boolean markSupported() { + return true; + } + + /** {@inheritDoc} */ + @Override + public int read() { + if (!ready()) { + return -1; + } + return TextStringBuilder.this.charAt(pos++); + } + + /** {@inheritDoc} */ + @Override + public int read(final char[] b, final int off, int len) { + if (off < 0 || len < 0 || off > b.length || off + len > b.length || off + len < 0) { + throw new IndexOutOfBoundsException(); + } + if (len == 0) { + return 0; + } + if (pos >= TextStringBuilder.this.size()) { + return -1; + } + if (pos + len > size()) { + len = TextStringBuilder.this.size() - pos; + } + TextStringBuilder.this.getChars(pos, pos + len, b, off); + pos += len; + return len; + } + + /** {@inheritDoc} */ + @Override + public boolean ready() { + return pos < TextStringBuilder.this.size(); + } + + /** {@inheritDoc} */ + @Override + public void reset() { + pos = mark; + } + + /** {@inheritDoc} */ + @Override + public long skip(long n) { + if (pos + n > TextStringBuilder.this.size()) { + n = TextStringBuilder.this.size() - pos; + } + if (n < 0) { + return 0; + } + pos += n; + return n; + } + } + + /** + * Inner class to allow StrBuilder to operate as a tokenizer. + */ + class TextStringBuilderTokenizer extends StringTokenizer { + + /** + * Default constructor. + */ + TextStringBuilderTokenizer() { + } + + /** {@inheritDoc} */ + @Override + public String getContent() { + final String str = super.getContent(); + if (str == null) { + return TextStringBuilder.this.toString(); + } + return str; + } + + /** {@inheritDoc} */ + @Override + protected List tokenize(final char[] chars, final int offset, final int count) { + if (chars == null) { + return super.tokenize(TextStringBuilder.this.getBuffer(), 0, TextStringBuilder.this.size()); + } + return super.tokenize(chars, offset, count); + } + } + + /** + * Inner class to allow StrBuilder to operate as a writer. + */ + class TextStringBuilderWriter extends Writer { + + /** + * Default constructor. + */ + TextStringBuilderWriter() { + } + + /** {@inheritDoc} */ + @Override + public void close() { + // do nothing + } + + /** {@inheritDoc} */ + @Override + public void flush() { + // do nothing + } + + /** {@inheritDoc} */ + @Override + public void write(final char[] cbuf) { + TextStringBuilder.this.append(cbuf); + } + + /** {@inheritDoc} */ + @Override + public void write(final char[] cbuf, final int off, final int len) { + TextStringBuilder.this.append(cbuf, off, len); + } + + /** {@inheritDoc} */ + @Override + public void write(final int c) { + TextStringBuilder.this.append((char) c); + } + + /** {@inheritDoc} */ + @Override + public void write(final String str) { + TextStringBuilder.this.append(str); + } + + /** {@inheritDoc} */ + @Override + public void write(final String str, final int off, final int len) { + TextStringBuilder.this.append(str, off, len); + } + } + + /** The space character. */ + private static final char SPACE = ' '; + + /** + * The extra capacity for new builders. + */ + static final int CAPACITY = 32; + + /** + * End-Of-Stream. + */ + private static final int EOS = -1; + + /** + * The size of the string {@code "false"}. + */ + private static final int FALSE_STRING_SIZE = Boolean.FALSE.toString().length(); + + /** + * Required for serialization support. + * + * @see java.io.Serializable + */ + private static final long serialVersionUID = 1L; + + /** + * The size of the string {@code "true"}. + */ + private static final int TRUE_STRING_SIZE = Boolean.TRUE.toString().length(); + + /** + * Constructs an instance from a reference to a character array. Changes to the input chars are reflected in this + * instance until the internal buffer needs to be reallocated. Using a reference to an array allows the instance to + * be initialized without copying the input array. + * + * @param initialBuffer The initial array that will back the new builder. + * @return A new instance. + * @since 1.9 + */ + public static TextStringBuilder wrap(final char[] initialBuffer) { + Objects.requireNonNull(initialBuffer, "initialBuffer"); + return new TextStringBuilder(initialBuffer, initialBuffer.length); + } + + /** + * Constructs an instance from a reference to a character array. Changes to the input chars are reflected in this + * instance until the internal buffer needs to be reallocated. Using a reference to an array allows the instance to + * be initialized without copying the input array. + * + * @param initialBuffer The initial array that will back the new builder. + * @param length The length of the subarray to be used; must be non-negative and no larger than + * {@code initialBuffer.length}. The new builder's size will be set to {@code length}. + * @return A new instance. + * @since 1.9 + */ + public static TextStringBuilder wrap(final char[] initialBuffer, final int length) { + return new TextStringBuilder(initialBuffer, length); + } + + /** Internal data storage. */ + private char[] buffer; + + /** The new line. */ + private String newLine; + + /** The null text. */ + private String nullText; + + /** Incremented when the buffer is reallocated. */ + private int reallocations; + + /** Current size of the buffer. */ + private int size; + + /** + * Constructor that creates an empty builder initial capacity 32 characters. + */ + public TextStringBuilder() { + this(CAPACITY); + } + + /** + * Constructs an instance from a reference to a character array. + * + * @param initialBuffer a reference to a character array, must not be null. + * @param length The length of the subarray to be used; must be non-negative and no larger than + * {@code initialBuffer.length}. The new builder's size will be set to {@code length}. + * @throws NullPointerException If {@code initialBuffer} is null. + * @throws IllegalArgumentException if {@code length} is bad. + */ + private TextStringBuilder(final char[] initialBuffer, final int length) { + this.buffer = Objects.requireNonNull(initialBuffer, "initialBuffer"); + if (length < 0 || length > initialBuffer.length) { + throw new IllegalArgumentException("initialBuffer.length=" + initialBuffer.length + ", length=" + length); + } + this.size = length; + } + + /** + * Constructs an instance from a character sequence, allocating 32 extra characters for growth. + * + * @param seq the string to copy, null treated as blank string + * @since 1.9 + */ + public TextStringBuilder(final CharSequence seq) { + this(StringUtils.length(seq) + CAPACITY); + if (seq != null) { + append(seq); + } + } + + /** + * Constructs an instance with the specified initial capacity. + * + * @param initialCapacity the initial capacity, zero or less will be converted to 32 + */ + public TextStringBuilder(final int initialCapacity) { + buffer = new char[initialCapacity <= 0 ? CAPACITY : initialCapacity]; + } + + /** + * Constructs an instance from a string, allocating 32 extra characters for growth. + * + * @param str the string to copy, null treated as blank string + */ + public TextStringBuilder(final String str) { + this(StringUtils.length(str) + CAPACITY); + if (str != null) { + append(str); + } + } + + /** + * Appends a boolean value to the string builder. + * + * @param value the value to append + * @return this, to enable chaining + */ + public TextStringBuilder append(final boolean value) { + if (value) { + ensureCapacity(size + TRUE_STRING_SIZE); + appendTrue(size); + } else { + ensureCapacity(size + FALSE_STRING_SIZE); + appendFalse(size); + } + return this; + } + + /** + * Appends a char value to the string builder. + * + * @param ch the value to append + * @return this, to enable chaining + */ + @Override + public TextStringBuilder append(final char ch) { + final int len = length(); + ensureCapacity(len + 1); + buffer[size++] = ch; + return this; + } + + /** + * Appends a char array to the string builder. Appending null will call {@link #appendNull()}. + * + * @param chars the char array to append + * @return this, to enable chaining + */ + public TextStringBuilder append(final char[] chars) { + if (chars == null) { + return appendNull(); + } + final int strLen = chars.length; + if (strLen > 0) { + final int len = length(); + ensureCapacity(len + strLen); + System.arraycopy(chars, 0, buffer, len, strLen); + size += strLen; + } + return this; + } + + /** + * Appends a char array to the string builder. Appending null will call {@link #appendNull()}. + * + * @param chars the char array to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public TextStringBuilder append(final char[] chars, final int startIndex, final int length) { + if (chars == null) { + return appendNull(); + } + if (startIndex < 0 || startIndex > chars.length) { + throw new StringIndexOutOfBoundsException("Invalid startIndex: " + length); + } + if (length < 0 || startIndex + length > chars.length) { + throw new StringIndexOutOfBoundsException("Invalid length: " + length); + } + if (length > 0) { + final int len = length(); + ensureCapacity(len + length); + System.arraycopy(chars, startIndex, buffer, len, length); + size += length; + } + return this; + } + + /** + * Appends the contents of a char buffer to this string builder. Appending null will call {@link #appendNull()}. + * + * @param str the char buffer to append + * @return this, to enable chaining + */ + public TextStringBuilder append(final CharBuffer str) { + return append(str, 0, StringUtils.length(str)); + } + + /** + * Appends the contents of a char buffer to this string builder. Appending null will call {@link #appendNull()}. + * + * @param buf the char buffer to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public TextStringBuilder append(final CharBuffer buf, final int startIndex, final int length) { + if (buf == null) { + return appendNull(); + } + if (buf.hasArray()) { + final int totalLength = buf.remaining(); + if (startIndex < 0 || startIndex > totalLength) { + throw new StringIndexOutOfBoundsException("startIndex must be valid"); + } + if (length < 0 || startIndex + length > totalLength) { + throw new StringIndexOutOfBoundsException("length must be valid"); + } + final int len = length(); + ensureCapacity(len + length); + System.arraycopy(buf.array(), buf.arrayOffset() + buf.position() + startIndex, buffer, len, length); + size += length; + } else { + append(buf.toString(), startIndex, length); + } + return this; + } + + /** + * Appends a CharSequence to this string builder. Appending null will call {@link #appendNull()}. + * + * @param seq the CharSequence to append + * @return this, to enable chaining + */ + @Override + public TextStringBuilder append(final CharSequence seq) { + if (seq == null) { + return appendNull(); + } + if (seq instanceof TextStringBuilder) { + return append((TextStringBuilder) seq); + } + if (seq instanceof StringBuilder) { + return append((StringBuilder) seq); + } + if (seq instanceof StringBuffer) { + return append((StringBuffer) seq); + } + if (seq instanceof CharBuffer) { + return append((CharBuffer) seq); + } + return append(seq.toString()); + } + + /** + * Appends part of a CharSequence to this string builder. Appending null will call {@link #appendNull()}. + * + * @param seq the CharSequence to append + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid + * @return this, to enable chaining + */ + @Override + public TextStringBuilder append(final CharSequence seq, final int startIndex, final int endIndex) { + if (seq == null) { + return appendNull(); + } + if (endIndex <= 0) { + throw new StringIndexOutOfBoundsException("endIndex must be valid"); + } + if (startIndex >= endIndex) { + throw new StringIndexOutOfBoundsException("endIndex must be greater than startIndex"); + } + return append(seq.toString(), startIndex, endIndex - startIndex); + } + + /** + * Appends a double value to the string builder using {@code String.valueOf}. + * + * @param value the value to append + * @return this, to enable chaining + */ + public TextStringBuilder append(final double value) { + return append(String.valueOf(value)); + } + + /** + * Appends a float value to the string builder using {@code String.valueOf}. + * + * @param value the value to append + * @return this, to enable chaining + */ + public TextStringBuilder append(final float value) { + return append(String.valueOf(value)); + } + + /** + * Appends an int value to the string builder using {@code String.valueOf}. + * + * @param value the value to append + * @return this, to enable chaining + */ + public TextStringBuilder append(final int value) { + return append(String.valueOf(value)); + } + + /** + * Appends a long value to the string builder using {@code String.valueOf}. + * + * @param value the value to append + * @return this, to enable chaining + */ + public TextStringBuilder append(final long value) { + return append(String.valueOf(value)); + } + + /** + * Appends an object to this string builder. Appending null will call {@link #appendNull()}. + * + * @param obj the object to append + * @return this, to enable chaining + */ + public TextStringBuilder append(final Object obj) { + if (obj == null) { + return appendNull(); + } + if (obj instanceof CharSequence) { + return append((CharSequence) obj); + } + return append(obj.toString()); + } + + /** + * Appends a string to this string builder. Appending null will call {@link #appendNull()}. + * + * @param str the string to append + * @return this, to enable chaining + */ + public TextStringBuilder append(final String str) { + return append(str, 0, StringUtils.length(str)); + } + + /** + * Appends part of a string to this string builder. Appending null will call {@link #appendNull()}. + * + * @param str the string to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public TextStringBuilder append(final String str, final int startIndex, final int length) { + if (str == null) { + return appendNull(); + } + if (startIndex < 0 || startIndex > str.length()) { + throw new StringIndexOutOfBoundsException("startIndex must be valid"); + } + if (length < 0 || startIndex + length > str.length()) { + throw new StringIndexOutOfBoundsException("length must be valid"); + } + if (length > 0) { + final int len = length(); + ensureCapacity(len + length); + str.getChars(startIndex, startIndex + length, buffer, len); + size += length; + } + return this; + } + + /** + * Calls {@link String#format(String, Object...)} and appends the result. + * + * @param format the format string + * @param objs the objects to use in the format string + * @return {@code this} to enable chaining + * @see String#format(String, Object...) + */ + public TextStringBuilder append(final String format, final Object... objs) { + return append(String.format(format, objs)); + } + + /** + * Appends a string buffer to this string builder. Appending null will call {@link #appendNull()}. + * + * @param str the string buffer to append + * @return this, to enable chaining + */ + public TextStringBuilder append(final StringBuffer str) { + return append(str, 0, StringUtils.length(str)); + } + + /** + * Appends part of a string buffer to this string builder. Appending null will call {@link #appendNull()}. + * + * @param str the string to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public TextStringBuilder append(final StringBuffer str, final int startIndex, final int length) { + if (str == null) { + return appendNull(); + } + if (startIndex < 0 || startIndex > str.length()) { + throw new StringIndexOutOfBoundsException("startIndex must be valid"); + } + if (length < 0 || startIndex + length > str.length()) { + throw new StringIndexOutOfBoundsException("length must be valid"); + } + if (length > 0) { + final int len = length(); + ensureCapacity(len + length); + str.getChars(startIndex, startIndex + length, buffer, len); + size += length; + } + return this; + } + + /** + * Appends a StringBuilder to this string builder. Appending null will call {@link #appendNull()}. + * + * @param str the StringBuilder to append + * @return this, to enable chaining + */ + public TextStringBuilder append(final StringBuilder str) { + return append(str, 0, StringUtils.length(str)); + } + + /** + * Appends part of a StringBuilder to this string builder. Appending null will call {@link #appendNull()}. + * + * @param str the StringBuilder to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public TextStringBuilder append(final StringBuilder str, final int startIndex, final int length) { + if (str == null) { + return appendNull(); + } + if (startIndex < 0 || startIndex > str.length()) { + throw new StringIndexOutOfBoundsException("startIndex must be valid"); + } + if (length < 0 || startIndex + length > str.length()) { + throw new StringIndexOutOfBoundsException("length must be valid"); + } + if (length > 0) { + final int len = length(); + ensureCapacity(len + length); + str.getChars(startIndex, startIndex + length, buffer, len); + size += length; + } + return this; + } + + /** + * Appends another string builder to this string builder. Appending null will call {@link #appendNull()}. + * + * @param str the string builder to append + * @return this, to enable chaining + */ + public TextStringBuilder append(final TextStringBuilder str) { + return append(str, 0, StringUtils.length(str)); + } + + /** + * Appends part of a string builder to this string builder. Appending null will call {@link #appendNull()}. + * + * @param str the string to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public TextStringBuilder append(final TextStringBuilder str, final int startIndex, final int length) { + if (str == null) { + return appendNull(); + } + if (startIndex < 0 || startIndex > str.length()) { + throw new StringIndexOutOfBoundsException("startIndex must be valid"); + } + if (length < 0 || startIndex + length > str.length()) { + throw new StringIndexOutOfBoundsException("length must be valid"); + } + if (length > 0) { + final int len = length(); + ensureCapacity(len + length); + str.getChars(startIndex, startIndex + length, buffer, len); + size += length; + } + return this; + } + + /** + * Appends each item in an iterable to the builder without any separators. Appending a null iterable will have no + * effect. Each object is appended using {@link #append(Object)}. + * + * @param iterable the iterable to append + * @return this, to enable chaining + */ + public TextStringBuilder appendAll(final Iterable iterable) { + if (iterable != null) { + for (final Object o : iterable) { + append(o); + } + } + return this; + } + + /** + * Appends each item in an iterator to the builder without any separators. Appending a null iterator will have no + * effect. Each object is appended using {@link #append(Object)}. + * + * @param it the iterator to append + * @return this, to enable chaining + */ + public TextStringBuilder appendAll(final Iterator it) { + if (it != null) { + while (it.hasNext()) { + append(it.next()); + } + } + return this; + } + + /** + * Appends each item in an array to the builder without any separators. Appending a null array will have no effect. + * Each object is appended using {@link #append(Object)}. + * + * @param the element type + * @param array the array to append + * @return this, to enable chaining + */ + public TextStringBuilder appendAll(@SuppressWarnings("unchecked") final T... array) { + /* + * @SuppressWarnings used to hide warning about vararg usage. We cannot use @SafeVarargs, since this method is + * not final. Using @SuppressWarnings is fine, because it isn't inherited by subclasses, so each subclass must + * vouch for itself whether its use of 'array' is safe. + */ + if (array != null && array.length > 0) { + for (final Object element : array) { + append(element); + } + } + return this; + } + + /** Appends {@code "false"}. */ + private void appendFalse(int index) { + buffer[index++] = 'f'; + buffer[index++] = 'a'; + buffer[index++] = 'l'; + buffer[index++] = 's'; + buffer[index] = 'e'; + size += FALSE_STRING_SIZE; + } + + /** + * Appends an object to the builder padding on the left to a fixed width. The {@code String.valueOf} of the + * {@code int} value is used. If the formatted value is larger than the length, the left hand side is lost. + * + * @param value the value to append + * @param width the fixed field width, zero or negative has no effect + * @param padChar the pad character to use + * @return this, to enable chaining + */ + public TextStringBuilder appendFixedWidthPadLeft(final int value, final int width, final char padChar) { + return appendFixedWidthPadLeft(String.valueOf(value), width, padChar); + } + + /** + * Appends an object to the builder padding on the left to a fixed width. The {@code toString} of the object is + * used. If the object is larger than the length, the left hand side is lost. If the object is null, the null text + * value is used. + * + * @param obj the object to append, null uses null text + * @param width the fixed field width, zero or negative has no effect + * @param padChar the pad character to use + * @return this, to enable chaining + */ + public TextStringBuilder appendFixedWidthPadLeft(final Object obj, final int width, final char padChar) { + if (width > 0) { + ensureCapacity(size + width); + String str = obj == null ? getNullText() : obj.toString(); + if (str == null) { + str = StringUtils.EMPTY; + } + final int strLen = str.length(); + if (strLen >= width) { + str.getChars(strLen - width, strLen, buffer, size); + } else { + final int padLen = width - strLen; + for (int i = 0; i < padLen; i++) { + buffer[size + i] = padChar; + } + str.getChars(0, strLen, buffer, size + padLen); + } + size += width; + } + return this; + } + + /** + * Appends an object to the builder padding on the right to a fixed length. The {@code String.valueOf} of the + * {@code int} value is used. If the object is larger than the length, the right hand side is lost. + * + * @param value the value to append + * @param width the fixed field width, zero or negative has no effect + * @param padChar the pad character to use + * @return this, to enable chaining + */ + public TextStringBuilder appendFixedWidthPadRight(final int value, final int width, final char padChar) { + return appendFixedWidthPadRight(String.valueOf(value), width, padChar); + } + + /** + * Appends an object to the builder padding on the right to a fixed length. The {@code toString} of the object is + * used. If the object is larger than the length, the right hand side is lost. If the object is null, null text + * value is used. + * + * @param obj the object to append, null uses null text + * @param width the fixed field width, zero or negative has no effect + * @param padChar the pad character to use + * @return this, to enable chaining + */ + public TextStringBuilder appendFixedWidthPadRight(final Object obj, final int width, final char padChar) { + if (width > 0) { + ensureCapacity(size + width); + String str = obj == null ? getNullText() : obj.toString(); + if (str == null) { + str = StringUtils.EMPTY; + } + final int strLen = str.length(); + if (strLen >= width) { + str.getChars(0, width, buffer, size); + } else { + final int padLen = width - strLen; + str.getChars(0, strLen, buffer, size); + for (int i = 0; i < padLen; i++) { + buffer[size + strLen + i] = padChar; + } + } + size += width; + } + return this; + } + + /** + * Appends a boolean value followed by a new line to the string builder. + * + * @param value the value to append + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final boolean value) { + return append(value).appendNewLine(); + } + + /** + * Appends a char value followed by a new line to the string builder. + * + * @param ch the value to append + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final char ch) { + return append(ch).appendNewLine(); + } + + /** + * Appends a char array followed by a new line to the string builder. Appending null will call + * {@link #appendNull()}. + * + * @param chars the char array to append + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final char[] chars) { + return append(chars).appendNewLine(); + } + + /** + * Appends a char array followed by a new line to the string builder. Appending null will call + * {@link #appendNull()}. + * + * @param chars the char array to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final char[] chars, final int startIndex, final int length) { + return append(chars, startIndex, length).appendNewLine(); + } + + /** + * Appends a double value followed by a new line to the string builder using {@code String.valueOf}. + * + * @param value the value to append + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final double value) { + return append(value).appendNewLine(); + } + + /** + * Appends a float value followed by a new line to the string builder using {@code String.valueOf}. + * + * @param value the value to append + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final float value) { + return append(value).appendNewLine(); + } + + /** + * Appends an int value followed by a new line to the string builder using {@code String.valueOf}. + * + * @param value the value to append + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final int value) { + return append(value).appendNewLine(); + } + + /** + * Appends a long value followed by a new line to the string builder using {@code String.valueOf}. + * + * @param value the value to append + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final long value) { + return append(value).appendNewLine(); + } + + // ----------------------------------------------------------------------- + /** + * Appends an object followed by a new line to this string builder. Appending null will call {@link #appendNull()}. + * + * @param obj the object to append + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final Object obj) { + return append(obj).appendNewLine(); + } + + /** + * Appends a string followed by a new line to this string builder. Appending null will call {@link #appendNull()}. + * + * @param str the string to append + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final String str) { + return append(str).appendNewLine(); + } + + /** + * Appends part of a string followed by a new line to this string builder. Appending null will call + * {@link #appendNull()}. + * + * @param str the string to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final String str, final int startIndex, final int length) { + return append(str, startIndex, length).appendNewLine(); + } + + /** + * Calls {@link String#format(String, Object...)} and appends the result. + * + * @param format the format string + * @param objs the objects to use in the format string + * @return {@code this} to enable chaining + * @see String#format(String, Object...) + */ + public TextStringBuilder appendln(final String format, final Object... objs) { + return append(format, objs).appendNewLine(); + } + + /** + * Appends a string buffer followed by a new line to this string builder. Appending null will call + * {@link #appendNull()}. + * + * @param str the string buffer to append + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final StringBuffer str) { + return append(str).appendNewLine(); + } + + /** + * Appends part of a string buffer followed by a new line to this string builder. Appending null will call + * {@link #appendNull()}. + * + * @param str the string to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final StringBuffer str, final int startIndex, final int length) { + return append(str, startIndex, length).appendNewLine(); + } + + /** + * Appends a string builder followed by a new line to this string builder. Appending null will call + * {@link #appendNull()}. + * + * @param str the string builder to append + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final StringBuilder str) { + return append(str).appendNewLine(); + } + + /** + * Appends part of a string builder followed by a new line to this string builder. Appending null will call + * {@link #appendNull()}. + * + * @param str the string builder to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final StringBuilder str, final int startIndex, final int length) { + return append(str, startIndex, length).appendNewLine(); + } + + /** + * Appends another string builder followed by a new line to this string builder. Appending null will call + * {@link #appendNull()}. + * + * @param str the string builder to append + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final TextStringBuilder str) { + return append(str).appendNewLine(); + } + + /** + * Appends part of a string builder followed by a new line to this string builder. Appending null will call + * {@link #appendNull()}. + * + * @param str the string to append + * @param startIndex the start index, inclusive, must be valid + * @param length the length to append, must be valid + * @return this, to enable chaining + */ + public TextStringBuilder appendln(final TextStringBuilder str, final int startIndex, final int length) { + return append(str, startIndex, length).appendNewLine(); + } + + /** + * Appends the new line string to this string builder. + *

+ * The new line string can be altered using {@link #setNewLineText(String)}. This might be used to force the output + * to always use Unix line endings even when on Windows. + * + * @return this, to enable chaining + */ + public TextStringBuilder appendNewLine() { + if (newLine == null) { + append(System.lineSeparator()); + return this; + } + return append(newLine); + } + + /** + * Appends the text representing {@code null} to this string builder. + * + * @return this, to enable chaining + */ + public TextStringBuilder appendNull() { + if (nullText == null) { + return this; + } + return append(nullText); + } + + /** + * Appends the pad character to the builder the specified number of times. + * + * @param length the length to append, negative means no append + * @param padChar the character to append + * @return this, to enable chaining + */ + public TextStringBuilder appendPadding(final int length, final char padChar) { + if (length >= 0) { + ensureCapacity(size + length); + for (int i = 0; i < length; i++) { + buffer[size++] = padChar; + } + } + return this; + } + + /** + * Appends a separator if the builder is currently non-empty. The separator is appended using {@link #append(char)}. + *

+ * This method is useful for adding a separator each time around the loop except the first. + *

+ * + *
+     * for (Iterator it = list.iterator(); it.hasNext();) {
+     *     appendSeparator(',');
+     *     append(it.next());
+     * }
+     * 
+ * + *

+ * Note that for this simple example, you should use {@link #appendWithSeparators(Iterable, String)}. + *

+ * + * @param separator the separator to use + * @return this, to enable chaining + */ + public TextStringBuilder appendSeparator(final char separator) { + if (isNotEmpty()) { + append(separator); + } + return this; + } + + /** + * Append one of both separators to the builder If the builder is currently empty it will append the + * defaultIfEmpty-separator Otherwise it will append the standard-separator + * + * The separator is appended using {@link #append(char)}. + * + * @param standard the separator if builder is not empty + * @param defaultIfEmpty the separator if builder is empty + * @return this, to enable chaining + */ + public TextStringBuilder appendSeparator(final char standard, final char defaultIfEmpty) { + if (isEmpty()) { + append(defaultIfEmpty); + } else { + append(standard); + } + return this; + } + + /** + * Appends a separator to the builder if the loop index is greater than zero. The separator is appended using + * {@link #append(char)}. + *

+ * This method is useful for adding a separator each time around the loop except the first. + *

+ * + *
+     * for (int i = 0; i < list.size(); i++) {
+     *     appendSeparator(",", i);
+     *     append(list.get(i));
+     * }
+     * 
+ * + *

+ * Note that for this simple example, you should use {@link #appendWithSeparators(Iterable, String)}. + *

+ * + * @param separator the separator to use + * @param loopIndex the loop index + * @return this, to enable chaining + */ + public TextStringBuilder appendSeparator(final char separator, final int loopIndex) { + if (loopIndex > 0) { + append(separator); + } + return this; + } + + /** + * Appends a separator if the builder is currently non-empty. Appending a null separator will have no effect. The + * separator is appended using {@link #append(String)}. + *

+ * This method is useful for adding a separator each time around the loop except the first. + *

+ * + *
+     * for (Iterator it = list.iterator(); it.hasNext();) {
+     *     appendSeparator(",");
+     *     append(it.next());
+     * }
+     * 
+ * + *

+ * Note that for this simple example, you should use {@link #appendWithSeparators(Iterable, String)}. + *

+ * + * @param separator the separator to use, null means no separator + * @return this, to enable chaining + */ + public TextStringBuilder appendSeparator(final String separator) { + return appendSeparator(separator, null); + } + + /** + * Appends a separator to the builder if the loop index is greater than zero. Appending a null separator will have + * no effect. The separator is appended using {@link #append(String)}. + *

+ * This method is useful for adding a separator each time around the loop except the first. + *

+ * + *
+     * for (int i = 0; i < list.size(); i++) {
+     *     appendSeparator(",", i);
+     *     append(list.get(i));
+     * }
+     * 
+ * + *

+ * Note that for this simple example, you should use {@link #appendWithSeparators(Iterable, String)}. + *

+ * + * @param separator the separator to use, null means no separator + * @param loopIndex the loop index + * @return this, to enable chaining + */ + public TextStringBuilder appendSeparator(final String separator, final int loopIndex) { + if (separator != null && loopIndex > 0) { + append(separator); + } + return this; + } + + /** + * Appends one of both separators to the StrBuilder. If the builder is currently empty it will append the + * defaultIfEmpty-separator Otherwise it will append the standard-separator + * + * Appending a null separator will have no effect. The separator is appended using {@link #append(String)}. + *

+ * This method is for example useful for constructing queries + *

+ * + *
+     * StrBuilder whereClause = new StrBuilder();
+     * if(searchCommand.getPriority() != null) {
+     *  whereClause.appendSeparator(" and", " where");
+     *  whereClause.append(" priority = ?")
+     * }
+     * if(searchCommand.getComponent() != null) {
+     *  whereClause.appendSeparator(" and", " where");
+     *  whereClause.append(" component = ?")
+     * }
+     * selectClause.append(whereClause)
+     * 
+ * + * @param standard the separator if builder is not empty, null means no separator + * @param defaultIfEmpty the separator if builder is empty, null means no separator + * @return this, to enable chaining + */ + public TextStringBuilder appendSeparator(final String standard, final String defaultIfEmpty) { + final String str = isEmpty() ? defaultIfEmpty : standard; + if (str != null) { + append(str); + } + return this; + } + + /** + * Appends current contents of this {@code StrBuilder} to the provided {@link Appendable}. + *

+ * This method tries to avoid doing any extra copies of contents. + *

+ * + * @param appendable the appendable to append data to + * @throws IOException if an I/O error occurs + * + * @see #readFrom(Readable) + */ + public void appendTo(final Appendable appendable) throws IOException { + if (appendable instanceof Writer) { + ((Writer) appendable).write(buffer, 0, size); + } else if (appendable instanceof StringBuilder) { + ((StringBuilder) appendable).append(buffer, 0, size); + } else if (appendable instanceof StringBuffer) { + ((StringBuffer) appendable).append(buffer, 0, size); + } else if (appendable instanceof CharBuffer) { + ((CharBuffer) appendable).put(buffer, 0, size); + } else { + appendable.append(this); + } + } + + /** Appends {@code "true"}. */ + private void appendTrue(int index) { + buffer[index++] = 't'; + buffer[index++] = 'r'; + buffer[index++] = 'u'; + buffer[index] = 'e'; + size += TRUE_STRING_SIZE; + } + + /** + * Appends an iterable placing separators between each value, but not before the first or after the last. Appending + * a null iterable will have no effect. Each object is appended using {@link #append(Object)}. + * + * @param iterable the iterable to append + * @param separator the separator to use, null means no separator + * @return this, to enable chaining + */ + public TextStringBuilder appendWithSeparators(final Iterable iterable, final String separator) { + if (iterable != null) { + final String sep = Objects.toString(separator, StringUtils.EMPTY); + final Iterator it = iterable.iterator(); + while (it.hasNext()) { + append(it.next()); + if (it.hasNext()) { + append(sep); + } + } + } + return this; + } + + /** + * Appends an iterator placing separators between each value, but not before the first or after the last. Appending + * a null iterator will have no effect. Each object is appended using {@link #append(Object)}. + * + * @param it the iterator to append + * @param separator the separator to use, null means no separator + * @return this, to enable chaining + */ + public TextStringBuilder appendWithSeparators(final Iterator it, final String separator) { + if (it != null) { + final String sep = Objects.toString(separator, StringUtils.EMPTY); + while (it.hasNext()) { + append(it.next()); + if (it.hasNext()) { + append(sep); + } + } + } + return this; + } + + /** + * Appends an array placing separators between each value, but not before the first or after the last. Appending a + * null array will have no effect. Each object is appended using {@link #append(Object)}. + * + * @param array the array to append + * @param separator the separator to use, null means no separator + * @return this, to enable chaining + */ + public TextStringBuilder appendWithSeparators(final Object[] array, final String separator) { + if (array != null && array.length > 0) { + final String sep = Objects.toString(separator, StringUtils.EMPTY); + append(array[0]); + for (int i = 1; i < array.length; i++) { + append(sep); + append(array[i]); + } + } + return this; + } + + // ----------------------------------------------------------------------- + /** + * Gets the contents of this builder as a Reader. + *

+ * This method allows the contents of the builder to be read using any standard method that expects a Reader. + *

+ *

+ * To use, simply create a {@code StrBuilder}, populate it with data, call {@code asReader}, and then read away. + *

+ *

+ * The internal character array is shared between the builder and the reader. This allows you to append to the + * builder after creating the reader, and the changes will be picked up. Note however, that no synchronization + * occurs, so you must perform all operations with the builder and the reader in one thread. + *

+ *

+ * The returned reader supports marking, and ignores the flush method. + *

+ * + * @return a reader that reads from this builder + */ + public Reader asReader() { + return new TextStringBuilderReader(); + } + + /** + * Creates a tokenizer that can tokenize the contents of this builder. + *

+ * This method allows the contents of this builder to be tokenized. The tokenizer will be setup by default to + * tokenize on space, tab, newline and form feed (as per StringTokenizer). These values can be changed on the + * tokenizer class, before retrieving the tokens. + *

+ *

+ * The returned tokenizer is linked to this builder. You may intermix calls to the builder and tokenizer within + * certain limits, however there is no synchronization. Once the tokenizer has been used once, it must be + * {@link StringTokenizer#reset() reset} to pickup the latest changes in the builder. For example: + *

+ * + *
+     * StrBuilder b = new StrBuilder();
+     * b.append("a b ");
+     * StrTokenizer t = b.asTokenizer();
+     * String[] tokens1 = t.getTokenArray(); // returns a,b
+     * b.append("c d ");
+     * String[] tokens2 = t.getTokenArray(); // returns a,b (c and d ignored)
+     * t.reset(); // reset causes builder changes to be picked up
+     * String[] tokens3 = t.getTokenArray(); // returns a,b,c,d
+     * 
+ * + *

+ * In addition to simply intermixing appends and tokenization, you can also call the set methods on the tokenizer to + * alter how it tokenizes. Just remember to call reset when you want to pickup builder changes. + *

+ *

+ * Calling {@link StringTokenizer#reset(String)} or {@link StringTokenizer#reset(char[])} with a non-null value will + * break the link with the builder. + *

+ * + * @return a tokenizer that is linked to this builder + */ + public StringTokenizer asTokenizer() { + return new TextStringBuilderTokenizer(); + } + + /** + * Gets this builder as a Writer that can be written to. + *

+ * This method allows you to populate the contents of the builder using any standard method that takes a Writer. + *

+ *

+ * To use, simply create a {@code StrBuilder}, call {@code asWriter}, and populate away. The data is available at + * any time using the methods of the {@code StrBuilder}. + *

+ *

+ * The internal character array is shared between the builder and the writer. This allows you to intermix calls that + * append to the builder and write using the writer and the changes will be occur correctly. Note however, that no + * synchronization occurs, so you must perform all operations with the builder and the writer in one thread. + *

+ *

+ * The returned writer ignores the close and flush methods. + *

+ * + * @return a writer that populates this builder + */ + public Writer asWriter() { + return new TextStringBuilderWriter(); + } + + /** + * Implement the {@link Builder} interface. + * + * @return The builder as a String + * @see #toString() + */ + @Override + public String build() { + return toString(); + } + + /** + * Gets the current size of the internal character array buffer. + * + * @return The capacity + */ + public int capacity() { + return buffer.length; + } + + /** + * Gets the character at the specified index. + * + * @see #setCharAt(int, char) + * @see #deleteCharAt(int) + * @param index the index to retrieve, must be valid + * @return The character at the index + * @throws IndexOutOfBoundsException if the index is invalid + */ + @Override + public char charAt(final int index) { + validateIndex(index); + return buffer[index]; + } + + /** + * Clears the string builder (convenience Collections API style method). + *

+ * This method does not reduce the size of the internal character buffer. To do that, call {@code clear()} followed + * by {@link #minimizeCapacity()}. + *

+ *

+ * This method is the same as {@link #setLength(int)} called with zero and is provided to match the API of + * Collections. + *

+ * + * @return this, to enable chaining + */ + public TextStringBuilder clear() { + size = 0; + return this; + } + + /** + * Checks if the string builder contains the specified char. + * + * @param ch the character to find + * @return true if the builder contains the character + */ + public boolean contains(final char ch) { + final char[] thisBuf = buffer; + for (int i = 0; i < this.size; i++) { + if (thisBuf[i] == ch) { + return true; + } + } + return false; + } + + /** + * Checks if the string builder contains the specified string. + * + * @param str the string to find + * @return true if the builder contains the string + */ + public boolean contains(final String str) { + return indexOf(str, 0) >= 0; + } + + /** + * Checks if the string builder contains a string matched using the specified matcher. + *

+ * Matchers can be used to perform advanced searching behavior. For example you could write a matcher to search for + * the character 'a' followed by a number. + *

+ * + * @param matcher the matcher to use, null returns -1 + * @return true if the matcher finds a match in the builder + */ + public boolean contains(final StringMatcher matcher) { + return indexOf(matcher, 0) >= 0; + } + + /** + * Deletes the characters between the two specified indices. + * + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid except that if too large it is treated as end of string + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder delete(final int startIndex, final int endIndex) { + final int actualEndIndex = validateRange(startIndex, endIndex); + final int len = actualEndIndex - startIndex; + if (len > 0) { + deleteImpl(startIndex, actualEndIndex, len); + } + return this; + } + + /** + * Deletes the character wherever it occurs in the builder. + * + * @param ch the character to delete + * @return this, to enable chaining + */ + public TextStringBuilder deleteAll(final char ch) { + for (int i = 0; i < size; i++) { + if (buffer[i] == ch) { + final int start = i; + while (++i < size) { + if (buffer[i] != ch) { + break; + } + } + final int len = i - start; + deleteImpl(start, i, len); + i -= len; + } + } + return this; + } + + /** + * Deletes the string wherever it occurs in the builder. + * + * @param str the string to delete, null causes no action + * @return this, to enable chaining + */ + public TextStringBuilder deleteAll(final String str) { + final int len = str == null ? 0 : str.length(); + if (len > 0) { + int index = indexOf(str, 0); + while (index >= 0) { + deleteImpl(index, index + len, len); + index = indexOf(str, index); + } + } + return this; + } + + /** + * Deletes all parts of the builder that the matcher matches. + *

+ * Matchers can be used to perform advanced deletion behavior. For example you could write a matcher to delete all + * occurrences where the character 'a' is followed by a number. + * + * @param matcher the matcher to use to find the deletion, null causes no action + * @return this, to enable chaining + */ + public TextStringBuilder deleteAll(final StringMatcher matcher) { + return replace(matcher, null, 0, size, -1); + } + + /** + * Deletes the character at the specified index. + * + * @see #charAt(int) + * @see #setCharAt(int, char) + * @param index the index to delete + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder deleteCharAt(final int index) { + validateIndex(index); + deleteImpl(index, index + 1, 1); + return this; + } + + /** + * Deletes the character wherever it occurs in the builder. + * + * @param ch the character to delete + * @return this, to enable chaining + */ + public TextStringBuilder deleteFirst(final char ch) { + for (int i = 0; i < size; i++) { + if (buffer[i] == ch) { + deleteImpl(i, i + 1, 1); + break; + } + } + return this; + } + + /** + * Deletes the string wherever it occurs in the builder. + * + * @param str the string to delete, null causes no action + * @return this, to enable chaining + */ + public TextStringBuilder deleteFirst(final String str) { + final int len = str == null ? 0 : str.length(); + if (len > 0) { + final int index = indexOf(str, 0); + if (index >= 0) { + deleteImpl(index, index + len, len); + } + } + return this; + } + + /** + * Deletes the first match within the builder using the specified matcher. + *

+ * Matchers can be used to perform advanced deletion behavior. For example you could write a matcher to delete where + * the character 'a' is followed by a number. + * + * @param matcher the matcher to use to find the deletion, null causes no action + * @return this, to enable chaining + */ + public TextStringBuilder deleteFirst(final StringMatcher matcher) { + return replace(matcher, null, 0, size, 1); + } + + /** + * Internal method to delete a range without validation. + * + * @param startIndex the start index, must be valid + * @param endIndex the end index (exclusive), must be valid + * @param len the length, must be valid + * @throws IndexOutOfBoundsException if any index is invalid + */ + private void deleteImpl(final int startIndex, final int endIndex, final int len) { + System.arraycopy(buffer, endIndex, buffer, startIndex, size - endIndex); + size -= len; + } + + /** + * Gets the character at the specified index before deleting it. + * + * @see #charAt(int) + * @see #deleteCharAt(int) + * @param index the index to retrieve, must be valid + * @return The character at the index + * @throws IndexOutOfBoundsException if the index is invalid + * @since 1.9 + */ + public char drainChar(final int index) { + validateIndex(index); + final char c = buffer[index]; + deleteCharAt(index); + return c; + } + + /** + * Drains (copies, then deletes) this character sequence into the specified array. This is equivalent to copying the + * characters from this sequence into the target and then deleting those character from this sequence. + * + * @param startIndex first index to copy, inclusive. + * @param endIndex last index to copy, exclusive. + * @param target the target array, must not be {@code null}. + * @param targetIndex the index to start copying in the target. + * @return How many characters where copied (then deleted). If this builder is empty, return {@code 0}. + * @since 1.9 + */ + public int drainChars(final int startIndex, final int endIndex, final char[] target, final int targetIndex) { + final int length = endIndex - startIndex; + if (isEmpty() || length == 0 || target.length == 0) { + return 0; + } + final int actualLen = Math.min(Math.min(size, length), target.length - targetIndex); + getChars(startIndex, actualLen, target, targetIndex); + delete(startIndex, actualLen); + return actualLen; + } + + /** + * Checks whether this builder ends with the specified string. + *

+ * Note that this method handles null input quietly, unlike String. + *

+ * + * @param str the string to search for, null returns false + * @return true if the builder ends with the string + */ + public boolean endsWith(final String str) { + if (str == null) { + return false; + } + final int len = str.length(); + if (len == 0) { + return true; + } + if (len > size) { + return false; + } + int pos = size - len; + for (int i = 0; i < len; i++, pos++) { + if (buffer[pos] != str.charAt(i)) { + return false; + } + } + return true; + } + + /** + * Checks the capacity and ensures that it is at least the size specified. + * + * @param capacity the capacity to ensure + * @return this, to enable chaining + */ + public TextStringBuilder ensureCapacity(final int capacity) { + if (capacity > buffer.length) { + reallocate(capacity * 2); + } + return this; + } + + /** + * Checks the contents of this builder against another to see if they contain the same character content. + * + * @param obj the object to check, null returns false + * @return true if the builders contain the same characters in the same order + */ + @Override + public boolean equals(final Object obj) { + return obj instanceof TextStringBuilder && equals((TextStringBuilder) obj); + } + + /** + * Checks the contents of this builder against another to see if they contain the same character content. + * + * @param other the object to check, null returns false + * @return true if the builders contain the same characters in the same order + */ + public boolean equals(final TextStringBuilder other) { + return other != null && Arrays.equals(buffer, other.buffer); + } + + /** + * Checks the contents of this builder against another to see if they contain the same character content ignoring + * case. + * + * @param other the object to check, null returns false + * @return true if the builders contain the same characters in the same order + */ + public boolean equalsIgnoreCase(final TextStringBuilder other) { + if (this == other) { + return true; + } + if (this.size != other.size) { + return false; + } + final char[] thisBuf = this.buffer; + final char[] otherBuf = other.buffer; + for (int i = size - 1; i >= 0; i--) { + final char c1 = thisBuf[i]; + final char c2 = otherBuf[i]; + if (c1 != c2 && Character.toUpperCase(c1) != Character.toUpperCase(c2)) { + return false; + } + } + return true; + } + + /** Gets a direct reference to internal storage, not for public consumption. */ + char[] getBuffer() { + return buffer; + } + + /** + * Copies this character array into the specified array. + * + * @param target the target array, null will cause an array to be created + * @return The input array, unless that was null or too small + */ + public char[] getChars(char[] target) { + final int len = length(); + if (target == null || target.length < len) { + target = new char[len]; + } + System.arraycopy(buffer, 0, target, 0, len); + return target; + } + + /** + * Copies this character array into the specified array. + * + * @param startIndex first index to copy, inclusive, must be valid. + * @param endIndex last index to copy, exclusive, must be valid. + * @param target the target array, must not be null or too small. + * @param targetIndex the index to start copying in target. + * @throws NullPointerException if the array is null. + * @throws IndexOutOfBoundsException if any index is invalid. + */ + public void getChars(final int startIndex, final int endIndex, final char[] target, final int targetIndex) { + if (startIndex < 0) { + throw new StringIndexOutOfBoundsException(startIndex); + } + if (endIndex < 0 || endIndex > length()) { + throw new StringIndexOutOfBoundsException(endIndex); + } + if (startIndex > endIndex) { + throw new StringIndexOutOfBoundsException("end < start"); + } + System.arraycopy(buffer, startIndex, target, targetIndex, endIndex - startIndex); + } + + /** + * Gets the text to be appended when a new line is added. + * + * @return The new line text, null means use system default + */ + public String getNewLineText() { + return newLine; + } + + /** + * Gets the text to be appended when null is added. + * + * @return The null text, null means no append + */ + public String getNullText() { + return nullText; + } + + /** + * Gets a suitable hash code for this builder. + * + * @return a hash code + */ + @Override + public int hashCode() { + return Arrays.hashCode(buffer); + } + + /** + * Searches the string builder to find the first reference to the specified char. + * + * @param ch the character to find + * @return The first index of the character, or -1 if not found + */ + public int indexOf(final char ch) { + return indexOf(ch, 0); + } + + /** + * Searches the string builder to find the first reference to the specified char. + * + * @param ch the character to find + * @param startIndex the index to start at, invalid index rounded to edge + * @return The first index of the character, or -1 if not found + */ + public int indexOf(final char ch, int startIndex) { + startIndex = Math.max(0, startIndex); + if (startIndex >= size) { + return -1; + } + final char[] thisBuf = buffer; + for (int i = startIndex; i < size; i++) { + if (thisBuf[i] == ch) { + return i; + } + } + return -1; + } + + /** + * Searches the string builder to find the first reference to the specified string. + *

+ * Note that a null input string will return -1, whereas the JDK throws an exception. + * + * @param str the string to find, null returns -1 + * @return The first index of the string, or -1 if not found + */ + public int indexOf(final String str) { + return indexOf(str, 0); + } + + /** + * Searches the string builder to find the first reference to the specified string starting searching from the given + * index. + *

+ * Note that a null input string will return -1, whereas the JDK throws an exception. + *

+ * + * @param str the string to find, null returns -1 + * @param startIndex the index to start at, invalid index rounded to edge + * @return The first index of the string, or -1 if not found + */ + public int indexOf(final String str, int startIndex) { + startIndex = Math.max(0, startIndex); + if (str == null || startIndex >= size) { + return -1; + } + final int strLen = str.length(); + if (strLen == 1) { + return indexOf(str.charAt(0), startIndex); + } + if (strLen == 0) { + return startIndex; + } + if (strLen > size) { + return -1; + } + final char[] thisBuf = buffer; + final int len = size - strLen + 1; + outer: for (int i = startIndex; i < len; i++) { + for (int j = 0; j < strLen; j++) { + if (str.charAt(j) != thisBuf[i + j]) { + continue outer; + } + } + return i; + } + return -1; + } + + /** + * Searches the string builder using the matcher to find the first match. + *

+ * Matchers can be used to perform advanced searching behavior. For example you could write a matcher to find the + * character 'a' followed by a number. + *

+ * + * @param matcher the matcher to use, null returns -1 + * @return The first index matched, or -1 if not found + */ + public int indexOf(final StringMatcher matcher) { + return indexOf(matcher, 0); + } + + /** + * Searches the string builder using the matcher to find the first match searching from the given index. + *

+ * Matchers can be used to perform advanced searching behavior. For example you could write a matcher to find the + * character 'a' followed by a number. + *

+ * + * @param matcher the matcher to use, null returns -1 + * @param startIndex the index to start at, invalid index rounded to edge + * @return The first index matched, or -1 if not found + */ + public int indexOf(final StringMatcher matcher, int startIndex) { + startIndex = Math.max(0, startIndex); + if (matcher == null || startIndex >= size) { + return -1; + } + final int len = size; + final char[] buf = buffer; + for (int i = startIndex; i < len; i++) { + if (matcher.isMatch(buf, i, startIndex, len) > 0) { + return i; + } + } + return -1; + } + + /** + * Inserts the value into this builder. + * + * @param index the index to add at, must be valid + * @param value the value to insert + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder insert(final int index, final boolean value) { + validateIndex(index); + if (value) { + ensureCapacity(size + TRUE_STRING_SIZE); + System.arraycopy(buffer, index, buffer, index + TRUE_STRING_SIZE, size - index); + appendTrue(index); + } else { + ensureCapacity(size + FALSE_STRING_SIZE); + System.arraycopy(buffer, index, buffer, index + FALSE_STRING_SIZE, size - index); + appendFalse(index); + } + return this; + } + + /** + * Inserts the value into this builder. + * + * @param index the index to add at, must be valid + * @param value the value to insert + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder insert(final int index, final char value) { + validateIndex(index); + ensureCapacity(size + 1); + System.arraycopy(buffer, index, buffer, index + 1, size - index); + buffer[index] = value; + size++; + return this; + } + + /** + * Inserts the character array into this builder. Inserting null will use the stored null text value. + * + * @param index the index to add at, must be valid + * @param chars the char array to insert + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder insert(final int index, final char[] chars) { + validateIndex(index); + if (chars == null) { + return insert(index, nullText); + } + final int len = chars.length; + if (len > 0) { + ensureCapacity(size + len); + System.arraycopy(buffer, index, buffer, index + len, size - index); + System.arraycopy(chars, 0, buffer, index, len); + size += len; + } + return this; + } + + /** + * Inserts part of the character array into this builder. Inserting null will use the stored null text value. + * + * @param index the index to add at, must be valid + * @param chars the char array to insert + * @param offset the offset into the character array to start at, must be valid + * @param length the length of the character array part to copy, must be positive + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if any index is invalid + */ + public TextStringBuilder insert(final int index, final char[] chars, final int offset, final int length) { + validateIndex(index); + if (chars == null) { + return insert(index, nullText); + } + if (offset < 0 || offset > chars.length) { + throw new StringIndexOutOfBoundsException("Invalid offset: " + offset); + } + if (length < 0 || offset + length > chars.length) { + throw new StringIndexOutOfBoundsException("Invalid length: " + length); + } + if (length > 0) { + ensureCapacity(size + length); + System.arraycopy(buffer, index, buffer, index + length, size - index); + System.arraycopy(chars, offset, buffer, index, length); + size += length; + } + return this; + } + + /** + * Inserts the value into this builder. + * + * @param index the index to add at, must be valid + * @param value the value to insert + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder insert(final int index, final double value) { + return insert(index, String.valueOf(value)); + } + + /** + * Inserts the value into this builder. + * + * @param index the index to add at, must be valid + * @param value the value to insert + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder insert(final int index, final float value) { + return insert(index, String.valueOf(value)); + } + + /** + * Inserts the value into this builder. + * + * @param index the index to add at, must be valid + * @param value the value to insert + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder insert(final int index, final int value) { + return insert(index, String.valueOf(value)); + } + + /** + * Inserts the value into this builder. + * + * @param index the index to add at, must be valid + * @param value the value to insert + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder insert(final int index, final long value) { + return insert(index, String.valueOf(value)); + } + + /** + * Inserts the string representation of an object into this builder. Inserting null will use the stored null text + * value. + * + * @param index the index to add at, must be valid + * @param obj the object to insert + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder insert(final int index, final Object obj) { + if (obj == null) { + return insert(index, nullText); + } + return insert(index, obj.toString()); + } + + /** + * Inserts the string into this builder. Inserting null will use the stored null text value. + * + * @param index the index to add at, must be valid + * @param str the string to insert + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder insert(final int index, String str) { + validateIndex(index); + if (str == null) { + str = nullText; + } + if (str != null) { + final int strLen = str.length(); + if (strLen > 0) { + final int newSize = size + strLen; + ensureCapacity(newSize); + System.arraycopy(buffer, index, buffer, index + strLen, size - index); + size = newSize; + str.getChars(0, strLen, buffer, index); + } + } + return this; + } + + /** + * Checks is the string builder is empty (convenience Collections API style method). + *

+ * This method is the same as checking {@link #length()} and is provided to match the API of Collections. + *

+ * + * @return {@code true} if the size is {@code 0}. + */ + public boolean isEmpty() { + return size == 0; + } + + /** + * Checks is the string builder is not empty. + *

+ * This method is the same as checking {@link #length()}. + *

+ * + * @return {@code true} if the size is not {@code 0}. + * @since 1.9 + */ + public boolean isNotEmpty() { + return size != 0; + } + + /** + * Gets whether the internal buffer has been reallocated. + * + * @return Whether the internal buffer has been reallocated. + * @since 1.9 + */ + public boolean isReallocated() { + return reallocations > 0; + } + + /** + * Searches the string builder to find the last reference to the specified char. + * + * @param ch the character to find + * @return The last index of the character, or -1 if not found + */ + public int lastIndexOf(final char ch) { + return lastIndexOf(ch, size - 1); + } + + /** + * Searches the string builder to find the last reference to the specified char. + * + * @param ch the character to find + * @param startIndex the index to start at, invalid index rounded to edge + * @return The last index of the character, or -1 if not found + */ + public int lastIndexOf(final char ch, int startIndex) { + startIndex = startIndex >= size ? size - 1 : startIndex; + if (startIndex < 0) { + return -1; + } + for (int i = startIndex; i >= 0; i--) { + if (buffer[i] == ch) { + return i; + } + } + return -1; + } + + /** + * Searches the string builder to find the last reference to the specified string. + *

+ * Note that a null input string will return -1, whereas the JDK throws an exception. + * + * @param str the string to find, null returns -1 + * @return The last index of the string, or -1 if not found + */ + public int lastIndexOf(final String str) { + return lastIndexOf(str, size - 1); + } + + /** + * Searches the string builder to find the last reference to the specified string starting searching from the given + * index. + *

+ * Note that a null input string will return -1, whereas the JDK throws an exception. + *

+ * + * @param str the string to find, null returns -1 + * @param startIndex the index to start at, invalid index rounded to edge + * @return The last index of the string, or -1 if not found + */ + public int lastIndexOf(final String str, int startIndex) { + startIndex = startIndex >= size ? size - 1 : startIndex; + if (str == null || startIndex < 0) { + return -1; + } + final int strLen = str.length(); + if (strLen > 0 && strLen <= size) { + if (strLen == 1) { + return lastIndexOf(str.charAt(0), startIndex); + } + + outer: for (int i = startIndex - strLen + 1; i >= 0; i--) { + for (int j = 0; j < strLen; j++) { + if (str.charAt(j) != buffer[i + j]) { + continue outer; + } + } + return i; + } + + } else if (strLen == 0) { + return startIndex; + } + return -1; + } + + /** + * Searches the string builder using the matcher to find the last match. + *

+ * Matchers can be used to perform advanced searching behavior. For example you could write a matcher to find the + * character 'a' followed by a number. + *

+ * + * @param matcher the matcher to use, null returns -1 + * @return The last index matched, or -1 if not found + */ + public int lastIndexOf(final StringMatcher matcher) { + return lastIndexOf(matcher, size); + } + + /** + * Searches the string builder using the matcher to find the last match searching from the given index. + *

+ * Matchers can be used to perform advanced searching behavior. For example you could write a matcher to find the + * character 'a' followed by a number. + *

+ * + * @param matcher the matcher to use, null returns -1 + * @param startIndex the index to start at, invalid index rounded to edge + * @return The last index matched, or -1 if not found + */ + public int lastIndexOf(final StringMatcher matcher, int startIndex) { + startIndex = startIndex >= size ? size - 1 : startIndex; + if (matcher == null || startIndex < 0) { + return -1; + } + final char[] buf = buffer; + final int endIndex = startIndex + 1; + for (int i = startIndex; i >= 0; i--) { + if (matcher.isMatch(buf, i, 0, endIndex) > 0) { + return i; + } + } + return -1; + } + + /** + * Extracts the leftmost characters from the string builder without throwing an exception. + *

+ * This method extracts the left {@code length} characters from the builder. If this many characters are not + * available, the whole builder is returned. Thus the returned string may be shorter than the length requested. + *

+ * + * @param length the number of characters to extract, negative returns empty string + * @return The new string + */ + public String leftString(final int length) { + if (length <= 0) { + return StringUtils.EMPTY; + } else if (length >= size) { + return new String(buffer, 0, size); + } else { + return new String(buffer, 0, length); + } + } + + /** + * Gets the length of the string builder. + * + * @return The length + */ + @Override + public int length() { + return size; + } + + /** + * Extracts some characters from the middle of the string builder without throwing an exception. + *

+ * This method extracts {@code length} characters from the builder at the specified index. If the index is negative + * it is treated as zero. If the index is greater than the builder size, it is treated as the builder size. If the + * length is negative, the empty string is returned. If insufficient characters are available in the builder, as + * much as possible is returned. Thus the returned string may be shorter than the length requested. + *

+ * + * @param index the index to start at, negative means zero + * @param length the number of characters to extract, negative returns empty string + * @return The new string + */ + public String midString(int index, final int length) { + if (index < 0) { + index = 0; + } + if (length <= 0 || index >= size) { + return StringUtils.EMPTY; + } + if (size <= index + length) { + return new String(buffer, index, size - index); + } + return new String(buffer, index, length); + } + + /** + * Minimizes the capacity to the actual length of the string. + * + * @return this, to enable chaining + */ + public TextStringBuilder minimizeCapacity() { + if (buffer.length > size) { + reallocate(size); + } + return this; + } + + /** + * If possible, reads chars from the provided {@link CharBuffer} directly into underlying character buffer without + * making extra copies. + * + * @param charBuffer CharBuffer to read. + * @return The number of characters read. + * @throws IOException if an I/O error occurs. + * + * @see #appendTo(Appendable) + * @since 1.9 + */ + public int readFrom(final CharBuffer charBuffer) throws IOException { + final int oldSize = size; + final int remaining = charBuffer.remaining(); + ensureCapacity(size + remaining); + charBuffer.get(buffer, size, remaining); + size += remaining; + return size - oldSize; + } + + /** + * If possible, reads all chars from the provided {@link Readable} directly into underlying character buffer without + * making extra copies. + * + * @param readable object to read from + * @return The number of characters read + * @throws IOException if an I/O error occurs + * + * @see #appendTo(Appendable) + */ + public int readFrom(final Readable readable) throws IOException { + if (readable instanceof Reader) { + return readFrom((Reader) readable); + } else if (readable instanceof CharBuffer) { + return readFrom((CharBuffer) readable); + } else { + final int oldSize = size; + while (true) { + ensureCapacity(size + 1); + final CharBuffer buf = CharBuffer.wrap(buffer, size, buffer.length - size); + final int read = readable.read(buf); + if (read == EOS) { + break; + } + size += read; + } + return size - oldSize; + } + } + + /** + * If possible, reads all chars from the provided {@link Reader} directly into underlying character buffer without + * making extra copies. + * + * @param reader Reader to read. + * @return The number of characters read or -1 if we reached the end of stream. + * @throws IOException if an I/O error occurs. + * + * @see #appendTo(Appendable) + * @since 1.9 + */ + public int readFrom(final Reader reader) throws IOException { + final int oldSize = size; + ensureCapacity(size + 1); + int readCount = reader.read(buffer, size, buffer.length - size); + if (readCount == EOS) { + return EOS; + } + do { + size += readCount; + ensureCapacity(size + 1); + readCount = reader.read(buffer, size, buffer.length - size); + } while (readCount != EOS); + return size - oldSize; + } + + /** + * If possible, reads {@code count} chars from the provided {@link Reader} directly into underlying character buffer + * without making extra copies. + * + * @param reader Reader to read. + * @param count The maximum characters to read, a value <= 0 returns 0. + * @return The number of characters read. If less than {@code count}, then we've reached the end-of-stream, or -1 if + * we reached the end of stream. + * @throws IOException if an I/O error occurs. + * @see #appendTo(Appendable) + * @since 1.9 + */ + public int readFrom(final Reader reader, final int count) throws IOException { + if (count <= 0) { + return 0; + } + final int oldSize = size; + ensureCapacity(size + count); + int target = count; + int readCount = reader.read(buffer, size, target); + if (readCount == EOS) { + return EOS; + } + do { + target -= readCount; + size += readCount; + readCount = reader.read(buffer, size, target); + } while (target > 0 && readCount != EOS); + return size - oldSize; + } + + /** + * Reallocates the buffer to the new length. + * + * @param newLength the length of the copy to be returned + */ + private void reallocate(final int newLength) { + this.buffer = Arrays.copyOf(buffer, newLength); + this.reallocations++; + } + + /** + * Replaces a portion of the string builder with another string. The length of the inserted string does not have to + * match the removed length. + * + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid except that if too large it is treated as end of string + * @param replaceStr the string to replace with, null means delete range + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder replace(final int startIndex, int endIndex, final String replaceStr) { + endIndex = validateRange(startIndex, endIndex); + final int insertLen = replaceStr == null ? 0 : replaceStr.length(); + replaceImpl(startIndex, endIndex, endIndex - startIndex, replaceStr, insertLen); + return this; + } + + /** + * Advanced search and replaces within the builder using a matcher. + *

+ * Matchers can be used to perform advanced behavior. For example you could write a matcher to delete all + * occurrences where the character 'a' is followed by a number. + *

+ * + * @param matcher the matcher to use to find the deletion, null causes no action + * @param replaceStr the string to replace the match with, null is a delete + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid except that if too large it is treated as end of string + * @param replaceCount the number of times to replace, -1 for replace all + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if start index is invalid + */ + public TextStringBuilder replace(final StringMatcher matcher, final String replaceStr, final int startIndex, + int endIndex, final int replaceCount) { + endIndex = validateRange(startIndex, endIndex); + return replaceImpl(matcher, replaceStr, startIndex, endIndex, replaceCount); + } + + /** + * Replaces the search character with the replace character throughout the builder. + * + * @param search the search character + * @param replace the replace character + * @return this, to enable chaining + */ + public TextStringBuilder replaceAll(final char search, final char replace) { + if (search != replace) { + for (int i = 0; i < size; i++) { + if (buffer[i] == search) { + buffer[i] = replace; + } + } + } + return this; + } + + /** + * Replaces the search string with the replace string throughout the builder. + * + * @param searchStr the search string, null causes no action to occur + * @param replaceStr the replace string, null is equivalent to an empty string + * @return this, to enable chaining + */ + public TextStringBuilder replaceAll(final String searchStr, final String replaceStr) { + final int searchLen = searchStr == null ? 0 : searchStr.length(); + if (searchLen > 0) { + final int replaceLen = replaceStr == null ? 0 : replaceStr.length(); + int index = indexOf(searchStr, 0); + while (index >= 0) { + replaceImpl(index, index + searchLen, searchLen, replaceStr, replaceLen); + index = indexOf(searchStr, index + replaceLen); + } + } + return this; + } + + /** + * Replaces all matches within the builder with the replace string. + *

+ * Matchers can be used to perform advanced replace behavior. For example you could write a matcher to replace all + * occurrences where the character 'a' is followed by a number. + *

+ * + * @param matcher the matcher to use to find the deletion, null causes no action + * @param replaceStr the replace string, null is equivalent to an empty string + * @return this, to enable chaining + */ + public TextStringBuilder replaceAll(final StringMatcher matcher, final String replaceStr) { + return replace(matcher, replaceStr, 0, size, -1); + } + + /** + * Replaces the first instance of the search character with the replace character in the builder. + * + * @param search the search character + * @param replace the replace character + * @return this, to enable chaining + */ + public TextStringBuilder replaceFirst(final char search, final char replace) { + if (search != replace) { + for (int i = 0; i < size; i++) { + if (buffer[i] == search) { + buffer[i] = replace; + break; + } + } + } + return this; + } + + /** + * Replaces the first instance of the search string with the replace string. + * + * @param searchStr the search string, null causes no action to occur + * @param replaceStr the replace string, null is equivalent to an empty string + * @return this, to enable chaining + */ + public TextStringBuilder replaceFirst(final String searchStr, final String replaceStr) { + final int searchLen = searchStr == null ? 0 : searchStr.length(); + if (searchLen > 0) { + final int index = indexOf(searchStr, 0); + if (index >= 0) { + final int replaceLen = replaceStr == null ? 0 : replaceStr.length(); + replaceImpl(index, index + searchLen, searchLen, replaceStr, replaceLen); + } + } + return this; + } + + /** + * Replaces the first match within the builder with the replace string. + *

+ * Matchers can be used to perform advanced replace behavior. For example you could write a matcher to replace where + * the character 'a' is followed by a number. + *

+ * + * @param matcher the matcher to use to find the deletion, null causes no action + * @param replaceStr the replace string, null is equivalent to an empty string + * @return this, to enable chaining + */ + public TextStringBuilder replaceFirst(final StringMatcher matcher, final String replaceStr) { + return replace(matcher, replaceStr, 0, size, 1); + } + + /** + * Internal method to delete a range without validation. + * + * @param startIndex the start index, must be valid + * @param endIndex the end index (exclusive), must be valid + * @param removeLen the length to remove (endIndex - startIndex), must be valid + * @param insertStr the string to replace with, null means delete range + * @param insertLen the length of the insert string, must be valid + * @throws IndexOutOfBoundsException if any index is invalid + */ + private void replaceImpl(final int startIndex, final int endIndex, final int removeLen, final String insertStr, + final int insertLen) { + final int newSize = size - removeLen + insertLen; + if (insertLen != removeLen) { + ensureCapacity(newSize); + System.arraycopy(buffer, endIndex, buffer, startIndex + insertLen, size - endIndex); + size = newSize; + } + if (insertLen > 0) { + insertStr.getChars(0, insertLen, buffer, startIndex); + } + } + + /** + * Replaces within the builder using a matcher. + *

+ * Matchers can be used to perform advanced behavior. For example you could write a matcher to delete all + * occurrences where the character 'a' is followed by a number. + *

+ * + * @param matcher the matcher to use to find the deletion, null causes no action + * @param replaceStr the string to replace the match with, null is a delete + * @param from the start index, must be valid + * @param to the end index (exclusive), must be valid + * @param replaceCount the number of times to replace, -1 for replace all + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if any index is invalid + */ + private TextStringBuilder replaceImpl(final StringMatcher matcher, final String replaceStr, final int from, int to, + int replaceCount) { + if (matcher == null || size == 0) { + return this; + } + final int replaceLen = replaceStr == null ? 0 : replaceStr.length(); + for (int i = from; i < to && replaceCount != 0; i++) { + final char[] buf = buffer; + final int removeLen = matcher.isMatch(buf, i, from, to); + if (removeLen > 0) { + replaceImpl(i, i + removeLen, removeLen, replaceStr, replaceLen); + to = to - removeLen + replaceLen; + i = i + replaceLen - 1; + if (replaceCount > 0) { + replaceCount--; + } + } + } + return this; + } + + /** + * Reverses the string builder placing each character in the opposite index. + * + * @return this, to enable chaining + */ + public TextStringBuilder reverse() { + if (size == 0) { + return this; + } + + final int half = size / 2; + final char[] buf = buffer; + for (int leftIdx = 0, rightIdx = size - 1; leftIdx < half; leftIdx++, rightIdx--) { + final char swap = buf[leftIdx]; + buf[leftIdx] = buf[rightIdx]; + buf[rightIdx] = swap; + } + return this; + } + + /** + * Extracts the rightmost characters from the string builder without throwing an exception. + *

+ * This method extracts the right {@code length} characters from the builder. If this many characters are not + * available, the whole builder is returned. Thus the returned string may be shorter than the length requested. + *

+ * + * @param length the number of characters to extract, negative returns empty string + * @return The new string + */ + public String rightString(final int length) { + if (length <= 0) { + return StringUtils.EMPTY; + } else if (length >= size) { + return new String(buffer, 0, size); + } else { + return new String(buffer, size - length, length); + } + } + + /** + * Clears and sets this builder to the given value. + * + * @see #charAt(int) + * @see #deleteCharAt(int) + * @param str the new value. + * @return this, to enable chaining + * @since 1.9 + */ + public TextStringBuilder set(final CharSequence str) { + clear(); + append(str); + return this; + } + + /** + * Sets the character at the specified index. + * + * @see #charAt(int) + * @see #deleteCharAt(int) + * @param index the index to set + * @param ch the new character + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the index is invalid + */ + public TextStringBuilder setCharAt(final int index, final char ch) { + validateIndex(index); + buffer[index] = ch; + return this; + } + + /** + * Updates the length of the builder by either dropping the last characters or adding filler of Unicode zero. + * + * @param length the length to set to, must be zero or positive + * @return this, to enable chaining + * @throws IndexOutOfBoundsException if the length is negative + */ + public TextStringBuilder setLength(final int length) { + if (length < 0) { + throw new StringIndexOutOfBoundsException(length); + } + if (length < size) { + size = length; + } else if (length > size) { + ensureCapacity(length); + final int oldEnd = size; + final int newEnd = length; + size = length; + Arrays.fill(buffer, oldEnd, newEnd, '\0'); + } + return this; + } + + /** + * Sets the text to be appended when a new line is added. + * + * @param newLine the new line text, null means use system default + * @return this, to enable chaining + */ + public TextStringBuilder setNewLineText(final String newLine) { + this.newLine = newLine; + return this; + } + + /** + * Sets the text to be appended when null is added. + * + * @param nullText the null text, null means no append + * @return this, to enable chaining + */ + public TextStringBuilder setNullText(String nullText) { + if (nullText != null && nullText.isEmpty()) { + nullText = null; + } + this.nullText = nullText; + return this; + } + + /** + * Gets the length of the string builder. + *

+ * This method is the same as {@link #length()} and is provided to match the API of Collections. + *

+ * + * @return The length + */ + public int size() { + return size; + } + + /** + * Checks whether this builder starts with the specified string. + *

+ * Note that this method handles null input quietly, unlike String. + *

+ * + * @param str the string to search for, null returns false + * @return true if the builder starts with the string + */ + public boolean startsWith(final String str) { + if (str == null) { + return false; + } + final int len = str.length(); + if (len == 0) { + return true; + } + if (len > size) { + return false; + } + for (int i = 0; i < len; i++) { + if (buffer[i] != str.charAt(i)) { + return false; + } + } + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public CharSequence subSequence(final int startIndex, final int endIndex) { + if (startIndex < 0) { + throw new StringIndexOutOfBoundsException(startIndex); + } + if (endIndex > size) { + throw new StringIndexOutOfBoundsException(endIndex); + } + if (startIndex > endIndex) { + throw new StringIndexOutOfBoundsException(endIndex - startIndex); + } + return substring(startIndex, endIndex); + } + + /** + * Extracts a portion of this string builder as a string. + * + * @param start the start index, inclusive, must be valid + * @return The new string + * @throws IndexOutOfBoundsException if the index is invalid + */ + public String substring(final int start) { + return substring(start, size); + } + + /** + * Extracts a portion of this string builder as a string. + *

+ * Note: This method treats an endIndex greater than the length of the builder as equal to the length of the + * builder, and continues without error, unlike StringBuffer or String. + *

+ * + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid except that if too large it is treated as end of string + * @return The new string + * @throws IndexOutOfBoundsException if the index is invalid + */ + public String substring(final int startIndex, int endIndex) { + endIndex = validateRange(startIndex, endIndex); + return new String(buffer, startIndex, endIndex - startIndex); + } + + /** + * Copies the builder's character array into a new character array. + * + * @return a new array that represents the contents of the builder + */ + public char[] toCharArray() { + return size == 0 ? ArrayUtils.EMPTY_CHAR_ARRAY : Arrays.copyOf(buffer, size); + } + + /** + * Copies part of the builder's character array into a new character array. + * + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid except that if too large it is treated as end of string + * @return a new array that holds part of the contents of the builder + * @throws IndexOutOfBoundsException if startIndex is invalid, or if endIndex is invalid (but endIndex greater than + * size is valid) + */ + public char[] toCharArray(final int startIndex, int endIndex) { + endIndex = validateRange(startIndex, endIndex); + final int len = endIndex - startIndex; + return len == 0 ? ArrayUtils.EMPTY_CHAR_ARRAY : Arrays.copyOfRange(buffer, startIndex, endIndex); + } + + /** + * Gets a String version of the string builder, creating a new instance each time the method is called. + *

+ * Note that unlike StringBuffer, the string version returned is independent of the string builder. + * + * @return The builder as a String + */ + @Override + public String toString() { + return new String(buffer, 0, size); + } + + /** + * Gets a StringBuffer version of the string builder, creating a new instance each time the method is called. + * + * @return The builder as a StringBuffer + */ + public StringBuffer toStringBuffer() { + return new StringBuffer(size).append(buffer, 0, size); + } + + /** + * Gets a StringBuilder version of the string builder, creating a new instance each time the method is called. + * + * @return The builder as a StringBuilder + */ + public StringBuilder toStringBuilder() { + return new StringBuilder(size).append(buffer, 0, size); + } + + /** + * Trims the builder by removing characters less than or equal to a space from the beginning and end. + * + * @return this, to enable chaining + */ + public TextStringBuilder trim() { + if (size == 0) { + return this; + } + int len = size; + final char[] buf = buffer; + int pos = 0; + while (pos < len && buf[pos] <= SPACE) { + pos++; + } + while (pos < len && buf[len - 1] <= SPACE) { + len--; + } + if (len < size) { + delete(len, size); + } + if (pos > 0) { + delete(0, pos); + } + return this; + } + + /** + * Validates that an index is in the range {@code 0 <= index <= size}. + * + * @param index the index to test. + * @throws IndexOutOfBoundsException Thrown when the index is not the range {@code 0 <= index <= size}. + */ + protected void validateIndex(final int index) { + if (index < 0 || index >= size) { + throw new StringIndexOutOfBoundsException(index); + } + } + + /** + * Validates parameters defining a range of the builder. + * + * @param startIndex the start index, inclusive, must be valid + * @param endIndex the end index, exclusive, must be valid except that if too large it is treated as end of string + * @return A valid end index. + * @throws StringIndexOutOfBoundsException if the index is invalid + */ + protected int validateRange(final int startIndex, int endIndex) { + if (startIndex < 0) { + throw new StringIndexOutOfBoundsException(startIndex); + } + if (endIndex > size) { + endIndex = size; + } + if (startIndex > endIndex) { + throw new StringIndexOutOfBoundsException("end < start"); + } + return endIndex; + } + +} diff --git a/src/main/java/org/apache/commons/text/WordUtils.java b/src/main/java/org/apache/commons/text/WordUtils.java index 1a6a407fd5..76306968b6 100644 --- a/src/main/java/org/apache/commons/text/WordUtils.java +++ b/src/main/java/org/apache/commons/text/WordUtils.java @@ -31,8 +31,8 @@ *

* *

- * This class tries to handle null input gracefully. An exception will not be thrown for a - * null input. Each method documents its behavior in more detail. + * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a + * {@code null} input. Each method documents its behavior in more detail. *

* * @since 1.1 @@ -40,21 +40,20 @@ public class WordUtils { /** - *

WordUtils instances should NOT be constructed in + *

{@code WordUtils} instances should NOT be constructed in * standard programming. Instead, the class should be used as - * WordUtils.wrap("foo bar", 20);.

+ * {@code WordUtils.wrap("foo bar", 20);}.

* *

This constructor is public to permit tools that require a JavaBean * instance to operate.

*/ public WordUtils() { - super(); } // Wrapping //-------------------------------------------------------------------------- /** - *

Wraps a single line of text, identifying words by ' '.

+ *

Wraps a single line of text, identifying words by {@code ' '}.

* *

New lines will be separated by the system property line separator. * Very long words, such as URLs will not be wrapped.

@@ -62,7 +61,8 @@ public WordUtils() { *

Leading spaces on a new line are stripped. * Trailing spaces are not stripped.

* - * + *
+ * * * * @@ -84,14 +84,14 @@ public WordUtils() { * * * - * + * * - * + * * * - * + * * - * + * * *
Examples
inputwrapLength"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."
"Click here to jump to the commons website - http://commons.apache.org""Click here to jump to the commons website - https://commons.apache.org"20"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org""Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"
"Click here, http://commons.apache.org, to jump to the commons website""Click here, https://commons.apache.org, to jump to the commons website"20"Click here,\nhttp://commons.apache.org,\nto jump to the\ncommons website""Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"
* @@ -99,19 +99,20 @@ public WordUtils() { * * @param str the String to be word wrapped, may be null * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 - * @return a line with newlines inserted, null if null input + * @return a line with newlines inserted, {@code null} if null input */ public static String wrap(final String str, final int wrapLength) { return wrap(str, wrapLength, null, false); } /** - *

Wraps a single line of text, identifying words by ' '.

+ *

Wraps a single line of text, identifying words by {@code ' '}.

* *

Leading spaces on a new line are stripped. * Trailing spaces are not stripped.

* - * + *
+ * * * * @@ -157,14 +158,14 @@ public static String wrap(final String str, final int wrapLength) { * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns." * * - * + * * * * - * + * * * - * + * * * * @@ -175,9 +176,9 @@ public static String wrap(final String str, final int wrapLength) { * @param str the String to be word wrapped, may be null * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 * @param newLineStr the string to insert for a new line, - * null uses the system property line separator + * {@code null} uses the system property line separator * @param wrapLongWords true if long words (such as URLs) should be wrapped - * @return a line with newlines inserted, null if null input + * @return a line with newlines inserted, {@code null} if null input */ public static String wrap(final String str, final int wrapLength, @@ -187,12 +188,13 @@ public static String wrap(final String str, } /** - *

Wraps a single line of text, identifying words by wrapOn.

+ *

Wraps a single line of text, identifying words by {@code wrapOn}.

* *

Leading spaces on a new line are stripped. * Trailing spaces are not stripped.

* - *
Examples
inputwrapLength
"Click here to jump to the commons website - http://commons.apache.org""Click here to jump to the commons website - https://commons.apache.org"20"\n"false"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org""Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"
"Click here to jump to the commons website - http://commons.apache.org""Click here to jump to the commons website - https://commons.apache.org"20"\n"true
+ *
+ * * * * @@ -244,15 +246,15 @@ public static String wrap(final String str, * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns." * * - * + * * * * * - * + * * * - * + * * * * @@ -271,11 +273,11 @@ public static String wrap(final String str, * @param str the String to be word wrapped, may be null * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 * @param newLineStr the string to insert for a new line, - * null uses the system property line separator + * {@code null} uses the system property line separator * @param wrapLongWords true if long words (such as URLs) should be wrapped * @param wrapOn regex expression to be used as a breakable characters, * if blank string is provided a space character will be used - * @return a line with newlines inserted, null if null input + * @return a line with newlines inserted, {@code null} if null input */ public static String wrap(final String str, int wrapLength, @@ -298,18 +300,22 @@ public static String wrap(final String str, final int inputLineLength = str.length(); int offset = 0; final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); + int matcherSize = -1; while (offset < inputLineLength) { int spaceToWrapAt = -1; - Matcher matcher = patternToWrapOn.matcher(str.substring(offset, Math - .min(offset + wrapLength + 1, inputLineLength))); + Matcher matcher = patternToWrapOn.matcher(str.substring(offset, + Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength))); if (matcher.find()) { if (matcher.start() == 0) { - offset += matcher.end(); - continue; - } else { - spaceToWrapAt = matcher.start() + offset; + matcherSize = matcher.end() - matcher.start(); + if (matcherSize != 0) { + offset += matcher.end(); + continue; + } + offset += 1; } + spaceToWrapAt = matcher.start() + offset; } // only last line without leading spaces is left @@ -330,29 +336,45 @@ public static String wrap(final String str, } else { // really long word or URL if (wrapLongWords) { + if (matcherSize == 0) { + offset--; + } // wrap really long word one line at a time wrappedLine.append(str, offset, wrapLength + offset); wrappedLine.append(newLineStr); offset += wrapLength; + matcherSize = -1; } else { // do not wrap really long word, just extend beyond limit matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength)); if (matcher.find()) { + matcherSize = matcher.end() - matcher.start(); spaceToWrapAt = matcher.start() + offset + wrapLength; } if (spaceToWrapAt >= 0) { + if (matcherSize == 0 && offset != 0) { + offset--; + } wrappedLine.append(str, offset, spaceToWrapAt); wrappedLine.append(newLineStr); offset = spaceToWrapAt + 1; } else { + if (matcherSize == 0 && offset != 0) { + offset--; + } wrappedLine.append(str, offset, str.length()); offset = inputLineLength; + matcherSize = -1; } } } } + if (matcherSize == 0 && offset < inputLineLength) { + offset--; + } + // Whatever is left in line is short enough to just pass through wrappedLine.append(str, offset, str.length()); @@ -368,7 +390,7 @@ public static String wrap(final String str, * use {@link #capitalizeFully(String)}.

* *

Whitespace is defined by {@link Character#isWhitespace(char)}. - * A null input String returns null. + * A {@code null} input String returns {@code null}. * Capitalization uses the Unicode title case, normally equivalent to * upper case.

* @@ -379,7 +401,7 @@ public static String wrap(final String str, * * * @param str the String to capitalize, may be null - * @return capitalized String, null if null String input + * @return capitalized String, {@code null} if null String input * @see #uncapitalize(String) * @see #capitalizeFully(String) */ @@ -395,9 +417,9 @@ public static String capitalize(final String str) { * *

The delimiters represent a set of characters understood to separate words. * The first string character and the first non-delimiter character after a - * delimiter will be capitalized.

+ * delimiter will be capitalized.

* - *

A null input String returns null. + *

A {@code null} input String returns {@code null}. * Capitalization uses the Unicode title case, normally equivalent to * upper case.

* @@ -412,7 +434,7 @@ public static String capitalize(final String str) { * * @param str the String to capitalize, may be null * @param delimiters set of characters to determine capitalization, null means whitespace - * @return capitalized String, null if null String input + * @return capitalized String, {@code null} if null String input * @see #uncapitalize(String) * @see #capitalizeFully(String) */ @@ -450,10 +472,10 @@ public static String capitalize(final String str, final char... delimiters) { /** *

Converts all the whitespace separated words in a String into capitalized words, * that is each word is made up of a titlecase character and then a series of - * lowercase characters.

+ * lowercase characters.

* *

Whitespace is defined by {@link Character#isWhitespace(char)}. - * A null input String returns null. + * A {@code null} input String returns {@code null}. * Capitalization uses the Unicode title case, normally equivalent to * upper case.

* @@ -464,7 +486,7 @@ public static String capitalize(final String str, final char... delimiters) { * * * @param str the String to capitalize, may be null - * @return capitalized String, null if null String input + * @return capitalized String, {@code null} if null String input */ public static String capitalizeFully(final String str) { return capitalizeFully(str, null); @@ -473,13 +495,13 @@ public static String capitalizeFully(final String str) { /** *

Converts all the delimiter separated words in a String into capitalized words, * that is each word is made up of a titlecase character and then a series of - * lowercase characters.

+ * lowercase characters.

* *

The delimiters represent a set of characters understood to separate words. * The first string character and the first non-delimiter character after a - * delimiter will be capitalized.

+ * delimiter will be capitalized.

* - *

A null input String returns null. + *

A {@code null} input String returns {@code null}. * Capitalization uses the Unicode title case, normally equivalent to * upper case.

* @@ -493,7 +515,7 @@ public static String capitalizeFully(final String str) { * * @param str the String to capitalize, may be null * @param delimiters set of characters to determine capitalization, null means whitespace - * @return capitalized String, null if null String input + * @return capitalized String, {@code null} if null String input */ public static String capitalizeFully(String str, final char... delimiters) { if (StringUtils.isEmpty(str)) { @@ -509,7 +531,7 @@ public static String capitalizeFully(String str, final char... delimiters) { * Only the first character of each word is changed.

* *

Whitespace is defined by {@link Character#isWhitespace(char)}. - * A null input String returns null.

+ * A {@code null} input String returns {@code null}.

* *
      * WordUtils.uncapitalize(null)        = null
@@ -518,7 +540,7 @@ public static String capitalizeFully(String str, final char... delimiters) {
      * 
* * @param str the String to uncapitalize, may be null - * @return uncapitalized String, null if null String input + * @return uncapitalized String, {@code null} if null String input * @see #capitalize(String) */ public static String uncapitalize(final String str) { @@ -531,10 +553,10 @@ public static String uncapitalize(final String str) { * *

The delimiters represent a set of characters understood to separate words. * The first string character and the first non-delimiter character after a - * delimiter will be uncapitalized.

+ * delimiter will be uncapitalized.

* *

Whitespace is defined by {@link Character#isWhitespace(char)}. - * A null input String returns null.

+ * A {@code null} input String returns {@code null}.

* *
      * WordUtils.uncapitalize(null, *)            = null
@@ -547,7 +569,7 @@ public static String uncapitalize(final String str) {
      *
      * @param str  the String to uncapitalize, may be null
      * @param delimiters  set of characters to determine uncapitalization, null means whitespace
-     * @return uncapitalized String, null if null String input
+     * @return uncapitalized String, {@code null} if null String input
      * @see #capitalize(String)
      */
     public static String uncapitalize(final String str, final char... delimiters) {
@@ -592,7 +614,7 @@ public static String uncapitalize(final String str, final char... delimiters) {
      * 
      *
      * 

Whitespace is defined by {@link Character#isWhitespace(char)}. - * A null input String returns null.

+ * A {@code null} input String returns {@code null}.

* *
      * StringUtils.swapCase(null)                 = null
@@ -601,7 +623,7 @@ public static String uncapitalize(final String str, final char... delimiters) {
      * 
* * @param str the String to swap case, may be null - * @return the changed String, null if null String input + * @return The changed String, {@code null} if null String input */ public static String swapCase(final String str) { if (StringUtils.isEmpty(str)) { @@ -642,7 +664,7 @@ public static String swapCase(final String str) { * Their case is not changed.

* *

Whitespace is defined by {@link Character#isWhitespace(char)}. - * A null input String returns null.

+ * A {@code null} input String returns {@code null}.

* *
      * WordUtils.initials(null)             = null
@@ -652,7 +674,7 @@ public static String swapCase(final String str) {
      * 
* * @param str the String to get initials from, may be null - * @return String of initial letters, null if null String input + * @return String of initial letters, {@code null} if null String input * @see #initials(String,char[]) */ public static String initials(final String str) { @@ -667,7 +689,7 @@ public static String initials(final String str) { * *

If the delimiters array is null, then Whitespace is used. * Whitespace is defined by {@link Character#isWhitespace(char)}. - * A null input String returns null. + * A {@code null} input String returns {@code null}. * An empty delimiter array returns an empty String.

* *
@@ -681,7 +703,7 @@ public static String initials(final String str) {
      *
      * @param str  the String to get initials from, may be null
      * @param delimiters  set of characters to determine words, null means whitespace
-     * @return String of initial characters, null if null String input
+     * @return String of initial characters, {@code null} if null String input
      * @see #initials(String)
      */
     public static String initials(final String str, final char... delimiters) {
@@ -689,7 +711,7 @@ public static String initials(final String str, final char... delimiters) {
             return str;
         }
         if (delimiters != null && delimiters.length == 0) {
-            return "";
+            return StringUtils.EMPTY;
         }
         final Set delimiterSet = generateDelimiterSet(delimiters);
         final int strLen = str.length();
@@ -805,14 +827,14 @@ public static boolean isDelimiter(final int codePoint, final char[] delimiters)
      *
      * @param str         the string to be abbreviated. If null is passed, null is returned.
      *                    If the empty String is passed, the empty string is returned.
-     * @param lower       the lower limit.
+     * @param lower       the lower limit; negative value is treated as zero.
      * @param upper       the upper limit; specify -1 if no limit is desired.
      *                    If the upper limit is lower than the lower limit, it will be
      *                    adjusted to be the same as the lower limit.
      * @param appendToEnd String to be appended to the end of the abbreviated string.
      *                    This is appended ONLY if the string was indeed abbreviated.
      *                    The append does not count towards the lower or upper limits.
-     * @return the abbreviated String.
+     * @return The abbreviated String.
      *
      * 
      * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null));     = "Now"
@@ -836,7 +858,6 @@ public static boolean isDelimiter(final int codePoint, final char[] delimiters)
     public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) {
         Validate.isTrue(upper >= -1, "upper value cannot be less than -1");
         Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value");
-
         if (StringUtils.isEmpty(str)) {
             return str;
         }
@@ -857,7 +878,7 @@ public static String abbreviate(final String str, int lower, int upper, final St
         final int index = StringUtils.indexOf(str, " ", lower);
         if (index == -1) {
             result.append(str, 0, upper);
-            // only if abbreviation has occured do we append the appendToEnd value
+            // only if abbreviation has occurred do we append the appendToEnd value
             if (upper != str.length()) {
                 result.append(StringUtils.defaultString(appendToEnd));
             }
diff --git a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
index 8173718b6c..69ed48493d 100644
--- a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
@@ -44,7 +44,7 @@ public DeleteCommand(final T object) {
     }
 
     /**
-     * Accept a visitor. When a DeleteCommand accepts a visitor, it calls
+     * Accept a visitor. When a {@code DeleteCommand} accepts a visitor, it calls
      * its {@link CommandVisitor#visitDeleteCommand visitDeleteCommand} method.
      *
      * @param visitor  the visitor to be accepted
diff --git a/src/main/java/org/apache/commons/text/diff/EditCommand.java b/src/main/java/org/apache/commons/text/diff/EditCommand.java
index 7920206aef..d26215490a 100644
--- a/src/main/java/org/apache/commons/text/diff/EditCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/EditCommand.java
@@ -33,12 +33,12 @@
  * inserted into the first sequence, {@link DeleteCommand DeleteCommand} which
  * correspond to an object of the first sequence being removed and
  * {@link KeepCommand KeepCommand} which correspond to an object of the first
- * sequence which equals an object in the second sequence. It is
+ * sequence which {@code equals} an object in the second sequence. It is
  * guaranteed that comparison is always performed this way (i.e. the
- * equals method of the object from the first sequence is used and
+ * {@code equals} method of the object from the first sequence is used and
  * the object passed as an argument comes from the second sequence) ; this can
  * be important if subclassing is used for some elements in the first sequence
- * and the equals method is specialized.
+ * and the {@code equals} method is specialized.
  * 

* *

@@ -69,7 +69,7 @@ protected EditCommand(final T object) { /** * Returns the object associated with this command. * - * @return the object on which the command is applied + * @return The object on which the command is applied */ protected T getObject() { return object; diff --git a/src/main/java/org/apache/commons/text/diff/EditScript.java b/src/main/java/org/apache/commons/text/diff/EditScript.java index bf4b1853d2..e179818449 100644 --- a/src/main/java/org/apache/commons/text/diff/EditScript.java +++ b/src/main/java/org/apache/commons/text/diff/EditScript.java @@ -32,7 +32,7 @@ * commands} come from the second sequence and that the objects embedded in * either the {@link DeleteCommand delete commands} or {@link KeepCommand keep * commands} come from the first sequence. This can be important if subclassing - * is used for some elements in the first sequence and the equals + * is used for some elements in the first sequence and the {@code equals} * method is specialized. * * @see StringsComparator diff --git a/src/main/java/org/apache/commons/text/diff/InsertCommand.java b/src/main/java/org/apache/commons/text/diff/InsertCommand.java index f0337dc458..f8eb0e07c1 100644 --- a/src/main/java/org/apache/commons/text/diff/InsertCommand.java +++ b/src/main/java/org/apache/commons/text/diff/InsertCommand.java @@ -44,7 +44,7 @@ public InsertCommand(final T object) { } /** - * Accept a visitor. When an InsertCommand accepts a visitor, + * Accept a visitor. When an {@code InsertCommand} accepts a visitor, * it calls its {@link CommandVisitor#visitInsertCommand visitInsertCommand} * method. * diff --git a/src/main/java/org/apache/commons/text/diff/KeepCommand.java b/src/main/java/org/apache/commons/text/diff/KeepCommand.java index 34c6fe7f09..efd48249f3 100644 --- a/src/main/java/org/apache/commons/text/diff/KeepCommand.java +++ b/src/main/java/org/apache/commons/text/diff/KeepCommand.java @@ -19,7 +19,7 @@ /** * Command representing the keeping of one object present in both sequences. *

- * When one object of the first sequence equals another objects in + * When one object of the first sequence {@code equals} another objects in * the second sequence at the right place, the {@link EditScript edit script} * transforming the first sequence into the second sequence uses an instance of * this class to represent the keeping of this object. The objects embedded in @@ -46,7 +46,7 @@ public KeepCommand(final T object) { } /** - * Accept a visitor. When a KeepCommand accepts a visitor, it + * Accept a visitor. When a {@code KeepCommand} accepts a visitor, it * calls its {@link CommandVisitor#visitKeepCommand visitKeepCommand} method. * * @param visitor the visitor to be accepted diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java index 46f1b888c0..6beff3b100 100644 --- a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java +++ b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java @@ -56,10 +56,12 @@ public class ReplacementsFinder implements CommandVisitor { * List of pending insertions. */ private final List pendingInsertions; + /** * List of pending deletions. */ private final List pendingDeletions; + /** * Count of elements skipped. */ diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java index 3beb716203..a00978c0e1 100644 --- a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java +++ b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java @@ -37,8 +37,8 @@ public interface ReplacementsHandler { * equals (if they exist). This property also holds for the objects after * the two sub-sequences. *

- * The replacement is defined as replacing the from - * sub-sequence into the to sub-sequence. + * The replacement is defined as replacing the {@code from} + * sub-sequence into the {@code to} sub-sequence. * * @param skipped number of tokens skipped since the last call (i.e. number of * tokens that were in both sequences), this number should be strictly positive diff --git a/src/main/java/org/apache/commons/text/diff/StringsComparator.java b/src/main/java/org/apache/commons/text/diff/StringsComparator.java index 66ce4b0cc7..b3963e5d5b 100644 --- a/src/main/java/org/apache/commons/text/diff/StringsComparator.java +++ b/src/main/java/org/apache/commons/text/diff/StringsComparator.java @@ -19,16 +19,16 @@ /** *

* It is guaranteed that the comparisons will always be done as - * o1.equals(o2) where o1 belongs to the first - * sequence and o2 belongs to the second sequence. This can + * {@code o1.equals(o2)} where {@code o1} belongs to the first + * sequence and {@code o2} belongs to the second sequence. This can * be important if subclassing is used for some elements in the first - * sequence and the equals method is specialized. + * sequence and the {@code equals} method is specialized. *

*

* Comparison can be seen from two points of view: either as giving the smallest * modification allowing to transform the first sequence into the second one, or * as giving the longest sequence which is a subsequence of both initial - * sequences. The equals method is used to compare objects, so any + * sequences. The {@code equals} method is used to compare objects, so any * object can be put into sequences. Modifications include deleting, inserting * or keeping one object, starting from the beginning of the first sequence. *

@@ -76,10 +76,10 @@ public class StringsComparator { *

*

* It is guaranteed that the comparisons will always be done as - * o1.equals(o2) where o1 belongs to the first - * sequence and o2 belongs to the second sequence. This can be + * {@code o1.equals(o2)} where {@code o1} belongs to the first + * sequence and {@code o2} belongs to the second sequence. This can be * important if subclassing is used for some elements in the first sequence - * and the equals method is specialized. + * and the {@code equals} method is specialized. *

* * @param left first character sequence to be compared @@ -102,10 +102,10 @@ public StringsComparator(final String left, final String right) { * embedded in either the {@link DeleteCommand delete commands} or * {@link KeepCommand keep commands} come from the first sequence. This can * be important if subclassing is used for some elements in the first - * sequence and the equals method is specialized. + * sequence and the {@code equals} method is specialized. *

* - * @return the edit script resulting from the comparison of the two + * @return The edit script resulting from the comparison of the two * sequences */ public EditScript getScript() { @@ -178,7 +178,7 @@ private void buildScript(final int start1, final int end1, final int start2, fin * @param end1 the end of the first sequence to be compared * @param start2 the begin of the second sequence to be compared * @param end2 the end of the second sequence to be compared - * @return the middle snake + * @return The middle snake */ private Snake getMiddleSnake(final int start1, final int end1, final int start2, final int end2) { // Myers Algorithm @@ -260,7 +260,7 @@ private Snake getMiddleSnake(final int start1, final int end1, final int start2, * @param diag the value of the diagonal of the snake * @param end1 the value of the end of the first sequence to be compared * @param end2 the value of the end of the second sequence to be compared - * @return the snake built + * @return The snake built */ private Snake buildSnake(final int start, final int diag, final int end1, final int end2) { int end = start; diff --git a/src/main/java/org/apache/commons/text/io/StringSubstitutorReader.java b/src/main/java/org/apache/commons/text/io/StringSubstitutorReader.java new file mode 100644 index 0000000000..fbe151b0b7 --- /dev/null +++ b/src/main/java/org/apache/commons/text/io/StringSubstitutorReader.java @@ -0,0 +1,316 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.text.io; + +import java.io.FilterReader; +import java.io.IOException; +import java.io.Reader; +import java.util.Objects; + +import org.apache.commons.text.StringSubstitutor; +import org.apache.commons.text.TextStringBuilder; +import org.apache.commons.text.matcher.StringMatcher; +import org.apache.commons.text.matcher.StringMatcherFactory; + +/** + * A {@link Reader} that performs string substitution on a source {@code Reader} using a {@link StringSubstitutor}. + * + *

+ * Using this Reader avoids reading a whole file into memory as a {@code String} to perform string substitution, for + * example, when a Servlet filters a file to a client. + *

+ *

+ * This class is not thread-safe. + *

+ * + * @since 1.9 + */ +public class StringSubstitutorReader extends FilterReader { + + /** The end-of-stream character marker. */ + private static final int EOS = -1; + + /** Our internal buffer. */ + private final TextStringBuilder buffer = new TextStringBuilder(); + + /** End-of-Stream flag. */ + private boolean eos; + + /** Matches escaped variable starts. */ + private final StringMatcher prefixEscapeMatcher; + + /** Internal buffer for {@link #read()} method. */ + private final char[] read1CharBuffer = {0}; + + /** The underlying StringSubstitutor. */ + private final StringSubstitutor stringSubstitutor; + + /** We don't always want to drain the whole buffer. */ + private int toDrain; + + /** + * Constructs a new instance. + * + * @param reader the underlying reader containing the template text known to the given {@code StringSubstitutor}. + * @param stringSubstitutor How to replace as we read. + * @throws NullPointerException if {@code reader} is {@code null}. + * @throws NullPointerException if {@code stringSubstitutor} is {@code null}. + */ + public StringSubstitutorReader(final Reader reader, final StringSubstitutor stringSubstitutor) { + super(reader); + this.stringSubstitutor = Objects.requireNonNull(stringSubstitutor); + this.prefixEscapeMatcher = StringMatcherFactory.INSTANCE.charMatcher(stringSubstitutor.getEscapeChar()) + .andThen(stringSubstitutor.getVariablePrefixMatcher()); + } + + /** + * Buffers the requested number of characters if available. + */ + private int buffer(final int requestReadCount) throws IOException { + final int actualReadCount = buffer.readFrom(super.in, requestReadCount); + eos = actualReadCount == EOS; + return actualReadCount; + } + + /** + * Reads a requested number of chars from the underlying reader into the buffer. On EOS, set the state is DRAINING, + * drain, and return a drain count, otherwise, returns the actual read count. + */ + private int bufferOrDrainOnEos(final int requestReadCount, final char[] target, final int targetIndex, + final int targetLength) throws IOException { + final int actualReadCount = buffer(requestReadCount); + return drainOnEos(actualReadCount, target, targetIndex, targetLength); + } + + /** + * Drains characters from our buffer to the given {@code target}. + */ + private int drain(final char[] target, final int targetIndex, final int targetLength) { + final int actualLen = Math.min(buffer.length(), targetLength); + final int drainCount = buffer.drainChars(0, actualLen, target, targetIndex); + toDrain -= drainCount; + if (buffer.isEmpty() || toDrain == 0) { + // nothing or everything drained. + toDrain = 0; + } + return drainCount; + } + + /** + * Drains from the buffer to the target only if we are at EOS per the input count. If input count is EOS, drain and + * returns the drain count, otherwise return the input count. If draining, the state is set to DRAINING. + */ + private int drainOnEos(final int readCountOrEos, final char[] target, final int targetIndex, + final int targetLength) { + if (readCountOrEos == EOS) { + // At EOS, drain. + if (buffer.isNotEmpty()) { + toDrain = buffer.size(); + return drain(target, targetIndex, targetLength); + } + return EOS; + } + return readCountOrEos; + } + + /** + * Tests if our buffer matches the given string matcher at the given position in the buffer. + */ + private boolean isBufferMatchAt(final StringMatcher stringMatcher, final int pos) { + return stringMatcher.isMatch(buffer, pos) == stringMatcher.size(); + } + + /** + * Tests if we are draining. + */ + private boolean isDraining() { + return toDrain > 0; + } + + /** + * Reads a single character. + * + * @return a character as an {@code int} or {@code -1} for end-of-stream. + * @throws IOException If an I/O error occurs + */ + @Override + public int read() throws IOException { + int count = 0; + // ask until we get a char or EOS + do { + count = read(read1CharBuffer, 0, 1); + if (count == EOS) { + return EOS; + } + // keep on buffering + } while (count < 1); + return read1CharBuffer[0]; + } + + /** + * Reads characters into a portion of an array. + * + * @param target Target buffer. + * @param targetIndexIn Index in the target at which to start storing characters. + * @param targetLengthIn Maximum number of characters to read. + * + * @return The number of characters read, or -1 on end of stream. + * @throws IOException If an I/O error occurs + */ + @Override + public int read(final char[] target, final int targetIndexIn, final int targetLengthIn) throws IOException { + // The whole thing is inefficient because we must look for a balanced suffix to match the starting prefix + // Trying to substitute an incomplete expression can perform replacements when it should not. + // At a high level: + // - if draining, drain until empty or target length hit + // - copy to target until we find a variable start + // - buffer until a balanced suffix is read, then substitute. + if (eos && buffer.isEmpty()) { + return EOS; + } + if (targetLengthIn <= 0) { + // short-circuit: ask nothing, give nothing + return 0; + } + // drain check + int targetIndex = targetIndexIn; + int targetLength = targetLengthIn; + if (isDraining()) { + // drain as much as possible + final int drainCount = drain(target, targetIndex, Math.min(toDrain, targetLength)); + if (drainCount == targetLength) { + // drained length requested, target is full, can only do more in the next invocation + return targetLength; + } + // drained less than requested, target not full. + targetIndex += drainCount; + targetLength -= drainCount; + } + // BUFFER from the underlying reader + final int minReadLenPrefix = prefixEscapeMatcher.size(); + // READ enough to test for an [optionally escaped] variable start + int readCount = buffer(readCount(minReadLenPrefix, 0)); + if (buffer.length() < minReadLenPrefix && targetLength < minReadLenPrefix) { + // read less than minReadLenPrefix, no variable possible + final int drainCount = drain(target, targetIndex, targetLength); + targetIndex += drainCount; + final int targetSize = targetIndex - targetIndexIn; + return eos && targetSize <= 0 ? EOS : targetSize; + } + if (eos) { + // EOS + stringSubstitutor.replaceIn(buffer); + toDrain = buffer.size(); + final int drainCount = drain(target, targetIndex, targetLength); + targetIndex += drainCount; + final int targetSize = targetIndex - targetIndexIn; + return eos && targetSize <= 0 ? EOS : targetSize; + } + // PREFIX + // buffer and drain until we find a variable start, escaped or plain. + int balance = 0; + final StringMatcher prefixMatcher = stringSubstitutor.getVariablePrefixMatcher(); + int pos = 0; + while (targetLength > 0) { + if (isBufferMatchAt(prefixMatcher, 0)) { + balance = 1; + pos = prefixMatcher.size(); + break; + } else if (isBufferMatchAt(prefixEscapeMatcher, 0)) { + balance = 1; + pos = prefixEscapeMatcher.size(); + break; + } + // drain first char + final int drainCount = drain(target, targetIndex, 1); + targetIndex += drainCount; + targetLength -= drainCount; + if (buffer.size() < minReadLenPrefix) { + readCount = bufferOrDrainOnEos(minReadLenPrefix, target, targetIndex, targetLength); + if (eos || isDraining()) { + // if draining, readCount is a drain count + if (readCount != EOS) { + targetIndex += readCount; + targetLength -= readCount; + } + final int actual = targetIndex - targetIndexIn; + return actual > 0 ? actual : EOS; + } + } + } + // we found a variable start + if (targetLength <= 0) { + // no more room in target + return targetLengthIn; + } + // SUFFIX + // buffer more to find a balanced suffix + final StringMatcher suffixMatcher = stringSubstitutor.getVariableSuffixMatcher(); + final int minReadLenSuffix = Math.max(minReadLenPrefix, suffixMatcher.size()); + readCount = buffer(readCount(minReadLenSuffix, pos)); + if (eos) { + // EOS + stringSubstitutor.replaceIn(buffer); + toDrain = buffer.size(); + final int drainCount = drain(target, targetIndex, targetLength); + return targetIndex + drainCount - targetIndexIn; + } + // buffer and break out when we find the end or a balanced suffix + while (true) { + if (isBufferMatchAt(suffixMatcher, pos)) { + balance--; + pos++; + if (balance == 0) { + break; + } + } else if (isBufferMatchAt(prefixMatcher, pos)) { + balance++; + pos += prefixMatcher.size(); + } else if (isBufferMatchAt(prefixEscapeMatcher, pos)) { + balance++; + pos += prefixEscapeMatcher.size(); + } else { + pos++; + } + readCount = buffer(readCount(minReadLenSuffix, pos)); + if (readCount == EOS && pos >= buffer.size()) { + break; + } + } + // substitute + final int endPos = pos + 1; + final int leftover = Math.max(0, buffer.size() - pos); + stringSubstitutor.replaceIn(buffer, 0, Math.min(buffer.size(), endPos)); + pos = buffer.size() - leftover; + final int drainLen = Math.min(targetLength, pos); + // only drain up to what we've substituted + toDrain = pos; + drain(target, targetIndex, drainLen); + return targetIndex - targetIndexIn + drainLen; + } + + /** + * Returns how many chars to attempt reading to have room in the buffer for {@code count} chars starting at position + * {@code pos}. + */ + private int readCount(final int count, final int pos) { + final int avail = buffer.size() - pos; + return avail >= count ? 0 : count - avail; + } + +} diff --git a/src/main/java/org/apache/commons/text/io/package-info.java b/src/main/java/org/apache/commons/text/io/package-info.java new file mode 100644 index 0000000000..c3944a403e --- /dev/null +++ b/src/main/java/org/apache/commons/text/io/package-info.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + *

+ * {@link org.apache.commons.text.io.StringSubstitutorReader} is a {@link java.io.Reader} that performs string + * substitution on a source {@code Reader} using a {@link org.apache.commons.text.StringSubstitutor}. + *

+ * + *

+ * Using this Reader avoids reading a whole file into memory as a {@code String} to perform string substitution, for + * example, when a Servlet filters a file to a client. + *

+ * + * @since 1.9 + */ +package org.apache.commons.text.io; diff --git a/src/main/java/org/apache/commons/text/lookup/AbstractStringLookup.java b/src/main/java/org/apache/commons/text/lookup/AbstractStringLookup.java new file mode 100644 index 0000000000..6858ce3ba7 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/AbstractStringLookup.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import org.apache.commons.lang3.StringUtils; + +/** + * A default lookup for others to extend in this package. + * + * @since 1.3 + */ +abstract class AbstractStringLookup implements StringLookup { + + /** + * The default split char. + */ + protected static final char SPLIT_CH = ':'; + + /** + * The default split string. + */ + protected static final String SPLIT_STR = String.valueOf(SPLIT_CH); + + /** + * Creates a lookup key for a given file and key. + */ + static String toLookupKey(final String left, final String right) { + return toLookupKey(left, SPLIT_STR, right); + } + + /** + * Creates a lookup key for a given file and key. + */ + static String toLookupKey(final String left, final String separator, final String right) { + return left + separator + right; + } + + /** + * Returns the substring after the first occurrence of {@code ch} in {@code value}. + * + * @param value The source string. + * @param ch The character to search. + * @return a new string. + * @deprecated Use {@link StringUtils#substringAfter(String, int)}. + */ + @Deprecated + protected String substringAfter(final String value, final char ch) { + return StringUtils.substringAfter(value, ch); + } + + /** + * Returns the substring after the first occurrence of {@code str} in {@code value}. + * + * @param value The source string. + * @param str The string to search. + * @return a new string. + * @deprecated Use {@link StringUtils#substringAfter(String, String)}. + */ + @Deprecated + protected String substringAfter(final String value, final String str) { + return StringUtils.substringAfter(value, str); + } + + /** + * Returns the substring after the first occurrence of {@code ch} in {@code value}. + * + * @param value The source string. + * @param ch The character to search. + * @return a new string. + * @deprecated Use {@link StringUtils#substringAfterLast(String, int)}. + */ + @Deprecated + protected String substringAfterLast(final String value, final char ch) { + return StringUtils.substringAfterLast(value, ch); + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/BiFunctionStringLookup.java b/src/main/java/org/apache/commons/text/lookup/BiFunctionStringLookup.java new file mode 100644 index 0000000000..4eba940424 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/BiFunctionStringLookup.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ +package org.apache.commons.text.lookup; + +import java.util.Map; +import java.util.Objects; +import java.util.function.BiFunction; + +/** + * A function-based lookup where the request for a lookup is answered by applying that function with a key. + * + * @param A function's return type + * @param

A function's second input type + * + * @since 1.9 + */ +final class BiFunctionStringLookup implements BiStringLookup

{ + + /** + * Creates a new instance backed by a Function. + * + * @param the function's input type + * @param biFunction the function, may be null. + * @return a new instance backed by the given function. + */ + static BiFunctionStringLookup on(final BiFunction biFunction) { + return new BiFunctionStringLookup<>(biFunction); + } + + /** + * Creates a new instance backed by a Map. Used by the default lookup. + * + * @param the map's value type. + * @param map the map of keys to values, may be null. + * @return a new instance backed by the given map. + */ + static BiFunctionStringLookup on(final Map map) { + return on((key, u) -> map.get(key)); + } + + /** + * Function. + */ + private final BiFunction biFunction; + + /** + * Creates a new instance backed by a Function. + * + * @param biFunction the function, may be null. + */ + private BiFunctionStringLookup(final BiFunction biFunction) { + this.biFunction = biFunction; + } + + @Override + public String lookup(final String key) { + return lookup(key, null); + } + + /** + * Looks up a String key by applying the function. + *

+ * If the function is null, then null is returned. The function result object is converted to a string using + * toString(). + *

+ * + * @param key the key to be looked up, may be null. + * @return The function result as a string, may be null. + */ + @Override + public String lookup(final String key, final P object) { + if (biFunction == null) { + return null; + } + final R obj; + try { + obj = biFunction.apply(key, object); + } catch (final SecurityException | NullPointerException | IllegalArgumentException e) { + // Squelched. All lookup(String) will return null. + // Could be a ConcurrentHashMap and a null key request + return null; + } + return Objects.toString(obj, null); + } + + @Override + public String toString() { + return super.toString() + " [function=" + biFunction + "]"; + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/BiStringLookup.java b/src/main/java/org/apache/commons/text/lookup/BiStringLookup.java new file mode 100644 index 0000000000..ac5c879fe5 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/BiStringLookup.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import java.util.function.BiFunction; +import java.util.function.Function; + +/** + * Lookups a String key for a String value. + *

+ * This class represents the simplest form of a string to string map. It has a benefit over a map in that it can create + * the result on demand based on the key. + *

+ *

+ * For example, it would be possible to implement a lookup that used the key as a primary key, and looked up the value + * on demand from the database. + *

+ *

+ * Like {@link BiFunction} is a variant of {@link Function}, this {@code BiStringLookup} is a variant of + * {@link StringLookup}. + *

+ * + * @param The second argument type. + * + * @since 1.9 + */ +@FunctionalInterface +public interface BiStringLookup extends StringLookup { + + /** + * Looks up a String key to provide a String value. + *

+ * The internal implementation may use any mechanism to return the value. The simplest implementation is to use a + * Map. However, virtually any implementation is possible. + *

+ *

+ * For example, it would be possible to implement a lookup that used the key as a primary key, and looked up the + * value on demand from the database Or, a numeric based implementation could be created that treats the key as an + * integer, increments the value and return the result as a string - converting 1 to 2, 15 to 16 etc. + *

+ *

+ * This method always returns a String, regardless of the underlying data, by converting it as necessary. For + * example: + *

+ * + *
+     * Map<String, Object> map = new HashMap<String, Object>();
+     * map.put("number", new Integer(2));
+     * assertEquals("2", StringLookupFactory.biFunctionStringLookup(map).lookup("number", "A context object"));
+     * 
+ * + * @param key the key to look up, may be null. + * @param object ignored by default. + * @return The matching value, null if no match. + */ + default String lookup(final String key, final U object) { + return lookup(key); + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/ConstantStringLookup.java b/src/main/java/org/apache/commons/text/lookup/ConstantStringLookup.java new file mode 100644 index 0000000000..a7a5d6130e --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/ConstantStringLookup.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.commons.lang3.ClassUtils; +import org.apache.commons.text.StringSubstitutor; + +/** + *

+ * Looks up the value of a fully-qualified static final value. + *

+ *

+ * Sometimes it is necessary in a configuration file to refer to a constant defined in a class. This can be done with + * this lookup implementation. Variable names must be in the format {@code apackage.AClass.AFIELD}. The + * {@code lookup(String)} method will split the passed in string at the last dot, separating the fully qualified class + * name and the name of the constant (i.e. static final) member field. Then the class is loaded and the field's + * value is obtained using reflection. + *

+ *

+ * Once retrieved values are cached for fast access. This class is thread-safe. It can be used as a standard (i.e. + * global) lookup object and serve multiple clients concurrently. + *

+ *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+ * StringLookupFactory.INSTANCE.constantStringLookup().lookup("java.awt.event.KeyEvent.VK_ESCAPE");
+ * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+ * StringSubstitutor.createInterpolator().replace("... ${const:java.awt.event.KeyEvent.VK_ESCAPE} ..."));
+ * 
+ *

+ * The above examples convert {@code java.awt.event.KeyEvent.VK_ESCAPE} to {@code "27"}. + *

+ *

+ * This class was adapted from Apache Commons Configuration. + *

+ * + * @since 1.5 + */ +class ConstantStringLookup extends AbstractStringLookup { + + /** An internally used cache for already retrieved values. */ + private static final ConcurrentHashMap CONSTANT_CACHE = new ConcurrentHashMap<>(); + + /** Constant for the field separator. */ + private static final char FIELD_SEPARATOR = '.'; + + /** + * Defines the singleton for this class. + */ + static final ConstantStringLookup INSTANCE = new ConstantStringLookup(); + + /** + * Clears the shared cache with the so far resolved constants. + */ + static void clear() { + CONSTANT_CACHE.clear(); + } + + /** + * Loads the class with the specified name. If an application has special needs regarding the class loaders to be + * used, it can hook in here. This implementation delegates to the {@code getClass()} method of Commons Lang's + * + * ClassUtils. + * + * @param className the name of the class to be loaded + * @return The corresponding class object + * @throws ClassNotFoundException if the class cannot be loaded + */ + protected Class fetchClass(final String className) throws ClassNotFoundException { + return ClassUtils.getClass(className); + } + + /** + * Tries to resolve the specified variable. The passed in variable name is interpreted as the name of a static + * final member field of a class. If the value has already been obtained, it can be retrieved from an internal + * cache. Otherwise this method will invoke the {@code resolveField()} method and pass in the name of the class and + * the field. + * + * @param key the name of the variable to be resolved + * @return The value of this variable or null if it cannot be resolved + */ + @Override + public synchronized String lookup(final String key) { + if (key == null) { + return null; + } + String result; + result = CONSTANT_CACHE.get(key); + if (result != null) { + return result; + } + final int fieldPos = key.lastIndexOf(FIELD_SEPARATOR); + if (fieldPos < 0) { + return null; + } + try { + final Object value = resolveField(key.substring(0, fieldPos), key.substring(fieldPos + 1)); + if (value != null) { + final String string = Objects.toString(value, null); + CONSTANT_CACHE.put(key, string); + result = string; + } + } catch (final Exception ex) { + // TODO it would be nice to log + return null; + } + return result; + } + + /** + * Determines the value of the specified constant member field of a class. This implementation will call + * {@code fetchClass()} to obtain the {@code java.lang.Class} object for the target class. Then it will use + * reflection to obtain the field's value. For this to work the field must be accessable. + * + * @param className the name of the class + * @param fieldName the name of the member field of that class to read + * @return The field's value + * @throws Exception if an error occurs + */ + protected Object resolveField(final String className, final String fieldName) throws Exception { + final Class clazz = fetchClass(className); + if (clazz == null) { + return null; + } + return clazz.getField(fieldName).get(null); + } +} diff --git a/src/main/java/org/apache/commons/text/lookup/DateStringLookup.java b/src/main/java/org/apache/commons/text/lookup/DateStringLookup.java new file mode 100644 index 0000000000..b3bcb9ccdf --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/DateStringLookup.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ +package org.apache.commons.text.lookup; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; + +import org.apache.commons.lang3.time.FastDateFormat; +import org.apache.commons.text.StringSubstitutor; + +/** + * Formats the current date with the format given in the key in a format compatible with + * {@link java.text.SimpleDateFormat}. + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+ * StringLookupFactory.INSTANCE.dateStringLookup().lookup("yyyy-MM-dd");
+ * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+ * StringSubstitutor.createInterpolator().replace("... ${date:yyyy-MM-dd} ..."));
+ * 
+ *

+ * The above examples convert {@code "yyyy-MM-dd"} to today's date, for example, {@code "2019-08-04"}. + *

+ */ +final class DateStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + */ + static final DateStringLookup INSTANCE = new DateStringLookup(); + + /** + * No need to build instances for now. + */ + private DateStringLookup() { + // empty + } + + /** + * Formats the given {@code date} long with the given {@code format}. + * + * @param date the date to format + * @param format the format string for {@link SimpleDateFormat}. + * @return The formatted date + */ + private String formatDate(final long date, final String format) { + FastDateFormat dateFormat = null; + if (format != null) { + try { + dateFormat = FastDateFormat.getInstance(format); + } catch (final Exception ex) { + throw IllegalArgumentExceptions.format(ex, "Invalid date format: [%s]", format); + } + } + if (dateFormat == null) { + dateFormat = FastDateFormat.getInstance(); + } + return dateFormat.format(new Date(date)); + } + + /** + * Formats the current date with the format given in the key in a format compatible with + * {@link java.text.SimpleDateFormat}. + * + * @param key the format to use. If null, the default {@link DateFormat} will be used. + * @return The value of the environment variable. + */ + @Override + public String lookup(final String key) { + return formatDate(System.currentTimeMillis(), key); + } +} diff --git a/src/main/java/org/apache/commons/text/lookup/DefaultStringLookup.java b/src/main/java/org/apache/commons/text/lookup/DefaultStringLookup.java new file mode 100644 index 0000000000..628ec49387 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/DefaultStringLookup.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.lookup; + +/** + * An enumeration defining {@link StringLookup} objects available through {@link StringLookupFactory}. + *

+ * This enum was adapted and expanded from Apache Commons Configuration 2.4. + *

+ * + * @see StringLookupFactory + * @see StringLookup + * @since 1.7 + */ +public enum DefaultStringLookup { + + /** + * The lookup for Base64 decoding using the key {@value StringLookupFactory#KEY_BASE64_DECODER}. + */ + BASE64_DECODER(StringLookupFactory.KEY_BASE64_DECODER, StringLookupFactory.INSTANCE.base64DecoderStringLookup()), + + /** + * The lookup for Base64 decoding using the key {@value StringLookupFactory#KEY_BASE64_ENCODER}. + */ + BASE64_ENCODER(StringLookupFactory.KEY_BASE64_ENCODER, StringLookupFactory.INSTANCE.base64EncoderStringLookup()), + + /** + * The lookup for constants using the key {@value StringLookupFactory#KEY_CONST}. + */ + CONST(StringLookupFactory.KEY_CONST, StringLookupFactory.INSTANCE.constantStringLookup()), + + /** + * The lookup for dates using the key {@value StringLookupFactory#KEY_DATE}. + */ + DATE(StringLookupFactory.KEY_DATE, StringLookupFactory.INSTANCE.dateStringLookup()), + + /** + * The lookup for DNS using the key {@value StringLookupFactory#KEY_DNS}. + * + * @since 1.8 + */ + DNS(StringLookupFactory.KEY_DNS, StringLookupFactory.INSTANCE.dnsStringLookup()), + + /** + * The lookup for environment properties using the key {@value StringLookupFactory#KEY_ENV}. + */ + ENVIRONMENT(StringLookupFactory.KEY_ENV, StringLookupFactory.INSTANCE.environmentVariableStringLookup()), + + /** + * The lookup for files using the key {@value StringLookupFactory#KEY_FILE}. + */ + FILE(StringLookupFactory.KEY_FILE, StringLookupFactory.INSTANCE.fileStringLookup()), + + /** + * The lookup for Java platform information using the key {@value StringLookupFactory#KEY_JAVA}. + */ + JAVA(StringLookupFactory.KEY_JAVA, StringLookupFactory.INSTANCE.javaPlatformStringLookup()), + + /** + * The lookup for localhost information using the key {@value StringLookupFactory#KEY_LOCALHOST}. + */ + LOCAL_HOST(StringLookupFactory.KEY_LOCALHOST, StringLookupFactory.INSTANCE.localHostStringLookup()), + + /** + * The lookup for properties using the key {@value StringLookupFactory#KEY_PROPERTIES}. + */ + PROPERTIES(StringLookupFactory.KEY_PROPERTIES, StringLookupFactory.INSTANCE.propertiesStringLookup()), + + /** + * The lookup for resource bundles using the key {@value StringLookupFactory#KEY_RESOURCE_BUNDLE}. + */ + RESOURCE_BUNDLE(StringLookupFactory.KEY_RESOURCE_BUNDLE, StringLookupFactory.INSTANCE.resourceBundleStringLookup()), + + /** + * The lookup for scripts using the key {@value StringLookupFactory#KEY_SCRIPT}. + */ + SCRIPT(StringLookupFactory.KEY_SCRIPT, StringLookupFactory.INSTANCE.scriptStringLookup()), + + /** + * The lookup for system properties using the key {@value StringLookupFactory#KEY_SYS}. + */ + SYSTEM_PROPERTIES(StringLookupFactory.KEY_SYS, StringLookupFactory.INSTANCE.systemPropertyStringLookup()), + + /** + * The lookup for URLs using the key {@value StringLookupFactory#KEY_URL}. + */ + URL(StringLookupFactory.KEY_URL, StringLookupFactory.INSTANCE.urlStringLookup()), + + /** + * The lookup for URL decoding using the key {@value StringLookupFactory#KEY_URL_DECODER}. + */ + URL_DECODER(StringLookupFactory.KEY_URL_DECODER, StringLookupFactory.INSTANCE.urlDecoderStringLookup()), + + /** + * The lookup for URL decoding using the key {@value StringLookupFactory#KEY_URL_ENCODER}. + */ + URL_ENCODER(StringLookupFactory.KEY_URL_ENCODER, StringLookupFactory.INSTANCE.urlEncoderStringLookup()), + + /** + * The lookup for URL decoding using the key {@value StringLookupFactory#KEY_XML}. + */ + XML(StringLookupFactory.KEY_XML, StringLookupFactory.INSTANCE.xmlStringLookup()); + + /** The prefix under which the associated lookup object is registered. */ + private final String key; + + /** The associated lookup instance. */ + private final StringLookup lookup; + + /** + * Creates a new instance of {@link DefaultStringLookup} and sets the key and the associated lookup instance. + * + * @param prefix the prefix + * @param lookup the {@link StringLookup} instance + */ + DefaultStringLookup(final String prefix, final StringLookup lookup) { + this.key = prefix; + this.lookup = lookup; + } + + /** + * Returns the standard prefix for the lookup object of this kind. + * + * @return the prefix + */ + public String getKey() { + return key; + } + + /** + * Returns the standard {@link StringLookup} instance of this kind. + * + * @return the associated {@link StringLookup} object + */ + public StringLookup getStringLookup() { + return lookup; + } +} diff --git a/src/main/java/org/apache/commons/text/lookup/DnsStringLookup.java b/src/main/java/org/apache/commons/text/lookup/DnsStringLookup.java new file mode 100644 index 0000000000..d9e6f9dfcc --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/DnsStringLookup.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ +package org.apache.commons.text.lookup; + +import java.net.InetAddress; +import java.net.UnknownHostException; + +import org.apache.commons.text.StringSubstitutor; + +/** + * Looks up keys related to DNS entries like host name, canonical host name, host address. + *

+ * The lookup keys are: + *

+ *
    + *
  • name|address: for the host name, for example {@code "name|93.184.216.34"} -> + * {@code "example.com"}.
  • + *
  • canonical-name|address: for the canonical host name, for example {@code "name|93.184.216.34"} -> + * {@code "example.com"}.
  • + *
  • address|hostname: for the host address, for example {@code "address|example.com"} -> + * {@code "93.184.216.34"}.
  • + *
  • address: same as {@code address|hostname}.
  • + *
+ * + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+ * StringLookupFactory.INSTANCE.dnsStringLookup().lookup("address|apache.org");
+ * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+ * StringSubstitutor.createInterpolator().replace("... ${dns:address|apache.org} ..."));
+ * 
+ *

+ * The above examples convert {@code "address|apache.org"} to {@code "95.216.24.32} (or {@code "40.79.78.1"}). + *

+ * + * @since 1.8 + */ +final class DnsStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + */ + static final DnsStringLookup INSTANCE = new DnsStringLookup(); + + /** + * No need to build instances for now. + */ + private DnsStringLookup() { + // empty + } + + /** + * Looks up the DNS value of the key. + * + * @param key the key to be looked up, may be null + * @return The DNS value. + */ + @Override + public String lookup(final String key) { + if (key == null) { + return null; + } + final String[] keys = key.trim().split("\\|"); + final int keyLen = keys.length; + final String subKey = keys[0].trim(); + final String subValue = keyLen < 2 ? key : keys[1].trim(); + try { + final InetAddress inetAddress = InetAddress.getByName(subValue); + switch (subKey) { + case InetAddressKeys.KEY_NAME: + return inetAddress.getHostName(); + case InetAddressKeys.KEY_CANONICAL_NAME: + return inetAddress.getCanonicalHostName(); + case InetAddressKeys.KEY_ADDRESS: + return inetAddress.getHostAddress(); + default: + return inetAddress.getHostAddress(); + } + } catch (final UnknownHostException e) { + return null; + } + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/FileStringLookup.java b/src/main/java/org/apache/commons/text/lookup/FileStringLookup.java new file mode 100644 index 0000000000..de150279cb --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/FileStringLookup.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import java.nio.file.Files; +import java.nio.file.Paths; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.StringSubstitutor; + +/** + * Looks up keys from an XML document. + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+ * StringLookupFactory.INSTANCE.fileStringLookup().lookup(UTF-8:com/domain/document.properties");
+ * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+ * StringSubstitutor.createInterpolator().replace("... ${file:UTF-8:com/domain/document.properties} ..."));
+ * 
+ *

+ * The above examples convert {@code "UTF-8:SomePath"} to the contents of the file. + *

+ * + * @since 1.5 + */ +final class FileStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + */ + static final AbstractStringLookup INSTANCE = new FileStringLookup(); + + /** + * No need to build instances for now. + */ + private FileStringLookup() { + // empty + } + + /** + * Looks up the value for the key in the format "DocumentPath:XPath". + *

+ * For example: "com/domain/document.xml:/path/to/node". + *

+ * + * @param key the key to be looked up, may be null + * @return The value associated with the key. + */ + @Override + public String lookup(final String key) { + if (key == null) { + return null; + } + final String[] keys = key.split(String.valueOf(SPLIT_CH)); + final int keyLen = keys.length; + if (keyLen < 2) { + throw IllegalArgumentExceptions + .format("Bad file key format [%s], expected format is CharsetName:DocumentPath.", key); + } + final String charsetName = keys[0]; + final String fileName = StringUtils.substringAfter(key, SPLIT_CH); + try { + return new String(Files.readAllBytes(Paths.get(fileName)), charsetName); + } catch (final Exception e) { + throw IllegalArgumentExceptions.format(e, "Error looking up file [%s] with charset [%s].", fileName, + charsetName); + } + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/FunctionStringLookup.java b/src/main/java/org/apache/commons/text/lookup/FunctionStringLookup.java new file mode 100644 index 0000000000..50dd18d292 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/FunctionStringLookup.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ +package org.apache.commons.text.lookup; + +import java.util.Collections; +import java.util.Map; +import java.util.Objects; +import java.util.function.Function; + +/** + * A function-based lookup where the request for a lookup is answered by applying that function with a key. + * + * @param A function's input type + * + * @since 1.9 + */ +final class FunctionStringLookup extends AbstractStringLookup { + + /** + * Creates a new instance backed by a Function. + * + * @param the function's input type + * @param function the function, may be null. + * @return a new instance backed by the given function. + */ + static FunctionStringLookup on(final Function function) { + return new FunctionStringLookup<>(function); + } + + /** + * Creates a new instance backed by a Map. Used by the default lookup. + * + * @param the map's value type. + * @param map the map of keys to values, may be null. + * @return a new instance backed by the given map. + */ + static FunctionStringLookup on(final Map map) { + return on((map == null ? Collections.emptyMap() : map)::get); + } + + /** + * Function. + */ + private final Function function; + + /** + * Creates a new instance backed by a Function. + * + * @param function the function, may be null. + */ + private FunctionStringLookup(final Function function) { + this.function = function; + } + + /** + * Looks up a String key by applying the function. + *

+ * If the function is null, then null is returned. The function result object is converted to a string using + * toString(). + *

+ * + * @param key the key to be looked up, may be null. + * @return The function result as a string, may be null. + */ + @Override + public String lookup(final String key) { + if (function == null) { + return null; + } + final V obj; + try { + obj = function.apply(key); + } catch (final SecurityException | NullPointerException | IllegalArgumentException e) { + // Squelched. All lookup(String) will return null. + // Could be a ConcurrentHashMap and a null key request + return null; + } + return Objects.toString(obj, null); + } + + @Override + public String toString() { + return super.toString() + " [function=" + function + "]"; + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/IllegalArgumentExceptions.java b/src/main/java/org/apache/commons/text/lookup/IllegalArgumentExceptions.java new file mode 100644 index 0000000000..50b3926ca4 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/IllegalArgumentExceptions.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +/** + * Shorthands creating {@link IllegalArgumentException} instances using formatted strings. + * + * @since 1.3 + */ +final class IllegalArgumentExceptions { + + /** + * Creates an {@link IllegalArgumentException} with a message + * formatted with {@link String#format(String,Object...)}. + * + * @param format See {@link String#format(String,Object...)} + * @param args See {@link String#format(String,Object...)} + * @return an {@link IllegalArgumentException} with a message formatted with {@link String#format(String,Object...)} + */ + static IllegalArgumentException format(final String format, final Object... args) { + return new IllegalArgumentException(String.format(format, args)); + } + + /** + * Creates an {@link IllegalArgumentException} with a message + * formatted with {@link String#format(String,Object...)}. + * + * @param t the throwable cause + * @param format See {@link String#format(String,Object...)} + * @param args See {@link String#format(String,Object...)} + * @return an {@link IllegalArgumentException} with a message formatted with {@link String#format(String,Object...)} + */ + static IllegalArgumentException format(final Throwable t, final String format, final Object... args) { + return new IllegalArgumentException(String.format(format, args), t); + } + + /** + * No need to build instances. + */ + private IllegalArgumentExceptions() { + // empty + } +} diff --git a/src/main/java/org/apache/commons/text/lookup/InetAddressKeys.java b/src/main/java/org/apache/commons/text/lookup/InetAddressKeys.java new file mode 100644 index 0000000000..1b74de6099 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/InetAddressKeys.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import java.net.InetAddress; + +/** + * Constants for referring to {@link InetAddress} APIs. + * + * @since 1.8 + */ +final class InetAddressKeys { + + /** + * Constants for referring to {@link InetAddress#getAddress()}. + */ + static final String KEY_ADDRESS = "address"; + + /** + * Constants for referring to {@link InetAddress#getCanonicalHostName()}. + */ + static final String KEY_CANONICAL_NAME = "canonical-name"; + + /** + * Constants for referring to {@link InetAddress#getHostName()}. + */ + static final String KEY_NAME = "name"; + + private InetAddressKeys() { + // noop + } +} diff --git a/src/main/java/org/apache/commons/text/lookup/InterpolatorStringLookup.java b/src/main/java/org/apache/commons/text/lookup/InterpolatorStringLookup.java new file mode 100644 index 0000000000..3e02f6048e --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/InterpolatorStringLookup.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ +package org.apache.commons.text.lookup; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Map.Entry; + +/** + * Proxies other {@link StringLookup}s using a keys within ${} markers using the format "${StringLookup:Key}". + *

+ * Uses the {@link StringLookupFactory default lookups}. + *

+ */ +class InterpolatorStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + * + * @since 1.6 + */ + static final AbstractStringLookup INSTANCE = new InterpolatorStringLookup(); + + /** Constant for the prefix separator. */ + private static final char PREFIX_SEPARATOR = ':'; + + static String toKey(final String key) { + return key.toLowerCase(Locale.ROOT); + } + + /** The default string lookup. */ + private final StringLookup defaultStringLookup; + + /** The map of String lookups keyed by prefix. */ + private final Map stringLookupMap; + + /** + * Creates an instance using only lookups that work without initial properties and are stateless. + *

+ * Uses the {@link StringLookupFactory default lookups}. + *

+ */ + InterpolatorStringLookup() { + this((Map) null); + } + + /** + * Creates a fully customized instance. + * + * @param stringLookupMap the map of string lookups. + * @param defaultStringLookup the default string lookup. + * @param addDefaultLookups whether the default lookups should be used. + */ + InterpolatorStringLookup(final Map stringLookupMap, final StringLookup defaultStringLookup, + final boolean addDefaultLookups) { + this.defaultStringLookup = defaultStringLookup; + this.stringLookupMap = new HashMap<>(stringLookupMap.size()); + for (final Entry entry : stringLookupMap.entrySet()) { + this.stringLookupMap.put(toKey(entry.getKey()), entry.getValue()); + } + if (addDefaultLookups) { + StringLookupFactory.INSTANCE.addDefaultStringLookups(this.stringLookupMap); + } + } + + /** + * Creates an instance using only lookups that work without initial properties and are stateless. + *

+ * Uses the {@link StringLookupFactory default lookups}. + *

+ * + * @param the map's value type. + * @param defaultMap the default map for string lookups. + */ + InterpolatorStringLookup(final Map defaultMap) { + this(StringLookupFactory.INSTANCE.mapStringLookup(defaultMap == null ? new HashMap() : defaultMap)); + } + + /** + * Creates an instance with the given lookup. + * + * @param defaultStringLookup the default lookup. + */ + InterpolatorStringLookup(final StringLookup defaultStringLookup) { + this(new HashMap<>(), defaultStringLookup, true); + } + + /** + * Gets the lookup map. + * + * @return The lookup map. + */ + public Map getStringLookupMap() { + return stringLookupMap; + } + + /** + * Resolves the specified variable. This implementation will try to extract a variable prefix from the given + * variable name (the first colon (':') is used as prefix separator). It then passes the name of the variable with + * the prefix stripped to the lookup object registered for this prefix. If no prefix can be found or if the + * associated lookup object cannot resolve this variable, the default lookup object will be used. + * + * @param var the name of the variable whose value is to be looked up + * @return The value of this variable or null if it cannot be resolved + */ + @Override + public String lookup(String var) { + if (var == null) { + return null; + } + + final int prefixPos = var.indexOf(PREFIX_SEPARATOR); + if (prefixPos >= 0) { + final String prefix = toKey(var.substring(0, prefixPos)); + final String name = var.substring(prefixPos + 1); + final StringLookup lookup = stringLookupMap.get(prefix); + String value = null; + if (lookup != null) { + value = lookup.lookup(name); + } + + if (value != null) { + return value; + } + var = var.substring(prefixPos + 1); + } + if (defaultStringLookup != null) { + return defaultStringLookup.lookup(var); + } + return null; + } + + @Override + public String toString() { + return super.toString() + " [stringLookupMap=" + stringLookupMap + ", defaultStringLookup=" + + defaultStringLookup + "]"; + } +} diff --git a/src/main/java/org/apache/commons/text/lookup/JavaPlatformStringLookup.java b/src/main/java/org/apache/commons/text/lookup/JavaPlatformStringLookup.java new file mode 100644 index 0000000000..5339c83f16 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/JavaPlatformStringLookup.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ +package org.apache.commons.text.lookup; + +import java.util.Locale; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.StringSubstitutor; + +/** + * Looks up keys related to Java: Java version, JRE version, VM version, and so on. + *

+ * The lookup keys with examples are: + *

+ *
    + *
  • version: "Java version 1.8.0_181"
  • + *
  • runtime: "Java(TM) SE Runtime Environment (build 1.8.0_181-b13) from Oracle Corporation"
  • + *
  • vm: "Java HotSpot(TM) 64-Bit Server VM (build 25.181-b13, mixed mode)"
  • + *
  • os: "Windows 10 10.0, architecture: amd64-64"
  • + *
  • hardware: "processors: 4, architecture: amd64-64, instruction sets: amd64"
  • + *
  • locale: "default locale: en_US, platform encoding: iso-8859-1"
  • + *
+ * + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+ * StringLookupFactory.INSTANCE.javaPlatformStringLookup().lookup("version");
+ * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+ * StringSubstitutor.createInterpolator().replace("... ${java:version} ..."));
+ * 
+ *

+ * The above examples convert {@code "version"} to the current VM version, for example, + * {@code "Java version 1.8.0_181"}. + *

+ * + * @since 1.3 + */ +final class JavaPlatformStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + */ + static final JavaPlatformStringLookup INSTANCE = new JavaPlatformStringLookup(); + /** {@code hardware} key for driving {@link JavaPlatformStringLookup#lookup(String)}. */ + private static final String KEY_HARDWARE = "hardware"; + /** {@code locale} key for driving {@link JavaPlatformStringLookup#lookup(String)}. */ + private static final String KEY_LOCALE = "locale"; + /** {@code os} key for driving {@link JavaPlatformStringLookup#lookup(String)}. */ + private static final String KEY_OS = "os"; + /** {@code runtime} key for driving {@link JavaPlatformStringLookup#lookup(String)}. */ + private static final String KEY_RUNTIME = "runtime"; + /** {@code version} key for driving {@link JavaPlatformStringLookup#lookup(String)}. */ + private static final String KEY_VERSION = "version"; + + /** {@code vm} key for driving {@link JavaPlatformStringLookup#lookup(String)}. */ + private static final String KEY_VM = "vm"; + + /** + * The main method for running the JavaPlatformStringLookup. + * + * @param args the standard java main method parameter which is unused for our running of this class. + */ + public static void main(final String[] args) { + System.out.println(JavaPlatformStringLookup.class); + System.out.printf("%s = %s%n", KEY_VERSION, JavaPlatformStringLookup.INSTANCE.lookup(KEY_VERSION)); + System.out.printf("%s = %s%n", KEY_RUNTIME, JavaPlatformStringLookup.INSTANCE.lookup(KEY_RUNTIME)); + System.out.printf("%s = %s%n", KEY_VM, JavaPlatformStringLookup.INSTANCE.lookup(KEY_VM)); + System.out.printf("%s = %s%n", KEY_OS, JavaPlatformStringLookup.INSTANCE.lookup(KEY_OS)); + System.out.printf("%s = %s%n", KEY_HARDWARE, JavaPlatformStringLookup.INSTANCE.lookup(KEY_HARDWARE)); + System.out.printf("%s = %s%n", KEY_LOCALE, JavaPlatformStringLookup.INSTANCE.lookup(KEY_LOCALE)); + } + + /** + * No need to build instances for now. + */ + private JavaPlatformStringLookup() { + // empty + } + + /** + * Accessible through the Lookup key {@code hardware}. + * + * @return hardware processor information. + */ + String getHardware() { + return "processors: " + Runtime.getRuntime().availableProcessors() + ", architecture: " + + getSystemProperty("os.arch") + this.getSystemProperty("-", "sun.arch.data.model") + + this.getSystemProperty(", instruction sets: ", "sun.cpu.isalist"); + } + + /** + * Accessible through the Lookup key {@code locale}. + * + * @return system locale and file encoding information. + */ + String getLocale() { + return "default locale: " + Locale.getDefault() + ", platform encoding: " + getSystemProperty("file.encoding"); + } + + /** + * Accessible through the Lookup key {@code os}. + * + * @return operating system information. + */ + String getOperatingSystem() { + return getSystemProperty("os.name") + " " + getSystemProperty("os.version") + + getSystemProperty(" ", "sun.os.patch.level") + ", architecture: " + getSystemProperty("os.arch") + + getSystemProperty("-", "sun.arch.data.model"); + } + + /** + * Accessible through the Lookup key {@code runtime}. + * + * @return Java Runtime Environment information. + */ + String getRuntime() { + return getSystemProperty("java.runtime.name") + " (build " + getSystemProperty("java.runtime.version") + + ") from " + getSystemProperty("java.vendor"); + } + + /** + * Gets the given system property. + * + * @param name a system property name. + * @return a system property value. + */ + private String getSystemProperty(final String name) { + return StringLookupFactory.INSTANCE_SYSTEM_PROPERTIES.lookup(name); + } + + /** + * Gets the given system property. + * + * @param prefix the prefix to use for the result string + * @param name a system property name. + * @return The prefix + a system property value. + */ + private String getSystemProperty(final String prefix, final String name) { + final String value = getSystemProperty(name); + if (StringUtils.isEmpty(value)) { + return StringUtils.EMPTY; + } + return prefix + value; + } + + /** + * Accessible through the Lookup key {@code vm}. + * + * @return Java Virtual Machine information. + */ + String getVirtualMachine() { + return getSystemProperty("java.vm.name") + " (build " + getSystemProperty("java.vm.version") + ", " + + getSystemProperty("java.vm.info") + ")"; + } + + /** + * Looks up the value of the Java platform key. + *

+ * The lookup keys with examples are: + *

+ *
    + *
  • version: "Java version 1.8.0_181"
  • + *
  • runtime: "Java(TM) SE Runtime Environment (build 1.8.0_181-b13) from Oracle Corporation"
  • + *
  • vm: "Java HotSpot(TM) 64-Bit Server VM (build 25.181-b13, mixed mode)"
  • + *
  • os: "Windows 10 10.0, architecture: amd64-64"
  • + *
  • hardware: "processors: 4, architecture: amd64-64, instruction sets: amd64"
  • + *
  • locale: "default locale: en_US, platform encoding: iso-8859-1"
  • + *
+ * + * @param key the key to be looked up, may be null + * @return The value of the environment variable. + */ + @Override + public String lookup(final String key) { + if (key == null) { + return null; + } + switch (key) { + case KEY_VERSION: + return "Java version " + getSystemProperty("java.version"); + case KEY_RUNTIME: + return getRuntime(); + case KEY_VM: + return getVirtualMachine(); + case KEY_OS: + return getOperatingSystem(); + case KEY_HARDWARE: + return getHardware(); + case KEY_LOCALE: + return getLocale(); + default: + throw new IllegalArgumentException(key); + } + } +} diff --git a/src/main/java/org/apache/commons/text/lookup/LocalHostStringLookup.java b/src/main/java/org/apache/commons/text/lookup/LocalHostStringLookup.java new file mode 100644 index 0000000000..20271adbec --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/LocalHostStringLookup.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ +package org.apache.commons.text.lookup; + +import java.net.InetAddress; +import java.net.UnknownHostException; + +/** + * Looks up keys related to the local host: host name, canonical host name, host address. + *

+ * The lookup keys are: + *

+ *
    + *
  • name: for the local host name, for example {@code EXAMPLE}.
  • + *
  • canonical-name: for the local canonical host name, for example {@code EXAMPLE.apache.org}.
  • + *
  • address: for the local host address, for example {@code 192.168.56.1}.
  • + *
+ * + * @since 1.3 + */ +final class LocalHostStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + */ + static final LocalHostStringLookup INSTANCE = new LocalHostStringLookup(); + + /** + * No need to build instances for now. + */ + private LocalHostStringLookup() { + // empty + } + + /** + * Looks up the value of a local host key. + * + * @param key the key to be looked up, may be null. + * @return The value of the environment variable. + */ + @Override + public String lookup(final String key) { + if (key == null) { + return null; + } + try { + switch (key) { + case InetAddressKeys.KEY_NAME: + return InetAddress.getLocalHost().getHostName(); + case InetAddressKeys.KEY_CANONICAL_NAME: + return InetAddress.getLocalHost().getCanonicalHostName(); + case InetAddressKeys.KEY_ADDRESS: + return InetAddress.getLocalHost().getHostAddress(); + default: + throw new IllegalArgumentException(key); + } + } catch (final UnknownHostException e) { + return null; + } + } +} diff --git a/src/main/java/org/apache/commons/text/lookup/PropertiesStringLookup.java b/src/main/java/org/apache/commons/text/lookup/PropertiesStringLookup.java new file mode 100644 index 0000000000..2f5d9a27b6 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/PropertiesStringLookup.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Properties; + +import org.apache.commons.lang3.StringUtils; + +/** + * Looks up keys from an XML document. + *

+ * Looks up the value for a given key in the format "Document::Key". + *

+ *

+ * Note the use of "::" instead of ":" to allow for "C:" drive letters in paths. + *

+ *

+ * For example: "com/domain/document.properties:key". + *

+ * + * @since 1.5 + */ +final class PropertiesStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + */ + static final PropertiesStringLookup INSTANCE = new PropertiesStringLookup(); + + /** Separates file and key. */ + static final String SEPARATOR = "::"; + + /** + * Creates a lookup key for a given file and key. + */ + static String toPropertyKey(final String file, final String key) { + return AbstractStringLookup.toLookupKey(file, SEPARATOR, key); + } + + /** + * No need to build instances for now. + */ + private PropertiesStringLookup() { + // empty + } + + /** + * Looks up the value for the key in the format "DocumentPath:XPath". + *

+ * For example: "com/domain/document.xml::/path/to/node". + *

+ *

+ * Note the use of "::" instead of ":" to allow for "C:" drive letters in paths. + *

+ * + * @param key the key to be looked up, may be null + * @return The value associated with the key. + */ + @Override + public String lookup(final String key) { + if (key == null) { + return null; + } + final String[] keys = key.split(SEPARATOR); + final int keyLen = keys.length; + if (keyLen < 2) { + throw IllegalArgumentExceptions.format("Bad properties key format [%s]; expected format is %s.", key, + toPropertyKey("DocumentPath", "Key")); + } + final String documentPath = keys[0]; + final String propertyKey = StringUtils.substringAfter(key, SEPARATOR); + try { + final Properties properties = new Properties(); + try (InputStream inputStream = Files.newInputStream(Paths.get(documentPath))) { + properties.load(inputStream); + } + return properties.getProperty(propertyKey); + } catch (final Exception e) { + throw IllegalArgumentExceptions.format(e, "Error looking up properties [%s] and key [%s].", documentPath, + propertyKey); + } + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/ResourceBundleStringLookup.java b/src/main/java/org/apache/commons/text/lookup/ResourceBundleStringLookup.java new file mode 100644 index 0000000000..cd27168cbc --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/ResourceBundleStringLookup.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import java.util.MissingResourceException; +import java.util.ResourceBundle; + +/** + * Looks up keys from resource bundles. + *

+ * Looks up the value for a given key in the format "BundleName:BundleKey". + *

+ *

+ * For example: "com.domain.messages:MyKey". + *

+ * + * @see ResourceBundle + * @since 1.3 + */ +final class ResourceBundleStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + */ + static final ResourceBundleStringLookup INSTANCE = new ResourceBundleStringLookup(); + + /** + * The name of the resource bundle from which to look something up. + */ + private final String bundleName; + + /** + * Constructs a blank instance. + * + * This ctor is not private to allow Mockito spying. + */ + ResourceBundleStringLookup() { + this(null); + } + + /** + * Constructs an instance that only works for the given bundle. + * + * @param bundleName the name of the resource bundle from which we will look keys up. + * @since 1.5 + */ + ResourceBundleStringLookup(final String bundleName) { + this.bundleName = bundleName; + } + + ResourceBundle getBundle(final String keyBundleName) { + // The ResourceBundle class caches bundles, no need to cache here. + return ResourceBundle.getBundle(keyBundleName); + } + + String getString(final String keyBundleName, final String bundleKey) { + return getBundle(keyBundleName).getString(bundleKey); + } + + /** + * Looks up the value for the key in the format "BundleName:BundleKey". + * + * For example: "com.domain.messages:MyKey". + * + * @param key the key to be looked up, may be null + * @return The value associated with the key. + * @see ResourceBundle + * @see ResourceBundle#getBundle(String) + * @see ResourceBundle#getString(String) + */ + @Override + public String lookup(final String key) { + if (key == null) { + return null; + } + final String[] keys = key.split(SPLIT_STR); + final int keyLen = keys.length; + final boolean anyBundle = bundleName == null; + if (anyBundle && keyLen != 2) { + throw IllegalArgumentExceptions + .format("Bad resource bundle key format [%s]; expected format is BundleName:KeyName.", key); + } else if (bundleName != null && keyLen != 1) { + throw IllegalArgumentExceptions.format("Bad resource bundle key format [%s]; expected format is KeyName.", + key); + } + final String keyBundleName = anyBundle ? keys[0] : bundleName; + final String bundleKey = anyBundle ? keys[1] : keys[0]; + try { + return getString(keyBundleName, bundleKey); + } catch (final MissingResourceException e) { + // The key is missing, return null such that an interpolator can supply a default value. + return null; + } catch (final Exception e) { + // Should only be a ClassCastException + throw IllegalArgumentExceptions.format(e, "Error looking up resource bundle [%s] and key [%s].", + keyBundleName, bundleKey); + } + } + + @Override + public String toString() { + return super.toString() + " [bundleName=" + bundleName + "]"; + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/ScriptStringLookup.java b/src/main/java/org/apache/commons/text/lookup/ScriptStringLookup.java new file mode 100644 index 0000000000..3397c9b3c6 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/ScriptStringLookup.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import java.util.Objects; + +import javax.script.ScriptEngine; +import javax.script.ScriptEngineManager; + +import org.apache.commons.text.StringSubstitutor; + +/** + * Executes the script with the given engine name. + *

+ * Execute the script with the engine name in the format "EngineName:Script". + *

+ *

+ * For example: {@code "javascript:3 + 4"}. + *

+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+ * StringSubstitutor.createInterpolator().replace("${script:javascript:3 + 4}"));
+ * 
+ * + * @since 1.5 + */ +final class ScriptStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + */ + static final ScriptStringLookup INSTANCE = new ScriptStringLookup(); + + /** + * No need to build instances for now. + */ + private ScriptStringLookup() { + // empty + } + + /** + * Execute the script with the engine name in the format "EngineName:Script". Extra colons will be ignored. + *

+ * For example: {@code "javascript:3 + 4"}. + *

+ * + * @param key the engine:script to execute, may be null + * @return The value returned by the execution. + */ + @Override + public String lookup(final String key) { + if (key == null) { + return null; + } + final String[] keys = key.split(SPLIT_STR, 2); + final int keyLen = keys.length; + if (keyLen != 2) { + throw IllegalArgumentExceptions.format("Bad script key format [%s]; expected format is EngineName:Script.", + key); + } + final String engineName = keys[0]; + final String script = keys[1]; + try { + final ScriptEngine scriptEngine = new ScriptEngineManager().getEngineByName(engineName); + if (scriptEngine == null) { + throw new IllegalArgumentException("No script engine named " + engineName); + } + return Objects.toString(scriptEngine.eval(script), null); + } catch (final Exception e) { + throw IllegalArgumentExceptions.format(e, "Error in script engine [%s] evaluating script [%s].", engineName, + script); + } + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/StringLookup.java b/src/main/java/org/apache/commons/text/lookup/StringLookup.java new file mode 100644 index 0000000000..df0f2b95a0 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/StringLookup.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +/** + * Lookups a String key for a String value. + *

+ * This class represents the simplest form of a string to string map. It has a benefit over a map in that it can create + * the result on demand based on the key. + *

+ *

+ * For example, it would be possible to implement a lookup that used the key as a primary key, and looked up the value + * on demand from the database. + *

+ * + * @since 1.3 + */ +@FunctionalInterface +public interface StringLookup { + + /** + * Looks up a String key to provide a String value. + *

+ * The internal implementation may use any mechanism to return the value. The simplest implementation is to use a + * Map. However, virtually any implementation is possible. + *

+ *

+ * For example, it would be possible to implement a lookup that used the key as a primary key, and looked up the + * value on demand from the database Or, a numeric based implementation could be created that treats the key as an + * integer, increments the value and return the result as a string - converting 1 to 2, 15 to 16 etc. + *

+ *

+ * This method always returns a String, regardless of the underlying data, by converting it as necessary. For + * example: + *

+ * + *
+     * Map<String, Object> map = new HashMap<String, Object>();
+     * map.put("number", new Integer(2));
+     * assertEquals("2", StringLookupFactory.mapStringLookup(map).lookup("number"));
+     * 
+ * + * @param key the key to look up, may be null. + * @return The matching value, null if no match. + */ + String lookup(String key); +} diff --git a/src/main/java/org/apache/commons/text/lookup/StringLookupFactory.java b/src/main/java/org/apache/commons/text/lookup/StringLookupFactory.java new file mode 100644 index 0000000000..b38dd414e3 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/StringLookupFactory.java @@ -0,0 +1,1138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import java.nio.charset.StandardCharsets; +import java.util.Base64; +import java.util.Map; +import java.util.function.BiFunction; +import java.util.function.Function; + +import org.apache.commons.text.StringSubstitutor; + +/** + * Create instances of string lookups or access singleton string lookups implemented in this package. + *

+ * The "classic" look up is {@link #mapStringLookup(Map)}. + *

+ *

+ * The methods for variable interpolation (A.K.A. variable substitution) are: + *

+ *
    + *
  • {@link #interpolatorStringLookup()}.
  • + *
  • {@link #interpolatorStringLookup(Map)}.
  • + *
  • {@link #interpolatorStringLookup(StringLookup)}.
  • + *
  • {@link #interpolatorStringLookup(Map, StringLookup, boolean)}.
  • + *
+ *

+ * The default lookups are: + *

+ *
Examples
inputwrapLength
"Click here to jump to the commons website - http://commons.apache.org""Click here to jump to the commons website - https://commons.apache.org"20"\n"false" ""Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org""Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"
"Click here to jump to the commons website - http://commons.apache.org""Click here to jump to the commons website - https://commons.apache.org"20"\n"true
+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Default String Lookups
KeyInterfaceFactory MethodSince
{@value #KEY_BASE64_DECODER}{@link StringLookup}{@link #base64DecoderStringLookup()}1.6
{@value #KEY_BASE64_ENCODER}{@link StringLookup}{@link #base64EncoderStringLookup()}1.6
{@value #KEY_CONST}{@link StringLookup}{@link #constantStringLookup()}1.5
{@value #KEY_DATE}{@link StringLookup}{@link #dateStringLookup()}1.5
{@value #KEY_DNS}{@link StringLookup}{@link #dnsStringLookup()}1.8
{@value #KEY_ENV}{@link StringLookup}{@link #environmentVariableStringLookup()}1.3
{@value #KEY_FILE}{@link StringLookup}{@link #fileStringLookup()}1.5
{@value #KEY_JAVA}{@link StringLookup}{@link #javaPlatformStringLookup()}1.5
{@value #KEY_LOCALHOST}{@link StringLookup}{@link #localHostStringLookup()}1.3
{@value #KEY_PROPERTIES}{@link StringLookup}{@link #propertiesStringLookup()}1.5
{@value #KEY_RESOURCE_BUNDLE}{@link StringLookup}{@link #resourceBundleStringLookup()}1.6
{@value #KEY_SCRIPT}{@link StringLookup}{@link #scriptStringLookup()}1.5
{@value #KEY_SYS}{@link StringLookup}{@link #systemPropertyStringLookup()}1.3
{@value #KEY_URL}{@link StringLookup}{@link #urlStringLookup()}1.5
{@value #KEY_URL_DECODER}{@link StringLookup}{@link #urlDecoderStringLookup()}1.5
{@value #KEY_URL_ENCODER}{@link StringLookup}{@link #urlEncoderStringLookup()}1.5
{@value #KEY_XML}{@link StringLookup}{@link #xmlStringLookup()}1.5
+ *

+ * We also provide functional lookups used as building blocks for other lookups. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Functional String Lookups
InterfaceFactory MethodSince
{@link BiStringLookup}{@link #biFunctionStringLookup(BiFunction)}1.9
{@link StringLookup}{@link #functionStringLookup(Function)}1.9
+ * + * @since 1.3 + */ +public final class StringLookupFactory { + + /** + * Defines the singleton for this class. + */ + public static final StringLookupFactory INSTANCE = new StringLookupFactory(); + + /** + * Decodes Base64 Strings. + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.base64DecoderStringLookup().lookup("SGVsbG9Xb3JsZCE=");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${base64Decoder:SGVsbG9Xb3JsZCE=} ..."));
+     * 
+ *

+ * The above examples convert {@code "SGVsbG9Xb3JsZCE="} to {@code "HelloWorld!"}. + *

+ */ + static final FunctionStringLookup INSTANCE_BASE64_DECODER = FunctionStringLookup + .on(key -> new String(Base64.getDecoder().decode(key), StandardCharsets.ISO_8859_1)); + + /** + * Encodes Base64 Strings. + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.base64EncoderStringLookup().lookup("HelloWorld!");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${base64Encoder:HelloWorld!} ..."));
+     * 
+ *

+ * The above examples convert {@code "HelloWorld!"} to {@code "SGVsbG9Xb3JsZCE="}. + *

+ * Defines the singleton for this class. + */ + static final FunctionStringLookup INSTANCE_BASE64_ENCODER = FunctionStringLookup + .on(key -> Base64.getEncoder().encodeToString(key.getBytes(StandardCharsets.ISO_8859_1))); + + /** + * Looks up keys from environment variables. + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.dateStringLookup().lookup("USER");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${env:USER} ..."));
+     * 
+ *

+ * The above examples convert (on Linux) {@code "USER"} to the current user name. On Windows 10, you would use + * {@code "USERNAME"} to the same effect. + *

+ */ + static final FunctionStringLookup INSTANCE_ENVIRONMENT_VARIABLES = FunctionStringLookup.on(System::getenv); + + /** + * Defines the FunctionStringLookup singleton that always returns null. + */ + static final FunctionStringLookup INSTANCE_NULL = FunctionStringLookup.on(key -> null); + + /** + * Defines the FunctionStringLookup singleton for looking up system properties. + */ + static final FunctionStringLookup INSTANCE_SYSTEM_PROPERTIES = FunctionStringLookup.on(System::getProperty); + + /** + * Default lookup key for interpolation {@value #KEY_BASE64_DECODER}. + * + * @since 1.6 + */ + public static final String KEY_BASE64_DECODER = "base64Decoder"; + + /** + * Default lookup key for interpolation {@value #KEY_BASE64_ENCODER}. + * + * @since 1.6 + */ + public static final String KEY_BASE64_ENCODER = "base64Encoder"; + + /** + * Default lookup key for interpolation {@value #KEY_CONST}. + * + * @since 1.6 + */ + public static final String KEY_CONST = "const"; + + /** + * Default lookup key for interpolation {@value #KEY_DATE}. + * + * @since 1.6 + */ + public static final String KEY_DATE = "date"; + + /** + * Default lookup key for interpolation {@value #KEY_DNS}. + * + * @since 1.8 + */ + public static final String KEY_DNS = "dns"; + + /** + * Default lookup key for interpolation {@value #KEY_ENV}. + * + * @since 1.6 + */ + public static final String KEY_ENV = "env"; + + /** + * Default lookup key for interpolation {@value #KEY_FILE}. + * + * @since 1.6 + */ + public static final String KEY_FILE = "file"; + + /** + * Default lookup key for interpolation {@value #KEY_JAVA}. + * + * @since 1.6 + */ + public static final String KEY_JAVA = "java"; + + /** + * Default lookup key for interpolation {@value #KEY_LOCALHOST}. + * + * @since 1.6 + */ + public static final String KEY_LOCALHOST = "localhost"; + + /** + * Default lookup key for interpolation {@value #KEY_PROPERTIES}. + * + * @since 1.6 + */ + public static final String KEY_PROPERTIES = "properties"; + + /** + * Default lookup key for interpolation {@value #KEY_RESOURCE_BUNDLE}. + * + * @since 1.6 + */ + public static final String KEY_RESOURCE_BUNDLE = "resourceBundle"; + + /** + * Default lookup key for interpolation {@value #KEY_SCRIPT}. + * + * @since 1.6 + */ + public static final String KEY_SCRIPT = "script"; + + /** + * Default lookup key for interpolation {@value #KEY_SYS}. + * + * @since 1.6 + */ + public static final String KEY_SYS = "sys"; + + /** + * Default lookup key for interpolation {@value #KEY_URL}. + * + * @since 1.6 + */ + public static final String KEY_URL = "url"; + + /** + * Default lookup key for interpolation {@value #KEY_URL_DECODER}. + * + * @since 1.6 + */ + public static final String KEY_URL_DECODER = "urlDecoder"; + + /** + * Default lookup key for interpolation {@value #KEY_URL_ENCODER}. + * + * @since 1.6 + */ + public static final String KEY_URL_ENCODER = "urlEncoder"; + + /** + * Default lookup key for interpolation {@value #KEY_XML}. + * + * @since 1.6 + */ + public static final String KEY_XML = "xml"; + + /** + * Clears any static resources. + * + * @since 1.5 + */ + public static void clear() { + ConstantStringLookup.clear(); + } + + /** + * No need to build instances for now. + */ + private StringLookupFactory() { + // empty + } + + /** + * Adds the {@link StringLookupFactory default lookups}. + * + * @param stringLookupMap the map of string lookups. + * @since 1.5 + */ + public void addDefaultStringLookups(final Map stringLookupMap) { + if (stringLookupMap != null) { + // "base64" is deprecated in favor of KEY_BASE64_DECODER. + stringLookupMap.put("base64", StringLookupFactory.INSTANCE_BASE64_DECODER); + for (final DefaultStringLookup stringLookup : DefaultStringLookup.values()) { + stringLookupMap.put(InterpolatorStringLookup.toKey(stringLookup.getKey()), + stringLookup.getStringLookup()); + } + } + } + + /** + * Returns the Base64DecoderStringLookup singleton instance to decode Base64 strings. + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.base64DecoderStringLookup().lookup("SGVsbG9Xb3JsZCE=");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${base64Decoder:SGVsbG9Xb3JsZCE=} ..."));
+     * 
+ *

+ * The above examples convert {@code "SGVsbG9Xb3JsZCE="} to {@code "HelloWorld!"}. + *

+ * + * @return The DateStringLookup singleton instance. + * @since 1.5 + */ + public StringLookup base64DecoderStringLookup() { + return StringLookupFactory.INSTANCE_BASE64_DECODER; + } + + /** + * Returns the Base64EncoderStringLookup singleton instance to encode strings to Base64. + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.base64EncoderStringLookup().lookup("HelloWorld!");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${base64Encoder:HelloWorld!} ..."));
+     * 
+ *

+ * The above examples convert {@code } to {@code "SGVsbG9Xb3JsZCE="}. + *

+ * + * @return The DateStringLookup singleton instance. + * @since 1.6 + */ + public StringLookup base64EncoderStringLookup() { + return StringLookupFactory.INSTANCE_BASE64_ENCODER; + } + + /** + * Returns the Base64DecoderStringLookup singleton instance to decode Base64 strings. + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.base64DecoderStringLookup().lookup("SGVsbG9Xb3JsZCE=");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${base64Decoder:SGVsbG9Xb3JsZCE=} ..."));
+     * 
+ *

+ * The above examples convert {@code "SGVsbG9Xb3JsZCE="} to {@code "HelloWorld!"}. + *

+ * + * @return The DateStringLookup singleton instance. + * @since 1.5 + * @deprecated Use {@link #base64DecoderStringLookup()}. + */ + @Deprecated + public StringLookup base64StringLookup() { + return StringLookupFactory.INSTANCE_BASE64_DECODER; + } + + /** + * Returns a new function-based lookup where the request for a lookup is answered by applying the function with a + * lookup key. + * + * @param the function return type. + * @param the function's second parameter type. + * @param biFunction the function. + * @return a new MapStringLookup. + * @since 1.9 + */ + public BiStringLookup biFunctionStringLookup(final BiFunction biFunction) { + return BiFunctionStringLookup.on(biFunction); + } + + /** + * Returns the ConstantStringLookup singleton instance to look up the value of a fully-qualified static final value. + *

+ * Sometimes it is necessary in a configuration file to refer to a constant defined in a class. This can be done + * with this lookup implementation. Variable names must be in the format {@code apackage.AClass.AFIELD}. The + * {@code lookup(String)} method will split the passed in string at the last dot, separating the fully qualified + * class name and the name of the constant (i.e. static final) member field. Then the class is loaded and the + * field's value is obtained using reflection. + *

+ *

+ * Once retrieved values are cached for fast access. This class is thread-safe. It can be used as a standard (i.e. + * global) lookup object and serve multiple clients concurrently. + *

+ *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.constantStringLookup().lookup("java.awt.event.KeyEvent.VK_ESCAPE");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${const:java.awt.event.KeyEvent.VK_ESCAPE} ..."));
+     * 
+ *

+ * The above examples convert {@code java.awt.event.KeyEvent.VK_ESCAPE} to {@code "27"}. + *

+ * + * @return The DateStringLookup singleton instance. + * @since 1.5 + */ + public StringLookup constantStringLookup() { + return ConstantStringLookup.INSTANCE; + } + + /** + * Returns the DateStringLookup singleton instance to format the current date with the format given in the key in a + * format compatible with {@link java.text.SimpleDateFormat}. + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.dateStringLookup().lookup("yyyy-MM-dd");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${date:yyyy-MM-dd} ..."));
+     * 
+ *

+ * The above examples convert {@code "yyyy-MM-dd"} to todays's date, for example, {@code "2019-08-04"}. + *

+ * + * @return The DateStringLookup singleton instance. + */ + public StringLookup dateStringLookup() { + return DateStringLookup.INSTANCE; + } + + /** + * Returns the DnsStringLookup singleton instance where the lookup key is one of: + *
    + *
  • name: for the local host name, for example {@code EXAMPLE} but also {@code EXAMPLE.apache.org}.
  • + *
  • canonical-name: for the local canonical host name, for example {@code EXAMPLE.apache.org}.
  • + *
  • address: for the local host address, for example {@code 192.168.56.1}.
  • + *
+ * + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.dnsStringLookup().lookup("address|apache.org");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${dns:address|apache.org} ..."));
+     * 
+ *

+ * The above examples convert {@code "address|apache.org"} to {@code "95.216.24.32} (or {@code "40.79.78.1"}). + *

+ * + * @return the DateStringLookup singleton instance. + * @since 1.8 + */ + public StringLookup dnsStringLookup() { + return DnsStringLookup.INSTANCE; + } + + /** + * Returns the EnvironmentVariableStringLookup singleton instance where the lookup key is an environment variable + * name. + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.dateStringLookup().lookup("USER");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${env:USER} ..."));
+     * 
+ *

+ * The above examples convert (on Linux) {@code "USER"} to the current user name. On Windows 10, you would use + * {@code "USERNAME"} to the same effect. + *

+ * + * @return The EnvironmentVariableStringLookup singleton instance. + */ + public StringLookup environmentVariableStringLookup() { + return StringLookupFactory.INSTANCE_ENVIRONMENT_VARIABLES; + } + + /** + * Returns the FileStringLookup singleton instance. + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.fileStringLookup().lookup("UTF-8:com/domain/document.properties");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${file:UTF-8:com/domain/document.properties} ..."));
+     * 
+ *

+ * The above examples convert {@code "UTF-8:com/domain/document.properties"} to the contents of the file. + *

+ * + * @return The FileStringLookup singleton instance. + * @since 1.5 + */ + public StringLookup fileStringLookup() { + return FileStringLookup.INSTANCE; + } + + /** + * Returns a new function-based lookup where the request for a lookup is answered by applying the function with a + * lookup key. + * + * @param the function return type. + * @param function the function. + * @return a new MapStringLookup. + * @since 1.9 + */ + public StringLookup functionStringLookup(final Function function) { + return FunctionStringLookup.on(function); + } + + /** + * Returns a new InterpolatorStringLookup using the {@link StringLookupFactory default lookups}. + *

+ * The lookups available to an interpolator are defined in + *

+ *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.interpolatorStringLookup().lookup("${sys:os.name}, ${env:USER}");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${sys:os.name}, ${env:USER} ..."));
+     * 
+ *

+ * The above examples convert {@code "${sys:os.name}, ${env:USER}"} to the OS name and Linux user name. + *

+ * + * @return a new InterpolatorStringLookup. + */ + public StringLookup interpolatorStringLookup() { + return InterpolatorStringLookup.INSTANCE; + } + + /** + * Returns a new InterpolatorStringLookup using the {@link StringLookupFactory default lookups}. + *

+ * If {@code addDefaultLookups} is true, the following lookups are used in addition to the ones provided in + * {@code stringLookupMap}: + *

+ * + * @param stringLookupMap the map of string lookups. + * @param defaultStringLookup the default string lookup. + * @param addDefaultLookups whether to use lookups as described above. + * @return a new InterpolatorStringLookup. + * @since 1.4 + */ + public StringLookup interpolatorStringLookup(final Map stringLookupMap, + final StringLookup defaultStringLookup, final boolean addDefaultLookups) { + return new InterpolatorStringLookup(stringLookupMap, defaultStringLookup, addDefaultLookups); + } + + /** + * Returns a new InterpolatorStringLookup using the {@link StringLookupFactory default lookups}. + * + * @param the value type the default string lookup's map. + * @param map the default map for string lookups. + * @return a new InterpolatorStringLookup. + */ + public StringLookup interpolatorStringLookup(final Map map) { + return new InterpolatorStringLookup(map); + } + + /** + * Returns a new InterpolatorStringLookup using the {@link StringLookupFactory default lookups}. + * + * @param defaultStringLookup the default string lookup. + * @return a new InterpolatorStringLookup. + */ + public StringLookup interpolatorStringLookup(final StringLookup defaultStringLookup) { + return new InterpolatorStringLookup(defaultStringLookup); + } + + /** + * Returns the JavaPlatformStringLookup singleton instance. Looks up keys related to Java: Java version, JRE + * version, VM version, and so on. + *

+ * The lookup keys with examples are: + *

+ *
    + *
  • version: "Java version 1.8.0_181"
  • + *
  • runtime: "Java(TM) SE Runtime Environment (build 1.8.0_181-b13) from Oracle Corporation"
  • + *
  • vm: "Java HotSpot(TM) 64-Bit Server VM (build 25.181-b13, mixed mode)"
  • + *
  • os: "Windows 10 10.0, architecture: amd64-64"
  • + *
  • hardware: "processors: 4, architecture: amd64-64, instruction sets: amd64"
  • + *
  • locale: "default locale: en_US, platform encoding: iso-8859-1"
  • + *
+ * + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.javaPlatformStringLookup().lookup("version");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${java:version} ..."));
+     * 
+ *

+ * The above examples convert {@code "version"} to the current VM version, for example, + * {@code "Java version 1.8.0_181"}. + *

+ * + * @return The JavaPlatformStringLookup singleton instance. + */ + public StringLookup javaPlatformStringLookup() { + return JavaPlatformStringLookup.INSTANCE; + } + + /** + * Returns the LocalHostStringLookup singleton instance where the lookup key is one of: + *
    + *
  • name: for the local host name, for example {@code EXAMPLE}.
  • + *
  • canonical-name: for the local canonical host name, for example {@code EXAMPLE.apache.org}.
  • + *
  • address: for the local host address, for example {@code 192.168.56.1}.
  • + *
+ * + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.localHostStringLookup().lookup("canonical-name");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${localhost:canonical-name} ..."));
+     * 
+ *

+ * The above examples convert {@code "canonical-name"} to the current host name, for example, + * {@code "EXAMPLE.apache.org"}. + *

+ * + * @return The DateStringLookup singleton instance. + */ + public StringLookup localHostStringLookup() { + return LocalHostStringLookup.INSTANCE; + } + + /** + * Returns a new map-based lookup where the request for a lookup is answered with the value for that key. + * + * @param the map value type. + * @param map the map. + * @return a new MapStringLookup. + */ + public StringLookup mapStringLookup(final Map map) { + return FunctionStringLookup.on(map); + } + + /** + * Returns the NullStringLookup singleton instance which always returns null. + * + * @return The NullStringLookup singleton instance. + */ + public StringLookup nullStringLookup() { + return StringLookupFactory.INSTANCE_NULL; + } + + /** + * Returns the PropertiesStringLookup singleton instance. + *

+ * Looks up the value for the key in the format "DocumentPath::MyKey". + *

+ *

+ * Note the use of "::" instead of ":" to allow for "C:" drive letters in paths. + *

+ *

+ * For example: "com/domain/document.properties::MyKey". + *

+ * + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.propertiesStringLookup().lookup("com/domain/document.properties::MyKey");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${properties:com/domain/document.properties::MyKey} ..."));
+     * 
+ *

+ * The above examples convert {@code "com/domain/document.properties::MyKey"} to the key value in the properties + * file at the path "com/domain/document.properties". + *

+ * + * @return The PropertiesStringLookup singleton instance. + * @since 1.5 + */ + public StringLookup propertiesStringLookup() { + return PropertiesStringLookup.INSTANCE; + } + + /** + * Returns the ResourceBundleStringLookup singleton instance. + *

+ * Looks up the value for a given key in the format "BundleName:BundleKey". + *

+ *

+ * For example: "com.domain.messages:MyKey". + *

+ *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.resourceBundleStringLookup().lookup("com.domain.messages:MyKey");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${resourceBundle:com.domain.messages:MyKey} ..."));
+     * 
+ *

+ * The above examples convert {@code "com.domain.messages:MyKey"} to the key value in the resource bundle at + * {@code "com.domain.messages"}. + *

+ * + * @return The ResourceBundleStringLookup singleton instance. + */ + public StringLookup resourceBundleStringLookup() { + return ResourceBundleStringLookup.INSTANCE; + } + + /** + * Returns a ResourceBundleStringLookup instance for the given bundle name. + *

+ * Looks up the value for a given key in the format "MyKey". + *

+ *

+ * For example: "MyKey". + *

+ *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.resourceBundleStringLookup("com.domain.messages").lookup("MyKey");
+     * 
+ *

+ * The above example converts {@code "MyKey"} to the key value in the resource bundle at + * {@code "com.domain.messages"}. + *

+ * + * @param bundleName Only lookup in this bundle. + * @return a ResourceBundleStringLookup instance for the given bundle name. + * @since 1.5 + */ + public StringLookup resourceBundleStringLookup(final String bundleName) { + return new ResourceBundleStringLookup(bundleName); + } + + /** + * Returns the ScriptStringLookup singleton instance. + *

+ * Looks up the value for the key in the format "ScriptEngineName:Script". + *

+ *

+ * For example: "javascript:3 + 4". + *

+ *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.scriptStringLookup().lookup("javascript:3 + 4");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${javascript:3 + 4} ..."));
+     * 
+ *

+ * The above examples convert {@code "javascript:3 + 4"} to {@code "7"}. + *

+ * + * @return The ScriptStringLookup singleton instance. + * @since 1.5 + */ + public StringLookup scriptStringLookup() { + return ScriptStringLookup.INSTANCE; + } + + /** + * Returns the SystemPropertyStringLookup singleton instance where the lookup key is a system property name. + * + *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.systemPropertyStringLookup().lookup("os.name");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${sys:os.name} ..."));
+     * 
+ *

+ * The above examples convert {@code "os.name"} to the operating system name. + *

+ * + * @return The SystemPropertyStringLookup singleton instance. + */ + public StringLookup systemPropertyStringLookup() { + return StringLookupFactory.INSTANCE_SYSTEM_PROPERTIES; + } + + /** + * Returns the UrlDecoderStringLookup singleton instance. + *

+ * Decodes URL Strings using the UTF-8 encoding. + *

+ *

+ * For example: "Hello%20World%21" becomes "Hello World!". + *

+ *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.urlDecoderStringLookup().lookup("Hello%20World%21");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${urlDecoder:Hello%20World%21} ..."));
+     * 
+ *

+ * The above examples convert {@code "Hello%20World%21"} to {@code "Hello World!"}. + *

+ * + * @return The UrlStringLookup singleton instance. + * @since 1.6 + */ + public StringLookup urlDecoderStringLookup() { + return UrlDecoderStringLookup.INSTANCE; + } + + /** + * Returns the UrlDecoderStringLookup singleton instance. + *

+ * Decodes URL Strings using the UTF-8 encoding. + *

+ *

+ * For example: "Hello World!" becomes "Hello+World%21". + *

+ *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.urlEncoderStringLookup().lookup("Hello World!");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${urlEncoder:Hello World!} ..."));
+     * 
+ *

+ * The above examples convert {@code "Hello World!"} to {@code "Hello%20World%21"}. + *

+ * + * @return The UrlStringLookup singleton instance. + * @since 1.6 + */ + public StringLookup urlEncoderStringLookup() { + return UrlEncoderStringLookup.INSTANCE; + } + + /** + * Returns the UrlStringLookup singleton instance. + *

+ * Looks up the value for the key in the format "CharsetName:URL". + *

+ *

+ * For example, using the HTTP scheme: "UTF-8:http://www.google.com" + *

+ *

+ * For example, using the file scheme: + * "UTF-8:file:///C:/somehome/commons/commons-text/src/test/resources/document.properties" + *

+ *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.urlStringLookup().lookup("UTF-8:https://www.apache.org");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${url:UTF-8:https://www.apache.org} ..."));
+     * 
+ *

+ * The above examples convert {@code "UTF-8:https://www.apache.org"} to the contents of that page. + *

+ * + * @return The UrlStringLookup singleton instance. + * @since 1.5 + */ + public StringLookup urlStringLookup() { + return UrlStringLookup.INSTANCE; + } + + /** + * Returns the XmlStringLookup singleton instance. + *

+ * Looks up the value for the key in the format "DocumentPath:XPath". + *

+ *

+ * For example: "com/domain/document.xml:/path/to/node". + *

+ *

+ * Using a {@link StringLookup} from the {@link StringLookupFactory}: + *

+ * + *
+     * StringLookupFactory.INSTANCE.xmlStringLookup().lookup("com/domain/document.xml:/path/to/node");
+     * 
+ *

+ * Using a {@link StringSubstitutor}: + *

+ * + *
+     * StringSubstitutor.createInterpolator().replace("... ${xml:com/domain/document.xml:/path/to/node} ..."));
+     * 
+ *

+ * The above examples convert {@code "com/domain/document.xml:/path/to/node"} to the value of the XPath in the XML + * document. + *

+ * + * @return The XmlStringLookup singleton instance. + * @since 1.5 + */ + public StringLookup xmlStringLookup() { + return XmlStringLookup.INSTANCE; + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/UrlDecoderStringLookup.java b/src/main/java/org/apache/commons/text/lookup/UrlDecoderStringLookup.java new file mode 100644 index 0000000000..d2b99d4a5c --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/UrlDecoderStringLookup.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; + +/** + * Decodes URL Strings using the UTF-8 encoding. + * + * @see URLEncoder + * @since 1.5 + */ +final class UrlDecoderStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + */ + static final UrlDecoderStringLookup INSTANCE = new UrlDecoderStringLookup(); + + /** + * This ctor is not private to allow Mockito spying. + */ + UrlDecoderStringLookup() { + // empty + } + + String decode(final String key, final String enc) throws UnsupportedEncodingException { + return URLDecoder.decode(key, enc); + } + + @Override + public String lookup(final String key) { + if (key == null) { + return null; + } + final String enc = StandardCharsets.UTF_8.name(); + try { + return decode(key, enc); + } catch (final UnsupportedEncodingException e) { + // Can't happen since UTF-8 is required by the Java specification. + throw IllegalArgumentExceptions.format(e, "%s: source=%s, encoding=%s", e, key, enc); + } + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/UrlEncoderStringLookup.java b/src/main/java/org/apache/commons/text/lookup/UrlEncoderStringLookup.java new file mode 100644 index 0000000000..62513aa772 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/UrlEncoderStringLookup.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; + +/** + * Encodes URL Strings using the UTF-8 encoding. + * + * @see URLEncoder + * @since 1.5 + */ +final class UrlEncoderStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + */ + static final UrlEncoderStringLookup INSTANCE = new UrlEncoderStringLookup(); + + /** + * This ctor is not private to allow Mockito spying. + */ + UrlEncoderStringLookup() { + // empty + } + + String encode(final String key, final String enc) throws UnsupportedEncodingException { + return URLEncoder.encode(key, enc); + } + + @Override + public String lookup(final String key) { + if (key == null) { + return null; + } + final String enc = StandardCharsets.UTF_8.name(); + try { + return encode(key, enc); + } catch (final UnsupportedEncodingException e) { + // Can't happen since UTF-8 is required by the Java specification. + throw IllegalArgumentExceptions.format(e, "%s: source=%s, encoding=%s", e, key, enc); + } + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/UrlStringLookup.java b/src/main/java/org/apache/commons/text/lookup/UrlStringLookup.java new file mode 100644 index 0000000000..1f0ef38eb3 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/UrlStringLookup.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import java.io.BufferedInputStream; +import java.io.InputStreamReader; +import java.io.StringWriter; +import java.net.URL; + +import org.apache.commons.lang3.StringUtils; + +/** + * Looks up keys from an XML document. + *

+ * Looks up the value for a given key in the format "Charset:URL". + *

+ *

+ * For example: "UTF-8:com/domain/document.properties". + *

+ * + * @since 1.5 + */ +final class UrlStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + */ + static final UrlStringLookup INSTANCE = new UrlStringLookup(); + + /** + * No need to build instances for now. + */ + private UrlStringLookup() { + // empty + } + + /** + * Looks up the value for the key in the format "DocumentPath:XPath". + *

+ * For example: "com/domain/document.xml:/path/to/node". + *

+ * + * @param key the key to be looked up, may be null + * @return The value associated with the key. + */ + @Override + public String lookup(final String key) { + if (key == null) { + return null; + } + final String[] keys = key.split(SPLIT_STR); + final int keyLen = keys.length; + if (keyLen < 2) { + throw IllegalArgumentExceptions.format("Bad URL key format [%s]; expected format is DocumentPath:Key.", + key); + } + final String charsetName = keys[0]; + final String urlStr = StringUtils.substringAfter(key, SPLIT_CH); + try { + final URL url = new URL(urlStr); + final int size = 8192; + final StringWriter writer = new StringWriter(size); + final char[] buffer = new char[size]; + try (BufferedInputStream bis = new BufferedInputStream(url.openStream()); + InputStreamReader reader = new InputStreamReader(bis, charsetName)) { + int n; + while (-1 != (n = reader.read(buffer))) { + writer.write(buffer, 0, n); + } + } + return writer.toString(); + } catch (final Exception e) { + throw IllegalArgumentExceptions.format(e, "Error looking up URL [%s] with Charset [%s].", urlStr, + charsetName); + } + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/XmlStringLookup.java b/src/main/java/org/apache/commons/text/lookup/XmlStringLookup.java new file mode 100644 index 0000000000..12aa5e56a4 --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/XmlStringLookup.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ + +package org.apache.commons.text.lookup; + +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Paths; + +import javax.xml.xpath.XPathFactory; + +import org.apache.commons.lang3.StringUtils; +import org.xml.sax.InputSource; + +/** + * Looks up keys from an XML document. + *

+ * Looks up the value for a given key in the format "Document:XPath". + *

+ *

+ * For example: "com/domain/document.xml:/path/to/node". + *

+ * + * @since 1.5 + */ +final class XmlStringLookup extends AbstractStringLookup { + + /** + * Defines the singleton for this class. + */ + static final XmlStringLookup INSTANCE = new XmlStringLookup(); + + /** + * No need to build instances for now. + */ + private XmlStringLookup() { + // empty + } + + /** + * Looks up the value for the key in the format "DocumentPath:XPath". + *

+ * For example: "com/domain/document.xml:/path/to/node". + *

+ * + * @param key the key to be looked up, may be null + * @return The value associated with the key. + */ + @Override + public String lookup(final String key) { + if (key == null) { + return null; + } + final String[] keys = key.split(SPLIT_STR); + final int keyLen = keys.length; + if (keyLen != 2) { + throw IllegalArgumentExceptions.format("Bad XML key format [%s]; expected format is DocumentPath:XPath.", + key); + } + final String documentPath = keys[0]; + final String xpath = StringUtils.substringAfter(key, SPLIT_CH); + try (InputStream inputStream = Files.newInputStream(Paths.get(documentPath))) { + return XPathFactory.newInstance().newXPath().evaluate(xpath, new InputSource(inputStream)); + } catch (final Exception e) { + throw IllegalArgumentExceptions.format(e, "Error looking up XML document [%s] and XPath [%s].", + documentPath, xpath); + } + } + +} diff --git a/src/main/java/org/apache/commons/text/lookup/package-info.java b/src/main/java/org/apache/commons/text/lookup/package-info.java new file mode 100644 index 0000000000..65ea003a2c --- /dev/null +++ b/src/main/java/org/apache/commons/text/lookup/package-info.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + *

+ * Provides algorithms for looking up strings for use with a {@link org.apache.commons.text.StringSubstitutor + * StringSubstitutor}. The main class in this package is {@link org.apache.commons.text.lookup.StringLookupFactory + * StringLookupFactory}. + *

+ *

+ * Use {@link org.apache.commons.text.lookup.StringLookupFactory StringLookupFactory} to create instances of string + * lookups or access singleton string lookups. The main interface is {@link org.apache.commons.text.lookup.StringLookup + * StringLookup} which is implemented here in package private classes. + *

+ *

+ * Like {@link java.util.function.BiFunction BiFunction} is a variant of {@link java.util.function.Function Function}, + * this {@link org.apache.commons.text.lookup.BiStringLookup BiStringLookup} is a variant of + * {@link org.apache.commons.text.lookup.StringLookup StringLookup}. + *

+ *

+ * The initial implementation was adapted from Apache Commons Log4j 2.11.0. + *

+ * + * @since 1.3 + */ +package org.apache.commons.text.lookup; diff --git a/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java b/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java new file mode 100644 index 0000000000..1646b0e735 --- /dev/null +++ b/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java @@ -0,0 +1,451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.text.matcher; + +import java.util.Arrays; + +/** + * A matcher that determines if a character array portion matches. + *

+ * Thread=safe. + *

+ * + * @since 1.3 + */ +abstract class AbstractStringMatcher implements StringMatcher { + + /** + * Matches all of the given matchers in order. + * + * @since 1.9 + */ + static final class AndStringMatcher extends AbstractStringMatcher { + + /** + * Matchers in order. + */ + private final StringMatcher[] stringMatchers; + + /** + * Constructs a new initialized instance. + * + * @param stringMatchers Matchers in order. Never null since the {@link StringMatcherFactory} uses the + * {@link NoneMatcher} instead. + */ + AndStringMatcher(final StringMatcher... stringMatchers) { + this.stringMatchers = stringMatchers.clone(); + } + + @Override + public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) { + int total = 0; + int curStart = start; + for (final StringMatcher stringMatcher : stringMatchers) { + if (stringMatcher != null) { + final int len = stringMatcher.isMatch(buffer, curStart, bufferStart, bufferEnd); + if (len == 0) { + return 0; + } + total += len; + curStart += len; + } + } + return total; + } + + @Override + public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) { + int total = 0; + int curStart = start; + for (final StringMatcher stringMatcher : stringMatchers) { + if (stringMatcher != null) { + final int len = stringMatcher.isMatch(buffer, curStart, bufferStart, bufferEnd); + if (len == 0) { + return 0; + } + total += len; + curStart += len; + } + } + return total; + } + + @Override + public int size() { + int total = 0; + for (final StringMatcher stringMatcher : stringMatchers) { + if (stringMatcher != null) { + total += stringMatcher.size(); + } + } + return total; + } + } + + /** + * Matches out of a set of characters. + *

+ * Thread=safe. + *

+ */ + static final class CharArrayMatcher extends AbstractStringMatcher { + + /** The string to match, as a character array, implementation treats as immutable. */ + private final char[] chars; + + /** The string to match. */ + private final String string; + + /** + * Constructs a matcher from a String. + * + * @param chars the string to match, must not be null + */ + CharArrayMatcher(final char... chars) { + this.string = String.valueOf(chars); + this.chars = chars.clone(); + } + + /** + * Returns the number of matching characters, {@code 0} if there is no match. + * + * @param buffer the text content to match against, do not change + * @param start the starting position for the match, valid for buffer + * @param bufferStart unused + * @param bufferEnd the end index of the active buffer, valid for buffer + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) { + final int len = size(); + if (start + len > bufferEnd) { + return 0; + } + int j = start; + for (int i = 0; i < len; i++, j++) { + if (chars[i] != buffer[j]) { + return 0; + } + } + return len; + } + + /** + * Returns the number of matching characters, {@code 0} if there is no match. + * + * @param buffer the text content to match against, do not change + * @param start the starting position for the match, valid for buffer + * @param bufferStart unused + * @param bufferEnd the end index of the active buffer, valid for buffer + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) { + final int len = size(); + if (start + len > bufferEnd) { + return 0; + } + int j = start; + for (int i = 0; i < len; i++, j++) { + if (chars[i] != buffer.charAt(j)) { + return 0; + } + } + return len; + } + + /** + * Returns the size of the string to match given in the constructor. + * + * @since 1.9 + */ + @Override + public int size() { + return chars.length; + } + + @Override + public String toString() { + return super.toString() + "[\"" + string + "\"]"; + } + + } + + /** + * Matches a character. + *

+ * Thread=safe. + *

+ */ + static final class CharMatcher extends AbstractStringMatcher { + + /** The character to match. */ + private final char ch; + + /** + * Constructs a matcher for a single character. + * + * @param ch the character to match + */ + CharMatcher(final char ch) { + this.ch = ch; + } + + /** + * Returns {@code 1} if there is a match, or {@code 0} if there is no match. + * + * @param buffer the text content to match against, do not change + * @param start the starting position for the match, valid for buffer + * @param bufferStart unused + * @param bufferEnd unused + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) { + return ch == buffer[start] ? 1 : 0; + } + + /** + * Returns {@code 1} if there is a match, or {@code 0} if there is no match. + * + * @param buffer the text content to match against, do not change + * @param start the starting position for the match, valid for buffer + * @param bufferStart unused + * @param bufferEnd unused + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) { + return ch == buffer.charAt(start) ? 1 : 0; + } + + /** + * Returns 1. + * + * @since 1.9 + */ + @Override + public int size() { + return 1; + } + + @Override + public String toString() { + return super.toString() + "['" + ch + "']"; + } + } + + /** + * Matches a set of characters. + *

+ * Thread=safe. + *

+ */ + static final class CharSetMatcher extends AbstractStringMatcher { + + /** The set of characters to match. */ + private final char[] chars; + + /** + * Constructs a matcher from a character array. + * + * @param chars the characters to match, must not be null + */ + CharSetMatcher(final char[] chars) { + this.chars = chars.clone(); + Arrays.sort(this.chars); + } + + /** + * Returns {@code 1} if there is a match, or {@code 0} if there is no match. + * + * @param buffer the text content to match against, do not change + * @param start the starting position for the match, valid for buffer + * @param bufferStart unused + * @param bufferEnd unused + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) { + return Arrays.binarySearch(chars, buffer[start]) >= 0 ? 1 : 0; + } + + /** + * Returns {@code 1} if there is a match, or {@code 0} if there is no match. + * + * @param buffer the text content to match against, do not change + * @param start the starting position for the match, valid for buffer + * @param bufferStart unused + * @param bufferEnd unused + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) { + return Arrays.binarySearch(chars, buffer.charAt(start)) >= 0 ? 1 : 0; + } + + /** + * Returns 1. + * + * @since 1.9 + */ + @Override + public int size() { + return 1; + } + + @Override + public String toString() { + return super.toString() + Arrays.toString(chars); + } + + } + + /** + * Matches nothing. + *

+ * Thread=safe. + *

+ */ + static final class NoneMatcher extends AbstractStringMatcher { + + /** + * Constructs a new instance of {@code NoMatcher}. + */ + NoneMatcher() { + } + + /** + * Always returns {@code 0}. + * + * @param buffer unused + * @param start unused + * @param bufferStart unused + * @param bufferEnd unused + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) { + return 0; + } + + /** + * Always returns {@code 0}. + * + * @param buffer unused + * @param start unused + * @param bufferStart unused + * @param bufferEnd unused + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) { + return 0; + } + + /** + * Returns 0. + * + * @since 1.9 + */ + @Override + public int size() { + return 0; + } + + } + + /** + * Matches whitespace as per trim(). + *

+ * Thread=safe. + *

+ */ + static final class TrimMatcher extends AbstractStringMatcher { + + /** + * The space character. + */ + private static final int SPACE_INT = 32; + + /** + * Constructs a new instance of {@code TrimMatcher}. + */ + TrimMatcher() { + } + + /** + * Returns {@code 1} if there is a match, or {@code 0} if there is no match. + * + * @param buffer the text content to match against, do not change + * @param start the starting position for the match, valid for buffer + * @param bufferStart unused + * @param bufferEnd unused + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) { + return buffer[start] <= SPACE_INT ? 1 : 0; + } + + /** + * Returns {@code 1} if there is a match, or {@code 0} if there is no match. + * + * @param buffer the text content to match against, do not change + * @param start the starting position for the match, valid for buffer + * @param bufferStart unused + * @param bufferEnd unused + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) { + return buffer.charAt(start) <= SPACE_INT ? 1 : 0; + } + + /** + * Returns 1. + * + * @since 1.9 + */ + @Override + public int size() { + return 1; + } + } + + /** + * Constructor. + */ + protected AbstractStringMatcher() { + } + +// /** +// * Validates indices for {@code bufferStart <= start < bufferEnd}. +// * +// * @param start the starting position for the match, valid in {@code buffer}. +// * @param bufferStart the first active index in the buffer, valid in {@code buffer}. +// * @param bufferEnd the end index (exclusive) of the active buffer, valid in {@code buffer}. +// */ +// void validate(final int start, final int bufferStart, final int bufferEnd) { +// if (((bufferStart > start) || (start >= bufferEnd))) { +// throw new IndexOutOfBoundsException( +// String.format("bufferStart(%,d) <= start(%,d) < bufferEnd(%,d)", bufferStart, start, bufferEnd)); +// } +// } + +} diff --git a/src/main/java/org/apache/commons/text/matcher/StringMatcher.java b/src/main/java/org/apache/commons/text/matcher/StringMatcher.java new file mode 100644 index 0000000000..7b82d606c6 --- /dev/null +++ b/src/main/java/org/apache/commons/text/matcher/StringMatcher.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.text.matcher; + +import org.apache.commons.lang3.CharSequenceUtils; + +/** + * Determines if a character array portion matches. + * + * @since 1.3 + */ +public interface StringMatcher { + + /** + * Returns a matcher that matches this matcher followed by the given matcher. + * + * @param stringMatcher the next matcher. + * @return a matcher that matches this matcher followed by the given matcher. + * @since 1.9 + */ + default StringMatcher andThen(final StringMatcher stringMatcher) { + return StringMatcherFactory.INSTANCE.andMatcher(this, stringMatcher); + } + + /** + * Returns the number of matching characters, zero for no match. + *

+ * This method is called to check for a match. The parameter {@code pos} represents the current position to be + * checked in the string {@code buffer} (a character array which must not be changed). The API guarantees that + * {@code pos} is a valid index for {@code buffer}. + *

+ *

+ * The matching code may check one character or many. It may check characters preceding {@code pos} as well as those + * after. + *

+ *

+ * It must return zero for no match, or a positive number if a match was found. The number indicates the number of + * characters that matched. + *

+ * + * @param buffer the text content to match against, do not change + * @param pos the starting position for the match, valid for buffer + * @return The number of matching characters, zero for no match + * @since 1.9 + */ + default int isMatch(final char[] buffer, final int pos) { + return isMatch(buffer, pos, 0, buffer.length); + } + + /** + * Returns the number of matching characters, {@code 0} if there is no match. + *

+ * This method is called to check for a match against a source {@code buffer}. The parameter {@code start} + * represents the start position to be checked in the {@code buffer} (a character array which MUST not be changed). + * The implementation SHOULD guarantees that {@code start} is a valid index in {@code buffer}. + *

+ *

+ * The character array may be larger than the active area to be matched. Only values in the buffer between the + * specified indices may be accessed, in other words: {@code bufferStart <= start < bufferEnd}. + *

+ *

+ * The matching code may check one character or many. It may check characters preceding {@code start} as well as + * those after, so long as no checks exceed the bounds specified. + *

+ *

+ * It must return zero for no match, or a positive number if a match was found. The number indicates the number of + * characters that matched. + *

+ * + * @param buffer the source text to search, do not change. + * @param start the starting position for the match, valid in {@code buffer}. + * @param bufferStart the first active index in the buffer, valid in {@code buffer}. + * @param bufferEnd the end index (exclusive) of the active buffer, valid in {@code buffer}. + * @return The number of matching characters, zero if there is no match. + */ + int isMatch(char[] buffer, int start, int bufferStart, int bufferEnd); + + /** + * Returns the number of matching characters, zero for no match. + *

+ * This method is called to check for a match. The parameter {@code pos} represents the current position to be + * checked in the string {@code buffer} (a character array which must not be changed). The API guarantees that + * {@code pos} is a valid index for {@code buffer}. + *

+ *

+ * The matching code may check one character or many. It may check characters preceding {@code pos} as well as those + * after. + *

+ *

+ * It must return zero for no match, or a positive number if a match was found. The number indicates the number of + * characters that matched. + *

+ * + * @param buffer the text content to match against, do not change + * @param pos the starting position for the match, valid for buffer + * @return The number of matching characters, zero for no match + * @since 1.9 + */ + default int isMatch(final CharSequence buffer, final int pos) { + return isMatch(buffer, pos, 0, buffer.length()); + } + + /** + * Returns the number of matching characters, {@code 0} if there is no match. + *

+ * This method is called to check for a match against a source {@code buffer}. The parameter {@code start} + * represents the start position to be checked in the {@code buffer} (a character array which MUST not be changed). + * The implementation SHOULD guarantees that {@code start} is a valid index in {@code buffer}. + *

+ *

+ * The character array may be larger than the active area to be matched. Only values in the buffer between the + * specified indices may be accessed, in other words: {@code bufferStart <= start < bufferEnd}. + *

+ *

+ * The matching code may check one character or many. It may check characters preceding {@code start} as well as + * those after, so long as no checks exceed the bounds specified. + *

+ *

+ * It must return zero for no match, or a positive number if a match was found. The number indicates the number of + * characters that matched. + *

+ * + * @param buffer the source text to search, do not change. + * @param start the starting position for the match, valid in {@code buffer}. + * @param bufferStart the first active index in the buffer, valid in {@code buffer}. + * @param bufferEnd the end index (exclusive) of the active buffer, valid in {@code buffer}. + * @return The number of matching characters, zero if there is no match. + * @since 1.9 + */ + default int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) { + return isMatch(CharSequenceUtils.toCharArray(buffer), start, bufferEnd, bufferEnd); + } + + /** + * Returns the size of the matching string. Defaults to 0. + * + * @return the size of the matching string. + * @since 1.9 + */ + default int size() { + return 0; + } + +} diff --git a/src/main/java/org/apache/commons/text/matcher/StringMatcherFactory.java b/src/main/java/org/apache/commons/text/matcher/StringMatcherFactory.java new file mode 100644 index 0000000000..c08b79553b --- /dev/null +++ b/src/main/java/org/apache/commons/text/matcher/StringMatcherFactory.java @@ -0,0 +1,258 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.text.matcher; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.StringUtils; + +/** + * Provides access to matchers defined in this package. + * + * @since 1.3 + */ +public final class StringMatcherFactory { + + /** + * Matches the comma character. + */ + private static final AbstractStringMatcher.CharMatcher COMMA_MATCHER = new AbstractStringMatcher.CharMatcher(','); + + /** + * Matches the double quote character. + */ + private static final AbstractStringMatcher.CharMatcher DOUBLE_QUOTE_MATCHER = new AbstractStringMatcher.CharMatcher( + '"'); + + /** + * Defines the singleton for this class. + */ + public static final StringMatcherFactory INSTANCE = new StringMatcherFactory(); + + /** + * Matches no characters. + */ + private static final AbstractStringMatcher.NoneMatcher NONE_MATCHER = new AbstractStringMatcher.NoneMatcher(); + + /** + * Matches the single or double quote character. + */ + private static final AbstractStringMatcher.CharSetMatcher QUOTE_MATCHER = new AbstractStringMatcher.CharSetMatcher( + "'\"".toCharArray()); + + /** + * Matches the double quote character. + */ + private static final AbstractStringMatcher.CharMatcher SINGLE_QUOTE_MATCHER = new AbstractStringMatcher.CharMatcher( + '\''); + + /** + * Matches the space character. + */ + private static final AbstractStringMatcher.CharMatcher SPACE_MATCHER = new AbstractStringMatcher.CharMatcher(' '); + + /** + * Matches the same characters as StringTokenizer, namely space, tab, newline, form feed. + */ + private static final AbstractStringMatcher.CharSetMatcher SPLIT_MATCHER = new AbstractStringMatcher.CharSetMatcher( + " \t\n\r\f".toCharArray()); + + /** + * Matches the tab character. + */ + private static final AbstractStringMatcher.CharMatcher TAB_MATCHER = new AbstractStringMatcher.CharMatcher('\t'); + + /** + * Matches the String trim() whitespace characters. + */ + private static final AbstractStringMatcher.TrimMatcher TRIM_MATCHER = new AbstractStringMatcher.TrimMatcher(); + + /** + * No need to build instances for now. + */ + private StringMatcherFactory() { + // empty + } + + /** + * Creates a matcher that matches all of the given matchers in order. + * + * @param stringMatchers the matcher + * @return a matcher that matches all of the given matchers in order. + * @since 1.9 + */ + public StringMatcher andMatcher(final StringMatcher... stringMatchers) { + final int len = ArrayUtils.getLength(stringMatchers); + if (len == 0) { + return NONE_MATCHER; + } + if (len == 1) { + return stringMatchers[0]; + } + return new AbstractStringMatcher.AndStringMatcher(stringMatchers); + } + + /** + * Constructor that creates a matcher from a character. + * + * @param ch the character to match, must not be null + * @return a new Matcher for the given char + */ + public StringMatcher charMatcher(final char ch) { + return new AbstractStringMatcher.CharMatcher(ch); + } + + /** + * Constructor that creates a matcher from a set of characters. + * + * @param chars the characters to match, null or empty matches nothing + * @return a new matcher for the given char[] + */ + public StringMatcher charSetMatcher(final char... chars) { + final int len = ArrayUtils.getLength(chars); + if (len == 0) { + return NONE_MATCHER; + } + if (len == 1) { + return new AbstractStringMatcher.CharMatcher(chars[0]); + } + return new AbstractStringMatcher.CharSetMatcher(chars); + } + + /** + * Creates a matcher from a string representing a set of characters. + * + * @param chars the characters to match, null or empty matches nothing + * @return a new Matcher for the given characters + */ + public StringMatcher charSetMatcher(final String chars) { + final int len = StringUtils.length(chars); + if (len == 0) { + return NONE_MATCHER; + } + if (len == 1) { + return new AbstractStringMatcher.CharMatcher(chars.charAt(0)); + } + return new AbstractStringMatcher.CharSetMatcher(chars.toCharArray()); + } + + /** + * Returns a matcher which matches the comma character. + * + * @return a matcher for a comma + */ + public StringMatcher commaMatcher() { + return COMMA_MATCHER; + } + + /** + * Returns a matcher which matches the double quote character. + * + * @return a matcher for a double quote + */ + public StringMatcher doubleQuoteMatcher() { + return DOUBLE_QUOTE_MATCHER; + } + + /** + * Matches no characters. + * + * @return a matcher that matches nothing + */ + public StringMatcher noneMatcher() { + return NONE_MATCHER; + } + + /** + * Returns a matcher which matches the single or double quote character. + * + * @return a matcher for a single or double quote + */ + public StringMatcher quoteMatcher() { + return QUOTE_MATCHER; + } + + /** + * Returns a matcher which matches the single quote character. + * + * @return a matcher for a single quote + */ + public StringMatcher singleQuoteMatcher() { + return SINGLE_QUOTE_MATCHER; + } + + /** + * Returns a matcher which matches the space character. + * + * @return a matcher for a space + */ + public StringMatcher spaceMatcher() { + return SPACE_MATCHER; + } + + /** + * Matches the same characters as StringTokenizer, namely space, tab, newline and form feed. + * + * @return The split matcher + */ + public StringMatcher splitMatcher() { + return SPLIT_MATCHER; + } + + /** + * Creates a matcher from a string. + * + * @param chars the string to match, null or empty matches nothing + * @return a new Matcher for the given String + * @since 1.9 + */ + public StringMatcher stringMatcher(final char... chars) { + final int length = ArrayUtils.getLength(chars); + return length == 0 ? NONE_MATCHER + : length == 1 ? new AbstractStringMatcher.CharMatcher(chars[0]) + : new AbstractStringMatcher.CharArrayMatcher(chars); + } + + /** + * Creates a matcher from a string. + * + * @param str the string to match, null or empty matches nothing + * @return a new Matcher for the given String + */ + public StringMatcher stringMatcher(final String str) { + return StringUtils.isEmpty(str) ? NONE_MATCHER : stringMatcher(str.toCharArray()); + } + + /** + * Returns a matcher which matches the tab character. + * + * @return a matcher for a tab + */ + public StringMatcher tabMatcher() { + return TAB_MATCHER; + } + + /** + * Matches the String trim() whitespace characters. + * + * @return The trim matcher + */ + public StringMatcher trimMatcher() { + return TRIM_MATCHER; + } + +} diff --git a/src/main/java/org/apache/commons/text/matcher/package-info.java b/src/main/java/org/apache/commons/text/matcher/package-info.java new file mode 100644 index 0000000000..a6b1d7daea --- /dev/null +++ b/src/main/java/org/apache/commons/text/matcher/package-info.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *

+ * Provides algorithms for matching up strings for use with a {@link org.apache.commons.text.StringSubstitutor + * StringSubstitutor}. The main class here is {@link org.apache.commons.text.matcher.StringMatcherFactory + * StringMatcherFactory}. + *

+ * + * @since 1.3 + */ +package org.apache.commons.text.matcher; diff --git a/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java b/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java index 39488aef3f..57eb25f5db 100644 --- a/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java +++ b/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java @@ -57,7 +57,7 @@ public Double cosineSimilarity(final Map leftVector, for (final Integer value : rightVector.values()) { d2 += Math.pow(value, 2); } - double cosineSimilarity; + final double cosineSimilarity; if (d1 <= 0.0 || d2 <= 0.0) { cosineSimilarity = 0.0; } else { @@ -88,13 +88,13 @@ private Set getIntersection(final Map leftV * @param leftVector left vector * @param rightVector right vector * @param intersection common elements - * @return the dot product + * @return The dot product */ private double dot(final Map leftVector, final Map rightVector, final Set intersection) { long dotProduct = 0; for (final CharSequence key : intersection) { - dotProduct += leftVector.get(key) * rightVector.get(key); + dotProduct += leftVector.get(key) * (long) rightVector.get(key); } return dotProduct; } diff --git a/src/main/java/org/apache/commons/text/similarity/Counter.java b/src/main/java/org/apache/commons/text/similarity/Counter.java index d259d6bc25..03129273ac 100644 --- a/src/main/java/org/apache/commons/text/similarity/Counter.java +++ b/src/main/java/org/apache/commons/text/similarity/Counter.java @@ -36,7 +36,6 @@ final class Counter { * Hidden constructor. */ private Counter() { - super(); } /** diff --git a/src/main/java/org/apache/commons/text/similarity/EditDistance.java b/src/main/java/org/apache/commons/text/similarity/EditDistance.java index cf4e2c0eb2..d330f78bd8 100644 --- a/src/main/java/org/apache/commons/text/similarity/EditDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/EditDistance.java @@ -20,25 +20,25 @@ * Interface for Edit Distances. * *

- * An edit distance is a formal metric on the Kleene closure (X*) over an - * alphabet (X). Note, that a metric - * on a set S is a function d: [S * S] -> [0, INFINITY) such - * that the following hold for x,y,z in - * the set S: + * An edit distance is a formal metric on the Kleene closure ({@code X*}) over an + * alphabet ({@code X}). Note, that a metric + * on a set {@code S} is a function {@code d: [S * S] -> [0, INFINITY)} such + * that the following hold for {@code x,y,z} in + * the set {@code S}: *

*
    - *
  • d(x,y) >= 0, non-negativity or separation axiom
  • - *
  • d(x,y) == 0, if and only if, x == y
  • - *
  • d(x,y) == d(y,x), symmetry, and
  • - *
  • d(x,z) <= d(x,y) + d(y,z), the triangle inequality
  • + *
  • {@code d(x,y) >= 0}, non-negativity or separation axiom
  • + *
  • {@code d(x,y) == 0}, if and only if, {@code x == y}
  • + *
  • {@code d(x,y) == d(y,x)}, symmetry, and
  • + *
  • {@code d(x,z) <= d(x,y) + d(y,z)}, the triangle inequality
  • *
* * *

* This is a BiFunction<CharSequence, CharSequence, R>. - * The apply method + * The {@code apply} method * accepts a pair of {@link CharSequence} parameters - * and returns an R type similarity score. + * and returns an {@code R} type similarity score. *

* * @param The type of similarity score unit used by this EditDistance. @@ -51,7 +51,7 @@ public interface EditDistance extends SimilarityScore { * * @param left the first CharSequence * @param right the second CharSequence - * @return the similarity score between two CharSequences + * @return The similarity score between two CharSequences */ @Override R apply(CharSequence left, CharSequence right); diff --git a/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java b/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java index 691fef3e55..310b6bb168 100644 --- a/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java +++ b/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java @@ -85,7 +85,7 @@ public EditDistanceFrom(final EditDistance editDistance, final CharSequence l *

* * @param right the second CharSequence - * @return the similarity score between two CharSequences + * @return The similarity score between two CharSequences */ public R apply(final CharSequence right) { return editDistance.apply(left, right); @@ -94,7 +94,7 @@ public R apply(final CharSequence right) { /** * Gets the left parameter. * - * @return the left parameter + * @return The left parameter */ public CharSequence getLeft() { return left; @@ -103,7 +103,7 @@ public CharSequence getLeft() { /** * Gets the edit distance. * - * @return the edit distance + * @return The edit distance */ public EditDistance getEditDistance() { return editDistance; diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java index 8356960435..8dcc526189 100644 --- a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java +++ b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java @@ -63,26 +63,27 @@ public FuzzyScore(final Locale locale) { *

* *
-     * score.fuzzyScore(null, null, null)                                    = IllegalArgumentException
-     * score.fuzzyScore("", "", Locale.ENGLISH)                              = 0
-     * score.fuzzyScore("Workshop", "b", Locale.ENGLISH)                     = 0
-     * score.fuzzyScore("Room", "o", Locale.ENGLISH)                         = 1
-     * score.fuzzyScore("Workshop", "w", Locale.ENGLISH)                     = 1
-     * score.fuzzyScore("Workshop", "ws", Locale.ENGLISH)                    = 2
-     * score.fuzzyScore("Workshop", "wo", Locale.ENGLISH)                    = 4
-     * score.fuzzyScore("Apache Software Foundation", "asf", Locale.ENGLISH) = 3
+     * score.fuzzyScore(null, null)                          = IllegalArgumentException
+     * score.fuzzyScore("not null", null)                    = IllegalArgumentException
+     * score.fuzzyScore(null, "not null")                    = IllegalArgumentException
+     * score.fuzzyScore("", "")                              = 0
+     * score.fuzzyScore("Workshop", "b")                     = 0
+     * score.fuzzyScore("Room", "o")                         = 1
+     * score.fuzzyScore("Workshop", "w")                     = 1
+     * score.fuzzyScore("Workshop", "ws")                    = 2
+     * score.fuzzyScore("Workshop", "wo")                    = 4
+     * score.fuzzyScore("Apache Software Foundation", "asf") = 3
      * 
* * @param term a full term that should be matched against, must not be null * @param query the query that will be matched against a term, must not be * null * @return result score - * @throws IllegalArgumentException if either String input {@code null} or - * Locale input {@code null} + * @throws IllegalArgumentException if the term or query is {@code null} */ public Integer fuzzyScore(final CharSequence term, final CharSequence query) { if (term == null || query == null) { - throw new IllegalArgumentException("Strings must not be null"); + throw new IllegalArgumentException("CharSequences must not be null"); } // fuzzy logic is case insensitive. We normalize the Strings to lower @@ -135,7 +136,7 @@ public Integer fuzzyScore(final CharSequence term, final CharSequence query) { /** * Gets the locale. * - * @return the locale + * @return The locale */ public Locale getLocale() { return locale; diff --git a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java index 8d88fe82f9..183fbd9737 100644 --- a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java @@ -57,11 +57,11 @@ public class HammingDistance implements EditDistance { @Override public Integer apply(final CharSequence left, final CharSequence right) { if (left == null || right == null) { - throw new IllegalArgumentException("Strings must not be null"); + throw new IllegalArgumentException("CharSequences must not be null"); } if (left.length() != right.length()) { - throw new IllegalArgumentException("Strings must have the same length"); + throw new IllegalArgumentException("CharSequences must have the same length"); } int distance = 0; diff --git a/src/main/java/org/apache/commons/text/similarity/IntersectionResult.java b/src/main/java/org/apache/commons/text/similarity/IntersectionResult.java new file mode 100644 index 0000000000..70536be813 --- /dev/null +++ b/src/main/java/org/apache/commons/text/similarity/IntersectionResult.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.similarity; + +import java.util.Objects; + +/** + * Represents the intersection result between two sets. + * + *

Stores the size of set A, set B and the intersection of A and B + * ({@code |A ∩ B|}).

+ * + *

This class is immutable.

+ * + * @since 1.7 + * @see Intersection + */ +public class IntersectionResult { + /** + * The size of set A. + */ + private final int sizeA; + /** + * The size of set B. + */ + private final int sizeB; + /** + * The size of the intersection between set A and B. + */ + private final int intersection; + + /** + * Create the results for an intersection between two sets. + * + * @param sizeA the size of set A ({@code |A|}) + * @param sizeB the size of set B ({@code |B|}) + * @param intersection the size of the intersection of A and B ({@code |A ∩ B|}) + * @throws IllegalArgumentException if the sizes are negative or the intersection is greater + * than the minimum of the two set sizes + */ + public IntersectionResult(final int sizeA, final int sizeB, final int intersection) { + if (sizeA < 0) { + throw new IllegalArgumentException("Set size |A| is not positive: " + sizeA); + } + if (sizeB < 0) { + throw new IllegalArgumentException("Set size |B| is not positive: " + sizeB); + } + if (intersection < 0 || intersection > Math.min(sizeA, sizeB)) { + throw new IllegalArgumentException("Invalid intersection of |A| and |B|: " + intersection); + } + this.sizeA = sizeA; + this.sizeB = sizeB; + this.intersection = intersection; + } + + /** + * Get the size of set A. + * + * @return |A| + */ + public int getSizeA() { + return sizeA; + } + + /** + * Get the size of set B. + * + * @return |B| + */ + public int getSizeB() { + return sizeB; + } + + /** + * Get the size of the intersection between set A and B. + * + * @return {@code |A ∩ B|} + */ + public int getIntersection() { + return intersection; + } + + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final IntersectionResult result = (IntersectionResult) o; + return sizeA == result.sizeA && sizeB == result.sizeB && intersection == result.intersection; + } + + @Override + public int hashCode() { + return Objects.hash(sizeA, sizeB, intersection); + } + + @Override + public String toString() { + return "Size A: " + sizeA + ", Size B: " + sizeB + ", Intersection: " + intersection; + } +} diff --git a/src/main/java/org/apache/commons/text/similarity/IntersectionSimilarity.java b/src/main/java/org/apache/commons/text/similarity/IntersectionSimilarity.java new file mode 100644 index 0000000000..a64f01c1f5 --- /dev/null +++ b/src/main/java/org/apache/commons/text/similarity/IntersectionSimilarity.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.similarity; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.function.Function; + +/** + * Measures the intersection of two sets created from a pair of character sequences. + * + *

It is assumed that the type {@code T} correctly conforms to the requirements for storage + * within a {@link Set} or {@link HashMap}. Ideally the type is immutable and implements + * {@link Object#equals(Object)} and {@link Object#hashCode()}.

+ * + * @param the type of the elements extracted from the character sequence + * @since 1.7 + * @see Set + * @see HashMap + */ +public class IntersectionSimilarity implements SimilarityScore { + /** The converter used to create the elements from the characters. */ + private final Function> converter; + + // The following is adapted from commons-collections for a Bag. + // A Bag is a collection that can store the count of the number + // of copies of each element. + + /** + * Mutable counter class for storing the count of elements. + */ + private static class BagCount { + /** The count. This is initialised to 1 upon construction. */ + int count = 1; + } + + /** + * A minimal implementation of a Bag that can store elements and a count. + * + *

For the intended purpose the Bag does not have to be a {@link Collection}. It does not + * even have to know its own size. + */ + private class TinyBag { + /** The backing map. */ + private final Map map; + + /** + * Create a new tiny bag. + * + * @param initialCapacity the initial capacity + */ + TinyBag(final int initialCapacity) { + map = new HashMap<>(initialCapacity); + } + + /** + * Adds a new element to the bag, incrementing its count in the underlying map. + * + * @param object the object to add + */ + void add(final T object) { + final BagCount mut = map.get(object); + if (mut == null) { + map.put(object, new BagCount()); + } else { + mut.count++; + } + } + + /** + * Returns the number of occurrence of the given element in this bag by + * looking up its count in the underlying map. + * + * @param object the object to search for + * @return The number of occurrences of the object, zero if not found + */ + int getCount(final Object object) { + final BagCount count = map.get(object); + if (count != null) { + return count.count; + } + return 0; + } + + /** + * Returns a Set view of the mappings contained in this bag. + * + * @return The Set view + */ + Set> entrySet() { + return map.entrySet(); + } + + /** + * Get the number of unique elements in the bag. + * + * @return The unique element size + */ + int uniqueElementSize() { + return map.size(); + } + } + + /** + * Create a new intersection similarity using the provided converter. + * + *

If the converter returns a {@link Set} then the intersection result will + * not include duplicates. Any other {@link Collection} is used to produce a result + * that will include duplicates in the intersect and union. + * + * @param converter the converter used to create the elements from the characters + * @throws IllegalArgumentException if the converter is null + */ + public IntersectionSimilarity(final Function> converter) { + if (converter == null) { + throw new IllegalArgumentException("Converter must not be null"); + } + this.converter = converter; + } + + /** + * Calculates the intersection of two character sequences passed as input. + * + * @param left first character sequence + * @param right second character sequence + * @return The intersection result + * @throws IllegalArgumentException if either input sequence is {@code null} + */ + @Override + public IntersectionResult apply(final CharSequence left, final CharSequence right) { + if (left == null || right == null) { + throw new IllegalArgumentException("Input cannot be null"); + } + + // Create the elements from the sequences + final Collection objectsA = converter.apply(left); + final Collection objectsB = converter.apply(right); + final int sizeA = objectsA.size(); + final int sizeB = objectsB.size(); + + // Short-cut if either collection is empty + if (Math.min(sizeA, sizeB) == 0) { + // No intersection + return new IntersectionResult(sizeA, sizeB, 0); + } + + // Intersection = count the number of shared elements + final int intersection; + if (objectsA instanceof Set && objectsB instanceof Set) { + // If a Set then the elements will only have a count of 1. + // Iterate over the smaller set. + intersection = (sizeA < sizeB) + ? getIntersection((Set) objectsA, (Set) objectsB) + : getIntersection((Set) objectsB, (Set) objectsA); + } else { + // Create a bag for each collection + final TinyBag bagA = toBag(objectsA); + final TinyBag bagB = toBag(objectsB); + // Iterate over the smaller number of unique elements + intersection = (bagA.uniqueElementSize() < bagB.uniqueElementSize()) + ? getIntersection(bagA, bagB) + : getIntersection(bagB, bagA); + } + + return new IntersectionResult(sizeA, sizeB, intersection); + } + + /** + * Convert the collection to a bag. The bag will contain the count of each element + * in the collection. + * + * @param objects the objects + * @return The bag + */ + private TinyBag toBag(final Collection objects) { + final TinyBag bag = new TinyBag(objects.size()); + for (final T t : objects) { + bag.add(t); + } + return bag; + } + + /** + * Compute the intersection between two sets. This is the count of all the elements + * that are within both sets. + * + * @param the type of the elements in the set + * @param setA the set A + * @param setB the set B + * @return The intersection + */ + private static int getIntersection(final Set setA, final Set setB) { + int intersection = 0; + for (final T element : setA) { + if (setB.contains(element)) { + intersection++; + } + } + return intersection; + } + + /** + * Compute the intersection between two bags. This is the sum of the minimum + * count of each element that is within both sets. + * + * @param bagA the bag A + * @param bagB the bag B + * @return The intersection + */ + private int getIntersection(final TinyBag bagA, final TinyBag bagB) { + int intersection = 0; + for (final Entry entry : bagA.entrySet()) { + final T element = entry.getKey(); + final int count = entry.getValue().count; + // The intersection of this entry in both bags is the minimum count + intersection += Math.min(count, bagB.getCount(element)); + } + return intersection; + } +} diff --git a/src/main/java/org/apache/commons/text/similarity/JaccardDistance.java b/src/main/java/org/apache/commons/text/similarity/JaccardDistance.java index 38e4548ebb..fe956b20bc 100644 --- a/src/main/java/org/apache/commons/text/similarity/JaccardDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/JaccardDistance.java @@ -50,6 +50,6 @@ public Double apply(final CharSequence left, final CharSequence right) { if (left == null || right == null) { throw new IllegalArgumentException("Input cannot be null"); } - return Math.round((1 - jaccardSimilarity.apply(left, right)) * 100d) / 100d; + return 1.0 - jaccardSimilarity.apply(left, right).doubleValue(); } } diff --git a/src/main/java/org/apache/commons/text/similarity/JaccardSimilarity.java b/src/main/java/org/apache/commons/text/similarity/JaccardSimilarity.java index 1dc2b85018..4f29139260 100644 --- a/src/main/java/org/apache/commons/text/similarity/JaccardSimilarity.java +++ b/src/main/java/org/apache/commons/text/similarity/JaccardSimilarity.java @@ -48,7 +48,7 @@ public Double apply(final CharSequence left, final CharSequence right) { if (left == null || right == null) { throw new IllegalArgumentException("Input cannot be null"); } - return Math.round(calculateJaccardSimilarity(left, right) * 100d) / 100d; + return calculateJaccardSimilarity(left, right); } /** @@ -62,27 +62,25 @@ public Double apply(final CharSequence left, final CharSequence right) { * @return index */ private Double calculateJaccardSimilarity(final CharSequence left, final CharSequence right) { - final Set intersectionSet = new HashSet<>(); - final Set unionSet = new HashSet<>(); - boolean unionFilled = false; final int leftLength = left.length(); final int rightLength = right.length(); + if (leftLength == 0 && rightLength == 0) { + return 1d; + } if (leftLength == 0 || rightLength == 0) { return 0d; } - - for (int leftIndex = 0; leftIndex < leftLength; leftIndex++) { - unionSet.add(String.valueOf(left.charAt(leftIndex))); - for (int rightIndex = 0; rightIndex < rightLength; rightIndex++) { - if (!unionFilled) { - unionSet.add(String.valueOf(right.charAt(rightIndex))); - } - if (left.charAt(leftIndex) == right.charAt(rightIndex)) { - intersectionSet.add(String.valueOf(left.charAt(leftIndex))); - } - } - unionFilled = true; + final Set leftSet = new HashSet<>(); + for (int i = 0; i < leftLength; i++) { + leftSet.add(left.charAt(i)); + } + final Set rightSet = new HashSet<>(); + for (int i = 0; i < rightLength; i++) { + rightSet.add(right.charAt(i)); } - return Double.valueOf(intersectionSet.size()) / Double.valueOf(unionSet.size()); + final Set unionSet = new HashSet<>(leftSet); + unionSet.addAll(rightSet); + final int intersectionSize = leftSet.size() + rightSet.size() - unionSet.size(); + return 1.0d * intersectionSize / unionSet.size(); } } diff --git a/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java b/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java index 8146eafa7c..ab1b7ea5ca 100644 --- a/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java @@ -19,86 +19,88 @@ import java.util.Arrays; /** - * A similarity algorithm indicating the percentage of matched characters between two character sequences. - * - *

- * The Jaro measure is the weighted sum of percentage of matched characters - * from each file and transposed characters. Winkler increased this measure - * for matching initial characters. - *

- * - *

- * This implementation is based on the Jaro Winkler similarity algorithm - * from - * http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance. - *

- * - *

- * This code has been adapted from Apache Commons Lang 3.3. - *

+ * Measures the Jaro-Winkler distance of two character sequences. + * It is the complementary of Jaro-Winkler similarity. * * @since 1.0 */ -public class JaroWinklerDistance implements SimilarityScore { +public class JaroWinklerDistance implements EditDistance { /** - * Represents a failed index search. + * @deprecated Deprecated as of 1.7. This constant will be removed in 2.0. */ + @Deprecated public static final int INDEX_NOT_FOUND = -1; /** - * Find the Jaro Winkler Distance which indicates the similarity score - * between two CharSequences. + * Computes the Jaro Winkler Distance between two character sequences. * *
      * distance.apply(null, null)          = IllegalArgumentException
-     * distance.apply("","")               = 0.0
-     * distance.apply("","a")              = 0.0
-     * distance.apply("aaapppp", "")       = 0.0
-     * distance.apply("frog", "fog")       = 0.93
-     * distance.apply("fly", "ant")        = 0.0
-     * distance.apply("elephant", "hippo") = 0.44
-     * distance.apply("hippo", "elephant") = 0.44
-     * distance.apply("hippo", "zzzzzzzz") = 0.0
-     * distance.apply("hello", "hallo")    = 0.88
-     * distance.apply("ABC Corporation", "ABC Corp") = 0.93
-     * distance.apply("D N H Enterprises Inc", "D & H Enterprises, Inc.") = 0.95
-     * distance.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.92
-     * distance.apply("PENNSYLVANIA", "PENNCISYLVNIA")    = 0.88
+     * distance.apply("foo", null)         = IllegalArgumentException
+     * distance.apply(null, "foo")         = IllegalArgumentException
+     * distance.apply("", "")              = 0.0
+     * distance.apply("foo", "foo")        = 0.0
+     * distance.apply("foo", "foo ")       = 0.06
+     * distance.apply("foo", "foo  ")      = 0.09
+     * distance.apply("foo", " foo ")      = 0.13
+     * distance.apply("foo", "  foo")      = 0.49
+     * distance.apply("", "a")             = 1.0
+     * distance.apply("aaapppp", "")       = 1.0
+     * distance.apply("frog", "fog")       = 0.07
+     * distance.apply("fly", "ant")        = 1.0
+     * distance.apply("elephant", "hippo") = 0.56
+     * distance.apply("hippo", "elephant") = 0.56
+     * distance.apply("hippo", "zzzzzzzz") = 1.0
+     * distance.apply("hello", "hallo")    = 0.12
+     * distance.apply("ABC Corporation", "ABC Corp") = 0.09
+     * distance.apply("D N H Enterprises Inc", "D & H Enterprises, Inc.") = 0.05
+     * distance.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.08
+     * distance.apply("PENNSYLVANIA", "PENNCISYLVNIA") = 0.12
      * 
* - * @param left the first String, must not be null - * @param right the second String, must not be null + * @param left the first CharSequence, must not be null + * @param right the second CharSequence, must not be null * @return result distance - * @throws IllegalArgumentException if either String input {@code null} + * @throws IllegalArgumentException if either CharSequence input is {@code null} */ @Override public Double apply(final CharSequence left, final CharSequence right) { - final double defaultScalingFactor = 0.1; if (left == null || right == null) { - throw new IllegalArgumentException("Strings must not be null"); + throw new IllegalArgumentException("CharSequences must not be null"); } + // TODO: replace the rest of the code by this in 2.0, see TEXT-104 + // + // JaroWinklerSimilarity similarity = new JaroWinklerSimilarity(); + // return 1 - similarity.apply(left, right); + + final double defaultScalingFactor = 0.1; final int[] mtp = matches(left, right); final double m = mtp[0]; if (m == 0) { return 0D; } - final double j = ((m / left.length() + m / right.length() + (m - mtp[1]) / m)) / 3; - final double jw = j < 0.7D ? j : j + Math.min(defaultScalingFactor, 1D / mtp[3]) * mtp[2] * (1D - j); + final double j = ((m / left.length() + m / right.length() + (m - (double) mtp[1] / 2) / m)) / 3; + final double jw = j < 0.7D ? j : j + defaultScalingFactor * mtp[2] * (1D - j); return jw; } + // TODO: remove this method in 2.0, see TEXT-104 /** - * This method returns the Jaro-Winkler string matches, transpositions, prefix, max array. + * This method returns the Jaro-Winkler string matches, half transpositions, prefix array. * * @param first the first string to be matched * @param second the second string to be matched - * @return mtp array containing: matches, transpositions, prefix, and max length + * @return mtp array containing: matches, half transpositions, and prefix + * @deprecated Deprecated as of 1.7. This method will be removed in 2.0, and moved to a Jaro Winkler similarity + * class. */ + @Deprecated protected static int[] matches(final CharSequence first, final CharSequence second) { - CharSequence max, min; + final CharSequence max; + final CharSequence min; if (first.length() > second.length()) { max = first; min = second; @@ -136,21 +138,20 @@ protected static int[] matches(final CharSequence first, final CharSequence seco si++; } } - int transpositions = 0; + int halfTranspositions = 0; for (int mi = 0; mi < ms1.length; mi++) { if (ms1[mi] != ms2[mi]) { - transpositions++; + halfTranspositions++; } } int prefix = 0; - for (int mi = 0; mi < min.length(); mi++) { + for (int mi = 0; mi < Math.min(4, min.length()); mi++) { if (first.charAt(mi) == second.charAt(mi)) { prefix++; } else { break; } } - return new int[] {matches, transpositions / 2, prefix, max.length()}; + return new int[] {matches, halfTranspositions, prefix}; } - } diff --git a/src/main/java/org/apache/commons/text/similarity/JaroWinklerSimilarity.java b/src/main/java/org/apache/commons/text/similarity/JaroWinklerSimilarity.java new file mode 100644 index 0000000000..7ff5869271 --- /dev/null +++ b/src/main/java/org/apache/commons/text/similarity/JaroWinklerSimilarity.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.similarity; + +import java.util.Arrays; + +import org.apache.commons.lang3.StringUtils; + +/** + * A similarity algorithm indicating the percentage of matched characters between two character sequences. + * + *

+ * The Jaro measure is the weighted sum of percentage of matched characters + * from each file and transposed characters. Winkler increased this measure + * for matching initial characters. + *

+ * + *

+ * This implementation is based on the Jaro Winkler similarity algorithm + * from + * http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance. + *

+ * + *

+ * This code has been adapted from Apache Commons Lang 3.3. + *

+ * + * @since 1.7 + */ +public class JaroWinklerSimilarity implements SimilarityScore { + + /** + * Computes the Jaro Winkler Similarity between two character sequences. + * + *
+     * sim.apply(null, null)          = IllegalArgumentException
+     * sim.apply("foo", null)         = IllegalArgumentException
+     * sim.apply(null, "foo")         = IllegalArgumentException
+     * sim.apply("", "")              = 1.0
+     * sim.apply("foo", "foo")        = 1.0
+     * sim.apply("foo", "foo ")       = 0.94
+     * sim.apply("foo", "foo  ")      = 0.91
+     * sim.apply("foo", " foo ")      = 0.87
+     * sim.apply("foo", "  foo")      = 0.51
+     * sim.apply("", "a")             = 0.0
+     * sim.apply("aaapppp", "")       = 0.0
+     * sim.apply("frog", "fog")       = 0.93
+     * sim.apply("fly", "ant")        = 0.0
+     * sim.apply("elephant", "hippo") = 0.44
+     * sim.apply("hippo", "elephant") = 0.44
+     * sim.apply("hippo", "zzzzzzzz") = 0.0
+     * sim.apply("hello", "hallo")    = 0.88
+     * sim.apply("ABC Corporation", "ABC Corp") = 0.91
+     * sim.apply("D N H Enterprises Inc", "D & H Enterprises, Inc.") = 0.95
+     * sim.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.92
+     * sim.apply("PENNSYLVANIA", "PENNCISYLVNIA") = 0.88
+     * 
+ * + * @param left the first CharSequence, must not be null + * @param right the second CharSequence, must not be null + * @return result similarity + * @throws IllegalArgumentException if either CharSequence input is {@code null} + */ + @Override + public Double apply(final CharSequence left, final CharSequence right) { + final double defaultScalingFactor = 0.1; + + if (left == null || right == null) { + throw new IllegalArgumentException("CharSequences must not be null"); + } + + if (StringUtils.equals(left, right)) { + return 1d; + } + + final int[] mtp = matches(left, right); + final double m = mtp[0]; + if (m == 0) { + return 0d; + } + final double j = ((m / left.length() + m / right.length() + (m - (double) mtp[1] / 2) / m)) / 3; + final double jw = j < 0.7d ? j : j + defaultScalingFactor * mtp[2] * (1d - j); + return jw; + } + + /** + * This method returns the Jaro-Winkler string matches, half transpositions, prefix array. + * + * @param first the first string to be matched + * @param second the second string to be matched + * @return mtp array containing: matches, half transpositions, and prefix + */ + protected static int[] matches(final CharSequence first, final CharSequence second) { + final CharSequence max; + final CharSequence min; + if (first.length() > second.length()) { + max = first; + min = second; + } else { + max = second; + min = first; + } + final int range = Math.max(max.length() / 2 - 1, 0); + final int[] matchIndexes = new int[min.length()]; + Arrays.fill(matchIndexes, -1); + final boolean[] matchFlags = new boolean[max.length()]; + int matches = 0; + for (int mi = 0; mi < min.length(); mi++) { + final char c1 = min.charAt(mi); + for (int xi = Math.max(mi - range, 0), xn = Math.min(mi + range + 1, max.length()); xi < xn; xi++) { + if (!matchFlags[xi] && c1 == max.charAt(xi)) { + matchIndexes[mi] = xi; + matchFlags[xi] = true; + matches++; + break; + } + } + } + final char[] ms1 = new char[matches]; + final char[] ms2 = new char[matches]; + for (int i = 0, si = 0; i < min.length(); i++) { + if (matchIndexes[i] != -1) { + ms1[si] = min.charAt(i); + si++; + } + } + for (int i = 0, si = 0; i < max.length(); i++) { + if (matchFlags[i]) { + ms2[si] = max.charAt(i); + si++; + } + } + int halfTranspositions = 0; + for (int mi = 0; mi < ms1.length; mi++) { + if (ms1[mi] != ms2[mi]) { + halfTranspositions++; + } + } + int prefix = 0; + for (int mi = 0; mi < Math.min(4, min.length()); mi++) { + if (first.charAt(mi) == second.charAt(mi)) { + prefix++; + } else { + break; + } + } + return new int[] {matches, halfTranspositions, prefix}; + } + +} diff --git a/src/main/java/org/apache/commons/text/similarity/LevenshteinDetailedDistance.java b/src/main/java/org/apache/commons/text/similarity/LevenshteinDetailedDistance.java index 00b2689996..2560d22a5a 100644 --- a/src/main/java/org/apache/commons/text/similarity/LevenshteinDetailedDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/LevenshteinDetailedDistance.java @@ -109,7 +109,7 @@ public LevenshteinResults apply(final CharSequence left, final CharSequence righ /** * Gets the default instance. * - * @return the default instace + * @return The default instace */ public static LevenshteinDetailedDistance getDefaultInstance() { return DEFAULT_INSTANCE; @@ -118,7 +118,7 @@ public static LevenshteinDetailedDistance getDefaultInstance() { /** * Gets the distance threshold. * - * @return the distance threshold + * @return The distance threshold */ public Integer getThreshold() { return threshold; @@ -150,8 +150,8 @@ public Integer getThreshold() { * limitedCompare("hippo", "elephant", 6) = -1 *
* - * @param left the first string, must not be null - * @param right the second string, must not be null + * @param left the first CharSequence, must not be null + * @param right the second CharSequence, must not be null * @param threshold the target threshold, must not be negative * @return result distance, or -1 */ @@ -159,7 +159,7 @@ private static LevenshteinResults limitedCompare(CharSequence left, CharSequence right, final int threshold) { //NOPMD if (left == null || right == null) { - throw new IllegalArgumentException("Strings must not be null"); + throw new IllegalArgumentException("CharSequences must not be null"); } if (threshold < 0) { throw new IllegalArgumentException("Threshold must not be negative"); @@ -331,14 +331,14 @@ private static LevenshteinResults limitedCompare(CharSequence left, * unlimitedCompare("hello", "hallo") = 1 *
* - * @param left the first String, must not be null - * @param right the second String, must not be null + * @param left the first CharSequence, must not be null + * @param right the second CharSequence, must not be null * @return result distance, or -1 - * @throws IllegalArgumentException if either String input {@code null} + * @throws IllegalArgumentException if either CharSequence input is {@code null} */ private static LevenshteinResults unlimitedCompare(CharSequence left, CharSequence right) { if (left == null || right == null) { - throw new IllegalArgumentException("Strings must not be null"); + throw new IllegalArgumentException("CharSequences must not be null"); } /* diff --git a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java index a8fab0437e..913ec2454f 100644 --- a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java @@ -117,7 +117,7 @@ public Integer apply(final CharSequence left, final CharSequence right) { /** * Gets the default instance. * - * @return the default instance + * @return The default instance */ public static LevenshteinDistance getDefaultInstance() { return DEFAULT_INSTANCE; @@ -126,7 +126,7 @@ public static LevenshteinDistance getDefaultInstance() { /** * Gets the distance threshold. * - * @return the distance threshold + * @return The distance threshold */ public Integer getThreshold() { return threshold; @@ -158,14 +158,14 @@ public Integer getThreshold() { * limitedCompare("hippo", "elephant", 6) = -1 * * - * @param left the first string, must not be null - * @param right the second string, must not be null + * @param left the first CharSequence, must not be null + * @param right the second CharSequence, must not be null * @param threshold the target threshold, must not be negative * @return result distance, or -1 */ private static int limitedCompare(CharSequence left, CharSequence right, final int threshold) { // NOPMD if (left == null || right == null) { - throw new IllegalArgumentException("Strings must not be null"); + throw new IllegalArgumentException("CharSequences must not be null"); } if (threshold < 0) { throw new IllegalArgumentException("Threshold must not be negative"); @@ -241,6 +241,11 @@ private static int limitedCompare(CharSequence left, CharSequence right, final i m = right.length(); } + // the edit distance cannot be less than the length difference + if (m - n > threshold) { + return -1; + } + int[] p = new int[n + 1]; // 'previous' cost array, horizontally int[] d = new int[n + 1]; // cost array, horizontally int[] tempD; // placeholder to assist in swapping p and d @@ -265,17 +270,12 @@ private static int limitedCompare(CharSequence left, CharSequence right, final i final int max = j > Integer.MAX_VALUE - threshold ? n : Math.min( n, j + threshold); - // the stripe may lead off of the table if s and t are of different - // sizes - if (min > max) { - return -1; - } - // ignore entry left of leftmost if (min > 1) { d[min - 1] = Integer.MAX_VALUE; } + int lowerBound = Integer.MAX_VALUE; // iterates through [min, max] in s for (int i = min; i <= max; i++) { if (left.charAt(i - 1) == rightJ) { @@ -286,6 +286,11 @@ private static int limitedCompare(CharSequence left, CharSequence right, final i // left and up d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]); } + lowerBound = Math.min(lowerBound, d[i]); + } + // if the lower bound is greater than the threshold, then exit early + if (lowerBound > threshold) { + return -1; } // copy current distance counts to 'previous row' distance counts @@ -328,14 +333,14 @@ private static int limitedCompare(CharSequence left, CharSequence right, final i * unlimitedCompare("hello", "hallo") = 1 * * - * @param left the first String, must not be null - * @param right the second String, must not be null + * @param left the first CharSequence, must not be null + * @param right the second CharSequence, must not be null * @return result distance, or -1 - * @throws IllegalArgumentException if either String input {@code null} + * @throws IllegalArgumentException if either CharSequence input is {@code null} */ private static int unlimitedCompare(CharSequence left, CharSequence right) { if (left == null || right == null) { - throw new IllegalArgumentException("Strings must not be null"); + throw new IllegalArgumentException("CharSequences must not be null"); } /* diff --git a/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java b/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java index b913619f50..0f51906b11 100644 --- a/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java +++ b/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java @@ -24,7 +24,7 @@ * common. Two strings that are entirely different, return a value of 0, and two strings that return a value * of the commonly shared length implies that the strings are completely the same in value and position. * Note. Generally this algorithm is fairly inefficient, as for length m, n of the input - * CharSequence's left and right respectively, the runtime of the + * {@code CharSequence}'s {@code left} and {@code right} respectively, the runtime of the * algorithm is O(m*n). *

* @@ -43,7 +43,7 @@ public class LongestCommonSubsequence implements SimilarityScore { /** - * Calculates longest common subsequence similarity score of two CharSequence's passed as + * Calculates longest common subsequence similarity score of two {@code CharSequence}'s passed as * input. * * @param left first character sequence @@ -58,16 +58,16 @@ public Integer apply(final CharSequence left, final CharSequence right) { if (left == null || right == null) { throw new IllegalArgumentException("Inputs must not be null"); } - return logestCommonSubsequence(left, right).length(); + return longestCommonSubsequence(left, right).length(); } /** - * Computes the longest common subsequence between the two CharSequence's passed as input. + * Computes the longest common subsequence between the two {@code CharSequence}'s passed as input. * *

- * Note, a substring and subsequence are not necessarily the same thing. Indeed, abcxyzqrs and - * xyzghfm have both the same common substring and subsequence, namely xyz. However, - * axbyczqrs and abcxyzqtv have the longest common subsequence xyzq because a + * Note, a substring and subsequence are not necessarily the same thing. Indeed, {@code abcxyzqrs} and + * {@code xyzghfm} have both the same common substring and subsequence, namely {@code xyz}. However, + * {@code axbyczqrs} and {@code abcxyzqtv} have the longest common subsequence {@code xyzq} because a * subsequence need not have adjacent characters. *

* @@ -79,7 +79,7 @@ public Integer apply(final CharSequence left, final CharSequence right) { * * @param left first character sequence * @param right second character sequence - * @return the longest common subsequence found + * @return The longest common subsequence found * @throws IllegalArgumentException * if either String input {@code null} * @deprecated Deprecated as of 1.2 due to a typo in the method name. @@ -92,13 +92,13 @@ public CharSequence logestCommonSubsequence(final CharSequence left, final CharS } /** - * Computes the longest common subsequence between the two CharSequence's passed as + * Computes the longest common subsequence between the two {@code CharSequence}'s passed as * input. * *

- * Note, a substring and subsequence are not necessarily the same thing. Indeed, abcxyzqrs and - * xyzghfm have both the same common substring and subsequence, namely xyz. However, - * axbyczqrs and abcxyzqtv have the longest common subsequence xyzq because a + * Note, a substring and subsequence are not necessarily the same thing. Indeed, {@code abcxyzqrs} and + * {@code xyzghfm} have both the same common substring and subsequence, namely {@code xyz}. However, + * {@code axbyczqrs} and {@code abcxyzqtv} have the longest common subsequence {@code xyzq} because a * subsequence need not have adjacent characters. *

* @@ -110,7 +110,7 @@ public CharSequence logestCommonSubsequence(final CharSequence left, final CharS * * @param left first character sequence * @param right second character sequence - * @return the longest common subsequence found + * @return The longest common subsequence found * @throws IllegalArgumentException * if either String input {@code null} * @since 1.2 diff --git a/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequenceDistance.java b/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequenceDistance.java index a33f8b3e7d..29745461c0 100644 --- a/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequenceDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequenceDistance.java @@ -42,9 +42,9 @@ public class LongestCommonSubsequenceDistance implements EditDistance { private final LongestCommonSubsequence longestCommonSubsequence = new LongestCommonSubsequence(); /** - * Calculates an edit distance between two CharSequence's left and - * right as: left.length() + right.length() - 2 * LCS(left, right), where - * LCS is given in {@link LongestCommonSubsequence#apply(CharSequence, CharSequence)}. + * Calculates an edit distance between two {@code CharSequence}'s {@code left} and + * {@code right} as: {@code left.length() + right.length() - 2 * LCS(left, right)}, where + * {@code LCS} is given in {@link LongestCommonSubsequence#apply(CharSequence, CharSequence)}. * * @param left first character sequence * @param right second character sequence diff --git a/src/main/java/org/apache/commons/text/similarity/RegexTokenizer.java b/src/main/java/org/apache/commons/text/similarity/RegexTokenizer.java index cc009efafa..84a424d0e6 100644 --- a/src/main/java/org/apache/commons/text/similarity/RegexTokenizer.java +++ b/src/main/java/org/apache/commons/text/similarity/RegexTokenizer.java @@ -21,17 +21,20 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; /** * A simple word tokenizer that utilizes regex to find words. It applies a regex - * {@code}(\w)+{@code} over the input text to extract words from a given character + * {@code (\w)+} over the input text to extract words from a given character * sequence. * * @since 1.0 */ class RegexTokenizer implements Tokenizer { + /** The whitespace pattern. */ + private static final Pattern PATTERN = Pattern.compile("(\\w)+"); /** * {@inheritDoc} @@ -41,13 +44,12 @@ class RegexTokenizer implements Tokenizer { @Override public CharSequence[] tokenize(final CharSequence text) { Validate.isTrue(StringUtils.isNotBlank(text), "Invalid text"); - final Pattern pattern = Pattern.compile("(\\w)+"); - final Matcher matcher = pattern.matcher(text.toString()); + final Matcher matcher = PATTERN.matcher(text); final List tokens = new ArrayList<>(); while (matcher.find()) { tokens.add(matcher.group(0)); } - return tokens.toArray(new String[0]); + return tokens.toArray(ArrayUtils.EMPTY_STRING_ARRAY); } } diff --git a/src/main/java/org/apache/commons/text/similarity/SimilarityScore.java b/src/main/java/org/apache/commons/text/similarity/SimilarityScore.java index e71fae974c..2156c2817c 100644 --- a/src/main/java/org/apache/commons/text/similarity/SimilarityScore.java +++ b/src/main/java/org/apache/commons/text/similarity/SimilarityScore.java @@ -24,12 +24,12 @@ * allowing for exceptions, namely the Jaro-Winkler similarity score. *

*

- * We Define a SimilarityScore to be a function d: [X * X] -> [0, INFINITY) with the + * We Define a SimilarityScore to be a function {@code d: [X * X] -> [0, INFINITY)} with the * following properties: *

*
    - *
  • d(x,y) >= 0, non-negativity or separation axiom
  • - *
  • d(x,y) == d(y,x), symmetry.
  • + *
  • {@code d(x,y) >= 0}, non-negativity or separation axiom
  • + *
  • {@code d(x,y) == d(y,x)}, symmetry.
  • *
* *

@@ -39,9 +39,9 @@ * *

* Further, this intended to be BiFunction<CharSequence, CharSequence, R>. - * The apply method + * The {@code apply} method * accepts a pair of {@link CharSequence} parameters - * and returns an R type similarity score. We have omitted the explicit + * and returns an {@code R} type similarity score. We have omitted the explicit * statement of extending BiFunction due to it only being implemented in Java 1.8, and we * wish to maintain Java 1.7 compatibility. *

@@ -56,7 +56,7 @@ public interface SimilarityScore { * * @param left the first CharSequence * @param right the second CharSequence - * @return the similarity score between two CharSequences + * @return The similarity score between two CharSequences */ R apply(CharSequence left, CharSequence right); diff --git a/src/main/java/org/apache/commons/text/similarity/SimilarityScoreFrom.java b/src/main/java/org/apache/commons/text/similarity/SimilarityScoreFrom.java index 8d3e46ef98..22ffc416ab 100644 --- a/src/main/java/org/apache/commons/text/similarity/SimilarityScoreFrom.java +++ b/src/main/java/org/apache/commons/text/similarity/SimilarityScoreFrom.java @@ -85,7 +85,7 @@ public SimilarityScoreFrom(final SimilarityScore similarityScore, final CharS *

* * @param right the second CharSequence - * @return the similarity score between two CharSequences + * @return The similarity score between two CharSequences */ public R apply(final CharSequence right) { return similarityScore.apply(left, right); @@ -94,7 +94,7 @@ public R apply(final CharSequence right) { /** * Gets the left parameter. * - * @return the left parameter + * @return The left parameter */ public CharSequence getLeft() { return left; @@ -103,7 +103,7 @@ public CharSequence getLeft() { /** * Gets the edit distance. * - * @return the edit distance + * @return The edit distance */ public SimilarityScore getSimilarityScore() { return similarityScore; diff --git a/src/main/java/org/apache/commons/text/similarity/package-info.java b/src/main/java/org/apache/commons/text/similarity/package-info.java index 703e427cc7..d053d2ad0f 100644 --- a/src/main/java/org/apache/commons/text/similarity/package-info.java +++ b/src/main/java/org/apache/commons/text/similarity/package-info.java @@ -29,15 +29,16 @@ *
  • {@link org.apache.commons.text.similarity.FuzzyScore Fuzzy Score}
  • *
  • {@link org.apache.commons.text.similarity.HammingDistance Hamming Distance}
  • *
  • {@link org.apache.commons.text.similarity.JaroWinklerDistance Jaro-Winkler Distance}
  • + *
  • {@link org.apache.commons.text.similarity.JaroWinklerSimilarity Jaro-Winkler Similarity}
  • *
  • {@link org.apache.commons.text.similarity.LevenshteinDistance Levenshtein Distance}
  • *
  • {@link org.apache.commons.text.similarity.LongestCommonSubsequenceDistance - * Longest Commons Subsequence Distance}
  • + * Longest Common Subsequence Distance} * * *

    The {@link org.apache.commons.text.similarity.CosineDistance Cosine Distance} * utilises a {@link org.apache.commons.text.similarity.RegexTokenizer regular expression tokenizer (\w+)}. * And the {@link org.apache.commons.text.similarity.LevenshteinDistance Levenshtein Distance}'s - * behaviour can be changed to take into consideration a maximum throughput.

    + * behavior can be changed to take into consideration a maximum throughput.

    * * @since 1.0 */ diff --git a/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java b/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java index ba257340e7..c0982f68c1 100644 --- a/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java +++ b/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java @@ -55,9 +55,9 @@ public AggregateTranslator(final CharSequenceTranslator... translators) { * {@inheritDoc} */ @Override - public int translate(final CharSequence input, final int index, final Writer out) throws IOException { + public int translate(final CharSequence input, final int index, final Writer writer) throws IOException { for (final CharSequenceTranslator translator : translators) { - final int consumed = translator.translate(input, index, out); + final int consumed = translator.translate(input, index, writer); if (consumed != 0) { return consumed; } diff --git a/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java b/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java index dd719fe012..7e56788808 100644 --- a/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java +++ b/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java @@ -48,11 +48,11 @@ public abstract class CharSequenceTranslator { * * @param input CharSequence that is being translated * @param index int representing the current point of translation - * @param out Writer to translate the text to + * @param writer Writer to translate the text to * @return int count of codepoints consumed * @throws IOException if and only if the Writer produces an IOException */ - public abstract int translate(CharSequence input, int index, Writer out) throws IOException; + public abstract int translate(CharSequence input, int index, Writer writer) throws IOException; /** * Helper for non-Writer usage. @@ -78,28 +78,28 @@ public final String translate(final CharSequence input) { * tightly coupled with the abstract method of this class. * * @param input CharSequence that is being translated - * @param out Writer to translate the text to + * @param writer Writer to translate the text to * @throws IOException if and only if the Writer produces an IOException */ - public final void translate(final CharSequence input, final Writer out) throws IOException { - Validate.isTrue(out != null, "The Writer must not be null"); + public final void translate(final CharSequence input, final Writer writer) throws IOException { + Validate.isTrue(writer != null, "The Writer must not be null"); if (input == null) { return; } int pos = 0; final int len = input.length(); while (pos < len) { - final int consumed = translate(input, pos, out); + final int consumed = translate(input, pos, writer); if (consumed == 0) { // inlined implementation of Character.toChars(Character.codePointAt(input, pos)) // avoids allocating temp char arrays and duplicate checks final char c1 = input.charAt(pos); - out.write(c1); + writer.write(c1); pos++; if (Character.isHighSurrogate(c1) && pos < len) { final char c2 = input.charAt(pos); if (Character.isLowSurrogate(c2)) { - out.write(c2); + writer.write(c2); pos++; } } @@ -128,11 +128,11 @@ public final CharSequenceTranslator with(final CharSequenceTranslator... transla } /** - *

    Returns an upper case hexadecimal String for the given + *

    Returns an upper case hexadecimal {@code String} for the given * character.

    * * @param codepoint The codepoint to convert. - * @return An upper case hexadecimal String + * @return An upper case hexadecimal {@code String} */ public static String hex(final int codepoint) { return Integer.toHexString(codepoint).toUpperCase(Locale.ENGLISH); diff --git a/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java b/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java index 71828be069..d225aacaac 100644 --- a/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java +++ b/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java @@ -32,9 +32,9 @@ public abstract class CodePointTranslator extends CharSequenceTranslator { * {@inheritDoc} */ @Override - public final int translate(final CharSequence input, final int index, final Writer out) throws IOException { + public final int translate(final CharSequence input, final int index, final Writer writer) throws IOException { final int codepoint = Character.codePointAt(input, index); - final boolean consumed = translate(codepoint, out); + final boolean consumed = translate(codepoint, writer); return consumed ? 1 : 0; } @@ -42,10 +42,10 @@ public final int translate(final CharSequence input, final int index, final Writ * Translate the specified codepoint into another. * * @param codepoint int character input to translate - * @param out Writer to optionally push the translated output to + * @param writer Writer to optionally push the translated output to * @return boolean as to whether translation occurred or not * @throws IOException if and only if the Writer produces an IOException */ - public abstract boolean translate(int codepoint, Writer out) throws IOException; + public abstract boolean translate(int codepoint, Writer writer) throws IOException; } diff --git a/src/main/java/org/apache/commons/text/translate/CsvTranslators.java b/src/main/java/org/apache/commons/text/translate/CsvTranslators.java index 2987eadda9..d192b4d9ee 100644 --- a/src/main/java/org/apache/commons/text/translate/CsvTranslators.java +++ b/src/main/java/org/apache/commons/text/translate/CsvTranslators.java @@ -21,13 +21,15 @@ import org.apache.commons.lang3.CharUtils; import org.apache.commons.lang3.StringUtils; + /** - *

    This class holds inner classes for escaping/unescaping Comma Separated Values.

    - * - * In general the use a high level API like commons-csv should be preferred over these - * low level classes. + * This class holds inner classes for escaping/unescaping Comma Separated Values. + *

    + * In general the use a high level API like Apache Commons + * CSV should be preferred over these low level classes. + *

    * - * @see commons-csv + * @see Apache Commons CSV */ public final class CsvTranslators { @@ -52,15 +54,15 @@ private CsvTranslators() { } public static class CsvEscaper extends SinglePassTranslator { @Override - void translateWhole(final CharSequence input, final Writer out) throws IOException { + void translateWhole(final CharSequence input, final Writer writer) throws IOException { final String inputSting = input.toString(); if (StringUtils.containsNone(inputSting, CSV_SEARCH_CHARS)) { - out.write(inputSting); + writer.write(inputSting); } else { // input needs quoting - out.write(CSV_QUOTE); - out.write(StringUtils.replace(inputSting, CSV_QUOTE_STR, CSV_ESCAPED_QUOTE_STR)); - out.write(CSV_QUOTE); + writer.write(CSV_QUOTE); + writer.write(StringUtils.replace(inputSting, CSV_QUOTE_STR, CSV_ESCAPED_QUOTE_STR)); + writer.write(CSV_QUOTE); } } } @@ -71,10 +73,10 @@ void translateWhole(final CharSequence input, final Writer out) throws IOExcepti public static class CsvUnescaper extends SinglePassTranslator { @Override - void translateWhole(final CharSequence input, final Writer out) throws IOException { + void translateWhole(final CharSequence input, final Writer writer) throws IOException { // is input not quoted? if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) { - out.write(input.toString()); + writer.write(input.toString()); return; } @@ -83,9 +85,9 @@ void translateWhole(final CharSequence input, final Writer out) throws IOExcepti if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) { // deal with escaped quotes; ie) "" - out.write(StringUtils.replace(quoteless, CSV_ESCAPED_QUOTE_STR, CSV_QUOTE_STR)); + writer.write(StringUtils.replace(quoteless, CSV_ESCAPED_QUOTE_STR, CSV_QUOTE_STR)); } else { - out.write(input.toString()); + writer.write(quoteless); } } } diff --git a/src/main/java/org/apache/commons/text/translate/EntityArrays.java b/src/main/java/org/apache/commons/text/translate/EntityArrays.java index e6cee304a9..0c36c6f1de 100644 --- a/src/main/java/org/apache/commons/text/translate/EntityArrays.java +++ b/src/main/java/org/apache/commons/text/translate/EntityArrays.java @@ -18,13 +18,12 @@ import java.util.Collections; import java.util.HashMap; -import java.util.Iterator; import java.util.Map; /** * Class holding various entity data for HTML and XML - generally for use with * the LookupTranslator. - * All Maps are generated using java.util.Collections.unmodifiableMap(). + * All Maps are generated using {@code java.util.Collections.unmodifiableMap()}. * * @since 1.0 */ @@ -434,9 +433,7 @@ public class EntityArrays { */ public static Map invert(final Map map) { final Map newMap = new HashMap<>(); - final Iterator> it = map.entrySet().iterator(); - while (it.hasNext()) { - final Map.Entry pair = it.next(); + for (final Map.Entry pair : map.entrySet()) { newMap.put(pair.getValue(), pair.getKey()); } return newMap; diff --git a/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java b/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java index b4b3aac9a7..bc9a157ec7 100644 --- a/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java +++ b/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java @@ -25,12 +25,12 @@ public class JavaUnicodeEscaper extends UnicodeEscaper { /** *

    - * Constructs a JavaUnicodeEscaper above the specified value (exclusive). + * Constructs a {@code JavaUnicodeEscaper} above the specified value (exclusive). *

    * * @param codepoint * above which to escape - * @return the newly created {@code UnicodeEscaper} instance + * @return The newly created {@code UnicodeEscaper} instance */ public static JavaUnicodeEscaper above(final int codepoint) { return outsideOf(0, codepoint); @@ -38,12 +38,12 @@ public static JavaUnicodeEscaper above(final int codepoint) { /** *

    - * Constructs a JavaUnicodeEscaper below the specified value (exclusive). + * Constructs a {@code JavaUnicodeEscaper} below the specified value (exclusive). *

    * * @param codepoint * below which to escape - * @return the newly created {@code UnicodeEscaper} instance + * @return The newly created {@code UnicodeEscaper} instance */ public static JavaUnicodeEscaper below(final int codepoint) { return outsideOf(codepoint, Integer.MAX_VALUE); @@ -51,14 +51,14 @@ public static JavaUnicodeEscaper below(final int codepoint) { /** *

    - * Constructs a JavaUnicodeEscaper between the specified values (inclusive). + * Constructs a {@code JavaUnicodeEscaper} between the specified values (inclusive). *

    * * @param codepointLow * above which to escape * @param codepointHigh * below which to escape - * @return the newly created {@code UnicodeEscaper} instance + * @return The newly created {@code UnicodeEscaper} instance */ public static JavaUnicodeEscaper between(final int codepointLow, final int codepointHigh) { return new JavaUnicodeEscaper(codepointLow, codepointHigh, true); @@ -66,14 +66,14 @@ public static JavaUnicodeEscaper between(final int codepointLow, final int codep /** *

    - * Constructs a JavaUnicodeEscaper outside of the specified values (exclusive). + * Constructs a {@code JavaUnicodeEscaper} outside of the specified values (exclusive). *

    * * @param codepointLow * below which to escape * @param codepointHigh * above which to escape - * @return the newly created {@code UnicodeEscaper} instance + * @return The newly created {@code UnicodeEscaper} instance */ public static JavaUnicodeEscaper outsideOf(final int codepointLow, final int codepointHigh) { return new JavaUnicodeEscaper(codepointLow, codepointHigh, false); @@ -81,9 +81,9 @@ public static JavaUnicodeEscaper outsideOf(final int codepointLow, final int cod /** *

    - * Constructs a JavaUnicodeEscaper for the specified range. This is the underlying method for the - * other constructors/builders. The below and above boundaries are inclusive when - * between is true and exclusive when it is false. + * Constructs a {@code JavaUnicodeEscaper} for the specified range. This is the underlying method for the + * other constructors/builders. The {@code below} and {@code above} boundaries are inclusive when + * {@code between} is {@code true} and exclusive when it is {@code false}. *

    * * @param below @@ -102,7 +102,7 @@ public JavaUnicodeEscaper(final int below, final int above, final boolean betwee * * @param codepoint * a Unicode code point - * @return the hex string for the given codepoint + * @return The hex string for the given codepoint */ @Override protected String toUtf16Escape(final int codepoint) { diff --git a/src/main/java/org/apache/commons/text/translate/LookupTranslator.java b/src/main/java/org/apache/commons/text/translate/LookupTranslator.java index 80eee30e6a..a143cf866c 100644 --- a/src/main/java/org/apache/commons/text/translate/LookupTranslator.java +++ b/src/main/java/org/apache/commons/text/translate/LookupTranslator.java @@ -19,9 +19,8 @@ import java.io.IOException; import java.io.Writer; import java.security.InvalidParameterException; +import java.util.BitSet; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; import java.util.Map; /** @@ -34,7 +33,7 @@ public class LookupTranslator extends CharSequenceTranslator { /** The mapping to be used in translation. */ private final Map lookupMap; /** The first character of each key in the lookupMap. */ - private final HashSet prefixSet; + private final BitSet prefixSet; /** The length of the shortest key in the lookupMap. */ private final int shortest; /** The length of the longest key in the lookupMap. */ @@ -56,15 +55,13 @@ public LookupTranslator(final Map lookupMap) { throw new InvalidParameterException("lookupMap cannot be null"); } this.lookupMap = new HashMap<>(); - this.prefixSet = new HashSet<>(); + this.prefixSet = new BitSet(); int currentShortest = Integer.MAX_VALUE; int currentLongest = 0; - final Iterator> it = lookupMap.entrySet().iterator(); - while (it.hasNext()) { - final Map.Entry pair = it.next(); + for (final Map.Entry pair : lookupMap.entrySet()) { this.lookupMap.put(pair.getKey().toString(), pair.getValue().toString()); - this.prefixSet.add(pair.getKey().charAt(0)); + this.prefixSet.set(pair.getKey().charAt(0)); final int sz = pair.getKey().length(); if (sz < currentShortest) { currentShortest = sz; @@ -81,9 +78,9 @@ public LookupTranslator(final Map lookupMap) { * {@inheritDoc} */ @Override - public int translate(final CharSequence input, final int index, final Writer out) throws IOException { + public int translate(final CharSequence input, final int index, final Writer writer) throws IOException { // check if translation exists for the input at position index - if (prefixSet.contains(input.charAt(index))) { + if (prefixSet.get(input.charAt(index))) { int max = longest; if (index + longest > input.length()) { max = input.length() - index; @@ -94,7 +91,7 @@ public int translate(final CharSequence input, final int index, final Writer out final String result = lookupMap.get(subSeq.toString()); if (result != null) { - out.write(result); + writer.write(result); return i; } } diff --git a/src/main/java/org/apache/commons/text/translate/NumericEntityEscaper.java b/src/main/java/org/apache/commons/text/translate/NumericEntityEscaper.java index b402974726..a2b31b5443 100644 --- a/src/main/java/org/apache/commons/text/translate/NumericEntityEscaper.java +++ b/src/main/java/org/apache/commons/text/translate/NumericEntityEscaper.java @@ -33,10 +33,10 @@ public class NumericEntityEscaper extends CodePointTranslator { /** range from lowest codepoint to highest codepoint. */ private final Range range; /** - *

    Constructs a NumericEntityEscaper for the specified range. This is - * the underlying method for the other constructors/builders. The below - * and above boundaries are inclusive when between is - * true and exclusive when it is false.

    + *

    Constructs a {@code NumericEntityEscaper} for the specified range. This is + * the underlying method for the other constructors/builders. The {@code below} + * and {@code above} boundaries are inclusive when {@code between} is + * {@code true} and exclusive when it is {@code false}.

    * * @param below int value representing the lowest codepoint boundary * @param above int value representing the highest codepoint boundary @@ -48,49 +48,49 @@ private NumericEntityEscaper(final int below, final int above, final boolean bet } /** - *

    Constructs a NumericEntityEscaper for all characters.

    + *

    Constructs a {@code NumericEntityEscaper} for all characters.

    */ public NumericEntityEscaper() { this(0, Integer.MAX_VALUE, true); } /** - *

    Constructs a NumericEntityEscaper below the specified value (exclusive).

    + *

    Constructs a {@code NumericEntityEscaper} below the specified value (exclusive).

    * * @param codepoint below which to escape - * @return the newly created {@code NumericEntityEscaper} instance + * @return The newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper below(final int codepoint) { return outsideOf(codepoint, Integer.MAX_VALUE); } /** - *

    Constructs a NumericEntityEscaper above the specified value (exclusive).

    + *

    Constructs a {@code NumericEntityEscaper} above the specified value (exclusive).

    * * @param codepoint above which to escape - * @return the newly created {@code NumericEntityEscaper} instance + * @return The newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper above(final int codepoint) { return outsideOf(0, codepoint); } /** - *

    Constructs a NumericEntityEscaper between the specified values (inclusive).

    + *

    Constructs a {@code NumericEntityEscaper} between the specified values (inclusive).

    * * @param codepointLow above which to escape * @param codepointHigh below which to escape - * @return the newly created {@code NumericEntityEscaper} instance + * @return The newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper between(final int codepointLow, final int codepointHigh) { return new NumericEntityEscaper(codepointLow, codepointHigh, true); } /** - *

    Constructs a NumericEntityEscaper outside of the specified values (exclusive).

    + *

    Constructs a {@code NumericEntityEscaper} outside of the specified values (exclusive).

    * * @param codepointLow below which to escape * @param codepointHigh above which to escape - * @return the newly created {@code NumericEntityEscaper} instance + * @return The newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper outsideOf(final int codepointLow, final int codepointHigh) { return new NumericEntityEscaper(codepointLow, codepointHigh, false); @@ -100,13 +100,13 @@ public static NumericEntityEscaper outsideOf(final int codepointLow, final int c * {@inheritDoc} */ @Override - public boolean translate(final int codepoint, final Writer out) throws IOException { + public boolean translate(final int codepoint, final Writer writer) throws IOException { if (this.between != this.range.contains(codepoint)) { return false; } - out.write("&#"); - out.write(Integer.toString(codepoint, 10)); - out.write(';'); + writer.write("&#"); + writer.write(Integer.toString(codepoint, 10)); + writer.write(';'); return true; } } diff --git a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java index 4a89153108..25f4d7465f 100644 --- a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java +++ b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java @@ -19,29 +19,52 @@ import java.io.IOException; import java.io.Writer; import java.util.Arrays; +import java.util.Collections; import java.util.EnumSet; +import org.apache.commons.lang3.ArrayUtils; + /** - * Translate XML numeric entities of the form &#[xX]?\d+;? to + * Translates XML numeric entities of the form &#[xX]?\d+;? to * the specific codepoint. * - * Note that the semi-colon is optional. + * Note that the semicolon is optional. * * @since 1.0 */ public class NumericEntityUnescaper extends CharSequenceTranslator { - /** NumericEntityUnescaper option enum. */ - public enum OPTION { semiColonRequired, semiColonOptional, errorIfNoSemiColon } + /** Default options. */ + private static final EnumSet