1212 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313 * See the License for the specific language governing permissions and
1414 * limitations under the License.
15- */
15+ */
1616
1717package org .apache .commons .codec .language ;
1818
1919import org .apache .commons .codec .EncoderException ;
2020import org .apache .commons .codec .StringEncoder ;
2121
2222/**
23- * Encodes a string into a soundex value. Soundex is an encoding used to relate
24- * similar names, but can also be used as a general purpose scheme to find word
25- * with similar phonemes.
23+ * Encodes a string into a soundex value. Soundex is an encoding used to relate similar names, but can also be used as a
24+ * general purpose scheme to find word with similar phonemes.
2625 *
2726 * @author Apache Software Foundation
28- * @version $Id: Soundex.java,v 1.22 2004/06/02 00:55:29 ggregory Exp $
27+ * @version $Id: Soundex.java,v 1.23 2004/06/05 00:43:32 ggregory Exp $
2928 */
3029public class Soundex implements StringEncoder {
3130
3231 /**
33- * This static variable contains an instance of the Soundex using the
34- * US_ENGLISH mapping.
35- */
32+ * An instance of Soundex using the US_ENGLISH_MAPPING mapping.
33+ *
34+ * @see #US_ENGLISH_MAPPING
35+ */
3636 public static final Soundex US_ENGLISH = new Soundex ();
3737
3838 /**
39- * This is a default mapping of the 26 letters used in US english. A value
40- * of <code>0</code> for a letter position means do not encode.
41- */
39+ * This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position
40+ * means do not encode.
41+ *
42+ * @see Soundex#Soundex(char[])
43+ */
4244 public static final char [] US_ENGLISH_MAPPING = "01230120022455012623010202" .toCharArray ();
4345
4446 /**
45- * Encodes the Strings and returns the number of characters in the two
46- * encoded Strings that are the same. This return value ranges from 0
47- * through 4: 0 indicates little or no similarity, and 4 indicates strong
48- * similarity or identical values.
49- *
50- * @param s1
51- * A String that will be encoded and compared.
52- * @param s2
53- * A String that will be encoded and compared.
54- * @return The number of characters in the two encoded Strings that are the
55- * same from 0 to 4.
56- *
57- * @see SoundexUtils#difference(StringEncoder,String,String)
58- * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
59- * MS T-SQL DIFFERENCE</a>
60- *
61- * @throws EncoderException
62- * if an error occurs encoding one of the strings
47+ * Encodes the Strings and returns the number of characters in the two encoded Strings that are the same. This
48+ * return value ranges from 0 through 4: 0 indicates little or no similarity, and 4 indicates strong similarity or
49+ * identical values.
50+ *
51+ * @param s1
52+ * A String that will be encoded and compared.
53+ * @param s2
54+ * A String that will be encoded and compared.
55+ * @return The number of characters in the two encoded Strings that are the same from 0 to 4.
56+ *
57+ * @see SoundexUtils#difference(StringEncoder,String,String)
58+ * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> MS
59+ * T-SQL DIFFERENCE </a>
60+ *
61+ * @throws EncoderException
62+ * if an error occurs encoding one of the strings
6363 * @since 1.3
64- */
64+ */
6565 public int difference (String s1 , String s2 ) throws EncoderException {
6666 return SoundexUtils .difference (this , s1 , s2 );
6767 }
6868
6969 /**
70- * The maximum length of a Soundex code - Soundex codes are only four
71- * characters by definition.
72- *
73- * @deprecated This feature is not needed since the encoding size must be
74- * constant.
75- */
70+ * The maximum length of a Soundex code - Soundex codes are only four characters by definition.
71+ *
72+ * @deprecated This feature is not needed since the encoding size must be constant.
73+ */
7674 private int maxLength = 4 ;
7775
7876 /**
79- * Every letter of the alphabet is "mapped" to a numerical value. This char
80- * array holds the values to which each letter is mapped. This
81- * implementation contains a default map for US_ENGLISH
82- */
77+ * Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each
78+ * letter is mapped. This implementation contains a default map for US_ENGLISH
79+ */
8380 private char [] soundexMapping ;
8481
8582 /**
86- * Creates an instance of the Soundex object using the default US_ENGLISH
87- * mapping.
88- */
83+ * Creates an instance using US_ENGLISH_MAPPING
84+ *
85+ * @see Soundex#Soundex(char[])
86+ * @see Soundex#US_ENGLISH_MAPPING
87+ */
8988 public Soundex () {
9089 this (US_ENGLISH_MAPPING );
9190 }
9291
9392 /**
94- * Creates a soundex instance using a custom mapping. This constructor can
95- * be used to customize the mapping, and/or possibly provide an
96- * internationalized mapping for a non-Western character set.
97- *
98- * @param mapping
99- * Mapping array to use when finding the corresponding code for
100- * a given character
101- */
93+ * Creates a soundex instance using the given mapping. This constructor can be used to customize the mapping, and/or
94+ * possibly provide an internationalized mapping for a non-Western character set.
95+ *
96+ * Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each
97+ * letter is mapped. This implementation contains a default map for US_ENGLISH
98+ *
99+ * @param mapping
100+ * Mapping array to use when finding the corresponding code for a given character
101+ */
102102 public Soundex (char [] mapping ) {
103103 this .setSoundexMapping (mapping );
104104 }
105105
106106 /**
107- * Encodes an Object using the soundex algorithm. This method is provided
108- * in order to satisfy the requirements of the Encoder interface, and will
109- * throw an EncoderException if the supplied object is not of type
110- * java.lang.String.
111- *
112- * @param pObject
113- * Object to encode
114- * @return An object (or type java.lang.String) containing the soundex code
115- * which corresponds to the String supplied.
116- * @throws EncoderException
117- * if the parameter supplied is not of type java.lang.String
118- */
107+ * Encodes an Object using the soundex algorithm. This method is provided in order to satisfy the requirements of
108+ * the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String.
109+ *
110+ * @param pObject
111+ * Object to encode
112+ * @return An object (or type java.lang.String) containing the soundex code which corresponds to the String
113+ * supplied.
114+ * @throws EncoderException
115+ * if the parameter supplied is not of type java.lang.String
116+ * @throws IllegalArgumentException
117+ * if a character is not mapped
118+ */
119119 public Object encode (Object pObject ) throws EncoderException {
120120 if (!(pObject instanceof String )) {
121121 throw new EncoderException ("Parameter supplied to Soundex encode is not of type java.lang.String" );
122- }
122+ }
123123 return soundex ((String ) pObject );
124124 }
125125
126126 /**
127- * Encodes a String using the soundex algorithm.
128- *
129- * @param pString
130- * A String object to encode
131- * @return A Soundex code corresponding to the String supplied
132- */
127+ * Encodes a String using the soundex algorithm.
128+ *
129+ * @param pString
130+ * A String object to encode
131+ * @return A Soundex code corresponding to the String supplied
132+ * @throws IllegalArgumentException
133+ * if a character is not mapped
134+ */
133135 public String encode (String pString ) {
134136 return soundex (pString );
135137 }
136138
137139 /**
138- * Used internally by the SoundEx algorithm.
139- *
140- * Consonants from the same code group separated by W or H are treated as
141- * one.
142- *
143- * @param str
144- * the cleaned working string to encode (in upper case).
145- * @param index
146- * the character position to encode
147- * @return Mapping code for a particular character
148- */
140+ * Used internally by the SoundEx algorithm.
141+ *
142+ * Consonants from the same code group separated by W or H are treated as one.
143+ *
144+ * @param str
145+ * the cleaned working string to encode (in upper case).
146+ * @param index
147+ * the character position to encode
148+ * @return Mapping code for a particular character
149+ * @throws IllegalArgumentException
150+ * if the character is not mapped
151+ */
149152 private char getMappingCode (String str , int index ) {
150153 char mappedChar = this .map (str .charAt (index ));
151154 // HW rule check
@@ -163,67 +166,71 @@ private char getMappingCode(String str, int index) {
163166 }
164167
165168 /**
166- * Returns the maxLength. Standard Soundex
167- *
168- * @deprecated This feature is not needed since the encoding size must be
169- * constant.
170- * @return int
171- */
169+ * Returns the maxLength. Standard Soundex
170+ *
171+ * @deprecated This feature is not needed since the encoding size must be constant.
172+ * @return int
173+ */
172174 public int getMaxLength () {
173175 return this .maxLength ;
174176 }
175177
176178 /**
177179 * Returns the soundex mapping.
178- * @return soundexMapping.
179- */
180+ *
181+ * @return soundexMapping.
182+ */
180183 private char [] getSoundexMapping () {
181184 return this .soundexMapping ;
182185 }
183186
184187 /**
185- * Maps the given upper-case character to it's Soudex code.
186- *
187- * @param c
188- * An upper-case character.
189- * @return A Soundex code.
190- */
191- private char map (char c ) {
192- int index = c - 'A' ;
193- if (index < 0 || index >= this .getSoundexMapping ().length ) {
194- throw new IllegalArgumentException ("The character is not mapped: " + c );
195- }
188+ * Maps the given upper-case character to it's Soudex code.
189+ *
190+ * @param ch
191+ * An upper-case character.
192+ * @return A Soundex code.
193+ * @throws IllegalArgumentException
194+ * Thrown if <code>ch</code> is not mapped.
195+ */
196+ private char map (char ch ) {
197+ int index = ch - 'A' ;
198+ if (index < 0 || index >= this .getSoundexMapping ().length ) {
199+ throw new IllegalArgumentException ("The character is not mapped: " + ch );
200+ }
196201 return this .getSoundexMapping ()[index ];
197202 }
198203
199204 /**
200- * Sets the maxLength.
201- *
202- * @deprecated This feature is not needed since the encoding size must be
203- * constant.
204- * @param maxLength
205- * The maxLength to set
206- */
205+ * Sets the maxLength.
206+ *
207+ * @deprecated This feature is not needed since the encoding size must be constant.
208+ * @param maxLength
209+ * The maxLength to set
210+ */
207211 public void setMaxLength (int maxLength ) {
208212 this .maxLength = maxLength ;
209213 }
210214
211215 /**
212216 * Sets the soundexMapping.
213- * @param soundexMapping
214- * The soundexMapping to set.
215- */
217+ *
218+ * @param soundexMapping
219+ * The soundexMapping to set.
220+ */
216221 private void setSoundexMapping (char [] soundexMapping ) {
217222 this .soundexMapping = soundexMapping ;
218223 }
219224
220225 /**
221- * Retreives the Soundex code for a given String object.
222- *
223- * @param str
224- * String to encode using the Soundex algorithm
225- * @return A soundex code for the String supplied
226- */
226+ * Retreives the Soundex code for a given String object.
227+ *
228+ * @param str
229+ * String to encode using the Soundex algorithm
230+ * @return A soundex code for the String supplied
231+ * @throws IllegalArgumentException
232+ * if a character is not mapped
233+ */
227234 public String soundex (String str ) {
228235 if (str == null ) {
229236 return null ;
@@ -249,4 +256,4 @@ public String soundex(String str) {
249256 return new String (out );
250257 }
251258
252- }
259+ }
0 commit comments