5555 * @author bayard@generationjava.com
5656 * @author Tim O'Brien
5757 * @author Gary Gregory
58- * @version $Id: Soundex.java,v 1.11 2003/11/06 16:31:47 ggregory Exp $
58+ * @version $Id: Soundex.java,v 1.12 2003/11/07 01:20:19 ggregory Exp $
5959 */
6060public class Soundex implements StringEncoder {
6161
@@ -66,14 +66,16 @@ public class Soundex implements StringEncoder {
6666 public static final Soundex US_ENGLISH = new Soundex ();
6767
6868 /**
69- * This is a default mapping of the 26 letters used in US english.
70- * A value of <code>0</code> for a letter position means do not encode.
69+ * This is a default mapping of the 26 letters used in US english. A value
70+ * of <code>0</code> for a letter position means do not encode.
7171 */
7272 public static final char [] US_ENGLISH_MAPPING = "01230120022455012623010202" .toCharArray ();
7373
7474 /**
7575 * The maximum length of a Soundex code - Soundex codes are only four
7676 * characters by definition.
77+ *
78+ * @deprecated This feature is not needed since the encoding size must be constant.
7779 */
7880 private int maxLength = 4 ;
7981
@@ -106,8 +108,8 @@ public Soundex(char[] mapping) {
106108 }
107109
108110 /**
109- * Cleans up the input string before Soundex processing by trimming and
110- * removing punctuation characters. The string is returned in upper- case.
111+ * Cleans up the input string before Soundex processing by only returning
112+ * upper case letters .
111113 */
112114 private String clean (String str ) {
113115 if (str == null || str .length () == 0 ) {
@@ -168,38 +170,35 @@ public String encode(String pString) {
168170 /**
169171 * Used internally by the SoundEx algorithm.
170172 *
171- * Consonants from the same code group separated by W or H are treated as one.
173+ * Consonants from the same code group separated by W or H are treated as
174+ * one.
172175 *
173176 * @param str
174- * the whole string
177+ * the cleaned working string to encode (in upper case).
175178 * @param index
176179 * the character position to encode
177180 * @return Mapping code for a particular character
178181 */
179182 private char getMappingCode (String str , int index ) {
180- char c = str .charAt (index );
181- if (!Character .isLetter (c )) {
182- return 0 ;
183- } else {
184- char mappedChar = this .map (c );
185- // HW rule check
186- if (index > 1 && mappedChar != '0' ) {
187- char hwChar = str .charAt (index -1 );
188- if ('H' == hwChar || 'W' == hwChar ) {
189- char preHWChar = str .charAt (index - 2 );
190- char firstCode = this .map (preHWChar );
191- if (firstCode == mappedChar || 'H' == preHWChar || 'W' == preHWChar ) {
192- return 0 ;
193- }
194- }
183+ char mappedChar = this .map (str .charAt (index ));
184+ // HW rule check
185+ if (index > 1 && mappedChar != '0' ) {
186+ char hwChar = str .charAt (index - 1 );
187+ if ('H' == hwChar || 'W' == hwChar ) {
188+ char preHWChar = str .charAt (index - 2 );
189+ char firstCode = this .map (preHWChar );
190+ if (firstCode == mappedChar || 'H' == preHWChar || 'W' == preHWChar ) {
191+ return 0 ;
192+ }
195193 }
196- return mappedChar ;
197194 }
195+ return mappedChar ;
198196 }
199197
200198 /**
201199 * Returns the maxLength. Standard Soundex
202200 *
201+ * @deprecated This feature is not needed since the encoding size must be constant.
203202 * @return int
204203 */
205204 public int getMaxLength () {
@@ -214,15 +213,16 @@ private char[] getSoundexMapping() {
214213 }
215214
216215 /**
217- * Maps the given upper-case character to it's Soudex code.
218- */
216+ * Maps the given upper-case character to it's Soudex code.
217+ */
219218 private char map (char c ) {
220- return this .getSoundexMapping ()[c - 'A' ];
219+ return this .getSoundexMapping ()[c - 'A' ];
221220 }
222221
223222 /**
224223 * Sets the maxLength.
225224 *
225+ * @deprecated This feature is not needed since the encoding size must be constant.
226226 * @param maxLength
227227 * The maxLength to set
228228 */
@@ -253,14 +253,14 @@ public String soundex(String str) {
253253 if (str .length () == 0 ) {
254254 return str ;
255255 }
256-
257256 char out [] = { '0' , '0' , '0' , '0' };
258257 char last , mapped ;
259258 int incount = 1 , count = 1 ;
260259 out [0 ] = str .charAt (0 );
261260 last = getMappingCode (str , 0 );
262- while ((incount < str .length ()) && (count < this .getMaxLength ())) {
263- if ((mapped = getMappingCode (str , incount ++)) != 0 ) {
261+ while ((incount < str .length ()) && (count < out .length )) {
262+ mapped = getMappingCode (str , incount ++);
263+ if (mapped != 0 ) {
264264 if ((mapped != '0' ) && (mapped != last )) {
265265 out [count ++] = mapped ;
266266 }
0 commit comments