Skip to content

Commit f095ceb

Browse files
committed
Soundex.setMaxLength causes bugs and is not needed.
http://issues.apache.org/bugzilla/show_bug.cgi?id=24484 git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/codec/trunk@130234 13f79535-47bb-0310-9956-ffa450edef68
1 parent 25104f4 commit f095ceb

2 files changed

Lines changed: 37 additions & 30 deletions

File tree

src/java/org/apache/commons/codec/language/Soundex.java

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
* @author bayard@generationjava.com
5656
* @author Tim O'Brien
5757
* @author Gary Gregory
58-
* @version $Id: Soundex.java,v 1.11 2003/11/06 16:31:47 ggregory Exp $
58+
* @version $Id: Soundex.java,v 1.12 2003/11/07 01:20:19 ggregory Exp $
5959
*/
6060
public class Soundex implements StringEncoder {
6161

@@ -66,14 +66,16 @@ public class Soundex implements StringEncoder {
6666
public static final Soundex US_ENGLISH = new Soundex();
6767

6868
/**
69-
* This is a default mapping of the 26 letters used in US english.
70-
* A value of <code>0</code> for a letter position means do not encode.
69+
* This is a default mapping of the 26 letters used in US english. A value
70+
* of <code>0</code> for a letter position means do not encode.
7171
*/
7272
public static final char[] US_ENGLISH_MAPPING = "01230120022455012623010202".toCharArray();
7373

7474
/**
7575
* The maximum length of a Soundex code - Soundex codes are only four
7676
* characters by definition.
77+
*
78+
* @deprecated This feature is not needed since the encoding size must be constant.
7779
*/
7880
private int maxLength = 4;
7981

@@ -106,8 +108,8 @@ public Soundex(char[] mapping) {
106108
}
107109

108110
/**
109-
* Cleans up the input string before Soundex processing by trimming and
110-
* removing punctuation characters. The string is returned in upper-case.
111+
* Cleans up the input string before Soundex processing by only returning
112+
* upper case letters.
111113
*/
112114
private String clean(String str) {
113115
if (str == null || str.length() == 0) {
@@ -168,38 +170,35 @@ public String encode(String pString) {
168170
/**
169171
* Used internally by the SoundEx algorithm.
170172
*
171-
* Consonants from the same code group separated by W or H are treated as one.
173+
* Consonants from the same code group separated by W or H are treated as
174+
* one.
172175
*
173176
* @param str
174-
* the whole string
177+
* the cleaned working string to encode (in upper case).
175178
* @param index
176179
* the character position to encode
177180
* @return Mapping code for a particular character
178181
*/
179182
private char getMappingCode(String str, int index) {
180-
char c = str.charAt(index);
181-
if (!Character.isLetter(c)) {
182-
return 0;
183-
} else {
184-
char mappedChar = this.map(c);
185-
// HW rule check
186-
if (index > 1 && mappedChar != '0') {
187-
char hwChar = str.charAt(index-1);
188-
if ('H' == hwChar || 'W' == hwChar) {
189-
char preHWChar = str.charAt(index - 2);
190-
char firstCode = this.map(preHWChar);
191-
if (firstCode == mappedChar || 'H' == preHWChar || 'W' == preHWChar) {
192-
return 0;
193-
}
194-
}
183+
char mappedChar = this.map(str.charAt(index));
184+
// HW rule check
185+
if (index > 1 && mappedChar != '0') {
186+
char hwChar = str.charAt(index - 1);
187+
if ('H' == hwChar || 'W' == hwChar) {
188+
char preHWChar = str.charAt(index - 2);
189+
char firstCode = this.map(preHWChar);
190+
if (firstCode == mappedChar || 'H' == preHWChar || 'W' == preHWChar) {
191+
return 0;
192+
}
195193
}
196-
return mappedChar;
197194
}
195+
return mappedChar;
198196
}
199197

200198
/**
201199
* Returns the maxLength. Standard Soundex
202200
*
201+
* @deprecated This feature is not needed since the encoding size must be constant.
203202
* @return int
204203
*/
205204
public int getMaxLength() {
@@ -214,15 +213,16 @@ private char[] getSoundexMapping() {
214213
}
215214

216215
/**
217-
* Maps the given upper-case character to it's Soudex code.
218-
*/
216+
* Maps the given upper-case character to it's Soudex code.
217+
*/
219218
private char map(char c) {
220-
return this.getSoundexMapping()[c - 'A'];
219+
return this.getSoundexMapping()[c - 'A'];
221220
}
222221

223222
/**
224223
* Sets the maxLength.
225224
*
225+
* @deprecated This feature is not needed since the encoding size must be constant.
226226
* @param maxLength
227227
* The maxLength to set
228228
*/
@@ -253,14 +253,14 @@ public String soundex(String str) {
253253
if (str.length() == 0) {
254254
return str;
255255
}
256-
257256
char out[] = { '0', '0', '0', '0' };
258257
char last, mapped;
259258
int incount = 1, count = 1;
260259
out[0] = str.charAt(0);
261260
last = getMappingCode(str, 0);
262-
while ((incount < str.length()) && (count < this.getMaxLength())) {
263-
if ((mapped = getMappingCode(str, incount++)) != 0) {
261+
while ((incount < str.length()) && (count < out.length)) {
262+
mapped = getMappingCode(str, incount++);
263+
if (mapped != 0) {
264264
if ((mapped != '0') && (mapped != last)) {
265265
out[count++] = mapped;
266266
}

src/test/org/apache/commons/codec/language/SoundexTest.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
/**
5454
* Tests {@link Soundex}
5555
*
56-
* @version $Revision: 1.6 $ $Date: 2003/11/06 16:31:47 $
56+
* @version $Revision: 1.7 $ $Date: 2003/11/07 01:21:47 $
5757
* @author Rodney Waldhoff
5858
* @author Gary Gregory
5959
*/
@@ -304,6 +304,13 @@ public void testHWRuleEx3() {
304304
public void testMaxLength() throws Exception {
305305
Soundex soundex = new Soundex();
306306
soundex.setMaxLength(soundex.getMaxLength());
307+
assertEquals("S460", this.getEncoder().encode("Sgler"));
308+
}
309+
310+
public void testMaxLengthLessThan3Fix() throws Exception {
311+
Soundex soundex = new Soundex();
312+
soundex.setMaxLength(2);
313+
assertEquals("S460", soundex.encode("SCHELLER"));
307314
}
308315

309316
}

0 commit comments

Comments
 (0)