Skip to content

Commit 9c88167

Browse files
committed
[CODEC-199] Bug in HW rule in Soundex. Applying 2nd version of the patch.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/codec/trunk@1669274 13f79535-47bb-0310-9956-ffa450edef68
1 parent 7c0b78d commit 9c88167

1 file changed

Lines changed: 8 additions & 39 deletions

File tree

src/main/java/org/apache/commons/codec/language/Soundex.java

Lines changed: 8 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public class Soundex implements StringEncoder {
4141
*
4242
* @see #US_ENGLISH_MAPPING
4343
*/
44-
public static final String US_ENGLISH_MAPPING_STRING = "01230120022455012623010202";
44+
public static final String US_ENGLISH_MAPPING_STRING = "0123012#02245501262301#202";
4545

4646
/**
4747
* This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position
@@ -167,37 +167,6 @@ public String encode(final String str) {
167167
return soundex(str);
168168
}
169169

170-
/**
171-
* Used internally by the Soundex algorithm.
172-
*
173-
* Consonants from the same code group separated by W or H are treated as one.
174-
*
175-
* @param str
176-
* the cleaned working string to encode (in upper case).
177-
* @param index
178-
* the character position to encode
179-
* @return Mapping code for a particular character
180-
* @throws IllegalArgumentException
181-
* if the character is not mapped
182-
*/
183-
private char getMappingCode(final String str, final int index) {
184-
// map() throws IllegalArgumentException
185-
final char mappedChar = this.map(str.charAt(index));
186-
// HW rule check
187-
if (index > 1 && mappedChar != '0') {
188-
for (int i=index-1 ; i>=0 ; i--) {
189-
final char prevChar = str.charAt(i);
190-
if (this.map(prevChar)==mappedChar) {
191-
return 0;
192-
}
193-
if ('H'!=prevChar && 'W'!=prevChar) {
194-
break;
195-
}
196-
}
197-
}
198-
return mappedChar;
199-
}
200-
201170
/**
202171
* Returns the maxLength. Standard Soundex
203172
*
@@ -268,14 +237,14 @@ public String soundex(String str) {
268237
char last, mapped;
269238
int incount = 1, count = 1;
270239
out[0] = str.charAt(0);
271-
// getMappingCode() throws IllegalArgumentException
272-
last = getMappingCode(str, 0);
240+
// map() throws IllegalArgumentException
241+
last = this.map(str.charAt(0));
273242
while (incount < str.length() && count < out.length) {
274-
mapped = getMappingCode(str, incount++);
275-
if (mapped != 0) {
276-
if (mapped != '0' && mapped != last) {
277-
out[count++] = mapped;
278-
}
243+
mapped = this.map(str.charAt(incount++));
244+
if (mapped == '0') {
245+
last = mapped;
246+
} else if (mapped != '#' && mapped != last) {
247+
out[count++] = mapped;
279248
last = mapped;
280249
}
281250
}

0 commit comments

Comments
 (0)