Skip to content

Commit 28b7b95

Browse files
committed
[CODEC-199] Bug in HW rule in Soundex.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/codec/trunk@1668441 13f79535-47bb-0310-9956-ffa450edef68
1 parent 8fc1935 commit 28b7b95

3 files changed

Lines changed: 22 additions & 5 deletions

File tree

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ The <action> type attribute can be add,update,fix,remove.
4343
</properties>
4444
<body>
4545
<release version="1.11" date="DD MM 2014" description="Feature and fix release.">
46+
<action dev="ggregory" type="add" issue="CODEC-199" due-to="Yossi Tamari">Bug in HW rule in Soundex</action>
4647
<action dev="ggregory" type="add" issue="CODEC-183">BaseNCodecOutputStream only supports writing EOF on close()</action>
4748
<action dev="ggregory" type="add" issue="CODEC-195">Support SHA-224 in DigestUtils on Java 8</action>
4849
<action dev="ggregory" type="add" issue="CODEC-194">Support java.nio.ByteBuffer in org.apache.commons.codec.binary.Hex</action>

src/main/java/org/apache/commons/codec/language/Soundex.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -185,13 +185,14 @@ private char getMappingCode(final String str, final int index) {
185185
final char mappedChar = this.map(str.charAt(index));
186186
// HW rule check
187187
if (index > 1 && mappedChar != '0') {
188-
final char hwChar = str.charAt(index - 1);
189-
if ('H' == hwChar || 'W' == hwChar) {
190-
final char preHWChar = str.charAt(index - 2);
191-
final char firstCode = this.map(preHWChar);
192-
if (firstCode == mappedChar || 'H' == preHWChar || 'W' == preHWChar) {
188+
for (int i=index-1 ; i>=0 ; i--) {
189+
final char prevChar = str.charAt(i);
190+
if (this.map(prevChar)==mappedChar) {
193191
return 0;
194192
}
193+
if ('H'!=prevChar && 'W'!=prevChar) {
194+
break;
195+
}
195196
}
196197
}
197198
return mappedChar;

src/test/java/org/apache/commons/codec/language/SoundexTest.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,8 @@ public void testHWRuleEx1() {
228228
// for the F). It is not coded A-226.
229229
Assert.assertEquals("A261", this.getStringEncoder().encode("Ashcraft"));
230230
Assert.assertEquals("A261", this.getStringEncoder().encode("Ashcroft"));
231+
Assert.assertEquals("Y330", this.getStringEncoder().encode("yehudit"));
232+
Assert.assertEquals("Y330", this.getStringEncoder().encode("yhwdyt"));
231233
}
232234

233235
/**
@@ -388,4 +390,17 @@ public void testUsMappingOWithDiaeresis() {
388390
Assert.assertEquals("", this.getStringEncoder().encode("\u00f6"));
389391
}
390392
}
393+
394+
/**
395+
* Tests example from http://en.wikipedia.org/wiki/Soundex#American_Soundex as of 2015-03-22.
396+
*/
397+
@Test
398+
public void testWikipediaAmericanSoundex() {
399+
Assert.assertEquals("R163", this.getStringEncoder().encode("Robert"));
400+
Assert.assertEquals("R163", this.getStringEncoder().encode("Rupert"));
401+
Assert.assertEquals("A261", this.getStringEncoder().encode("Ashcraft"));
402+
Assert.assertEquals("A261", this.getStringEncoder().encode("Ashcroft"));
403+
Assert.assertEquals("T522", this.getStringEncoder().encode("Tymczak"));
404+
Assert.assertEquals("P236", this.getStringEncoder().encode("Pfister"));
405+
}
391406
}

0 commit comments

Comments
 (0)