Skip to content

Commit 36fccb4

Browse files
committed
CODEC-250 Wrong value calculated by Cologne Phonetic if a special character is placed between equal letters
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/codec/trunk@1842290 13f79535-47bb-0310-9956-ffa450edef68
1 parent 0b2a0c9 commit 36fccb4

3 files changed

Lines changed: 13 additions & 6 deletions

File tree

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ The <action> type attribute can be add,update,fix,remove.
4444
<body>
4545
<release version="1.12" date="2017-MM-DD" description="Feature and fix release.">
4646
<!-- The first attribute below should be the issue id; makes it easier to navigate in the IDE outline -->
47+
<action issue="CODEC-250" dev="sebb" type="fix" due-to="Alex Volodko">Wrong value calculated by Cologne Phonetic if a special character is placed between equal letters</action>
4748
<action issue="CODEC-244" dev="ggregory" type="update">Update from Java 6 to Java 7</action>
4849
<action issue="CODEC-240" dev="ggregory" type="add" due-to="Ioannis Sermetziadis">Add Percent-Encoding Codec (described in RFC3986 and RFC7578)</action>
4950
<action issue="CODEC-246" dev="ggregory" type="fix" due-to="Oscar Luis Vera Pérez">ColognePhoneticTest.testIsEncodeEquals missing assertions</action>

src/main/java/org/apache/commons/codec/language/ColognePhonetic.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -337,13 +337,13 @@ public String colognePhonetic(String text) {
337337
nextChar = CHAR_IGNORE;
338338
}
339339

340+
// OK to ignore H here because it only affects nextChar which has already been set up
341+
if (chr == 'H' || chr < 'A' || chr > 'Z') {
342+
continue; // ignore unwanted characters
343+
}
344+
340345
if (arrayContains(AEIJOUY, chr)) {
341346
code = '0';
342-
} else if (chr == 'H' || chr < 'A' || chr > 'Z') {
343-
if (lastCode == CHAR_FIRST_POS) {
344-
continue; // ignore leading unwanted characters
345-
}
346-
code = CHAR_IGNORE;
347347
} else if (chr == 'B' || (chr == 'P' && nextChar != 'H')) {
348348
code = '1';
349349
} else if ((chr == 'D' || chr == 'T') && !arrayContains(SCZ, nextChar)) {
@@ -380,7 +380,7 @@ public String colognePhonetic(String text) {
380380
} else if (chr == 'M' || chr == 'N') {
381381
code = '6';
382382
} else {
383-
code = chr;
383+
code = chr; // should not happen?
384384
}
385385

386386
if (code != CHAR_IGNORE && (lastCode != code && (code != '0' || lastCode == CHAR_FIRST_POS) || code < '0' || code > '8')) {

src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,12 @@ public void testVariationsMeyer() throws EncoderException {
235235
this.checkEncodingVariations("67", data);
236236
}
237237

238+
@Test
239+
public void testSpecialCharsBetweenSameLetters() throws EncoderException {
240+
final String data[] = {"Test test", "Testtest", "Test-test", "TesT#Test", "TesT?test"};
241+
this.checkEncodingVariations("28282", data);
242+
}
243+
238244
// Allow command-line testing
239245
public static void main(String args[]) {
240246
ColognePhonetic coder = new ColognePhonetic();

0 commit comments

Comments
 (0)