Skip to content

Commit 7972242

Browse files
committed
CODEC-255 ColognePhonetic handles x incorrectly
1 parent 128c93c commit 7972242

3 files changed

Lines changed: 39 additions & 40 deletions

File tree

src/changes/changes.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ The <action> type attribute can be add,update,fix,remove.
4444
<body>
4545

4646
<release version="1.13" date="YYYY-MM-DD" description="TBD">
47-
<action issue="CODEC-254" dev="sebb" due-to="Holger Grote" type="fix">ColognePhonetic does not treat the letter H correct</action>
47+
<action issue="CODEC-255" dev="sebb" due-to="Holger Grote" type="fix">ColognePhonetic handles x incorrectly</action>
48+
<action issue="CODEC-254" dev="sebb" due-to="Holger Grote" type="fix">ColognePhonetic does not treat the letter H correctly</action>
4849
<action issue="CODEC-257" dev="ggregory" type="update">Update from Java 7 to Java 8</action>
4950
<action issue="CODEC-134" dev="tmousaw-ptc" type="fix">Reject any decode request for a value that is impossible to encode to for Base32/Base64 rather than blindly decoding.</action>
5051
</release>

src/main/java/org/apache/commons/codec/language/ColognePhonetic.java

Lines changed: 36 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ public class ColognePhonetic implements StringEncoder {
192192
private static final char[] AHKOQUX = new char[] { 'A', 'H', 'K', 'O', 'Q', 'U', 'X' };
193193
private static final char[] DTX = new char[] { 'D', 'T', 'X' };
194194

195+
private static final char CHAR_IGNORE = '-'; // is this character to be ignored?
196+
195197
/**
196198
* This class is not thread-safe; the field {@link #length} is mutable.
197199
* However, it is not shared between threads, as it is constructed on demand
@@ -227,13 +229,25 @@ public String toString() {
227229

228230
private class CologneOutputBuffer extends CologneBuffer {
229231

232+
private char lastCode;
233+
230234
public CologneOutputBuffer(final int buffSize) {
231235
super(buffSize);
236+
lastCode = '/'; // impossible value
232237
}
233238

234-
public void addRight(final char chr) {
235-
data[length] = chr;
236-
length++;
239+
/**
240+
* Store the next code in the output buffer, keeping track of the previous code.
241+
* '0' is only stored if it is the first entry.
242+
* Ignored chars are never stored.
243+
* If the code is the same as the last code (whether stored or not) it is not stored.
244+
*/
245+
public void put(final char code) {
246+
if (code != CHAR_IGNORE && lastCode != code && (code != '0' || length == 0)) {
247+
data[length] = code;
248+
length++;
249+
}
250+
lastCode = code;
237251
}
238252

239253
@Override
@@ -250,11 +264,6 @@ public CologneInputBuffer(final char[] data) {
250264
super(data);
251265
}
252266

253-
public void addLeft(final char ch) {
254-
length++;
255-
data[getNextPos()] = ch;
256-
}
257-
258267
@Override
259268
protected char[] copyData(final int start, final int length) {
260269
final char[] newData = new char[length];
@@ -310,13 +319,7 @@ public String colognePhonetic(String text) {
310319

311320
char nextChar;
312321

313-
final char CHAR_FIRST_POS = '/'; // are we processing the first character?
314-
final char CHAR_IGNORE = '-'; // is this character to be ignored?
315-
316322
char lastChar = CHAR_IGNORE;
317-
char lastCode = CHAR_FIRST_POS;
318-
boolean firstChar = true; // are we generating the first digit?
319-
char code;
320323
char chr;
321324

322325
while (input.length() > 0) {
@@ -333,55 +336,49 @@ public String colognePhonetic(String text) {
333336
}
334337

335338
if (arrayContains(AEIJOUY, chr)) {
336-
code = '0';
339+
output.put('0');
337340
} else if (chr == 'B' || (chr == 'P' && nextChar != 'H')) {
338-
code = '1';
341+
output.put('1');
339342
} else if ((chr == 'D' || chr == 'T') && !arrayContains(CSZ, nextChar)) {
340-
code = '2';
343+
output.put('2');
341344
} else if (arrayContains(FPVW, chr)) {
342-
code = '3';
345+
output.put('3');
343346
} else if (arrayContains(GKQ, chr)) {
344-
code = '4';
347+
output.put('4');
345348
} else if (chr == 'X' && !arrayContains(CKQ, lastChar)) {
346-
code = '4';
347-
input.addLeft('S');
349+
output.put('4');
350+
output.put('8');
348351
} else if (chr == 'S' || chr == 'Z') {
349-
code = '8';
352+
output.put('8');
350353
} else if (chr == 'C') {
351-
if (firstChar) {
354+
if (output.length() == 0) {
352355
if (arrayContains(AHKLOQRUX, nextChar)) {
353-
code = '4';
356+
output.put('4');
354357
} else {
355-
code = '8';
358+
output.put('8');
356359
}
357360
} else {
358361
if (arrayContains(SZ, lastChar) || !arrayContains(AHKOQUX, nextChar)) {
359-
code = '8';
362+
output.put('8');
360363
} else {
361-
code = '4';
364+
output.put('4');
362365
}
363366
}
364367
} else if (arrayContains(DTX, chr)) {
365-
code = '8';
368+
output.put('8');
366369
} else if (chr == 'R') {
367-
code = '7';
370+
output.put('7');
368371
} else if (chr == 'L') {
369-
code = '5';
372+
output.put('5');
370373
} else if (chr == 'M' || chr == 'N') {
371-
code = '6';
374+
output.put('6');
372375
} else if (chr == 'H') {
373-
code = CHAR_IGNORE;
376+
output.put(CHAR_IGNORE); // needed by put
374377
} else {
375-
code = chr; // should not happen?
376-
}
377-
378-
if (code != CHAR_IGNORE && lastCode != code && (code != '0' || firstChar)) {
379-
output.addRight(code);
380-
firstChar = false; // no longer processing first output digit
378+
// ignored; should not happen
381379
}
382380

383381
lastChar = chr;
384-
lastCode = code;
385382
}
386383
return output.toString();
387384
}

src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ public void testExamples() throws EncoderException {
194194
{"Celsius", "8588"},
195195
{"Ace", "08"},
196196
{"shch", "84"}, // CODEC-254
197+
{"xch", "484"}, // CODEC-255
197198
{"heithabu", "021"}};
198199
this.checkEncodings(data);
199200
}

0 commit comments

Comments
 (0)