Skip to content

Commit 0110214

Browse files
committed
[CODEC-187] Update Beider-Morse rules to v3.4.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/codec/trunk@1636703 13f79535-47bb-0310-9956-ffa450edef68
1 parent 83af056 commit 0110214

4 files changed

Lines changed: 28 additions & 5 deletions

File tree

src/main/java/org/apache/commons/codec/language/bm/BeiderMorseEncoder.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
* Down-stream applications may wish to further process the encoding for indexing or lookup purposes, for example, by
6666
* splitting on pipe (<code>|</code>) and indexing under each of these alternatives.
6767
* <p>
68-
* <b>Note</b>: this version of the Beider-Morse encoding is equivalent with v3.3 of the reference implementation.
68+
* <b>Note</b>: this version of the Beider-Morse encoding is equivalent with v3.4 of the reference implementation.
6969
*
7070
* @see <a href="http://stevemorse.org/phonetics/bmpm.htm">Beider-Morse Phonetic Matching</a>
7171
* @see <a href="http://stevemorse.org/phoneticinfo.htm">Reference implementation</a>

src/main/resources/org/apache/commons/codec/language/bm/ash_approx_common.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,13 @@
201201
"lEndEr" "" "$" "lYnder"
202202
"lendEr" "" "$" "lYnder"
203203
"lEnder" "" "$" "lYnder"
204-
204+
205+
// burg = berg
206+
"bUrk" "" "$" "(burk|berk)"
207+
"burk" "" "$" "(burk|berk)"
208+
"bUrg" "" "$" "(burk|berk)"
209+
"burg" "" "$" "(burk|berk)"
210+
205211
// CONSONANTS {z & Z; s & S} are approximately interchangeable
206212
"s" "" "[rmnl]" "z"
207213
"S" "" "[rmnl]" "z"

src/main/resources/org/apache/commons/codec/language/bm/gen_approx_common.txt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,17 @@
205205
"lEndEr" "" "$" "lYnder"
206206
"lendEr" "" "$" "lYnder"
207207
"lEnder" "" "$" "lYnder"
208-
208+
209+
// burg = berg
210+
"burk" "" "$" "(burk|berk)"
211+
"bUrk" "" "$" "(burk|berk)"
212+
"burg" "" "$" "(burk|berk)"
213+
"bUrg" "" "$" "(burk|berk)"
214+
"Burk" "" "$" "(burk|berk)"
215+
"BUrk" "" "$" "(burk|berk)"
216+
"Burg" "" "$" "(burk|berk)"
217+
"BUrg" "" "$" "(burk|berk)"
218+
209219
// CONSONANTS {z & Z; s & S} are approximately interchangeable
210220
"s" "" "[rmnl]" "z"
211221
"S" "" "[rmnl]" "z"

src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717

1818
package org.apache.commons.codec.language.bm;
1919

20-
import static org.junit.Assert.*;
20+
import static org.junit.Assert.assertEquals;
21+
import static org.junit.Assert.assertTrue;
2122

2223
import java.util.Arrays;
2324
import java.util.List;
@@ -50,7 +51,13 @@ public static List<Object[]> data() {
5051
"(elSink|elsink|helSink|helsink|helzink|xelsink)-(banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink)",
5152
NameType.GENERIC,
5253
RuleType.EXACT,
53-
Boolean.FALSE, TEN });
54+
Boolean.FALSE, TEN },
55+
new Object[] {
56+
"Judenburg",
57+
"iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk",
58+
NameType.GENERIC,
59+
RuleType.APPROX,
60+
Boolean.TRUE, TEN });
5461
}
5562

5663
private final boolean concat;

0 commit comments

Comments
 (0)