1414 * See the License for the specific language governing permissions and
1515 * limitations under the License.
1616 */
17+
1718package org .apache .commons .codec .language ;
1819
1920import static org .junit .jupiter .api .Assertions .assertEquals ;
2021
22+ import java .util .stream .IntStream ;
23+
2124import org .apache .commons .codec .AbstractStringEncoderTest ;
2225import org .apache .commons .codec .EncoderException ;
2326import org .junit .jupiter .api .Test ;
27+ import org .junit .jupiter .params .ParameterizedTest ;
28+ import org .junit .jupiter .params .provider .MethodSource ;
2429
2530/**
2631 * Tests {@link DaitchMokotoffSoundex}.
@@ -47,7 +52,6 @@ private String soundex(final String source) {
4752 void testAccentedCharacterFolding () {
4853 assertEquals ("294795" , soundex ("Straßburg" ));
4954 assertEquals ("294795" , soundex ("Strasburg" ));
50-
5155 assertEquals ("095600" , soundex ("Éregon" ));
5256 assertEquals ("095600" , soundex ("Eregon" ));
5357 }
@@ -59,7 +63,6 @@ void testAdjacentCodes() {
5963 // 0-54-4---8 -> wrong
6064 // 0-54-----8 -> correct
6165 assertEquals ("054800" , soundex ("AKSSOL" ));
62-
6366 // GERSCHFELD
6467 // G-E-RS-CH-F-E-L-D
6568 // 5--4/94-5/4-7-8-3 -> wrong
@@ -82,18 +85,18 @@ void testEncodeBasic() {
8285
8386 @ Test
8487 void testEncodeIgnoreApostrophes () throws EncoderException {
85- checkEncodingVariations ("079600" , new String [] { "OBrien" , "'OBrien" , "O'Brien" , "OB'rien" , "OBr'ien" ,
86- "OBri'en" , "OBrie'n" , "OBrien'" });
88+ checkEncodingVariations ("079600" , "OBrien" , "'OBrien" , "O'Brien" , "OB'rien" , "OBr'ien" , "OBri'en" , "OBrie'n" , "OBrien'" );
8789 }
8890
8991 /**
9092 * Test data from http://www.myatt.demon.co.uk/sxalg.htm
9193 *
92- * @throws EncoderException for some failure scenarios */
94+ * @throws EncoderException for some failure scenarios
95+ */
9396 @ Test
9497 void testEncodeIgnoreHyphens () throws EncoderException {
95- checkEncodingVariations ("565463" , new String [] { "KINGSMITH" , "-KINGSMITH" , "K-INGSMITH" , "KI-NGSMITH" ,
96- "KIN-GSMITH" , "KING-SMITH" , "KINGS-MITH" , "KINGSM-ITH" , " KINGSMI-TH" , "KINGSMIT-H" , "KINGSMITH-" } );
98+ checkEncodingVariations ("565463" , "KINGSMITH" , "-KINGSMITH" , "K-INGSMITH" , "KI-NGSMITH" , "KIN-GSMITH" , "KING-SMITH" , "KINGS-MITH" , "KINGSM-ITH " ,
99+ "KINGSMI-TH" , "KINGSMIT-H" , "KINGSMITH-" );
97100 }
98101
99102 @ Test
@@ -102,6 +105,18 @@ void testEncodeIgnoreTrimmable() {
102105 assertEquals ("746536" , encode ("Washington" ));
103106 }
104107
108+ static IntStream getNonLetters () {
109+ return IntStream .rangeClosed (Character .MIN_VALUE , Character .MAX_VALUE ).filter (c -> !Character .isLetter (c ));
110+ }
111+
112+ @ ParameterizedTest
113+ @ MethodSource ("getNonLetters" )
114+ void testEncodeIgnoreNonLetters (final int nonLetterInt ) throws EncoderException {
115+ final char nonLetterChar = (char ) nonLetterInt ;
116+ checkEncodingVariations ("746536" , "Washington" + nonLetterChar , nonLetterChar + "Washington" , nonLetterChar + "Washington" + nonLetterChar ,
117+ "Washi" + nonLetterChar + "ngton" );
118+ }
119+
105120 /**
106121 * Examples from http://www.jewishgen.org/infofiles/soundex.html
107122 */
@@ -116,7 +131,6 @@ void testSoundexBasic() {
116131 assertEquals ("370000" , soundex ("Topf" ));
117132 assertEquals ("586660" , soundex ("Kleinmann" ));
118133 assertEquals ("769600" , soundex ("Ben Aron" ));
119-
120134 assertEquals ("097400|097500" , soundex ("AUERBACH" ));
121135 assertEquals ("097400|097500" , soundex ("OHRBACH" ));
122136 assertEquals ("874400" , soundex ("LIPSHITZ" ));
@@ -166,5 +180,4 @@ void testSpecialRomanianCharacters() {
166180 assertEquals ("364000|464000" , soundex ("ţamas" )); // t-cedilla
167181 assertEquals ("364000|464000" , soundex ("țamas" )); // t-comma
168182 }
169-
170183}
0 commit comments