1717
1818package org .apache .commons .codec .language ;
1919
20- import java .util .Arrays ;
21- import java .util .List ;
22-
2320import org .apache .commons .codec .EncoderException ;
2421import org .apache .commons .codec .StringEncoder ;
2522import org .apache .commons .codec .StringEncoderAbstractTest ;
@@ -83,109 +80,46 @@ public void testDan() throws EncoderException {
8380 this .encodeAll (new String [] { "Dane" , "Dean" , "Dionne" }, "DAN" );
8481 }
8582
86- @ Test
87- public void testDropBy () throws EncoderException {
88- this .assertEncodings (
89- new String [] { "MACINTOSH" , "MCANT" },
90- new String [] { "KNUTH" , "NAT" },
91- new String [] { "KOEHN" , "CAN" },
92- new String [] { "PHILLIPSON" , "FALAPSAN" },
93- new String [] { "PFEISTER" , "FASTAR" },
94- new String [] { "MCKEE" , "MCY" },
95- new String [] { "MACKIE" , "MCY" },
96- new String [] { "HEITSCHMIDT" , "HATSNAD" },
97- new String [] { "BART" , "BAD" },
98- new String [] { "HURD" , "HAD" },
99- new String [] { "HUNT" , "HAD" },
100- new String [] { "WESTERLUND" , "WASTARLAD" },
101- new String [] { "CASSTEVENS" , "CASTAFAN" },
102- new String [] { "VASQUEZ" , "VASG" },
103- new String [] { "FRAZIER" , "FRASAR" },
104- new String [] { "BOWMAN" , "BANAN" },
105- new String [] { "RICKERT" , "RACAD" },
106- new String [] { "DEUTSCH" , "DAT" },
107- new String [] { "WESTPHAL" , "WASTFAL" },
108- new String [] { "SHRIVER" , "SRAVAR" },
109- new String [] { "KUHL" , "CAL" },
110- new String [] { "RAWSON" , "RASAN" },
111- new String [] { "JILES" , "JAL" },
112- new String [] { "CARRAWAY" , "CARY" },
113- new String [] { "YAMADA" , "YANAD" });
114- }
115-
11683 /**
117- * Tests data gathered from around the internets .
84+ * Tests data gathered from around the internet .
11885 *
86+ * @see <a href="http://www.dropby.com/NYSIISTextStrings.html">http://www.dropby.com/NYSIISTextStrings.html</a>
11987 * @throws EncoderException
12088 */
12189 @ Test
122- public void testDropBy2 () throws EncoderException {
123- // Explanation of differences between this implementation and the one at dropby.com.
124- //
125- // Algorithm (taken from www.dropby.com/NYSIIS.html):
126- //
127- // 1. Transcode first characters of name:
128- // MAC > MCC
129- // KN > NN
130- // K > C
131- // PH > FF
132- // PF > FF
133- // SCH > SSS
134- //
135- // 2. Transcode last characters of name:
136- // EE, IE > Y
137- // DT,RT,RD,NT,ND > D
138- //
139- // 3. First character of key = first character of name.
140- //
141- // 4. Transcode remaining characters by following these rules, incrementing by one character each time:
142- // 4a. EV > AF else A,E,I,O,U > A
143- // 4b. Q > G
144- // 4c. Z > S
145- // 4d. M > N
146- // 4e. KN > N else K > C
147- // 4f. SCH > SSS
148- // 4g. PH > FF
149- // 4h. H > If previous or next is nonvowel, previous
150- // 4i. W > If previous is vowel, previous
151- // 4j. Add current to key if current != last key character
152- //
153- // 5. If last character is S, remove it
154- // 6. If last characters are AY, replace with Y
155- // 7. If last character is A, remove it
156- // 8. Collapse all strings of repeated characters
157- // 9. Add original first character of name as first character of key
90+ public void testDropBy () throws EncoderException {
91+ // Explanation of differences between this implementation and the one at dropby.com is
92+ // prepended to the test string. The referenced rules refer to the outlined steps the
93+ // class description for Nysiis.
15894
15995 this .assertEncodings (
160- // http://www.dropby.com/indexLF.html?content=/NYSIIS.html
16196 // 1. Transcode first characters of name
16297 new String [] { "MACINTOSH" , "MCANT" },
16398 // violates 4j: the second N should not be added, as the first
16499 // key char is already a N
165- new String [] { "KNUTH" , "NAT" }, // Original: NNAT; modified: NATH
100+ new String [] { "KNUTH" , "NAT" }, // Original: NNAT; modified: NATH
166101 // O and E are transcoded to A because of rule 4a
167102 // H also to A because of rule 4h
168103 // the N gets mysteriously lost, maybe because of a wrongly implemented rule 4h
169104 // that skips the next char in such a case?
170105 // the remaining A is removed because of rule 7
171- new String [] { "KOEHN" , "CAN" }, // Original: C
106+ new String [] { "KOEHN" , "CAN" }, // Original: C
172107 // violates 4j: see also KNUTH
173108 new String [] { "PHILLIPSON" , "FALAPSAN" }, // Original: FFALAP[SAN]
174109 // violates 4j: see also KNUTH
175- new String [] { "PFEISTER" , "FASTAR" }, // Original: FFASTA[R]
110+ new String [] { "PFEISTER" , "FASTAR" }, // Original: FFASTA[R]
176111 // violates 4j: see also KNUTH
177- new String [] { "SCHOENHOEFT" , "SANAFT" }, // Original: SSANAF[T]
178- // http://www.dropby.com/indexLF.html?content=/NYSIIS.html
179- // 2.Transcode last characters of name:
112+ new String [] { "SCHOENHOEFT" , "SANAFT" }, // Original: SSANAF[T]
113+ // 2. Transcode last characters of name:
180114 new String [] { "MCKEE" , "MCY" },
181115 new String [] { "MACKIE" , "MCY" },
182116 new String [] { "HEITSCHMIDT" , "HATSNAD" },
183117 new String [] { "BART" , "BAD" },
184118 new String [] { "HURD" , "HAD" },
185119 new String [] { "HUNT" , "HAD" },
186120 new String [] { "WESTERLUND" , "WASTARLAD" },
187- // http://www.dropby.com/indexLF.html?content=/NYSIIS.html
188- // 4. Transcode remaining characters by following these rules, incrementing by one character each time:
121+ // 4. Transcode remaining characters by following these rules,
122+ // incrementing by one character each time:
189123 new String [] { "CASSTEVENS" , "CASTAFAN" },
190124 new String [] { "VASQUEZ" , "VASG" },
191125 new String [] { "FRAZIER" , "FRASAR" },
@@ -195,18 +129,18 @@ public void testDropBy2() throws EncoderException {
195129 // violates 5: the last S is not removed
196130 // when comparing to DEUTS, which is phonetically similar
197131 // the result it also DAT, which is correct for DEUTSCH too imo
198- new String [] { "DEUTSCH" , "DAT" }, // Original: DATS
132+ new String [] { "DEUTSCH" , "DAT" }, // Original: DATS
199133 new String [] { "WESTPHAL" , "WASTFAL" },
200134 // violates 4h: the H should be transcoded to S and thus ignored as
201135 // the first key character is also S
202- new String [] { "SHRIVER" , "SRAVAR" }, // Original: SHRAVA[R]
136+ new String [] { "SHRIVER" , "SRAVAR" }, // Original: SHRAVA[R]
203137 // same as KOEHN, the L gets mysteriously lost
204- new String [] { "KUHL" , "CAL" }, // Original: C
138+ new String [] { "KUHL" , "CAL" }, // Original: C
205139 new String [] { "RAWSON" , "RASAN" },
206140 // If last character is S, remove it
207141 new String [] { "JILES" , "JAL" },
208142 // violates 6: if the last two characters are AY, remove A
209- new String [] { "CARRAWAY" , "CARY" }, // Original: CARAY
143+ new String [] { "CARRAWAY" , "CARY" }, // Original: CARAY
210144 new String [] { "YAMADA" , "YANAD" });
211145 }
212146
0 commit comments