Skip to content

Commit ddc7143

Browse files
committed
Fix all current issues in [CODEC-125] including the big performance issue. Thanks to a patch from Matthew Pocock! Some clean ups and more tests are needed but this is looking much better now.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/codec/trunk@1151715 13f79535-47bb-0310-9956-ffa450edef68
1 parent b7d0dab commit ddc7143

8 files changed

Lines changed: 369 additions & 377 deletions

File tree

src/java/org/apache/commons/codec/language/bm/Lang.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -198,9 +198,9 @@ private Lang(List<LangRule> rules, Languages languages) {
198198
* @return the language that the word originates from or {@link Languages#ANY} if there was no unique match
199199
*/
200200
public String guessLanguage(String text) {
201-
Set<String> ls = guessLanguages(text);
202-
if (ls.size() == 1) {
203-
return ls.iterator().next();
201+
Languages.LanguageSet ls = guessLanguages(text);
202+
if (ls.isSingleton()) {
203+
return ls.getAny();
204204
} else {
205205
return Languages.ANY;
206206
}
@@ -209,11 +209,11 @@ public String guessLanguage(String text) {
209209
/**
210210
* Guesses the languages of a word.
211211
*
212-
* @param text
212+
* @param input
213213
* the word
214-
* @return a Set of Strings of language names that are potential matches for the word
214+
* @return a Set of Strings of language names that are potential matches for the input word
215215
*/
216-
public Set<String> guessLanguages(String input) {
216+
public Languages.LanguageSet guessLanguages(String input) {
217217
String text = input.toLowerCase(); // todo: locale?
218218
// System.out.println("Testing text: '" + text + "'");
219219

@@ -234,6 +234,6 @@ public Set<String> guessLanguages(String input) {
234234
}
235235
}
236236

237-
return langs;
237+
return Languages.LanguageSet.from(langs);
238238
}
239239
}

src/java/org/apache/commons/codec/language/bm/Languages.java

Lines changed: 132 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.EnumMap;
2323
import java.util.HashSet;
2424
import java.util.Map;
25+
import java.util.NoSuchElementException;
2526
import java.util.Scanner;
2627
import java.util.Set;
2728

@@ -114,43 +115,135 @@ public Set<String> getLanguages() {
114115
return this.languages;
115116
}
116117

117-
// // The original code mapped sets of languages to unique numerical codes - this doesn't seem to be needed in this impl
118-
// public static Languages instance(String languagesResourceName)
119-
// {
120-
// // read languages list
121-
// Map<String, Integer> ls = new HashMap<String, Integer>();
122-
// InputStream langIS = Languages.class.getClassLoader().getResourceAsStream(languagesResourceName);
123-
//
124-
// if(langIS == null)
125-
// throw new IllegalArgumentException("Unable to resolve required resource: " + languagesResourceName);
126-
//
127-
// Scanner lsScanner = new Scanner(langIS);
128-
// int i = 0;
129-
// while(lsScanner.hasNextLine()) {
130-
// String line = lsScanner.nextLine();
131-
// i++;
132-
// ls.put(line.trim(), i^2);
133-
// }
134-
//
135-
// return new Languages(Collections.unmodifiableSet(ls.keySet()), Collections.unmodifiableMap(ls));
136-
// }
137-
//
138-
// // todo: phoneticutils.php: LanguageIndex, LanguageName, LanguageCode, LanguageIndexFromCode
139-
//
140-
//
141-
// private final Set<String> languages;
142-
// private final Map<String, Integer> language_codes;
143-
//
144-
// private Languages(Set<String> languages, Map<String, Integer> language_codes) {
145-
// this.languages = languages;
146-
// this.language_codes = language_codes;
147-
// }
148-
//
149-
// public Set<String> getLanguages() {
150-
// return languages;
151-
// }
152-
//
153-
// public Map<String, Integer> getLanguage_codes() {
154-
// return language_codes;
155-
// }
118+
/**
119+
* A set of languages.
120+
*/
121+
public static abstract class LanguageSet {
122+
public abstract LanguageSet restrictTo(LanguageSet other);
123+
124+
public static LanguageSet from(Set<String> langs) {
125+
if (langs.isEmpty()) {
126+
return NO_LANGUAGES;
127+
} else {
128+
return new SomeLanguages(langs);
129+
}
130+
}
131+
132+
public abstract boolean contains(String language);
133+
134+
public abstract boolean isSingleton();
135+
136+
public abstract String getAny();
137+
138+
public abstract boolean isEmpty();
139+
}
140+
141+
/**
142+
* No languages at all.
143+
*/
144+
public static LanguageSet NO_LANGUAGES = new LanguageSet() {
145+
@Override
146+
public LanguageSet restrictTo(LanguageSet other) {
147+
return this;
148+
}
149+
150+
@Override
151+
public boolean contains(String language) {
152+
return false;
153+
}
154+
155+
@Override
156+
public boolean isSingleton() {
157+
return false;
158+
}
159+
160+
@Override
161+
public String getAny() {
162+
throw new NoSuchElementException("Can't fetch any language from the empty language set.");
163+
}
164+
165+
@Override
166+
public boolean isEmpty() {
167+
return true;
168+
}
169+
};
170+
171+
/**
172+
* Any/all languages.
173+
*/
174+
public static LanguageSet ANY_LANGUAGE = new LanguageSet() {
175+
@Override
176+
public LanguageSet restrictTo(LanguageSet other) {
177+
return other;
178+
}
179+
180+
@Override
181+
public boolean contains(String language) {
182+
return true;
183+
}
184+
185+
@Override
186+
public boolean isSingleton() {
187+
return false;
188+
}
189+
190+
@Override
191+
public String getAny() {
192+
throw new NoSuchElementException("Can't fetch any language from the any language set.");
193+
}
194+
195+
@Override
196+
public boolean isEmpty() {
197+
return false;
198+
}
199+
};
200+
201+
/**
202+
* Some languages, explicitly enumerated.
203+
*/
204+
public static class SomeLanguages extends LanguageSet {
205+
private final Set<String> languages;
206+
207+
private SomeLanguages(Set<String> languages) {
208+
this.languages = Collections.unmodifiableSet(languages);
209+
}
210+
211+
public Set<String> getLanguages() {
212+
return this.languages;
213+
}
214+
215+
@Override
216+
public LanguageSet restrictTo(LanguageSet other) {
217+
if (other == NO_LANGUAGES) {
218+
return other;
219+
} else if (other == ANY_LANGUAGE) {
220+
return this;
221+
} else {
222+
SomeLanguages sl = (SomeLanguages) other;
223+
Set<String> ls = new HashSet<String>(this.languages);
224+
ls.retainAll(sl.languages);
225+
return from(ls);
226+
}
227+
}
228+
229+
@Override
230+
public boolean contains(String language) {
231+
return this.languages.contains(language);
232+
}
233+
234+
@Override
235+
public boolean isSingleton() {
236+
return this.languages.size() == 1;
237+
}
238+
239+
@Override
240+
public String getAny() {
241+
return this.languages.iterator().next();
242+
}
243+
244+
@Override
245+
public boolean isEmpty() {
246+
return this.languages.isEmpty();
247+
}
248+
}
156249
}

0 commit comments

Comments
 (0)