Skip to content

Commit a9a1ef4

Browse files
committed
1 parent 8b0318c commit a9a1ef4

3 files changed

Lines changed: 53 additions & 33 deletions

File tree

src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -164,15 +164,15 @@ public String makeString() {
164164
* @since 1.6
165165
*/
166166
private static final class RulesApplication {
167-
private final List<Rule> finalRules;
167+
private final Map<String, List<Rule>> finalRules;
168168
private final CharSequence input;
169169

170170
private PhonemeBuilder phonemeBuilder;
171171
private int i;
172172
private final int maxPhonemes;
173173
private boolean found;
174174

175-
public RulesApplication(final List<Rule> finalRules, final CharSequence input,
175+
public RulesApplication(final Map<String, List<Rule>> finalRules, final CharSequence input,
176176
final PhonemeBuilder phonemeBuilder, final int i, final int maxPhonemes) {
177177
if (finalRules == null) {
178178
throw new NullPointerException("The finalRules argument must not be null");
@@ -201,18 +201,18 @@ public PhonemeBuilder getPhonemeBuilder() {
201201
*/
202202
public RulesApplication invoke() {
203203
this.found = false;
204-
int patternLength = 0;
205-
for (final Rule rule : this.finalRules) {
206-
final String pattern = rule.getPattern();
207-
patternLength = pattern.length();
208-
209-
if (!rule.patternAndContextMatches(this.input, this.i)) {
210-
continue;
211-
}
212-
213-
this.phonemeBuilder = this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
214-
this.found = true;
215-
break;
204+
int patternLength = 1;
205+
List<Rule> rules = this.finalRules.get(input.subSequence(i, i+patternLength));
206+
if (rules != null) {
207+
for (Rule rule : rules) {
208+
final String pattern = rule.getPattern();
209+
patternLength = pattern.length();
210+
if (rule.patternAndContextMatches(this.input, this.i)) {
211+
this.phonemeBuilder = this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
212+
this.found = true;
213+
break;
214+
}
215+
}
216216
}
217217

218218
if (!this.found) {
@@ -358,7 +358,7 @@ public PhoneticEngine(final NameType nameType, final RuleType ruleType, final bo
358358
* @param finalRules the final rules to apply
359359
* @return the resulting phonemes
360360
*/
361-
private PhonemeBuilder applyFinalRules(final PhonemeBuilder phonemeBuilder, final List<Rule> finalRules) {
361+
private PhonemeBuilder applyFinalRules(final PhonemeBuilder phonemeBuilder, final Map<String, List<Rule>> finalRules) {
362362
if (finalRules == null) {
363363
throw new NullPointerException("finalRules can not be null");
364364
}
@@ -414,11 +414,11 @@ public String encode(final String input) {
414414
* of the input
415415
*/
416416
public String encode(String input, final Languages.LanguageSet languageSet) {
417-
final List<Rule> rules = Rule.getInstance(this.nameType, RuleType.RULES, languageSet);
417+
final Map<String, List<Rule>> rules = Rule.getInstance(this.nameType, RuleType.RULES, languageSet);
418418
// rules common across many (all) languages
419-
final List<Rule> finalRules1 = Rule.getInstance(this.nameType, this.ruleType, "common");
419+
final Map<String, List<Rule>> finalRules1 = Rule.getInstance(this.nameType, this.ruleType, "common");
420420
// rules that apply to a specific language that may be ambiguous or wrong if applied to other languages
421-
final List<Rule> finalRules2 = Rule.getInstance(this.nameType, this.ruleType, languageSet);
421+
final Map<String, List<Rule>> finalRules2 = Rule.getInstance(this.nameType, this.ruleType, languageSet);
422422

423423
// tidy the input
424424
// lower case is a locale-dependent operation

src/main/java/org/apache/commons/codec/language/bm/Rule.java

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -169,16 +169,16 @@ public boolean isMatch(final CharSequence input) {
169169

170170
private static final String HASH_INCLUDE = "#include";
171171

172-
private static final Map<NameType, Map<RuleType, Map<String, List<Rule>>>> RULES =
173-
new EnumMap<NameType, Map<RuleType, Map<String, List<Rule>>>>(NameType.class);
172+
private static final Map<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>> RULES =
173+
new EnumMap<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>>(NameType.class);
174174

175175
static {
176176
for (final NameType s : NameType.values()) {
177-
final Map<RuleType, Map<String, List<Rule>>> rts =
178-
new EnumMap<RuleType, Map<String, List<Rule>>>(RuleType.class);
177+
final Map<RuleType, Map<String, Map<String, List<Rule>>>> rts =
178+
new EnumMap<RuleType, Map<String, Map<String, List<Rule>>>>(RuleType.class);
179179

180180
for (final RuleType rt : RuleType.values()) {
181-
final Map<String, List<Rule>> rs = new HashMap<String, List<Rule>>();
181+
final Map<String, Map<String, List<Rule>>> rs = new HashMap<String, Map<String, List<Rule>>>();
182182

183183
final Languages ls = Languages.getInstance(s);
184184
for (final String l : ls.getLanguages()) {
@@ -258,7 +258,7 @@ private static boolean endsWith(final CharSequence input, final CharSequence suf
258258
* the set of languages to consider
259259
* @return a list of Rules that apply
260260
*/
261-
public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
261+
public static Map<String, List<Rule>> getInstance(final NameType nameType, final RuleType rt,
262262
final Languages.LanguageSet langs) {
263263
return langs.isSingleton() ? getInstance(nameType, rt, langs.getAny()) :
264264
getInstance(nameType, rt, Languages.ANY);
@@ -275,8 +275,8 @@ public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
275275
* the language to consider
276276
* @return a list rules for a combination of name type, rule type and a single language.
277277
*/
278-
public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
279-
final List<Rule> rules = RULES.get(nameType).get(rt).get(lang);
278+
public static Map<String, List<Rule>> getInstance(final NameType nameType, final RuleType rt, final String lang) {
279+
final Map<String, List<Rule>> rules = RULES.get(nameType).get(rt).get(lang);
280280

281281
if (rules == null) {
282282
throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.",
@@ -323,8 +323,8 @@ private static PhonemeExpr parsePhonemeExpr(final String ph) {
323323
}
324324
}
325325

326-
private static List<Rule> parseRules(final Scanner scanner, final String location) {
327-
final List<Rule> lines = new ArrayList<Rule>();
326+
private static Map<String, List<Rule>> parseRules(final Scanner scanner, final String location) {
327+
final Map<String, List<Rule>> lines = new HashMap<String, List<Rule>>();
328328
int currentLine = 0;
329329

330330
boolean inMultilineComment = false;
@@ -361,7 +361,7 @@ private static List<Rule> parseRules(final Scanner scanner, final String locatio
361361
throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " +
362362
location);
363363
} else {
364-
lines.addAll(parseRules(createScanner(incl), location + "->" + incl));
364+
lines.putAll(parseRules(createScanner(incl), location + "->" + incl));
365365
}
366366
} else {
367367
// rule
@@ -390,7 +390,13 @@ public String toString() {
390390
return sb.toString();
391391
}
392392
};
393-
lines.add(r);
393+
String patternKey = r.pattern.substring(0,1);
394+
List<Rule> rules = lines.get(patternKey);
395+
if (rules == null) {
396+
rules = new ArrayList<Rule>();
397+
lines.put(patternKey, rules);
398+
}
399+
rules.add(r);
394400
} catch (final IllegalArgumentException e) {
395401
throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " +
396402
location, e);

src/test/java/org/apache/commons/codec/language/bm/PhoneticEnginePerformanceTest.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,33 @@
2121
/**
2222
* Tests performance for {@link PhoneticEngine}.
2323
* <p>
24-
* See <a href="https://issues.apache.org/jira/browse/CODEC-174">[CODEC-174] Improve performance of Beider Morse encoder</a>.
24+
* See <a href="https://issues.apache.org/jira/browse/CODEC-174">[CODEC-174] Improve performance of Beider Morse
25+
* encoder</a>.
2526
* </p>
2627
* <p>
27-
* Results for November 7, 2013, SVN revision 1539678.
28+
* Results for November 7, 2013, project SVN revision 1539678.
2829
* </p>
2930
* <ol>
3031
* <li>Time for encoding 80,000 times the input 'Angelo': 33,039 millis.</li>
3132
* <li>Time for encoding 80,000 times the input 'Angelo': 32,297 millis.</li>
3233
* <li>Time for encoding 80,000 times the input 'Angelo': 32,857 millis.</li>
33-
* <li>Time for encoding 80,000 times the input 'Angelo': 31,561 millis.</li>
34+
* <li>Time for encoding 80,000 times the input 'Angelo': <b>31,561 millis.</b></li>
3435
* <li>Time for encoding 80,000 times the input 'Angelo': 32,665 millis.</li>
3536
* <li>Time for encoding 80,000 times the input 'Angelo': 32,215 millis.</li>
3637
* </ol>
38+
* <p>
39+
* On this file's revision 1539678, with patch <a
40+
* href="https://issues.apache.org/jira/secure/attachment/12611963/CODEC-174-change-rules-storage-to-Map.patch"
41+
* >CODEC-174-change-rules-storage-to-Map</a>:
42+
* </p>
43+
* <ol>
44+
* <li>Time for encoding 80,000 times the input 'Angelo': 18,196 millis.</li>
45+
* <li>Time for encoding 80,000 times the input 'Angelo': 13,858 millis.</li>
46+
* <li>Time for encoding 80,000 times the input 'Angelo': 13,644 millis.</li>
47+
* <li>Time for encoding 80,000 times the input 'Angelo': <b>13,591 millis.</b></li>
48+
* <li>Time for encoding 80,000 times the input 'Angelo': 13,861 millis.</li>
49+
* <li>Time for encoding 80,000 times the input 'Angelo': 13,696 millis.</li>
50+
* </ol>
3751
*/
3852
public class PhoneticEnginePerformanceTest {
3953

0 commit comments

Comments
 (0)