Skip to content

Commit 3bf874e

Browse files
committed
Optimize memory allocation
Add org.apache.commons.codec.language.bm.Rule.PhonemeExpr.size()
1 parent f6f9b14 commit 3bf874e

5 files changed

Lines changed: 60 additions & 37 deletions

File tree

pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ limitations under the License.
2828
</parent>
2929
<groupId>commons-codec</groupId>
3030
<artifactId>commons-codec</artifactId>
31-
<version>1.16.2-SNAPSHOT</version>
31+
<version>1.17.0-SNAPSHOT</version>
3232
<name>Apache Commons Codec</name>
3333
<inceptionYear>2002</inceptionYear>
3434
<description>
@@ -273,9 +273,9 @@ limitations under the License.
273273
<checkstyle.config.file>${basedir}/src/conf/checkstyle.xml</checkstyle.config.file>
274274
<jacoco.skip>false</jacoco.skip>
275275
<!-- Commons Release Plugin -->
276-
<commons.release.version>1.16.1</commons.release.version>
276+
<commons.release.version>1.17.0</commons.release.version>
277277
<commons.bc.version>1.16.0</commons.bc.version>
278-
<commons.bc.next>1.16.2</commons.bc.next>
278+
<commons.bc.next>1.17.1</commons.bc.next>
279279
<commons.rc.version>RC1</commons.rc.version>
280280
<commons.release.isDistModule>true</commons.release.isDistModule>
281281
<commons.distSvnStagingUrl>scm:svn:https://dist.apache.org/repos/dist/dev/commons/${commons.componentid}</commons.distSvnStagingUrl>

src/changes/changes.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ The <action> type attribute can be add,update,fix,remove.
4343
<author>Apache Commons Developers</author>
4444
</properties>
4545
<body>
46-
<release version="1.16.2" date="2024-MM-DD" description="Feature and fix release. Requires a minimum of Java 8.">
46+
<release version="1.17.0" date="YYYY-MM-DD" description="Feature and fix release. Requires a minimum of Java 8.">
4747
<!-- ADD -->
48+
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.codec.language.bm.Rule.PhonemeExpr.size().</action>
4849
<!-- FIX -->
4950
<!-- UPDATE -->
5051
<action dev="ggregory" type="update" due-to="Dependabot, Gary Gregory">Bump org.apache.commons:commons-parent from 66 to 67 #250.</action>

src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,8 @@ public void append(final CharSequence str) {
106106
* @param maxPhonemes the maximum number of phonemes to build up
107107
*/
108108
public void apply(final Rule.PhonemeExpr phonemeExpr, final int maxPhonemes) {
109-
final Set<Rule.Phoneme> newPhonemes = new LinkedHashSet<>(maxPhonemes);
110-
111-
EXPR: for (final Rule.Phoneme left : this.phonemes) {
109+
final Set<Rule.Phoneme> newPhonemes = new LinkedHashSet<>(Math.min(phonemes.size() * phonemeExpr.size(), maxPhonemes));
110+
EXPR: for (final Rule.Phoneme left : phonemes) {
112111
for (final Rule.Phoneme right : phonemeExpr.getPhonemes()) {
113112
final LanguageSet languages = left.getLanguages().restrictTo(right.getLanguages());
114113
if (!languages.isEmpty()) {
@@ -122,9 +121,8 @@ public void apply(final Rule.PhonemeExpr phonemeExpr, final int maxPhonemes) {
122121
}
123122
}
124123
}
125-
126-
this.phonemes.clear();
127-
this.phonemes.addAll(newPhonemes);
124+
phonemes.clear();
125+
phonemes.addAll(newPhonemes);
128126
}
129127

130128
/**
@@ -133,7 +131,7 @@ public void apply(final Rule.PhonemeExpr phonemeExpr, final int maxPhonemes) {
133131
* @return the phoneme set
134132
*/
135133
public Set<Rule.Phoneme> getPhonemes() {
136-
return this.phonemes;
134+
return phonemes;
137135
}
138136

139137
/**
@@ -155,22 +153,24 @@ public String makeString() {
155153
* processed already), and {@code found} indicates if a matching rule was found or not. In the case where a
156154
* matching rule was found, {@code phonemeBuilder} is replaced with a new builder containing the phonemes
157155
* updated by the matching rule.
158-
*
156+
* <p>
159157
* Although this class is not thread-safe (it has mutable unprotected fields), it is not shared between threads
160158
* as it is constructed as needed by the calling methods.
159+
* </p>
160+
*
161161
* @since 1.6
162162
*/
163163
private static final class RulesApplication {
164+
164165
private final Map<String, List<Rule>> finalRules;
165166
private final CharSequence input;
166-
167167
private final PhonemeBuilder phonemeBuilder;
168168
private int i;
169169
private final int maxPhonemes;
170170
private boolean found;
171171

172-
public RulesApplication(final Map<String, List<Rule>> finalRules, final CharSequence input,
173-
final PhonemeBuilder phonemeBuilder, final int i, final int maxPhonemes) {
172+
public RulesApplication(final Map<String, List<Rule>> finalRules, final CharSequence input, final PhonemeBuilder phonemeBuilder, final int i,
173+
final int maxPhonemes) {
174174
Objects.requireNonNull(finalRules, "finalRules");
175175
this.finalRules = finalRules;
176176
this.phonemeBuilder = phonemeBuilder;
@@ -180,11 +180,11 @@ public RulesApplication(final Map<String, List<Rule>> finalRules, final CharSequ
180180
}
181181

182182
public int getI() {
183-
return this.i;
183+
return i;
184184
}
185185

186186
public PhonemeBuilder getPhonemeBuilder() {
187-
return this.phonemeBuilder;
187+
return phonemeBuilder;
188188
}
189189

190190
/**
@@ -195,31 +195,31 @@ public PhonemeBuilder getPhonemeBuilder() {
195195
* @return {@code this}
196196
*/
197197
public RulesApplication invoke() {
198-
this.found = false;
198+
found = false;
199199
int patternLength = 1;
200-
final List<Rule> rules = this.finalRules.get(input.subSequence(i, i + patternLength));
200+
final List<Rule> rules = finalRules.get(input.subSequence(i, i + patternLength));
201201
if (rules != null) {
202202
for (final Rule rule : rules) {
203203
final String pattern = rule.getPattern();
204204
patternLength = pattern.length();
205-
if (rule.patternAndContextMatches(this.input, this.i)) {
206-
this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
207-
this.found = true;
205+
if (rule.patternAndContextMatches(input, i)) {
206+
phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
207+
found = true;
208208
break;
209209
}
210210
}
211211
}
212212

213-
if (!this.found) {
213+
if (!found) {
214214
patternLength = 1;
215215
}
216216

217-
this.i += patternLength;
217+
i += patternLength;
218218
return this;
219219
}
220220

221221
public boolean isFound() {
222-
return this.found;
222+
return found;
223223
}
224224
}
225225

@@ -269,11 +269,11 @@ private static String join(final List<String> strings, final String sep) {
269269
* the type of names it will use
270270
* @param ruleType
271271
* the type of rules it will apply
272-
* @param concat
272+
* @param concatenate
273273
* if it will concatenate multiple encodings
274274
*/
275-
public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concat) {
276-
this(nameType, ruleType, concat, DEFAULT_MAX_PHONEMES);
275+
public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concatenate) {
276+
this(nameType, ruleType, concatenate, DEFAULT_MAX_PHONEMES);
277277
}
278278

279279
/**
@@ -283,20 +283,19 @@ public PhoneticEngine(final NameType nameType, final RuleType ruleType, final bo
283283
* the type of names it will use
284284
* @param ruleType
285285
* the type of rules it will apply
286-
* @param concat
286+
* @param concatenate
287287
* if it will concatenate multiple encodings
288288
* @param maxPhonemes
289289
* the maximum number of phonemes that will be handled
290290
* @since 1.7
291291
*/
292-
public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concat,
293-
final int maxPhonemes) {
292+
public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concatenate, final int maxPhonemes) {
294293
if (ruleType == RuleType.RULES) {
295294
throw new IllegalArgumentException("ruleType must not be " + RuleType.RULES);
296295
}
297296
this.nameType = nameType;
298297
this.ruleType = ruleType;
299-
this.concat = concat;
298+
this.concat = concatenate;
300299
this.lang = Lang.instance(nameType);
301300
this.maxPhonemes = maxPhonemes;
302301
}

src/main/java/org/apache/commons/codec/language/bm/Rule.java

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,11 @@ public Phoneme mergeWithLanguage(final LanguageSet lang) {
166166
return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang));
167167
}
168168

169+
@Override
170+
public int size() {
171+
return 1;
172+
}
173+
169174
@Override
170175
public String toString() {
171176
return phonemeText.toString() + "[" + languages + "]";
@@ -174,19 +179,35 @@ public String toString() {
174179

175180
public interface PhonemeExpr {
176181
Iterable<Phoneme> getPhonemes();
182+
183+
/**
184+
* Gets the expression size in phonemes.
185+
*
186+
* @return the expression size in phonemes.
187+
* @since 1.17.0
188+
*/
189+
default int size() {
190+
// All implementations are int-bound.
191+
return (int) Math.min(getPhonemes().spliterator().getExactSizeIfKnown(), Integer.MAX_VALUE);
192+
}
177193
}
178194

179195
public static final class PhonemeList implements PhonemeExpr {
180196

181-
private final List<Phoneme> phonemes;
197+
private final List<Phoneme> phonemeList;
182198

183199
public PhonemeList(final List<Phoneme> phonemes) {
184-
this.phonemes = phonemes;
200+
this.phonemeList = phonemes;
185201
}
186202

187203
@Override
188204
public List<Phoneme> getPhonemes() {
189-
return this.phonemes;
205+
return phonemeList;
206+
}
207+
208+
@Override
209+
public int size() {
210+
return phonemeList.size();
190211
}
191212
}
192213

src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ public static Stream<Arguments> data() {
4343
Arguments.of("SntJohn-Smith", "sntjonsmit", NameType.GENERIC, RuleType.EXACT, Boolean.TRUE, TEN),
4444
Arguments.of("d'ortley", "(ortlaj|ortlej)-(dortlaj|dortlej)", NameType.GENERIC, RuleType.EXACT, Boolean.TRUE, TEN),
4545
Arguments.of("van helsing", "(elSink|elsink|helSink|helsink|helzink|xelsink)-(banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink)", NameType.GENERIC, RuleType.EXACT, Boolean.FALSE, TEN),
46-
Arguments.of("Judenburg", "iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk", NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, TEN)
46+
Arguments.of("Judenburg", "iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk", NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, TEN),
47+
Arguments.of("Judenburg", "iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk", NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, Integer.MAX_VALUE)
4748
);
4849
// @formatter:on
4950
}
@@ -54,7 +55,8 @@ public static Stream<Arguments> invalidData() {
5455
Arguments.of("bar", "bar|bor|var|vor", NameType.ASHKENAZI, RuleType.APPROX, Boolean.FALSE, TEN),
5556
Arguments.of("al", "|al", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN),
5657
Arguments.of("da", "da|di", NameType.GENERIC, RuleType.EXACT, Boolean.FALSE, TEN),
57-
Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN)
58+
Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN),
59+
Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, Integer.MAX_VALUE)
5860
);
5961
// @formatter:on
6062
}

0 commit comments

Comments
 (0)