Skip to content

Commit 1f908b2

Browse files
authored
CODEC-315: Fix possible IndexOutOfBoundException (#223)
Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
1 parent 41871c2 commit 1f908b2

2 files changed

Lines changed: 20 additions & 2 deletions

File tree

src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ public String encode(String input, final Languages.LanguageSet languageSet) {
409409
switch (this.nameType) {
410410
case SEPHARDIC:
411411
words.forEach(aWord -> {
412-
final String[] parts = aWord.split("'");
412+
final String[] parts = aWord.split("'", -1);
413413
words2.add(parts[parts.length - 1]);
414414
});
415415
words2.removeAll(NAME_PREFIXES.get(this.nameType));
@@ -431,7 +431,7 @@ public String encode(String input, final Languages.LanguageSet languageSet) {
431431
} else if (words2.size() == 1) {
432432
// not a multi-word name
433433
input = words.iterator().next();
434-
} else {
434+
} else if (!words2.isEmpty()) {
435435
// encode each word in a multi-word name separately (normally used for approx matches)
436436
final StringBuilder result = new StringBuilder();
437437
words2.forEach(word -> result.append("-").append(encode(word)));

src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,15 @@ public static Stream<Arguments> data() {
4848
);
4949
}
5050

51+
public static Stream<Arguments> invalidData() {
52+
return Stream.of(
53+
Arguments.of("bar", "bar|bor|var|vor", NameType.ASHKENAZI, RuleType.APPROX, Boolean.FALSE, TEN),
54+
Arguments.of("al", "|al", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN),
55+
Arguments.of("da", "da|di", NameType.GENERIC, RuleType.EXACT, Boolean.FALSE, TEN),
56+
Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN)
57+
);
58+
}
59+
5160
// TODO Identify if there is a need to an assertTimeout(Duration.ofMillis(10000L) in some point, since this method was marked as @Test(timeout = 10000L)
5261
@ParameterizedTest
5362
@MethodSource("data")
@@ -70,4 +79,13 @@ public void testEncode(final String name, final String phoneticExpected, final N
7079
}
7180
}
7281
}
82+
83+
@ParameterizedTest
84+
@MethodSource("invalidData")
85+
public void testInvalidEncode(final String input, final String phoneticExpected, final NameType nameType,
86+
final RuleType ruleType, final boolean concat, final int maxPhonemes) {
87+
final PhoneticEngine engine = new PhoneticEngine(nameType, ruleType, concat, maxPhonemes);
88+
89+
assertEquals(engine.encode(input), phoneticExpected);
90+
}
7391
}

0 commit comments

Comments
 (0)