Skip to content

Commit f6ab92f

Browse files
committed
CODEC-315: Fix possible IndexOutOfBoundException
Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
1 parent 44e4c4d commit f6ab92f

2 files changed

Lines changed: 23 additions & 2 deletions

File tree

src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ public String encode(String input, final Languages.LanguageSet languageSet) {
409409
switch (this.nameType) {
410410
case SEPHARDIC:
411411
words.forEach(aWord -> {
412-
final String[] parts = aWord.split("'");
412+
final String[] parts = aWord.split("'", -1);
413413
words2.add(parts[parts.length - 1]);
414414
});
415415
words2.removeAll(NAME_PREFIXES.get(this.nameType));
@@ -431,7 +431,7 @@ public String encode(String input, final Languages.LanguageSet languageSet) {
431431
} else if (words2.size() == 1) {
432432
// not a multi-word name
433433
input = words.iterator().next();
434-
} else {
434+
} else if (!words2.isEmpty()) {
435435
// encode each word in a multi-word name separately (normally used for approx matches)
436436
final StringBuilder result = new StringBuilder();
437437
words2.forEach(word -> result.append("-").append(encode(word)));

src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.commons.codec.language.bm;
1919

20+
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
2021
import static org.junit.jupiter.api.Assertions.assertEquals;
2122
import static org.junit.jupiter.api.Assertions.assertTrue;
2223

@@ -48,6 +49,15 @@ public static Stream<Arguments> data() {
4849
);
4950
}
5051

52+
public static Stream<Arguments> invalidData() {
53+
return Stream.of(
54+
Arguments.of("bar", "bar|bor|var|vor", NameType.ASHKENAZI, RuleType.APPROX, Boolean.FALSE, TEN),
55+
Arguments.of("al", "|al", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN),
56+
Arguments.of("da", "da|di", NameType.GENERIC, RuleType.EXACT, Boolean.FALSE, TEN),
57+
Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN)
58+
);
59+
}
60+
5161
// TODO Identify if there is a need to an assertTimeout(Duration.ofMillis(10000L) in some point, since this method was marked as @Test(timeout = 10000L)
5262
@ParameterizedTest
5363
@MethodSource("data")
@@ -70,4 +80,15 @@ public void testEncode(final String name, final String phoneticExpected, final N
7080
}
7181
}
7282
}
83+
84+
@ParameterizedTest
85+
@MethodSource("invalidData")
86+
public void testInvalidEncode(final String input, final String phoneticExpected, final NameType nameType,
87+
final RuleType ruleType, final boolean concat, final int maxPhonemes) {
88+
final PhoneticEngine engine = new PhoneticEngine(nameType, ruleType, concat, maxPhonemes);
89+
90+
assertDoesNotThrow(() -> {
91+
assertEquals(engine.encode(input), phoneticExpected);
92+
});
93+
}
7394
}

0 commit comments

Comments
 (0)