diff --git a/src/main/java/org/apache/commons/lang3/StringUtils.java b/src/main/java/org/apache/commons/lang3/StringUtils.java index 056656c0637..bdd3c0b80a1 100644 --- a/src/main/java/org/apache/commons/lang3/StringUtils.java +++ b/src/main/java/org/apache/commons/lang3/StringUtils.java @@ -8882,6 +8882,36 @@ public static String truncate(final String str, final int maxWidth) { return truncate(str, 0, maxWidth); } + public static String truncateToByteLength(String str, int maxBytes, Charset charset) { + if (str == null) { + return null; + } + + byte[] bytes = StringUtils.getBytes(str, charset); + if (bytes.length <= maxBytes) { + return str; + } + + // Binary search or iterative approach to find the right character length + int low = 0; + int high = str.codePointCount(0, str.length()); + int count = 0; + while (low <= high) { + int mid = low + (high - low) / 2; + int charIndex = str.offsetByCodePoints(0, mid); + byte[] currentBytes = StringUtils.getBytes(str.substring(0, charIndex), charset); + if (currentBytes.length <= maxBytes) { + low = mid + 1; + count = mid; + } else { + high = mid - 1; + } + } + + int idx = str.offsetByCodePoints(0, count); + return str.substring(0, idx); + } + /** * Truncates a String. This will turn * "Now is the time for all good men" into "is the time for all". diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java index bff9d842dce..08ac89d2058 100644 --- a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java +++ b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java @@ -3089,6 +3089,31 @@ void testTruncate_StringIntInt() { assertEquals("", StringUtils.truncate("abcdefghijklmno", Integer.MAX_VALUE, Integer.MAX_VALUE)); } + @Test + void testTruncateToByteLength() { + assertNull(StringUtils.truncateToByteLength(null, 0, Charset.defaultCharset())); + assertEquals("abcdefghij", StringUtils.truncateToByteLength("abcdefghijklmno", 10, Charset.defaultCharset())); + assertEquals("abcdefghijklmno", StringUtils.truncateToByteLength("abcdefghijklmno", 15, Charset.defaultCharset())); + assertEquals("abcdefghijklmno", StringUtils.truncateToByteLength("abcdefghijklmno", 20, Charset.defaultCharset())); + assertEquals("\u4F60\u597D\u55CE", StringUtils.truncateToByteLength("\u4F60\u597D\u55CE", 10, Charset.defaultCharset())); + assertEquals("\u4F60", StringUtils.truncateToByteLength("\u4F60\u597D\u55CE", 5, Charset.defaultCharset())); + assertEquals("\u2713\u2714", StringUtils.truncateToByteLength("\u2713\u2714", 6, Charset.defaultCharset())); + assertEquals("", StringUtils.truncateToByteLength("\u2713\u2714", 2, Charset.defaultCharset())); + assertEquals("\uD83D\uDE80", StringUtils.truncateToByteLength("\uD83D\uDE80\u2728\uD83C\uDF89", 6, Charset.defaultCharset())); + assertEquals("", StringUtils.truncateToByteLength("\uD83D\uDE80\u2728\uD83C\uDF89", 3, Charset.defaultCharset())); + assertEquals("", StringUtils.truncateToByteLength("\uD83D\uDE03", 3, Charset.defaultCharset())); + assertEquals("\uD83D\uDE03", StringUtils.truncateToByteLength("\uD83D\uDE03", 4, Charset.defaultCharset())); + assertEquals("\uD83D\uDE03\uD83D\uDE03", StringUtils.truncateToByteLength( + "\uD83D\uDE03\uD83D\uDE03\uD83D\uDE03\uD83D\uDE03\uD83D\uDE03", 9, Charset.defaultCharset())); + + for (int i = 0; i < 100; ++i) { + String s = StringUtils.truncateToByteLength("🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊", i, Charset.defaultCharset()); + assertNotNull(s); + byte[] data = s.getBytes(); + assertTrue(data.length <= i); + } + } + @Test void testUnCapitalize() { assertNull(StringUtils.uncapitalize(null));