Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/main/java/org/apache/commons/lang3/StringUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -8882,6 +8882,36 @@ public static String truncate(final String str, final int maxWidth) {
return truncate(str, 0, maxWidth);
}

public static String truncateToByteLength(String str, int maxBytes, Charset charset) {
if (str == null) {
return null;
}

byte[] bytes = StringUtils.getBytes(str, charset);
if (bytes.length <= maxBytes) {
return str;
}

// Binary search or iterative approach to find the right character length
int low = 0;
int high = str.codePointCount(0, str.length());
int count = 0;
while (low <= high) {
int mid = low + (high - low) / 2;
int charIndex = str.offsetByCodePoints(0, mid);
byte[] currentBytes = StringUtils.getBytes(str.substring(0, charIndex), charset);
if (currentBytes.length <= maxBytes) {
low = mid + 1;
count = mid;
} else {
high = mid - 1;
}
}

int idx = str.offsetByCodePoints(0, count);
return str.substring(0, idx);
}

/**
* Truncates a String. This will turn
* "Now is the time for all good men" into "is the time for all".
Expand Down
25 changes: 25 additions & 0 deletions src/test/java/org/apache/commons/lang3/StringUtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3089,6 +3089,31 @@ void testTruncate_StringIntInt() {
assertEquals("", StringUtils.truncate("abcdefghijklmno", Integer.MAX_VALUE, Integer.MAX_VALUE));
}

@Test
void testTruncateToByteLength() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These should be separate test methods for different tests so they can pass or fail independently.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you can reuse the test class, but this is about 10 different tests that should each be a separate method.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test suite in Commons IO is parameterized and takes into consideration the introduction of support for Unicode 15 in JDK 20 (grapheme clusters and so on): https://github.com/apache/commons-io/blob/b4ee32c53c0036429d64c0d6fe82a62a0fc6dae2/src/test/java/org/apache/commons/io/FileSystemTest.java#L171

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe habe one method testing the various null cases (more than at the moment) one for the usual single byte ascii cases one for bmp Tests and then one for Graphen cases and one for Dynamic. I would also avoid Emoji literals in source

assertNull(StringUtils.truncateToByteLength(null, 0, Charset.defaultCharset()));
assertEquals("abcdefghij", StringUtils.truncateToByteLength("abcdefghijklmno", 10, Charset.defaultCharset()));
assertEquals("abcdefghijklmno", StringUtils.truncateToByteLength("abcdefghijklmno", 15, Charset.defaultCharset()));
assertEquals("abcdefghijklmno", StringUtils.truncateToByteLength("abcdefghijklmno", 20, Charset.defaultCharset()));
assertEquals("\u4F60\u597D\u55CE", StringUtils.truncateToByteLength("\u4F60\u597D\u55CE", 10, Charset.defaultCharset()));
assertEquals("\u4F60", StringUtils.truncateToByteLength("\u4F60\u597D\u55CE", 5, Charset.defaultCharset()));
assertEquals("\u2713\u2714", StringUtils.truncateToByteLength("\u2713\u2714", 6, Charset.defaultCharset()));
assertEquals("", StringUtils.truncateToByteLength("\u2713\u2714", 2, Charset.defaultCharset()));
assertEquals("\uD83D\uDE80", StringUtils.truncateToByteLength("\uD83D\uDE80\u2728\uD83C\uDF89", 6, Charset.defaultCharset()));
assertEquals("", StringUtils.truncateToByteLength("\uD83D\uDE80\u2728\uD83C\uDF89", 3, Charset.defaultCharset()));
assertEquals("", StringUtils.truncateToByteLength("\uD83D\uDE03", 3, Charset.defaultCharset()));
assertEquals("\uD83D\uDE03", StringUtils.truncateToByteLength("\uD83D\uDE03", 4, Charset.defaultCharset()));
assertEquals("\uD83D\uDE03\uD83D\uDE03", StringUtils.truncateToByteLength(
"\uD83D\uDE03\uD83D\uDE03\uD83D\uDE03\uD83D\uDE03\uD83D\uDE03", 9, Charset.defaultCharset()));

for (int i = 0; i < 100; ++i) {
String s = StringUtils.truncateToByteLength("🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊", i, Charset.defaultCharset());
assertNotNull(s);
byte[] data = s.getBytes();
assertTrue(data.length <= i);
}
}

@Test
void testUnCapitalize() {
assertNull(StringUtils.uncapitalize(null));
Expand Down