Skip to content

Commit f890da3

Browse files
[CODEC-341] Fix Base16 custom alphabet decode table (#434)
* [CODEC-341] Fix Base16 custom alphabet decode table Derive Base16 decode tables from custom encode tables so a configured codec can decode its own output. Reject encode tables that do not contain exactly 16 unique byte values. Reviewed-by: OpenAI Codex Reviewed-by: Anthropic Claude Code * Update Base16Test.java * Remove test clutter. --------- Co-authored-by: Gary Gregory <garydgregory@users.noreply.github.com>
1 parent 143dd52 commit f890da3

3 files changed

Lines changed: 77 additions & 8 deletions

File tree

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ The <action> type attribute can be add,update,fix,remove.
4545
<body>
4646
<release version="1.22.1" date="YYYY-MM-DD" description="This is a feature and maintenance release. Java 8 or later is required.">
4747
<!-- FIX -->
48+
<action type="fix" issue="CODEC-341" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">Base16.Builder.setEncodeTable(byte...) can create a codec that cannot decode its own output.</action>
4849
<action type="fix" issue="CODEC-339" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">URLCodec.encodeUrl(BitSet, byte[]) allows custom safe sets to emit URL encoding control characters.</action>
4950
<action type="fix" issue="CODEC-338" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">PercentCodec loses literal '+' when plusForSpace is enabled.</action>
5051
<action type="add" issue="CODEC-337" dev="pkarwasz" due-to="Ruiqi Dong, Gary Gregory">Digest ALL reuses System.in, so only the first algorithm sees the real input (#431).</action>

src/main/java/org/apache/commons/codec/binary/Base16.java

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
* </p>
3434
* <p>
3535
* The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case
36-
* alphabet.
36+
* alphabet, and configuring a custom 16-byte alphabet with {@link Builder#setEncodeTable(byte...)}.
3737
* </p>
3838
*
3939
* @see Base16InputStream
@@ -78,10 +78,18 @@ public Base16 get() {
7878
return new Base16(this);
7979
}
8080

81+
/**
82+
* Sets the Base16 encode table and derives the matching decode table.
83+
*
84+
* @param encodeTable 16 unique bytes, null resets to the default upper-case table.
85+
* @return {@code this} instance.
86+
* @throws IllegalArgumentException if {@code encodeTable} does not contain 16 unique bytes.
87+
*/
8188
@Override
8289
public Builder setEncodeTable(final byte... encodeTable) {
83-
super.setDecodeTableRaw(Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE) ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE);
84-
return super.setEncodeTable(encodeTable);
90+
final byte[] table = encodeTable != null ? encodeTable : UPPER_CASE_ENCODE_TABLE;
91+
super.setDecodeTableRaw(toDecodeTable(table));
92+
return super.setEncodeTable(table);
8593
}
8694

8795
/**
@@ -91,8 +99,7 @@ public Builder setEncodeTable(final byte... encodeTable) {
9199
* @return {@code this} instance.
92100
*/
93101
public Builder setLowerCase(final boolean lowerCase) {
94-
setEncodeTableRaw(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE);
95-
return asThis();
102+
return setEncodeTable(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE);
96103
}
97104

98105
}
@@ -152,6 +159,32 @@ public Builder setLowerCase(final boolean lowerCase) {
152159
/** Mask used to extract 4 bits, used when decoding character. */
153160
private static final int MASK_4_BITS = 0x0f;
154161

162+
private static byte[] toDecodeTable(final byte[] encodeTable) {
163+
if (Arrays.equals(encodeTable, UPPER_CASE_ENCODE_TABLE)) {
164+
return UPPER_CASE_DECODE_TABLE;
165+
}
166+
if (Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE)) {
167+
return LOWER_CASE_DECODE_TABLE;
168+
}
169+
if (encodeTable.length != 1 << BITS_PER_ENCODED_BYTE) {
170+
throw new IllegalArgumentException("Base16 encode table must contain 16 entries.");
171+
}
172+
int max = -1;
173+
for (final byte b : encodeTable) {
174+
max = Math.max(max, b & 0xff);
175+
}
176+
final byte[] decodeTable = new byte[max + 1];
177+
Arrays.fill(decodeTable, (byte) -1);
178+
for (int i = 0; i < encodeTable.length; i++) {
179+
final int b = encodeTable[i] & 0xff;
180+
if (decodeTable[b] != -1) {
181+
throw new IllegalArgumentException("Duplicate value in Base16 encode table: " + b);
182+
}
183+
decodeTable[b] = (byte) i;
184+
}
185+
return decodeTable;
186+
}
187+
155188
/**
156189
* Constructs a new builder.
157190
*
@@ -241,8 +274,9 @@ void decode(final byte[] data, int offset, final int length, final Context conte
241274

242275
private int decodeOctet(final byte octet) {
243276
int decoded = -1;
244-
if ((octet & 0xff) < decodeTable.length) {
245-
decoded = decodeTable[octet];
277+
final int b = octet & 0xff;
278+
if (b < decodeTable.length) {
279+
decoded = decodeTable[b];
246280
}
247281
if (decoded == -1) {
248282
throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet);
@@ -282,7 +316,8 @@ void encode(final byte[] data, final int offset, final int length, final Context
282316
*/
283317
@Override
284318
public boolean isInAlphabet(final byte octet) {
285-
return isInAlphabet((byte) (octet & 0xff), decodeTable);
319+
final int b = octet & 0xff;
320+
return b < decodeTable.length && decodeTable[b] != -1;
286321
}
287322

288323
/**

src/test/java/org/apache/commons/codec/binary/Base16Test.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,39 @@ void testConstructors() {
149149
new Base16(false, CodecPolicy.STRICT);
150150
}
151151

152+
@Test
153+
void testCustomEncodeTableAffectsDecodeTable() {
154+
final byte[] encodeTable = "0123456789ABCDEF".getBytes(StandardCharsets.US_ASCII);
155+
final byte tmp = encodeTable[0];
156+
encodeTable[0] = encodeTable[1];
157+
encodeTable[1] = tmp;
158+
final Base16 base16 = Base16.builder().setEncodeTable(encodeTable).get();
159+
final byte[] encoded = base16.encode(new byte[] { 1 });
160+
assertEquals("10", new String(encoded, StandardCharsets.US_ASCII));
161+
assertArrayEquals(new byte[] { 1 }, base16.decode(encoded));
162+
}
163+
164+
@Test
165+
void testCustomEncodeTableRejectsDuplicates() {
166+
final byte[] encodeTable = "00123456789ABCDE".getBytes(StandardCharsets.US_ASCII);
167+
assertThrows(IllegalArgumentException.class, () -> Base16.builder().setEncodeTable(encodeTable));
168+
}
169+
170+
@Test
171+
void testCustomEncodeTableRejectsInvalidLength() {
172+
assertThrows(IllegalArgumentException.class,
173+
() -> Base16.builder().setEncodeTable("0123456789ABCDE".getBytes(StandardCharsets.US_ASCII)));
174+
}
175+
176+
@Test
177+
void testBuilderSetLowerCaseDecodesOwnOutput() {
178+
final Base16 base16 = Base16.builder().setLowerCase(true).get();
179+
final byte[] data = { (byte) 0xab };
180+
final byte[] encoded = base16.encode(data);
181+
assertEquals("ab", new String(encoded, StandardCharsets.US_ASCII));
182+
assertArrayEquals(data, base16.decode(encoded));
183+
}
184+
152185
@Test
153186
void testDecodeSingleBytes() {
154187
final String encoded = "556E74696C206E6578742074696D6521";

0 commit comments

Comments
 (0)