Skip to content

Commit d4e2e22

Browse files
authored
Merge branch 'master' into fix/CODEC-343_base32_hex_decode_table
2 parents 7dcaa2a + 833558b commit d4e2e22

12 files changed

Lines changed: 163 additions & 23 deletions

File tree

src/changes/changes.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ The <action> type attribute can be add,update,fix,remove.
4646
<release version="1.22.1" date="YYYY-MM-DD" description="This is a feature and maintenance release. Java 8 or later is required.">
4747
<!-- FIX -->
4848
<action type="fix" issue="CODEC-343" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">Base32.Builder.setHexDecodeTable(boolean) sets the encode table to a decode lookup table.</action>
49+
<action type="fix" issue="CODEC-341" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">Base16.Builder.setEncodeTable(byte...) can create a codec that cannot decode its own output.</action>
50+
<action type="fix" issue="CODEC-339" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">URLCodec.encodeUrl(BitSet, byte[]) allows custom safe sets to emit URL encoding control characters.</action>
51+
<action type="fix" issue="CODEC-338" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">PercentCodec loses literal '+' when plusForSpace is enabled.</action>
4952
<action type="add" issue="CODEC-337" dev="pkarwasz" due-to="Ruiqi Dong, Gary Gregory">Digest ALL reuses System.in, so only the first algorithm sees the real input (#431).</action>
5053
<!-- ADD -->
5154
<!-- UPDATE -->

src/main/java/org/apache/commons/codec/binary/Base16.java

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
* </p>
3434
* <p>
3535
* The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case
36-
* alphabet.
36+
* alphabet, and configuring a custom 16-byte alphabet with {@link Builder#setEncodeTable(byte...)}.
3737
* </p>
3838
*
3939
* @see Base16InputStream
@@ -78,10 +78,18 @@ public Base16 get() {
7878
return new Base16(this);
7979
}
8080

81+
/**
82+
* Sets the Base16 encode table and derives the matching decode table.
83+
*
84+
* @param encodeTable 16 unique bytes, null resets to the default upper-case table.
85+
* @return {@code this} instance.
86+
* @throws IllegalArgumentException if {@code encodeTable} does not contain 16 unique bytes.
87+
*/
8188
@Override
8289
public Builder setEncodeTable(final byte... encodeTable) {
83-
super.setDecodeTableRaw(Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE) ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE);
84-
return super.setEncodeTable(encodeTable);
90+
final byte[] table = encodeTable != null ? encodeTable : UPPER_CASE_ENCODE_TABLE;
91+
super.setDecodeTableRaw(toDecodeTable(table));
92+
return super.setEncodeTable(table);
8593
}
8694

8795
/**
@@ -91,8 +99,7 @@ public Builder setEncodeTable(final byte... encodeTable) {
9199
* @return {@code this} instance.
92100
*/
93101
public Builder setLowerCase(final boolean lowerCase) {
94-
setEncodeTableRaw(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE);
95-
return asThis();
102+
return setEncodeTable(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE);
96103
}
97104

98105
}
@@ -162,6 +169,32 @@ public static Builder builder() {
162169
return new Builder();
163170
}
164171

172+
private static byte[] toDecodeTable(final byte[] encodeTable) {
173+
if (Arrays.equals(encodeTable, UPPER_CASE_ENCODE_TABLE)) {
174+
return UPPER_CASE_DECODE_TABLE;
175+
}
176+
if (Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE)) {
177+
return LOWER_CASE_DECODE_TABLE;
178+
}
179+
if (encodeTable.length != 1 << BITS_PER_ENCODED_BYTE) {
180+
throw new IllegalArgumentException("Base16 encode table must contain 16 entries.");
181+
}
182+
int max = -1;
183+
for (final byte b : encodeTable) {
184+
max = Math.max(max, b & 0xff);
185+
}
186+
final byte[] decodeTable = new byte[max + 1];
187+
Arrays.fill(decodeTable, (byte) -1);
188+
for (int i = 0; i < encodeTable.length; i++) {
189+
final int b = encodeTable[i] & 0xff;
190+
if (decodeTable[b] != -1) {
191+
throw new IllegalArgumentException("Duplicate value in Base16 encode table: " + b);
192+
}
193+
decodeTable[b] = (byte) i;
194+
}
195+
return decodeTable;
196+
}
197+
165198
/**
166199
* Constructs a Base16 codec used for decoding and encoding.
167200
*/
@@ -241,8 +274,9 @@ void decode(final byte[] data, int offset, final int length, final Context conte
241274

242275
private int decodeOctet(final byte octet) {
243276
int decoded = -1;
244-
if ((octet & 0xff) < decodeTable.length) {
245-
decoded = decodeTable[octet];
277+
final int b = octet & 0xff;
278+
if (b < decodeTable.length) {
279+
decoded = decodeTable[b];
246280
}
247281
if (decoded == -1) {
248282
throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet);
@@ -282,7 +316,8 @@ void encode(final byte[] data, final int offset, final int length, final Context
282316
*/
283317
@Override
284318
public boolean isInAlphabet(final byte octet) {
285-
return isInAlphabet((byte) (octet & 0xff), decodeTable);
319+
final int b = octet & 0xff;
320+
return b < decodeTable.length && decodeTable[b] != -1;
286321
}
287322

288323
/**

src/main/java/org/apache/commons/codec/binary/Base58.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public Base58.Builder setEncodeTable(final byte... encodeTable) {
8787
}
8888
private static final BigInteger BASE = BigInteger.valueOf(58);
8989

90-
private static final byte[] EMPTY = new byte[0];
90+
private static final byte[] EMPTY = {};
9191

9292
/**
9393
* Base58 alphabet: 123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz

src/main/java/org/apache/commons/codec/binary/BaseNCodec.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
5858
*/
5959
public abstract static class AbstractBuilder<T, B extends AbstractBuilder<T, B>> implements Supplier<T> {
6060

61+
/**
62+
* Clones the given array or returns a default array if the array is null.
63+
*
64+
* @param array The array to test and clone if not null.
65+
* @param defaultArray The default array to return if the array is null.
66+
* @return A clone of the array or the default array if the array is null.
67+
*/
68+
static byte[] clone(final byte[] array, final byte[] defaultArray) {
69+
return array != null ? array.clone() : defaultArray;
70+
}
71+
6172
private int unencodedBlockSize;
6273
private int encodedBlockSize;
6374
private CodecPolicy decodingPolicy = DECODING_POLICY_DEFAULT;
@@ -132,7 +143,7 @@ int getUnencodedBlockSize() {
132143
* @since 1.20.0
133144
*/
134145
public B setDecodeTable(final byte[] decodeTable) {
135-
this.decodeTable = decodeTable != null ? decodeTable.clone() : null;
146+
this.decodeTable = clone(decodeTable, null);
136147
return asThis();
137148
}
138149

@@ -176,7 +187,7 @@ B setEncodedBlockSize(final int encodedBlockSize) {
176187
* @return {@code this} instance.
177188
*/
178189
public B setEncodeTable(final byte... encodeTable) {
179-
this.encodeTable = encodeTable != null ? encodeTable.clone() : defaultEncodeTable;
190+
this.encodeTable = clone(encodeTable, defaultEncodeTable);
180191
return asThis();
181192
}
182193

@@ -209,7 +220,7 @@ public B setLineLength(final int lineLength) {
209220
* @return {@code this} instance.
210221
*/
211222
public B setLineSeparator(final byte... lineSeparator) {
212-
this.lineSeparator = lineSeparator != null ? lineSeparator.clone() : CHUNK_SEPARATOR;
223+
this.lineSeparator = clone(lineSeparator , CHUNK_SEPARATOR);
213224
return asThis();
214225
}
215226

src/main/java/org/apache/commons/codec/net/PercentCodec.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ public class PercentCodec implements BinaryEncoder, BinaryDecoder {
4444
*/
4545
private static final byte ESCAPE_CHAR = '%';
4646

47+
/**
48+
* The plus character used to encode spaces when plusForSpace is true.
49+
*/
50+
private static final byte PLUS_CHAR = '+';
51+
4752
/**
4853
* The bit set used to store the character that should be always encoded.
4954
*/
@@ -80,6 +85,9 @@ public PercentCodec() {
8085
public PercentCodec(final byte[] alwaysEncodeChars, final boolean plusForSpace) {
8186
this.plusForSpace = plusForSpace;
8287
insertAlwaysEncodeChars(alwaysEncodeChars);
88+
if (plusForSpace) {
89+
insertAlwaysEncodeChar(PLUS_CHAR);
90+
}
8391
}
8492

8593
private boolean canEncode(final byte c) {

src/main/java/org/apache/commons/codec/net/QCodec.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,6 @@ public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder
101101
PRINTABLE_CHARS.set('}');
102102
PRINTABLE_CHARS.set('~');
103103
}
104-
private static final byte SPACE = 32;
105-
106104
private static final byte UNDERSCORE = 95;
107105

108106
private boolean encodeBlanks;
@@ -201,7 +199,7 @@ protected byte[] doDecoding(final byte[] bytes) throws DecoderException {
201199
if (b != UNDERSCORE) {
202200
tmp[i] = b;
203201
} else {
204-
tmp[i] = SPACE;
202+
tmp[i] = Utils.SPACE;
205203
}
206204
}
207205
return QuotedPrintableCodec.decodeQuotedPrintable(tmp);
@@ -217,7 +215,7 @@ protected byte[] doEncoding(final byte[] bytes) {
217215
final byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
218216
if (this.encodeBlanks) {
219217
for (int i = 0; i < data.length; i++) {
220-
if (data[i] == SPACE) {
218+
if (data[i] == Utils.SPACE) {
221219
data[i] = UNDERSCORE;
222220
}
223221
}

src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, Strin
7272
private static final BitSet PRINTABLE_CHARS = new BitSet(256);
7373
private static final byte ESCAPE_CHAR = '=';
7474
private static final byte TAB = 9;
75-
private static final byte SPACE = 32;
7675
private static final byte CR = 13;
7776
private static final byte LF = 10;
7877

@@ -96,7 +95,7 @@ public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, Strin
9695
PRINTABLE_CHARS.set(i);
9796
}
9897
PRINTABLE_CHARS.set(TAB);
99-
PRINTABLE_CHARS.set(SPACE);
98+
PRINTABLE_CHARS.set(Utils.SPACE);
10099
}
101100

102101
/**
@@ -284,7 +283,7 @@ private static int getUnsignedOctet(final int index, final byte[] bytes) {
284283
* @return {@code true} if the byte is either a space or tab character.
285284
*/
286285
private static boolean isWhitespace(final int b) {
287-
return b == SPACE || b == TAB;
286+
return b == Utils.SPACE || b == TAB;
288287
}
289288

290289
/**

src/main/java/org/apache/commons/codec/net/URLCodec.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, St
5353
*/
5454
protected static final byte ESCAPE_CHAR = '%';
5555

56+
private static final byte PLUS_CHAR = '+';
57+
5658
/**
5759
* BitSet of www-form-url safe characters.
5860
* This is a copy of the internal BitSet which is now used for the conversion.
@@ -107,7 +109,7 @@ public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException
107109
final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
108110
for (int i = 0; i < bytes.length; i++) {
109111
final int b = bytes[i];
110-
if (b == '+') {
112+
if (b == PLUS_CHAR) {
111113
buffer.write(' ');
112114
} else if (b == ESCAPE_CHAR) {
113115
try {
@@ -126,9 +128,11 @@ public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException
126128

127129
/**
128130
* Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
131+
* The characters {@code %} and {@code +} are always escaped because {@link #decodeUrl(byte[])}
132+
* treats them as URL-encoding syntax.
129133
*
130134
* @param urlsafe
131-
* bitset of characters deemed URL safe.
135+
* bitset of characters deemed URL safe, except for {@code %} and {@code +}.
132136
* @param bytes
133137
* array of bytes to convert to URL safe characters.
134138
* @return array of bytes containing URL safe characters.
@@ -147,9 +151,9 @@ public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) {
147151
if (b < 0) {
148152
b = 256 + b;
149153
}
150-
if (urlsafe.get(b)) {
154+
if (urlsafe.get(b) && b != ESCAPE_CHAR && b != PLUS_CHAR) {
151155
if (b == ' ') {
152-
b = '+';
156+
b = PLUS_CHAR;
153157
}
154158
buffer.write(b);
155159
} else {

src/main/java/org/apache/commons/codec/net/Utils.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ final class Utils {
3535
*/
3636
private static final int RADIX = 16;
3737

38+
static final byte SPACE = 32;
39+
3840
/**
3941
* Returns the numeric value of the character {@code b} in radix 16.
4042
*

src/test/java/org/apache/commons/codec/binary/Base16Test.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,15 @@ private void testBase16InBuffer(final int startPasSize, final int endPadSize) {
9191
assertEquals("48656C6C6F20576F726C64", encodedContent, "encoding hello world");
9292
}
9393

94+
@Test
95+
void testBuilderSetLowerCaseDecodesOwnOutput() {
96+
final Base16 base16 = Base16.builder().setLowerCase(true).get();
97+
final byte[] data = { (byte) 0xab };
98+
final byte[] encoded = base16.encode(data);
99+
assertEquals("ab", new String(encoded, StandardCharsets.US_ASCII));
100+
assertArrayEquals(data, base16.decode(encoded));
101+
}
102+
94103
@Test
95104
void testByteToStringVariations() {
96105
final Base16 base16 = new Base16();
@@ -149,6 +158,30 @@ void testConstructors() {
149158
new Base16(false, CodecPolicy.STRICT);
150159
}
151160

161+
@Test
162+
void testCustomEncodeTableAffectsDecodeTable() {
163+
final byte[] encodeTable = "0123456789ABCDEF".getBytes(StandardCharsets.US_ASCII);
164+
final byte tmp = encodeTable[0];
165+
encodeTable[0] = encodeTable[1];
166+
encodeTable[1] = tmp;
167+
final Base16 base16 = Base16.builder().setEncodeTable(encodeTable).get();
168+
final byte[] encoded = base16.encode(new byte[] { 1 });
169+
assertEquals("10", new String(encoded, StandardCharsets.US_ASCII));
170+
assertArrayEquals(new byte[] { 1 }, base16.decode(encoded));
171+
}
172+
173+
@Test
174+
void testCustomEncodeTableRejectsDuplicates() {
175+
final byte[] encodeTable = "00123456789ABCDE".getBytes(StandardCharsets.US_ASCII);
176+
assertThrows(IllegalArgumentException.class, () -> Base16.builder().setEncodeTable(encodeTable));
177+
}
178+
179+
@Test
180+
void testCustomEncodeTableRejectsInvalidLength() {
181+
assertThrows(IllegalArgumentException.class,
182+
() -> Base16.builder().setEncodeTable("0123456789ABCDE".getBytes(StandardCharsets.US_ASCII)));
183+
}
184+
152185
@Test
153186
void testDecodeSingleBytes() {
154187
final String encoded = "556E74696C206E6578742074696D6521";

0 commit comments

Comments
 (0)