Skip to content

Commit 5af4c8e

Browse files
authored
Merge branch 'master' into fix/CODEC-340_base58_custom_alphabet
2 parents e878731 + ac98257 commit 5af4c8e

14 files changed

Lines changed: 274 additions & 31 deletions

File tree

src/changes/changes.xml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,13 @@ The <action> type attribute can be add,update,fix,remove.
4646
<release version="1.22.1" date="YYYY-MM-DD" description="This is a feature and maintenance release. Java 8 or later is required.">
4747
<!-- FIX -->
4848
<action type="fix" issue="CODEC-340" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">Base58.Builder.setEncodeTable(byte...) is ignored when encoding and decoding.</action>
49-
<action type="add" issue="CODEC-337" dev="pkarwasz" due-to="Ruiqi Dong, Gary Gregory">Digest ALL reuses System.in, so only the first algorithm sees the real input (#431).</action>
49+
<action type="fix" issue="CODEC-342" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">Base32.Builder.setEncodeTable(byte...) can create a codec that cannot decode its own output.</action>
50+
<action type="fix" issue="CODEC-343" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">Base32.Builder.setHexDecodeTable(boolean) sets the encode table to a decode lookup table.</action>
51+
<action type="fix" issue="CODEC-341" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">Base16.Builder.setEncodeTable(byte...) can create a codec that cannot decode its own output.</action>
52+
<action type="fix" issue="CODEC-339" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">URLCodec.encodeUrl(BitSet, byte[]) allows custom safe sets to emit URL encoding control characters.</action>
53+
<action type="fix" issue="CODEC-338" dev="ggregory" due-to="Ruiqi Dong, Gary Gregory">PercentCodec loses literal '+' when plusForSpace is enabled.</action>
5054
<!-- ADD -->
55+
<action type="add" issue="CODEC-337" dev="pkarwasz" due-to="Ruiqi Dong, Gary Gregory">Digest ALL reuses System.in, so only the first algorithm sees the real input (#431).</action>
5156
<!-- UPDATE -->
5257
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump org.apache.commons:commons-parent from 98 to 102.</action>
5358
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-io:commons-io from 2.21.0 to 2.22.0.</action>

src/main/java/org/apache/commons/codec/binary/Base16.java

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
* </p>
3434
* <p>
3535
* The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case
36-
* alphabet.
36+
* alphabet, and configuring a custom 16-byte alphabet with {@link Builder#setEncodeTable(byte...)}.
3737
* </p>
3838
*
3939
* @see Base16InputStream
@@ -78,10 +78,18 @@ public Base16 get() {
7878
return new Base16(this);
7979
}
8080

81+
/**
82+
* Sets the Base16 encode table and derives the matching decode table.
83+
*
84+
* @param encodeTable 16 unique bytes, null resets to the default upper-case table.
85+
* @return {@code this} instance.
86+
* @throws IllegalArgumentException if {@code encodeTable} does not contain 16 unique bytes.
87+
*/
8188
@Override
8289
public Builder setEncodeTable(final byte... encodeTable) {
83-
super.setDecodeTableRaw(Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE) ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE);
84-
return super.setEncodeTable(encodeTable);
90+
final byte[] table = encodeTable != null ? encodeTable : UPPER_CASE_ENCODE_TABLE;
91+
super.setDecodeTableRaw(toDecodeTable(table));
92+
return super.setEncodeTable(table);
8593
}
8694

8795
/**
@@ -91,8 +99,7 @@ public Builder setEncodeTable(final byte... encodeTable) {
9199
* @return {@code this} instance.
92100
*/
93101
public Builder setLowerCase(final boolean lowerCase) {
94-
setEncodeTableRaw(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE);
95-
return asThis();
102+
return setEncodeTable(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE);
96103
}
97104

98105
}
@@ -162,6 +169,32 @@ public static Builder builder() {
162169
return new Builder();
163170
}
164171

172+
private static byte[] toDecodeTable(final byte[] encodeTable) {
173+
if (Arrays.equals(encodeTable, UPPER_CASE_ENCODE_TABLE)) {
174+
return UPPER_CASE_DECODE_TABLE;
175+
}
176+
if (Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE)) {
177+
return LOWER_CASE_DECODE_TABLE;
178+
}
179+
if (encodeTable.length != 1 << BITS_PER_ENCODED_BYTE) {
180+
throw new IllegalArgumentException("Base16 encode table must contain 16 entries.");
181+
}
182+
int max = -1;
183+
for (final byte b : encodeTable) {
184+
max = Math.max(max, b & 0xff);
185+
}
186+
final byte[] decodeTable = new byte[max + 1];
187+
Arrays.fill(decodeTable, (byte) -1);
188+
for (int i = 0; i < encodeTable.length; i++) {
189+
final int b = encodeTable[i] & 0xff;
190+
if (decodeTable[b] != -1) {
191+
throw new IllegalArgumentException("Duplicate value in Base16 encode table: " + b);
192+
}
193+
decodeTable[b] = (byte) i;
194+
}
195+
return decodeTable;
196+
}
197+
165198
/**
166199
* Constructs a Base16 codec used for decoding and encoding.
167200
*/
@@ -241,8 +274,9 @@ void decode(final byte[] data, int offset, final int length, final Context conte
241274

242275
private int decodeOctet(final byte octet) {
243276
int decoded = -1;
244-
if ((octet & 0xff) < decodeTable.length) {
245-
decoded = decodeTable[octet];
277+
final int b = octet & 0xff;
278+
if (b < decodeTable.length) {
279+
decoded = decodeTable[b];
246280
}
247281
if (decoded == -1) {
248282
throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet);
@@ -282,7 +316,8 @@ void encode(final byte[] data, final int offset, final int length, final Context
282316
*/
283317
@Override
284318
public boolean isInAlphabet(final byte octet) {
285-
return isInAlphabet((byte) (octet & 0xff), decodeTable);
319+
final int b = octet & 0xff;
320+
return b < decodeTable.length && decodeTable[b] != -1;
286321
}
287322

288323
/**

src/main/java/org/apache/commons/codec/binary/Base32.java

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,24 @@ public Base32 get() {
9696
return new Base32(this);
9797
}
9898

99+
/**
100+
* Sets the encode table and derives the matching decode table.
101+
* <p>
102+
* The RFC 4648 Base32 and Base32 Hex tables keep their case-insensitive decoders.
103+
* </p>
104+
*
105+
* @param encodeTable the encode table with exactly 32 unique entries, null resets to the default.
106+
* @return {@code this} instance.
107+
* @throws IllegalArgumentException if the encode table does not contain exactly 32 unique entries.
108+
*/
99109
@Override
100110
public Builder setEncodeTable(final byte... encodeTable) {
101-
super.setDecodeTableRaw(Arrays.equals(encodeTable, HEX_ENCODE_TABLE) ? HEX_DECODE_TABLE : DECODE_TABLE);
111+
super.setDecodeTableRaw(toDecodeTable(encodeTable));
102112
return super.setEncodeTable(encodeTable);
103113
}
104114

105115
/**
106-
* Sets the decode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
116+
* Sets the encode and decode tables to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
107117
* <p>
108118
* This overrides a value previously set with {@link #setEncodeTable(byte...)}.
109119
* </p>
@@ -113,7 +123,7 @@ public Builder setEncodeTable(final byte... encodeTable) {
113123
* @since 1.18.0
114124
*/
115125
public Builder setHexDecodeTable(final boolean useHex) {
116-
return setEncodeTable(decodeTable(useHex));
126+
return setEncodeTable(encodeTable(useHex));
117127
}
118128

119129
/**
@@ -145,6 +155,8 @@ public Builder setHexEncodeTable(final boolean useHex) {
145155

146156
private static final int BYTES_PER_ENCODED_BLOCK = 8;
147157
private static final int BYTES_PER_UNENCODED_BLOCK = 5;
158+
private static final int DECODING_TABLE_LENGTH = 256;
159+
private static final int ENCODING_TABLE_LENGTH = 1 << BITS_PER_ENCODED_BYTE;
148160

149161
/**
150162
* This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in Table 3 of RFC 4648) into their 5-bit
@@ -256,6 +268,29 @@ public static Builder builder() {
256268
return new Builder();
257269
}
258270

271+
/**
272+
* Calculates a decode table for a given encode table.
273+
*
274+
* @param encodeTable that is used to determine decode lookup table.
275+
* @return A new decode table.
276+
* @throws IllegalArgumentException if the encode table does not contain exactly 32 unique entries.
277+
*/
278+
private static byte[] calculateDecodeTable(final byte[] encodeTable) {
279+
if (encodeTable.length != ENCODING_TABLE_LENGTH) {
280+
throw new IllegalArgumentException("encodeTable must have exactly 32 entries.");
281+
}
282+
final byte[] decodeTable = new byte[DECODING_TABLE_LENGTH];
283+
Arrays.fill(decodeTable, (byte) -1);
284+
for (int i = 0; i < encodeTable.length; i++) {
285+
final int encodedByte = encodeTable[i] & 0xff;
286+
if (decodeTable[encodedByte] != -1) {
287+
throw new IllegalArgumentException("encodeTable must not contain duplicate entries.");
288+
}
289+
decodeTable[encodedByte] = (byte) i;
290+
}
291+
return decodeTable;
292+
}
293+
259294
private static byte[] decodeTable(final boolean useHex) {
260295
return useHex ? HEX_DECODE_TABLE : DECODE_TABLE;
261296
}
@@ -276,6 +311,23 @@ private static byte[] encodeTable(final boolean useHex) {
276311
return useHex ? HEX_ENCODE_TABLE : ENCODE_TABLE;
277312
}
278313

314+
/**
315+
* Gets the decode table that matches the given encode table.
316+
*
317+
* @param encodeTable that is used to determine decode lookup table.
318+
* @return the matching decode table.
319+
*/
320+
private static byte[] toDecodeTable(final byte[] encodeTable) {
321+
final byte[] table = encodeTable != null ? encodeTable : ENCODE_TABLE;
322+
if (Arrays.equals(table, ENCODE_TABLE)) {
323+
return DECODE_TABLE;
324+
}
325+
if (Arrays.equals(table, HEX_ENCODE_TABLE)) {
326+
return HEX_DECODE_TABLE;
327+
}
328+
return calculateDecodeTable(table);
329+
}
330+
279331
/**
280332
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = {@link
281333
* #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;}
@@ -530,14 +582,14 @@ void decode(final byte[] input, int inPos, final int inAvail, final Context cont
530582
}
531583
final int decodeSize = this.encodeSize - 1;
532584
for (int i = 0; i < inAvail; i++) {
533-
final byte b = input[inPos++];
534-
if (b == pad) {
585+
final int b = input[inPos++] & 0xff;
586+
if (b == (pad & 0xff)) {
535587
// We're done.
536588
context.eof = true;
537589
break;
538590
}
539591
final byte[] buffer = ensureBufferSize(decodeSize, context);
540-
if (b >= 0 && b < this.decodeTable.length) {
592+
if (b < this.decodeTable.length) {
541593
final int result = this.decodeTable[b];
542594
if (result >= 0) {
543595
context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
@@ -738,7 +790,8 @@ byte[] getLineSeparator() {
738790
*/
739791
@Override
740792
public boolean isInAlphabet(final byte octet) {
741-
return isInAlphabet(octet, decodeTable);
793+
final int value = octet & 0xff;
794+
return value < decodeTable.length && decodeTable[value] != -1;
742795
}
743796

744797
/**

src/main/java/org/apache/commons/codec/binary/Base58.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ public Base58.Builder setEncodeTable(final byte... encodeTable) {
9292
private static final int DECODING_TABLE_LENGTH = 256;
9393
private static final int ENCODING_TABLE_LENGTH = 58;
9494

95-
private static final byte[] EMPTY = new byte[0];
95+
private static final byte[] EMPTY = {};
9696

9797
/**
9898
* Base58 alphabet: 123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz

src/main/java/org/apache/commons/codec/binary/BaseNCodec.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
5858
*/
5959
public abstract static class AbstractBuilder<T, B extends AbstractBuilder<T, B>> implements Supplier<T> {
6060

61+
/**
62+
* Clones the given array or returns a default array if the array is null.
63+
*
64+
* @param array The array to test and clone if not null.
65+
* @param defaultArray The default array to return if the array is null.
66+
* @return A clone of the array or the default array if the array is null.
67+
*/
68+
static byte[] clone(final byte[] array, final byte[] defaultArray) {
69+
return array != null ? array.clone() : defaultArray;
70+
}
71+
6172
private int unencodedBlockSize;
6273
private int encodedBlockSize;
6374
private CodecPolicy decodingPolicy = DECODING_POLICY_DEFAULT;
@@ -132,7 +143,7 @@ int getUnencodedBlockSize() {
132143
* @since 1.20.0
133144
*/
134145
public B setDecodeTable(final byte[] decodeTable) {
135-
this.decodeTable = decodeTable != null ? decodeTable.clone() : null;
146+
this.decodeTable = clone(decodeTable, null);
136147
return asThis();
137148
}
138149

@@ -176,7 +187,7 @@ B setEncodedBlockSize(final int encodedBlockSize) {
176187
* @return {@code this} instance.
177188
*/
178189
public B setEncodeTable(final byte... encodeTable) {
179-
this.encodeTable = encodeTable != null ? encodeTable.clone() : defaultEncodeTable;
190+
this.encodeTable = clone(encodeTable, defaultEncodeTable);
180191
return asThis();
181192
}
182193

@@ -209,7 +220,7 @@ public B setLineLength(final int lineLength) {
209220
* @return {@code this} instance.
210221
*/
211222
public B setLineSeparator(final byte... lineSeparator) {
212-
this.lineSeparator = lineSeparator != null ? lineSeparator.clone() : CHUNK_SEPARATOR;
223+
this.lineSeparator = clone(lineSeparator , CHUNK_SEPARATOR);
213224
return asThis();
214225
}
215226

src/main/java/org/apache/commons/codec/net/PercentCodec.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ public class PercentCodec implements BinaryEncoder, BinaryDecoder {
4444
*/
4545
private static final byte ESCAPE_CHAR = '%';
4646

47+
/**
48+
* The plus character used to encode spaces when plusForSpace is true.
49+
*/
50+
private static final byte PLUS_CHAR = '+';
51+
4752
/**
4853
* The bit set used to store the character that should be always encoded.
4954
*/
@@ -80,6 +85,9 @@ public PercentCodec() {
8085
public PercentCodec(final byte[] alwaysEncodeChars, final boolean plusForSpace) {
8186
this.plusForSpace = plusForSpace;
8287
insertAlwaysEncodeChars(alwaysEncodeChars);
88+
if (plusForSpace) {
89+
insertAlwaysEncodeChar(PLUS_CHAR);
90+
}
8391
}
8492

8593
private boolean canEncode(final byte c) {

src/main/java/org/apache/commons/codec/net/QCodec.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,6 @@ public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder
101101
PRINTABLE_CHARS.set('}');
102102
PRINTABLE_CHARS.set('~');
103103
}
104-
private static final byte SPACE = 32;
105-
106104
private static final byte UNDERSCORE = 95;
107105

108106
private boolean encodeBlanks;
@@ -201,7 +199,7 @@ protected byte[] doDecoding(final byte[] bytes) throws DecoderException {
201199
if (b != UNDERSCORE) {
202200
tmp[i] = b;
203201
} else {
204-
tmp[i] = SPACE;
202+
tmp[i] = Utils.SPACE;
205203
}
206204
}
207205
return QuotedPrintableCodec.decodeQuotedPrintable(tmp);
@@ -217,7 +215,7 @@ protected byte[] doEncoding(final byte[] bytes) {
217215
final byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
218216
if (this.encodeBlanks) {
219217
for (int i = 0; i < data.length; i++) {
220-
if (data[i] == SPACE) {
218+
if (data[i] == Utils.SPACE) {
221219
data[i] = UNDERSCORE;
222220
}
223221
}

src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, Strin
7272
private static final BitSet PRINTABLE_CHARS = new BitSet(256);
7373
private static final byte ESCAPE_CHAR = '=';
7474
private static final byte TAB = 9;
75-
private static final byte SPACE = 32;
7675
private static final byte CR = 13;
7776
private static final byte LF = 10;
7877

@@ -96,7 +95,7 @@ public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, Strin
9695
PRINTABLE_CHARS.set(i);
9796
}
9897
PRINTABLE_CHARS.set(TAB);
99-
PRINTABLE_CHARS.set(SPACE);
98+
PRINTABLE_CHARS.set(Utils.SPACE);
10099
}
101100

102101
/**
@@ -284,7 +283,7 @@ private static int getUnsignedOctet(final int index, final byte[] bytes) {
284283
* @return {@code true} if the byte is either a space or tab character.
285284
*/
286285
private static boolean isWhitespace(final int b) {
287-
return b == SPACE || b == TAB;
286+
return b == Utils.SPACE || b == TAB;
288287
}
289288

290289
/**

0 commit comments

Comments
 (0)