Skip to content

Commit 019ee2c

Browse files
committed
Refactored to use a Lookup Table
1 parent 1b594c0 commit 019ee2c

6 files changed

Lines changed: 586 additions & 587 deletions

File tree

src/main/java/org/apache/commons/codec/binary/Base16.java

Lines changed: 127 additions & 185 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,6 @@
1818
package org.apache.commons.codec.binary;
1919

2020
import org.apache.commons.codec.CodecPolicy;
21-
import org.apache.commons.codec.DecoderException;
22-
23-
import java.nio.charset.Charset;
2421

2522
/**
2623
* Provides Base16 encoding and decoding.
@@ -29,79 +26,118 @@
2926
* This class is thread-safe.
3027
* </p>
3128
*
29+
* @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
30+
*
3231
* @since 1.15
3332
*/
3433
public class Base16 extends BaseNCodec {
3534

36-
private static final int BYTES_PER_UNENCODED_BLOCK = 1;
35+
/**
36+
* BASE16 characters are 4 bits in length.
37+
* They are formed by taking an 8-bit group,
38+
* which is converted into two BASE16 characters.
39+
*/
40+
private static final int BITS_PER_ENCODED_BYTE = 4;
3741
private static final int BYTES_PER_ENCODED_BLOCK = 2;
42+
private static final int BYTES_PER_UNENCODED_BLOCK = 1;
43+
44+
/**
45+
* This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified
46+
* in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16
47+
* alphabet but fall within the bounds of the array are translated to -1.
48+
*/
49+
private static final byte[] UPPER_CASE_DECODE_TABLE = {
50+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
51+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
52+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
53+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
54+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
55+
-1, 10, 11, 12, 13, 14, 15 // 40-46 A-F
56+
};
3857

39-
private final boolean toLowerCase;
40-
private final Charset charset;
58+
/**
59+
* This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
60+
* equivalents as specified in Table 5 of RFC 4648.
61+
*/
62+
private static final byte[] UPPER_CASE_ENCODE_TABLE = {
63+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
64+
'A', 'B', 'C', 'D', 'E', 'F'
65+
};
4166

4267
/**
43-
* Creates a Base16 codec used for decoding and encoding.
68+
* This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet"
69+
* into their 4-bit positive integer equivalents. Characters that are not in the Base16
70+
* alphabet but fall within the bounds of the array are translated to -1.
4471
*/
45-
protected Base16() {
46-
this(Hex.DEFAULT_CHARSET);
47-
}
72+
private static final byte[] LOWER_CASE_DECODE_TABLE = {
73+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
74+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
75+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
76+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
77+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
78+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
79+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
80+
-1, 10, 11, 12, 13, 14, 15 // 60-66 a-f
81+
};
82+
83+
/**
84+
* This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
85+
* lower-case equivalents.
86+
*/
87+
private static final byte[] LOWER_CASE_ENCODE_TABLE = {
88+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
89+
'a', 'b', 'c', 'd', 'e', 'f'
90+
};
91+
92+
/** Mask used to extract 4 bits, used when decoding character. */
93+
private static final int MASK_4BITS = 0x0f;
94+
95+
/**
96+
* Decode table to use.
97+
*/
98+
private final byte[] decodeTable;
99+
100+
/**
101+
* Encode table to use.
102+
*/
103+
private final byte[] encodeTable;
48104

49105
/**
50106
* Creates a Base16 codec used for decoding and encoding.
51-
*
52-
* @param charset the charset.
53107
*/
54-
protected Base16(final Charset charset) {
55-
this(true, charset);
108+
public Base16() {
109+
this(false);
56110
}
57111

58112
/**
59113
* Creates a Base16 codec used for decoding and encoding.
60114
*
61-
* @param toLowerCase {@code true} converts to lowercase, {@code false} to uppercase.
62-
* @param charset the charset.
115+
* @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
63116
*/
64-
protected Base16(final boolean toLowerCase, final Charset charset) {
65-
super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0);
66-
this.toLowerCase = toLowerCase;
67-
this.charset = charset;
117+
public Base16(final boolean lowerCase) {
118+
this(lowerCase, DECODING_POLICY_DEFAULT);
68119
}
69120

70121
/**
71122
* Creates a Base16 codec used for decoding and encoding.
72123
*
73-
* @param toLowerCase {@code true} converts to lowercase, {@code false} to uppercase.
74-
* @param charset the charset.
124+
* @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
75125
* @param decodingPolicy Decoding policy.
76126
*/
77-
protected Base16(final boolean toLowerCase, final Charset charset, final CodecPolicy decodingPolicy) {
127+
public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
78128
super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0,
79129
PAD_DEFAULT, decodingPolicy);
80-
this.toLowerCase = toLowerCase;
81-
this.charset = charset;
82-
}
83-
84-
@Override
85-
void encode(final byte[] data, final int offset, final int length, final Context context) {
86-
if (context.eof) {
87-
return;
88-
}
89-
if (length < 0) {
90-
context.eof = true;
91-
return;
130+
if (lowerCase) {
131+
this.encodeTable = LOWER_CASE_ENCODE_TABLE;
132+
this.decodeTable = LOWER_CASE_DECODE_TABLE;
133+
} else {
134+
this.encodeTable = UPPER_CASE_ENCODE_TABLE;
135+
this.decodeTable = UPPER_CASE_DECODE_TABLE;
92136
}
93-
94-
final char[] chars = Hex.encodeHex(data, offset, length, toLowerCase);
95-
final byte[] encoded = new String(chars).getBytes(charset);
96-
97-
final byte[] buffer = ensureBufferSize(encoded.length, context);
98-
System.arraycopy(encoded, 0, buffer, context.pos, encoded.length);
99-
100-
context.pos += encoded.length;
101137
}
102138

103139
@Override
104-
void decode(final byte[] data, final int offset, final int length, final Context context) {
140+
void decode(final byte[] data, int offset, final int length, final Context context) {
105141
if (context.eof || length < 0) {
106142
context.eof = true;
107143
if (context.ibitWorkArea > 0) {
@@ -119,178 +155,84 @@ void decode(final byte[] data, final int offset, final int length, final Context
119155
return;
120156
}
121157

122-
// NOTE: Each pair of bytes is really a pair of hex-chars, therefore each pair represents one byte
123-
124158
// we must have an even number of chars to decode
125-
final char[] encodedChars = new char[availableChars % 2 == 0 ? availableChars : availableChars - 1];
159+
final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
160+
161+
final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
126162

127-
// copy all (or part of) data into encodedChars
163+
int result;
128164
int i = 0;
129165
if (dataLen < availableChars) {
130166
// we have 1/2 byte from previous invocation to decode
131-
encodedChars[i++] = (char)context.ibitWorkArea;
132-
context.ibitWorkArea = -1; // reset for next iteration!
133-
}
134-
final int copyLen = encodedChars.length - i;
135-
for (int j = offset; j < copyLen + offset; j++) {
136-
encodedChars[i++] = (char) data[j];
137-
}
167+
result = decodeTable[context.ibitWorkArea] << BITS_PER_ENCODED_BYTE;
168+
result |= decodeTable[data[offset++]];
169+
i = 2;
138170

139-
// decode encodedChars into buffer
140-
final byte[] buffer = ensureBufferSize(encodedChars.length / 2, context);
141-
try {
142-
final int written = Hex.decodeHex(encodedChars, buffer, context.pos);
143-
context.pos += written;
144-
} catch (final DecoderException e) {
145-
throw new RuntimeException(e); // this method ensures that this cannot happen at runtime!
171+
buffer[context.pos++] = (byte)result;
172+
173+
// reset for next invocation!
174+
context.ibitWorkArea = -1;
146175
}
147176

148-
// we have one char of a hex-pair left over
149-
if (copyLen < dataLen) {
150-
context.ibitWorkArea = data[offset + dataLen - 1]; // store 1/2 byte for next invocation of decode
177+
while (i < charsToProcess) {
178+
result = decodeTable[data[offset++]] << BITS_PER_ENCODED_BYTE;
179+
result |= decodeTable[data[offset++]];
180+
i += 2;
181+
buffer[context.pos++] = (byte)result;
151182
}
152-
}
153183

154-
/**
155-
* Validates whether decoding allows an entire final trailing character that cannot be
156-
* used for a complete byte.
157-
*
158-
* @throws IllegalArgumentException if strict decoding is enabled
159-
*/
160-
private void validateTrailingCharacter() {
161-
if (isStrictDecoding()) {
162-
throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet" +
163-
"character but not a possible encoding. " +
164-
"Decoding requires at least two characters to create one byte.");
184+
// we have one char of a hex-pair left over
185+
if (i < dataLen) {
186+
context.ibitWorkArea = data[i]; // store 1/2 byte for next invocation of decode
165187
}
166188
}
167189

168190
@Override
169-
protected boolean isInAlphabet(final byte value) {
170-
if (value >= '0' && value <= '9') {
171-
return true;
191+
void encode(final byte[] data, final int offset, final int length, final Context context) {
192+
if (context.eof) {
193+
return;
172194
}
173195

174-
if (toLowerCase) {
175-
return value >= 'a' && value <= 'f';
176-
} else {
177-
return value >= 'A' && value <= 'F';
196+
if (length < 0) {
197+
context.eof = true;
198+
return;
178199
}
179-
}
180200

181-
/**
182-
* Returns whether or not the {@code c} is in the base 16 alphabet.
183-
*
184-
* @param c The value to test
185-
* @return {@code true} if the value is defined in the the base 16 alphabet, {@code false} otherwise.
186-
*/
187-
public static boolean isBase16(final char c) {
188-
return
189-
(c >= '0' && c <= '9')
190-
|| (c >= 'A' && c <= 'F')
191-
|| (c >= 'a' && c <= 'f');
192-
}
193-
194-
/**
195-
* Tests a given String to see if it contains only valid characters within the Base16 alphabet.
196-
*
197-
* @param base16 String to test
198-
* @return {@code true} if all characters in the String are valid characters in the Base16 alphabet or if
199-
* the String is empty; {@code false}, otherwise
200-
*/
201-
public static boolean isBase16(final String base16) {
202-
return isBase16(base16.toCharArray());
203-
}
201+
final byte[] buffer = ensureBufferSize(length * BYTES_PER_ENCODED_BLOCK, context);
204202

205-
/**
206-
* Tests a given char array to see if it contains only valid characters within the Base16 alphabet.
207-
*
208-
* @param arrayChars char array to test
209-
* @return {@code true} if all chars are valid characters in the Base16 alphabet or if the char array is empty;
210-
* {@code false}, otherwise
211-
*/
212-
public static boolean isBase16(final char[] arrayChars) {
213-
for (int i = 0; i < arrayChars.length; i++) {
214-
if (!isBase16(arrayChars[i])) {
215-
return false;
216-
}
203+
final int end = offset + length;
204+
for (int i = offset; i < end; i++) {
205+
final int value = data[i];
206+
final int high = (value >> BITS_PER_ENCODED_BYTE) & MASK_4BITS;
207+
final int low = value & MASK_4BITS;
208+
buffer[context.pos++] = encodeTable[high];
209+
buffer[context.pos++] = encodeTable[low];
217210
}
218-
return true;
219211
}
220212

221213
/**
222-
* Tests a given char array to see if it contains only valid characters within the Base16 alphabet.
214+
* Returns whether or not the {@code octet} is in the Base16 alphabet.
223215
*
224-
* @param arrayChars byte array to test
225-
* @return {@code true} if all chars are valid characters in the Base16 alphabet or if the byte array is empty;
226-
* {@code false}, otherwise
227-
*/
228-
public static boolean isBase16(final byte[] arrayChars) {
229-
for (int i = 0; i < arrayChars.length; i++) {
230-
if (!isBase16((char) arrayChars[i])) {
231-
return false;
232-
}
233-
}
234-
return true;
235-
}
236-
237-
/**
238-
* Encodes binary data using the base16 algorithm.
216+
* @param octet The value to test.
239217
*
240-
* @param binaryData Array containing binary data to encode.
241-
* @return Base16-encoded data.
218+
* @return {@code true} if the value is defined in the the Base16 alphabet {@code false} otherwise.
242219
*/
243-
public static byte[] encodeBase16(final byte[] binaryData) {
244-
return encodeBase16(binaryData, true, Hex.DEFAULT_CHARSET, Integer.MAX_VALUE);
220+
@Override
221+
public boolean isInAlphabet(final byte octet) {
222+
return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
245223
}
246224

247225
/**
248-
* Encodes binary data using the base16 algorithm.
226+
* Validates whether decoding allows an entire final trailing character that cannot be
227+
* used for a complete byte.
249228
*
250-
* @param binaryData Array containing binary data to encode.
251-
* @param toLowerCase {@code true} converts to lowercase, {@code false} to uppercase.
252-
* @param charset the charset.
253-
* @param maxResultSize The maximum result size to accept.
254-
* @return Base16-encoded data.
255-
* @throws IllegalArgumentException Thrown when the input array needs an output array bigger than maxResultSize
229+
* @throws IllegalArgumentException if strict decoding is enabled
256230
*/
257-
public static byte[] encodeBase16(final byte[] binaryData, final boolean toLowerCase, final Charset charset,
258-
final int maxResultSize) {
259-
if (binaryData == null || binaryData.length == 0) {
260-
return binaryData;
261-
}
262-
263-
// Create this so can use the super-class method
264-
// Also ensures that the same roundings are performed by the ctor and the code
265-
final Base16 b16 = new Base16(toLowerCase, charset);
266-
final long len = b16.getEncodedLength(binaryData);
267-
if (len > maxResultSize) {
268-
throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
269-
len +
270-
") than the specified maximum size of " +
271-
maxResultSize);
231+
private void validateTrailingCharacter() {
232+
if (isStrictDecoding()) {
233+
throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet" +
234+
"character but not a possible encoding. " +
235+
"Decoding requires at least two characters to create one byte.");
272236
}
273-
274-
return b16.encode(binaryData);
275-
}
276-
277-
/**
278-
* Decodes a Base16 String into octets.
279-
*
280-
* @param base16String String containing Base16 data
281-
* @return Array containing decoded data.
282-
*/
283-
public static byte[] decodeBase16(final String base16String) {
284-
return new Base16().decode(base16String);
285-
}
286-
287-
/**
288-
* Decodes Base16 data into octets.
289-
*
290-
* @param base16Data Byte array containing Base16 data
291-
* @return Array containing decoded data.
292-
*/
293-
public static byte[] decodeBase16(final byte[] base16Data) {
294-
return new Base16().decode(base16Data);
295237
}
296238
}

0 commit comments

Comments
 (0)