1818package org .apache .commons .codec .binary ;
1919
2020import org .apache .commons .codec .CodecPolicy ;
21- import org .apache .commons .codec .DecoderException ;
22-
23- import java .nio .charset .Charset ;
2421
2522/**
2623 * Provides Base16 encoding and decoding.
2926 * This class is thread-safe.
3027 * </p>
3128 *
29+ * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
30+ *
3231 * @since 1.15
3332 */
3433public class Base16 extends BaseNCodec {
3534
36- private static final int BYTES_PER_UNENCODED_BLOCK = 1 ;
35+ /**
36+ * BASE16 characters are 4 bits in length.
37+ * They are formed by taking an 8-bit group,
38+ * which is converted into two BASE16 characters.
39+ */
40+ private static final int BITS_PER_ENCODED_BYTE = 4 ;
3741 private static final int BYTES_PER_ENCODED_BLOCK = 2 ;
42+ private static final int BYTES_PER_UNENCODED_BLOCK = 1 ;
43+
44+ /**
45+ * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified
46+ * in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16
47+ * alphabet but fall within the bounds of the array are translated to -1.
48+ */
49+ private static final byte [] UPPER_CASE_DECODE_TABLE = {
50+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
51+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , // 00-0f
52+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , // 10-1f
53+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , // 20-2f
54+ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , -1 , -1 , -1 , -1 , -1 , -1 , // 30-3f 0-9
55+ -1 , 10 , 11 , 12 , 13 , 14 , 15 // 40-46 A-F
56+ };
3857
39- private final boolean toLowerCase ;
40- private final Charset charset ;
58+ /**
59+ * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
60+ * equivalents as specified in Table 5 of RFC 4648.
61+ */
62+ private static final byte [] UPPER_CASE_ENCODE_TABLE = {
63+ '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' ,
64+ 'A' , 'B' , 'C' , 'D' , 'E' , 'F'
65+ };
4166
4267 /**
43- * Creates a Base16 codec used for decoding and encoding.
68+ * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet"
69+ * into their 4-bit positive integer equivalents. Characters that are not in the Base16
70+ * alphabet but fall within the bounds of the array are translated to -1.
4471 */
45- protected Base16 () {
46- this (Hex .DEFAULT_CHARSET );
47- }
72+ private static final byte [] LOWER_CASE_DECODE_TABLE = {
73+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
74+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , // 00-0f
75+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , // 10-1f
76+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , // 20-2f
77+ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , -1 , -1 , -1 , -1 , -1 , -1 , // 30-3f 0-9
78+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , // 40-4f
79+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , // 50-5f
80+ -1 , 10 , 11 , 12 , 13 , 14 , 15 // 60-66 a-f
81+ };
82+
83+ /**
84+ * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
85+ * lower-case equivalents.
86+ */
87+ private static final byte [] LOWER_CASE_ENCODE_TABLE = {
88+ '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' ,
89+ 'a' , 'b' , 'c' , 'd' , 'e' , 'f'
90+ };
91+
92+ /** Mask used to extract 4 bits, used when decoding character. */
93+ private static final int MASK_4BITS = 0x0f ;
94+
95+ /**
96+ * Decode table to use.
97+ */
98+ private final byte [] decodeTable ;
99+
100+ /**
101+ * Encode table to use.
102+ */
103+ private final byte [] encodeTable ;
48104
49105 /**
50106 * Creates a Base16 codec used for decoding and encoding.
51- *
52- * @param charset the charset.
53107 */
54- protected Base16 (final Charset charset ) {
55- this (true , charset );
108+ public Base16 () {
109+ this (false );
56110 }
57111
58112 /**
59113 * Creates a Base16 codec used for decoding and encoding.
60114 *
61- * @param toLowerCase {@code true} converts to lowercase, {@code false} to uppercase.
62- * @param charset the charset.
115+ * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
63116 */
64- protected Base16 (final boolean toLowerCase , final Charset charset ) {
65- super (BYTES_PER_UNENCODED_BLOCK , BYTES_PER_ENCODED_BLOCK , 0 , 0 );
66- this .toLowerCase = toLowerCase ;
67- this .charset = charset ;
117+ public Base16 (final boolean lowerCase ) {
118+ this (lowerCase , DECODING_POLICY_DEFAULT );
68119 }
69120
70121 /**
71122 * Creates a Base16 codec used for decoding and encoding.
72123 *
73- * @param toLowerCase {@code true} converts to lowercase, {@code false} to uppercase.
74- * @param charset the charset.
124+ * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
75125 * @param decodingPolicy Decoding policy.
76126 */
77- protected Base16 (final boolean toLowerCase , final Charset charset , final CodecPolicy decodingPolicy ) {
127+ public Base16 (final boolean lowerCase , final CodecPolicy decodingPolicy ) {
78128 super (BYTES_PER_UNENCODED_BLOCK , BYTES_PER_ENCODED_BLOCK , 0 , 0 ,
79129 PAD_DEFAULT , decodingPolicy );
80- this .toLowerCase = toLowerCase ;
81- this .charset = charset ;
82- }
83-
84- @ Override
85- void encode (final byte [] data , final int offset , final int length , final Context context ) {
86- if (context .eof ) {
87- return ;
88- }
89- if (length < 0 ) {
90- context .eof = true ;
91- return ;
130+ if (lowerCase ) {
131+ this .encodeTable = LOWER_CASE_ENCODE_TABLE ;
132+ this .decodeTable = LOWER_CASE_DECODE_TABLE ;
133+ } else {
134+ this .encodeTable = UPPER_CASE_ENCODE_TABLE ;
135+ this .decodeTable = UPPER_CASE_DECODE_TABLE ;
92136 }
93-
94- final char [] chars = Hex .encodeHex (data , offset , length , toLowerCase );
95- final byte [] encoded = new String (chars ).getBytes (charset );
96-
97- final byte [] buffer = ensureBufferSize (encoded .length , context );
98- System .arraycopy (encoded , 0 , buffer , context .pos , encoded .length );
99-
100- context .pos += encoded .length ;
101137 }
102138
103139 @ Override
104- void decode (final byte [] data , final int offset , final int length , final Context context ) {
140+ void decode (final byte [] data , int offset , final int length , final Context context ) {
105141 if (context .eof || length < 0 ) {
106142 context .eof = true ;
107143 if (context .ibitWorkArea > 0 ) {
@@ -119,178 +155,84 @@ void decode(final byte[] data, final int offset, final int length, final Context
119155 return ;
120156 }
121157
122- // NOTE: Each pair of bytes is really a pair of hex-chars, therefore each pair represents one byte
123-
124158 // we must have an even number of chars to decode
125- final char [] encodedChars = new char [availableChars % 2 == 0 ? availableChars : availableChars - 1 ];
159+ final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1 ;
160+
161+ final byte [] buffer = ensureBufferSize (charsToProcess / BYTES_PER_ENCODED_BLOCK , context );
126162
127- // copy all (or part of) data into encodedChars
163+ int result ;
128164 int i = 0 ;
129165 if (dataLen < availableChars ) {
130166 // we have 1/2 byte from previous invocation to decode
131- encodedChars [i ++] = (char )context .ibitWorkArea ;
132- context .ibitWorkArea = -1 ; // reset for next iteration!
133- }
134- final int copyLen = encodedChars .length - i ;
135- for (int j = offset ; j < copyLen + offset ; j ++) {
136- encodedChars [i ++] = (char ) data [j ];
137- }
167+ result = decodeTable [context .ibitWorkArea ] << BITS_PER_ENCODED_BYTE ;
168+ result |= decodeTable [data [offset ++]];
169+ i = 2 ;
138170
139- // decode encodedChars into buffer
140- final byte [] buffer = ensureBufferSize (encodedChars .length / 2 , context );
141- try {
142- final int written = Hex .decodeHex (encodedChars , buffer , context .pos );
143- context .pos += written ;
144- } catch (final DecoderException e ) {
145- throw new RuntimeException (e ); // this method ensures that this cannot happen at runtime!
171+ buffer [context .pos ++] = (byte )result ;
172+
173+ // reset for next invocation!
174+ context .ibitWorkArea = -1 ;
146175 }
147176
148- // we have one char of a hex-pair left over
149- if (copyLen < dataLen ) {
150- context .ibitWorkArea = data [offset + dataLen - 1 ]; // store 1/2 byte for next invocation of decode
177+ while (i < charsToProcess ) {
178+ result = decodeTable [data [offset ++]] << BITS_PER_ENCODED_BYTE ;
179+ result |= decodeTable [data [offset ++]];
180+ i += 2 ;
181+ buffer [context .pos ++] = (byte )result ;
151182 }
152- }
153183
154- /**
155- * Validates whether decoding allows an entire final trailing character that cannot be
156- * used for a complete byte.
157- *
158- * @throws IllegalArgumentException if strict decoding is enabled
159- */
160- private void validateTrailingCharacter () {
161- if (isStrictDecoding ()) {
162- throw new IllegalArgumentException ("Strict decoding: Last encoded character is a valid base 16 alphabet" +
163- "character but not a possible encoding. " +
164- "Decoding requires at least two characters to create one byte." );
184+ // we have one char of a hex-pair left over
185+ if (i < dataLen ) {
186+ context .ibitWorkArea = data [i ]; // store 1/2 byte for next invocation of decode
165187 }
166188 }
167189
168190 @ Override
169- protected boolean isInAlphabet (final byte value ) {
170- if (value >= '0' && value <= '9' ) {
171- return true ;
191+ void encode (final byte [] data , final int offset , final int length , final Context context ) {
192+ if (context . eof ) {
193+ return ;
172194 }
173195
174- if (toLowerCase ) {
175- return value >= 'a' && value <= 'f' ;
176- } else {
177- return value >= 'A' && value <= 'F' ;
196+ if (length < 0 ) {
197+ context .eof = true ;
198+ return ;
178199 }
179- }
180200
181- /**
182- * Returns whether or not the {@code c} is in the base 16 alphabet.
183- *
184- * @param c The value to test
185- * @return {@code true} if the value is defined in the the base 16 alphabet, {@code false} otherwise.
186- */
187- public static boolean isBase16 (final char c ) {
188- return
189- (c >= '0' && c <= '9' )
190- || (c >= 'A' && c <= 'F' )
191- || (c >= 'a' && c <= 'f' );
192- }
193-
194- /**
195- * Tests a given String to see if it contains only valid characters within the Base16 alphabet.
196- *
197- * @param base16 String to test
198- * @return {@code true} if all characters in the String are valid characters in the Base16 alphabet or if
199- * the String is empty; {@code false}, otherwise
200- */
201- public static boolean isBase16 (final String base16 ) {
202- return isBase16 (base16 .toCharArray ());
203- }
201+ final byte [] buffer = ensureBufferSize (length * BYTES_PER_ENCODED_BLOCK , context );
204202
205- /**
206- * Tests a given char array to see if it contains only valid characters within the Base16 alphabet.
207- *
208- * @param arrayChars char array to test
209- * @return {@code true} if all chars are valid characters in the Base16 alphabet or if the char array is empty;
210- * {@code false}, otherwise
211- */
212- public static boolean isBase16 (final char [] arrayChars ) {
213- for (int i = 0 ; i < arrayChars .length ; i ++) {
214- if (!isBase16 (arrayChars [i ])) {
215- return false ;
216- }
203+ final int end = offset + length ;
204+ for (int i = offset ; i < end ; i ++) {
205+ final int value = data [i ];
206+ final int high = (value >> BITS_PER_ENCODED_BYTE ) & MASK_4BITS ;
207+ final int low = value & MASK_4BITS ;
208+ buffer [context .pos ++] = encodeTable [high ];
209+ buffer [context .pos ++] = encodeTable [low ];
217210 }
218- return true ;
219211 }
220212
221213 /**
222- * Tests a given char array to see if it contains only valid characters within the Base16 alphabet.
214+ * Returns whether or not the {@code octet} is in the Base16 alphabet.
223215 *
224- * @param arrayChars byte array to test
225- * @return {@code true} if all chars are valid characters in the Base16 alphabet or if the byte array is empty;
226- * {@code false}, otherwise
227- */
228- public static boolean isBase16 (final byte [] arrayChars ) {
229- for (int i = 0 ; i < arrayChars .length ; i ++) {
230- if (!isBase16 ((char ) arrayChars [i ])) {
231- return false ;
232- }
233- }
234- return true ;
235- }
236-
237- /**
238- * Encodes binary data using the base16 algorithm.
216+ * @param octet The value to test.
239217 *
240- * @param binaryData Array containing binary data to encode.
241- * @return Base16-encoded data.
218+ * @return {@code true} if the value is defined in the the Base16 alphabet {@code false} otherwise.
242219 */
243- public static byte [] encodeBase16 (final byte [] binaryData ) {
244- return encodeBase16 (binaryData , true , Hex .DEFAULT_CHARSET , Integer .MAX_VALUE );
220+ @ Override
221+ public boolean isInAlphabet (final byte octet ) {
222+ return octet >= 0 && octet < decodeTable .length && decodeTable [octet ] != -1 ;
245223 }
246224
247225 /**
248- * Encodes binary data using the base16 algorithm.
226+ * Validates whether decoding allows an entire final trailing character that cannot be
227+ * used for a complete byte.
249228 *
250- * @param binaryData Array containing binary data to encode.
251- * @param toLowerCase {@code true} converts to lowercase, {@code false} to uppercase.
252- * @param charset the charset.
253- * @param maxResultSize The maximum result size to accept.
254- * @return Base16-encoded data.
255- * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than maxResultSize
229+ * @throws IllegalArgumentException if strict decoding is enabled
256230 */
257- public static byte [] encodeBase16 (final byte [] binaryData , final boolean toLowerCase , final Charset charset ,
258- final int maxResultSize ) {
259- if (binaryData == null || binaryData .length == 0 ) {
260- return binaryData ;
261- }
262-
263- // Create this so can use the super-class method
264- // Also ensures that the same roundings are performed by the ctor and the code
265- final Base16 b16 = new Base16 (toLowerCase , charset );
266- final long len = b16 .getEncodedLength (binaryData );
267- if (len > maxResultSize ) {
268- throw new IllegalArgumentException ("Input array too big, the output array would be bigger (" +
269- len +
270- ") than the specified maximum size of " +
271- maxResultSize );
231+ private void validateTrailingCharacter () {
232+ if (isStrictDecoding ()) {
233+ throw new IllegalArgumentException ("Strict decoding: Last encoded character is a valid base 16 alphabet" +
234+ "character but not a possible encoding. " +
235+ "Decoding requires at least two characters to create one byte." );
272236 }
273-
274- return b16 .encode (binaryData );
275- }
276-
277- /**
278- * Decodes a Base16 String into octets.
279- *
280- * @param base16String String containing Base16 data
281- * @return Array containing decoded data.
282- */
283- public static byte [] decodeBase16 (final String base16String ) {
284- return new Base16 ().decode (base16String );
285- }
286-
287- /**
288- * Decodes Base16 data into octets.
289- *
290- * @param base16Data Byte array containing Base16 data
291- * @return Array containing decoded data.
292- */
293- public static byte [] decodeBase16 (final byte [] base16Data ) {
294- return new Base16 ().decode (base16Data );
295237 }
296238}
0 commit comments