Skip to content

Commit e80304c

Browse files
committed
Merge branch 'adamretter-base16'
2 parents 7118544 + 74343c0 commit e80304c

21 files changed

Lines changed: 2082 additions & 320 deletions

pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,13 @@ limitations under the License.
207207
<role>Submitted Match Rating Approach (MRA) phonetic encoder and tests [CODEC-161]</role>
208208
</roles>
209209
</contributor>
210+
<contributor>
211+
<name>Adam Retter</name>
212+
<organization>Evolved Binary</organization>
213+
<roles>
214+
<role>Base16 Input and Output Streams</role>
215+
</roles>
216+
</contributor>
210217
</contributors>
211218
<!-- Codec only has test dependencies ATM -->
212219
<dependencies>

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ The <action> type attribute can be add,update,fix,remove.
4747
<action issue="CODEC-280" dev="aherbert" type="update">Base32/Base64/BCodec: Added strict decoding property to control handling of trailing bits. Default lenient mode discards them without error. Strict mode raise an exception.</action>
4848
<action issue="CODEC-289" dev="aherbert" type="update">Base32/Base64 Input/OutputStream: Added strict decoding property to control handling of trailing bits. Default lenient mode discards them without error. Strict mode raise an exception.</action>
4949
<action dev="ggregory" type="update" due-to="Gary Gregory">Update tests from JUnit 4.12 to 4.13.</action>
50+
<action issue="CODEC-290" dev="aherbert" due-to="Adam Retter" type="add">Base16Codec and Base16Input/OutputStream</action>
5051
</release>
5152

5253
<release version="1.14" date="2019-12-30" description="Feature and fix release.">
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.codec.binary;
19+
20+
import org.apache.commons.codec.CodecPolicy;
21+
22+
/**
23+
* Provides Base16 encoding and decoding.
24+
*
25+
* <p>
26+
* This class is thread-safe.
27+
* </p>
28+
* <p>
29+
* This implementation strictly follows RFC 4648, and as such unlike
30+
* the {@link Base32} and {@link Base64} implementations,
31+
* it does not ignore invalid alphabet characters or whitespace,
32+
* neither does it offer chunking or padding characters.
33+
* </p>
34+
* <p>
35+
* The only additional feature above those specified in RFC 4648
36+
* is support for working with a lower-case alphabet in addition
37+
* to the default upper-case alphabet.
38+
* </p>
39+
*
40+
* @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
41+
*
42+
* @since 1.15
43+
*/
44+
public class Base16 extends BaseNCodec {
45+
46+
/**
47+
* BASE16 characters are 4 bits in length.
48+
* They are formed by taking an 8-bit group,
49+
* which is converted into two BASE16 characters.
50+
*/
51+
private static final int BITS_PER_ENCODED_BYTE = 4;
52+
private static final int BYTES_PER_ENCODED_BLOCK = 2;
53+
private static final int BYTES_PER_UNENCODED_BLOCK = 1;
54+
55+
/**
56+
* This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified
57+
* in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16
58+
* alphabet but fall within the bounds of the array are translated to -1.
59+
*/
60+
private static final byte[] UPPER_CASE_DECODE_TABLE = {
61+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
62+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
63+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
64+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
65+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
66+
-1, 10, 11, 12, 13, 14, 15 // 40-46 A-F
67+
};
68+
69+
/**
70+
* This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
71+
* equivalents as specified in Table 5 of RFC 4648.
72+
*/
73+
private static final byte[] UPPER_CASE_ENCODE_TABLE = {
74+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
75+
'A', 'B', 'C', 'D', 'E', 'F'
76+
};
77+
78+
/**
79+
* This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet"
80+
* into their 4-bit positive integer equivalents. Characters that are not in the Base16
81+
* alphabet but fall within the bounds of the array are translated to -1.
82+
*/
83+
private static final byte[] LOWER_CASE_DECODE_TABLE = {
84+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
85+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
86+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
87+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
88+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
89+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
90+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
91+
-1, 10, 11, 12, 13, 14, 15 // 60-66 a-f
92+
};
93+
94+
/**
95+
* This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
96+
* lower-case equivalents.
97+
*/
98+
private static final byte[] LOWER_CASE_ENCODE_TABLE = {
99+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
100+
'a', 'b', 'c', 'd', 'e', 'f'
101+
};
102+
103+
/** Mask used to extract 4 bits, used when decoding character. */
104+
private static final int MASK_4BITS = 0x0f;
105+
106+
/**
107+
* Decode table to use.
108+
*/
109+
private final byte[] decodeTable;
110+
111+
/**
112+
* Encode table to use.
113+
*/
114+
private final byte[] encodeTable;
115+
116+
/**
117+
* Creates a Base16 codec used for decoding and encoding.
118+
*/
119+
public Base16() {
120+
this(false);
121+
}
122+
123+
/**
124+
* Creates a Base16 codec used for decoding and encoding.
125+
*
126+
* @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
127+
*/
128+
public Base16(final boolean lowerCase) {
129+
this(lowerCase, DECODING_POLICY_DEFAULT);
130+
}
131+
132+
/**
133+
* Creates a Base16 codec used for decoding and encoding.
134+
*
135+
* @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
136+
* @param decodingPolicy Decoding policy.
137+
*/
138+
public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
139+
super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0,
140+
PAD_DEFAULT, decodingPolicy);
141+
if (lowerCase) {
142+
this.encodeTable = LOWER_CASE_ENCODE_TABLE;
143+
this.decodeTable = LOWER_CASE_DECODE_TABLE;
144+
} else {
145+
this.encodeTable = UPPER_CASE_ENCODE_TABLE;
146+
this.decodeTable = UPPER_CASE_DECODE_TABLE;
147+
}
148+
}
149+
150+
@Override
151+
void decode(final byte[] data, int offset, final int length, final Context context) {
152+
if (context.eof || length < 0) {
153+
context.eof = true;
154+
if (context.ibitWorkArea != 0) {
155+
validateTrailingCharacter();
156+
}
157+
return;
158+
}
159+
160+
final int dataLen = Math.min(data.length - offset, length);
161+
final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;
162+
163+
// small optimisation to short-cut the rest of this method when it is fed byte-by-byte
164+
if (availableChars == 1 && availableChars == dataLen) {
165+
context.ibitWorkArea = decodeOctet(data[offset]) + 1; // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
166+
return;
167+
}
168+
169+
// we must have an even number of chars to decode
170+
final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
171+
172+
final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
173+
174+
int result;
175+
int i = 0;
176+
if (dataLen < availableChars) {
177+
// we have 1/2 byte from previous invocation to decode
178+
result = (context.ibitWorkArea - 1) << BITS_PER_ENCODED_BYTE;
179+
result |= decodeOctet(data[offset++]);
180+
i = 2;
181+
182+
buffer[context.pos++] = (byte)result;
183+
184+
// reset to empty-value for next invocation!
185+
context.ibitWorkArea = 0;
186+
}
187+
188+
while (i < charsToProcess) {
189+
result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
190+
result |= decodeOctet(data[offset++]);
191+
i += 2;
192+
buffer[context.pos++] = (byte)result;
193+
}
194+
195+
// we have one char of a hex-pair left over
196+
if (i < dataLen) {
197+
context.ibitWorkArea = decodeOctet(data[i]) + 1; // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
198+
}
199+
}
200+
201+
private int decodeOctet(final byte octet) {
202+
int decoded = -1;
203+
if ((octet & 0xff) < decodeTable.length) {
204+
decoded = decodeTable[octet];
205+
}
206+
207+
if (decoded == -1) {
208+
throw new IllegalArgumentException("Invalid octet in encoded value: " + (int)octet);
209+
}
210+
211+
return decoded;
212+
}
213+
214+
@Override
215+
void encode(final byte[] data, final int offset, final int length, final Context context) {
216+
if (context.eof) {
217+
return;
218+
}
219+
220+
if (length < 0) {
221+
context.eof = true;
222+
return;
223+
}
224+
225+
final int size = length * BYTES_PER_ENCODED_BLOCK;
226+
if (size < 0) {
227+
throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
228+
}
229+
230+
final byte[] buffer = ensureBufferSize(size, context);
231+
232+
final int end = offset + length;
233+
for (int i = offset; i < end; i++) {
234+
final int value = data[i];
235+
final int high = (value >> BITS_PER_ENCODED_BYTE) & MASK_4BITS;
236+
final int low = value & MASK_4BITS;
237+
buffer[context.pos++] = encodeTable[high];
238+
buffer[context.pos++] = encodeTable[low];
239+
}
240+
}
241+
242+
/**
243+
* Returns whether or not the {@code octet} is in the Base16 alphabet.
244+
*
245+
* @param octet The value to test.
246+
*
247+
* @return {@code true} if the value is defined in the the Base16 alphabet {@code false} otherwise.
248+
*/
249+
@Override
250+
public boolean isInAlphabet(final byte octet) {
251+
return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
252+
}
253+
254+
/**
255+
* Validates whether decoding allows an entire final trailing character that cannot be
256+
* used for a complete byte.
257+
*
258+
* @throws IllegalArgumentException if strict decoding is enabled
259+
*/
260+
private void validateTrailingCharacter() {
261+
if (isStrictDecoding()) {
262+
throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet" +
263+
"character but not a possible encoding. " +
264+
"Decoding requires at least two characters to create one byte.");
265+
}
266+
}
267+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.codec.binary;
19+
20+
import org.apache.commons.codec.CodecPolicy;
21+
22+
import java.io.InputStream;
23+
24+
/**
25+
* Provides Base16 encoding and decoding in a streaming fashion (unlimited size).
26+
* <p>
27+
* The default behavior of the Base16InputStream is to DECODE, whereas the default behavior of the
28+
* {@link Base16OutputStream} is to ENCODE, but this behavior can be overridden by using a different constructor.
29+
* </p>
30+
*
31+
* @since 1.15
32+
*/
33+
public class Base16InputStream extends BaseNCodecInputStream {
34+
35+
/**
36+
* Creates a Base16InputStream such that all data read is Base16-decoded from the original provided InputStream.
37+
*
38+
* @param in InputStream to wrap.
39+
*/
40+
public Base16InputStream(final InputStream in) {
41+
this(in, false);
42+
}
43+
44+
/**
45+
* Creates a Base16InputStream such that all data read is either Base16-encoded or Base16-decoded from the original
46+
* provided InputStream.
47+
*
48+
* @param in InputStream to wrap.
49+
* @param doEncode true if we should encode all data read from us, false if we should decode.
50+
*/
51+
public Base16InputStream(final InputStream in, final boolean doEncode) {
52+
this(in, doEncode, false);
53+
}
54+
55+
/**
56+
* Creates a Base16InputStream such that all data read is either Base16-encoded or Base16-decoded from the original
57+
* provided InputStream.
58+
*
59+
* @param in InputStream to wrap.
60+
* @param doEncode true if we should encode all data read from us, false if we should decode.
61+
* @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
62+
*/
63+
public Base16InputStream(final InputStream in, final boolean doEncode,
64+
final boolean lowerCase) {
65+
this(in, doEncode, lowerCase, CodecPolicy.LENIENT);
66+
}
67+
68+
/**
69+
* Creates a Base16InputStream such that all data read is either Base16-encoded or Base16-decoded from the original
70+
* provided InputStream.
71+
*
72+
* @param in InputStream to wrap.
73+
* @param doEncode true if we should encode all data read from us, false if we should decode.
74+
* @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
75+
* @param decodingPolicy Decoding policy.
76+
*/
77+
public Base16InputStream(final InputStream in, final boolean doEncode,
78+
final boolean lowerCase, final CodecPolicy decodingPolicy) {
79+
super(in, new Base16(lowerCase, decodingPolicy), doEncode);
80+
}
81+
}

0 commit comments

Comments
 (0)