apache
diff --git a/‎pom.xml‎
Lines changed: 7 additions & 0 deletions b/‎pom.xml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/changes/changes.xml‎
Lines changed: 1 addition & 0 deletions b/‎src/changes/changes.xml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/main/java/org/apache/commons/codec/binary/Base16.java‎
Lines changed: 267 additions & 0 deletions b/‎src/main/java/org/apache/commons/codec/binary/Base16.java‎
Lines changed: 267 additions & 0 deletions
diff --git a/‎src/main/java/org/apache/commons/codec/binary/Base16InputStream.java‎
Lines changed: 81 additions & 0 deletions b/‎src/main/java/org/apache/commons/codec/binary/Base16InputStream.java‎
Lines changed: 81 additions & 0 deletions
@@ -207,6 +207,13 @@ limitations under the License.
         <role>Submitted Match Rating Approach (MRA) phonetic encoder and tests [CODEC-161]</role>
       </roles>
     </contributor>
+    <contributor>
+      <name>Adam Retter</name>
+      <organization>Evolved Binary</organization>
+      <roles>
+        <role>Base16 Input and Output Streams</role>
+      </roles>
+    </contributor>
   </contributors>
   <!-- Codec only has test dependencies ATM -->
   <dependencies>
 
@@ -47,6 +47,7 @@ The <action> type attribute can be add,update,fix,remove.
       <action issue="CODEC-280" dev="aherbert" type="update">Base32/Base64/BCodec: Added strict decoding property to control handling of trailing bits. Default lenient mode discards them without error. Strict mode raise an exception.</action>
       <action issue="CODEC-289" dev="aherbert" type="update">Base32/Base64 Input/OutputStream: Added strict decoding property to control handling of trailing bits. Default lenient mode discards them without error. Strict mode raise an exception.</action>
       <action                   dev="ggregory" type="update" due-to="Gary Gregory">Update tests from JUnit 4.12 to 4.13.</action>
+      <action issue="CODEC-290" dev="aherbert" due-to="Adam Retter" type="add">Base16Codec and Base16Input/OutputStream</action>
     </release>
 
     <release version="1.14" date="2019-12-30" description="Feature and fix release.">
 
@@ -0,0 +1,267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.codec.binary;
+
+import org.apache.commons.codec.CodecPolicy;
+
+/**
+ * Provides Base16 encoding and decoding.
+ *
+ * <p>
+ * This class is thread-safe.
+ * </p>
+ * <p>
+ * This implementation strictly follows RFC 4648, and as such unlike
+ * the {@link Base32} and {@link Base64} implementations,
+ * it does not ignore invalid alphabet characters or whitespace,
+ * neither does it offer chunking or padding characters.
+ * </p>
+ * <p>
+ * The only additional feature above those specified in RFC 4648
+ * is support for working with a lower-case alphabet in addition
+ * to the default upper-case alphabet.
+ * </p>
+ *
+ * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
+ *
+ * @since 1.15
+ */
+public class Base16 extends BaseNCodec {
+
+    /**
+     * BASE16 characters are 4 bits in length.
+     * They are formed by taking an 8-bit group,
+     * which is converted into two BASE16 characters.
+     */
+    private static final int BITS_PER_ENCODED_BYTE = 4;
+    private static final int BYTES_PER_ENCODED_BLOCK = 2;
+    private static final int BYTES_PER_UNENCODED_BLOCK = 1;
+
+    /**
+     * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified
+     * in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16
+     * alphabet but fall within the bounds of the array are translated to -1.
+     */
+    private static final byte[] UPPER_CASE_DECODE_TABLE = {
+            //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
+            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
+            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
+            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
+             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
+            -1, 10, 11, 12, 13, 14, 15                                      // 40-46 A-F
+    };
+
+    /**
+     * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
+     * equivalents as specified in Table 5 of RFC 4648.
+     */
+    private static final byte[] UPPER_CASE_ENCODE_TABLE = {
+            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+            'A', 'B', 'C', 'D', 'E', 'F'
+    };
+
+    /**
+     * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet"
+     * into their 4-bit positive integer equivalents. Characters that are not in the Base16
+     * alphabet but fall within the bounds of the array are translated to -1.
+     */
+    private static final byte[] LOWER_CASE_DECODE_TABLE = {
+            //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
+            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
+            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
+            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
+             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
+            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
+            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
+            -1, 10, 11, 12, 13, 14, 15                                      // 60-66 a-f
+    };
+
+    /**
+     * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
+     * lower-case equivalents.
+     */
+    private static final byte[] LOWER_CASE_ENCODE_TABLE = {
+            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+            'a', 'b', 'c', 'd', 'e', 'f'
+    };
+
+    /** Mask used to extract 4 bits, used when decoding character. */
+    private static final int MASK_4BITS = 0x0f;
+
+    /**
+     * Decode table to use.
+     */
+    private final byte[] decodeTable;
+
+    /**
+     * Encode table to use.
+     */
+    private final byte[] encodeTable;
+
+    /**
+     * Creates a Base16 codec used for decoding and encoding.
+     */
+    public Base16() {
+        this(false);
+    }
+
+    /**
+     * Creates a Base16 codec used for decoding and encoding.
+     *
+     * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
+     */
+    public Base16(final boolean lowerCase) {
+        this(lowerCase, DECODING_POLICY_DEFAULT);
+    }
+
+    /**
+     * Creates a Base16 codec used for decoding and encoding.
+     *
+     * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
+     * @param decodingPolicy Decoding policy.
+     */
+    public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
+        super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0,
+                PAD_DEFAULT, decodingPolicy);
+        if (lowerCase) {
+            this.encodeTable = LOWER_CASE_ENCODE_TABLE;
+            this.decodeTable = LOWER_CASE_DECODE_TABLE;
+        } else {
+            this.encodeTable = UPPER_CASE_ENCODE_TABLE;
+            this.decodeTable = UPPER_CASE_DECODE_TABLE;
+        }
+    }
+
+    @Override
+    void decode(final byte[] data, int offset, final int length, final Context context) {
+        if (context.eof || length < 0) {
+            context.eof = true;
+            if (context.ibitWorkArea != 0) {
+                validateTrailingCharacter();
+            }
+            return;
+        }
+
+        final int dataLen = Math.min(data.length - offset, length);
+        final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;
+
+        // small optimisation to short-cut the rest of this method when it is fed byte-by-byte
+        if (availableChars == 1 && availableChars == dataLen) {
+            context.ibitWorkArea = decodeOctet(data[offset]) + 1;   // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
+            return;
+        }
+
+        // we must have an even number of chars to decode
+        final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
+
+        final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
+
+        int result;
+        int i = 0;
+        if (dataLen < availableChars) {
+            // we have 1/2 byte from previous invocation to decode
+            result = (context.ibitWorkArea - 1) << BITS_PER_ENCODED_BYTE;
+            result |= decodeOctet(data[offset++]);
+            i = 2;
+
+            buffer[context.pos++] = (byte)result;
+
+            // reset to empty-value for next invocation!
+            context.ibitWorkArea = 0;
+        }
+
+        while (i < charsToProcess) {
+            result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
+            result |= decodeOctet(data[offset++]);
+            i += 2;
+            buffer[context.pos++] = (byte)result;
+        }
+
+        // we have one char of a hex-pair left over
+        if (i < dataLen) {
+            context.ibitWorkArea = decodeOctet(data[i]) + 1;   // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
+        }
+    }
+
+    private int decodeOctet(final byte octet) {
+        int decoded = -1;
+        if ((octet & 0xff) < decodeTable.length) {
+            decoded = decodeTable[octet];
+        }
+
+        if (decoded == -1) {
+            throw new IllegalArgumentException("Invalid octet in encoded value: " + (int)octet);
+        }
+
+        return decoded;
+    }
+
+    @Override
+    void encode(final byte[] data, final int offset, final int length, final Context context) {
+        if (context.eof) {
+            return;
+        }
+
+        if (length < 0) {
+            context.eof = true;
+            return;
+        }
+
+        final int size = length * BYTES_PER_ENCODED_BLOCK;
+        if (size < 0) {
+            throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
+        }
+
+        final byte[] buffer = ensureBufferSize(size, context);
+
+        final int end = offset + length;
+        for (int i = offset; i < end; i++) {
+            final int value = data[i];
+            final int high = (value >> BITS_PER_ENCODED_BYTE) & MASK_4BITS;
+            final int low = value & MASK_4BITS;
+            buffer[context.pos++] = encodeTable[high];
+            buffer[context.pos++] = encodeTable[low];
+        }
+    }
+
+    /**
+     * Returns whether or not the {@code octet} is in the Base16 alphabet.
+     *
+     * @param octet The value to test.
+     *
+     * @return {@code true} if the value is defined in the the Base16 alphabet {@code false} otherwise.
+     */
+    @Override
+    public boolean isInAlphabet(final byte octet) {
+        return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
+    }
+
+    /**
+     * Validates whether decoding allows an entire final trailing character that cannot be
+     * used for a complete byte.
+     *
+     * @throws IllegalArgumentException if strict decoding is enabled
+     */
+    private void validateTrailingCharacter() {
+        if (isStrictDecoding()) {
+            throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet" +
+                    "character but not a possible encoding. " +
+                    "Decoding requires at least two characters to create one byte.");
+        }
+    }
+}
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.codec.binary;
+
+import org.apache.commons.codec.CodecPolicy;
+
+import java.io.InputStream;
+
+/**
+ * Provides Base16 encoding and decoding in a streaming fashion (unlimited size).
+ * <p>
+ * The default behavior of the Base16InputStream is to DECODE, whereas the default behavior of the
+ * {@link Base16OutputStream} is to ENCODE, but this behavior can be overridden by using a different constructor.
+ * </p>
+ *
+ * @since 1.15
+ */
+public class Base16InputStream extends BaseNCodecInputStream {
+
+    /**
+     * Creates a Base16InputStream such that all data read is Base16-decoded from the original provided InputStream.
+     *
+     * @param in InputStream to wrap.
+     */
+    public Base16InputStream(final InputStream in) {
+        this(in, false);
+    }
+
+    /**
+     * Creates a Base16InputStream such that all data read is either Base16-encoded or Base16-decoded from the original
+     * provided InputStream.
+     *
+     * @param in InputStream to wrap.
+     * @param doEncode true if we should encode all data read from us, false if we should decode.
+     */
+    public Base16InputStream(final InputStream in, final boolean doEncode) {
+        this(in, doEncode, false);
+    }
+
+    /**
+     * Creates a Base16InputStream such that all data read is either Base16-encoded or Base16-decoded from the original
+     * provided InputStream.
+     *
+     * @param in InputStream to wrap.
+     * @param doEncode true if we should encode all data read from us, false if we should decode.
+     * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
+     */
+    public Base16InputStream(final InputStream in, final boolean doEncode,
+            final boolean lowerCase) {
+        this(in, doEncode, lowerCase, CodecPolicy.LENIENT);
+    }
+
+    /**
+     * Creates a Base16InputStream such that all data read is either Base16-encoded or Base16-decoded from the original
+     * provided InputStream.
+     *
+     * @param in InputStream to wrap.
+     * @param doEncode true if we should encode all data read from us, false if we should decode.
+     * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
+     * @param decodingPolicy Decoding policy.
+     */
+    public Base16InputStream(final InputStream in, final boolean doEncode,
+            final boolean lowerCase, final CodecPolicy decodingPolicy) {
+        super(in, new Base16(lowerCase, decodingPolicy), doEncode);
+    }
+}