Skip to content

Commit 3c21223

Browse files
aherbertgarydgregory
authored andcommitted
[CODEC-280] Added strict decoding property to BaseNCodec. (#35)
* [CODEC-280] Added strict decoding property to BaseNCodec. The default is lenient encoding. Any trailing bits that cannot be interpreted as part of the encoding are discarded. Combinations of trailing characters that cannot be created by a valid encoding are partially interpreted. If set to strict encoding the codec will raise an exception when trailing bits are present that are not part of a valid encoding. Added tests to show that in lenient mode re-encoding will create a different byte array. In strict mode the re-encoding should create the same byte array as the original (with appropriate pad characters on the input). * [CODEC-280] Added strict decoding property to BCodec. * [CODEC-280] Removed unused import @ignore from BCodecTest
1 parent f5a61f0 commit 3c21223

8 files changed

Lines changed: 220 additions & 35 deletions

File tree

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ The <action> type attribute can be add,update,fix,remove.
4444

4545
<release version="1.15" date="YYYY-MM-DD" description="Feature and fix release.">
4646
<action issue="CODEC-264" dev="aherbert" due-to="Andy Seaborne" type="fix">MurmurHash3: Ensure hash128 maintains the sign extension bug.</action>
47+
<action issue="CODEC-280" dev="aherbert" type="update">Base32/Base64/BCodec: Added strict decoding property to control handling of trailing bits. Default lenient mode discards them without error. Strict mode raise an exception.</action>
4748
</release>
4849

4950
<release version="1.14" date="2019-12-30" description="Feature and fix release.">

src/main/java/org/apache/commons/codec/binary/Base32.java

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,6 @@ public class Base32 extends BaseNCodec {
114114
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
115115
};
116116

117-
/** Mask used to extract 7 bits, used when decoding final trailing character. */
118-
private static final long MASK_7BITS = 0x7fL;
119-
/** Mask used to extract 6 bits, used when decoding final trailing character. */
120-
private static final long MASK_6BITS = 0x3fL;
121117
/** Mask used to extract 5 bits, used when encoding Base32 bytes */
122118
private static final int MASK_5BITS = 0x1f;
123119
/** Mask used to extract 4 bits, used when decoding final trailing character. */
@@ -387,17 +383,26 @@ void decode(final byte[] input, int inPos, final int inAvail, final Context cont
387383
// Two forms of EOF as far as Base32 decoder is concerned: actual
388384
// EOF (-1) and first time '=' character is encountered in stream.
389385
// This approach makes the '=' padding characters completely optional.
390-
if (context.eof && context.modulus >= 2) { // if modulus < 2, nothing to do
386+
if (context.eof && context.modulus > 0) { // if modulus == 0, nothing to do
391387
final byte[] buffer = ensureBufferSize(decodeSize, context);
392388

393-
// we ignore partial bytes, i.e. only multiples of 8 count
389+
// We ignore partial bytes, i.e. only multiples of 8 count.
390+
// Any combination not part of a valid encoding is either partially decoded
391+
// or will raise an exception. Possible trailing characters are 2, 4, 5, 7.
392+
// It is not possible to encode with 1, 3, 6 trailing characters.
393+
// For backwards compatibility 3 & 6 chars are decoded anyway rather than discarded.
394+
// See the encode(byte[]) method EOF section.
394395
switch (context.modulus) {
396+
// case 0 : // impossible, as excluded above
397+
case 1 : // 5 bits - either ignore entirely, or raise an exception
398+
validateTrailingCharacters();
395399
case 2 : // 10 bits, drop 2 and output one byte
396400
validateCharacter(MASK_2BITS, context);
397401
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 2) & MASK_8BITS);
398402
break;
399-
case 3 : // 15 bits, drop 7 and output 1 byte
400-
validateCharacter(MASK_7BITS, context);
403+
case 3 : // 15 bits, drop 7 and output 1 byte, or raise an exception
404+
validateTrailingCharacters();
405+
// Not possible from a valid encoding but decode anyway
401406
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 7) & MASK_8BITS);
402407
break;
403408
case 4 : // 20 bits = 2*8 + 4
@@ -406,21 +411,22 @@ void decode(final byte[] input, int inPos, final int inAvail, final Context cont
406411
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
407412
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
408413
break;
409-
case 5 : // 25bits = 3*8 + 1
414+
case 5 : // 25 bits = 3*8 + 1
410415
validateCharacter(MASK_1BITS, context);
411416
context.lbitWorkArea = context.lbitWorkArea >> 1;
412417
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
413418
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
414419
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
415420
break;
416-
case 6 : // 30bits = 3*8 + 6
417-
validateCharacter(MASK_6BITS, context);
421+
case 6 : // 30 bits = 3*8 + 6, or raise an exception
422+
validateTrailingCharacters();
423+
// Not possible from a valid encoding but decode anyway
418424
context.lbitWorkArea = context.lbitWorkArea >> 6;
419425
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS);
420426
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS);
421427
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS);
422428
break;
423-
case 7 : // 35 = 4*8 +3
429+
case 7 : // 35 bits = 4*8 +3
424430
validateCharacter(MASK_3BITS, context);
425431
context.lbitWorkArea = context.lbitWorkArea >> 3;
426432
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS);
@@ -559,6 +565,20 @@ public boolean isInAlphabet(final byte octet) {
559565
return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
560566
}
561567

568+
/**
569+
* Validates whether decoding allows final trailing characters that cannot be
570+
* created during encoding.
571+
*
572+
* @throws IllegalArgumentException if strict decoding is enabled
573+
*/
574+
private void validateTrailingCharacters() {
575+
if (isStrictDecoding()) {
576+
throw new IllegalArgumentException(
577+
"Strict decoding: Last encoded character(s) (before the paddings if any) are valid base 32 alphabet but not a possible encoding. " +
578+
"Decoding requries either 2, 4, 5, or 7 trailing 5-bit characters to create bytes.");
579+
}
580+
}
581+
562582
/**
563583
* Validates whether decoding the final trailing character is possible in the context
564584
* of the set of possible base 32 values.
@@ -571,12 +591,12 @@ public boolean isInAlphabet(final byte octet) {
571591
*
572592
* @throws IllegalArgumentException if the bits being checked contain any non-zero value
573593
*/
574-
private static void validateCharacter(final long emptyBitsMask, final Context context) {
594+
private void validateCharacter(final long emptyBitsMask, final Context context) {
575595
// Use the long bit work area
576-
if ((context.lbitWorkArea & emptyBitsMask) != 0) {
596+
if (isStrictDecoding() && (context.lbitWorkArea & emptyBitsMask) != 0) {
577597
throw new IllegalArgumentException(
578-
"Last encoded character (before the paddings if any) is a valid base 32 alphabet but not a possible value. " +
579-
"Expected the discarded bits to be zero.");
598+
"Strict decoding: Last encoded character (before the paddings if any) is a valid base 32 alphabet but not a possible encoding. " +
599+
"Expected the discarded bits from the character to be zero.");
580600
}
581601
}
582602
}

src/main/java/org/apache/commons/codec/binary/Base64.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -470,8 +470,8 @@ void decode(final byte[] in, int inPos, final int inAvail, final Context context
470470
// Output all whole multiples of 8 bits and ignore the rest
471471
switch (context.modulus) {
472472
// case 0 : // impossible, as excluded above
473-
case 1 : // 6 bits - ignore entirely
474-
// TODO not currently tested; perhaps it is impossible?
473+
case 1 : // 6 bits - either ignore entirely, or raise an exception
474+
validateTrailingCharacter();
475475
break;
476476
case 2 : // 12 bits = 8 + 4
477477
validateCharacter(MASK_4BITS, context);
@@ -786,6 +786,20 @@ protected boolean isInAlphabet(final byte octet) {
786786
return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
787787
}
788788

789+
/**
790+
* Validates whether decoding allows an entire final trailing character that cannot be
791+
* used for a complete byte.
792+
*
793+
* @throws IllegalArgumentException if strict decoding is enabled
794+
*/
795+
private void validateTrailingCharacter() {
796+
if (isStrictDecoding()) {
797+
throw new IllegalArgumentException(
798+
"Strict decoding: Last encoded character (before the paddings if any) is a valid base 64 alphabet but not a possible encoding. " +
799+
"Decoding requries at least two trailing 6-bit characters to create bytes.");
800+
}
801+
}
802+
789803
/**
790804
* Validates whether decoding the final trailing character is possible in the context
791805
* of the set of possible base 64 values.
@@ -798,11 +812,11 @@ protected boolean isInAlphabet(final byte octet) {
798812
*
799813
* @throws IllegalArgumentException if the bits being checked contain any non-zero value
800814
*/
801-
private static void validateCharacter(final int emptyBitsMask, final Context context) {
802-
if ((context.ibitWorkArea & emptyBitsMask) != 0) {
815+
private void validateCharacter(final int emptyBitsMask, final Context context) {
816+
if (isStrictDecoding() && (context.ibitWorkArea & emptyBitsMask) != 0) {
803817
throw new IllegalArgumentException(
804-
"Last encoded character (before the paddings if any) is a valid base 64 alphabet but not a possible value. " +
805-
"Expected the discarded bits to be zero.");
818+
"Strict decoding: Last encoded character (before the paddings if any) is a valid base 64 alphabet but not a possible encoding. " +
819+
"Expected the discarded bits from the character to be zero.");
806820
}
807821
}
808822
}

src/main/java/org/apache/commons/codec/binary/BaseNCodec.java

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.commons.codec.BinaryEncoder;
2424
import org.apache.commons.codec.DecoderException;
2525
import org.apache.commons.codec.EncoderException;
26+
import org.apache.commons.codec.binary.BaseNCodec.Context;
2627

2728
/**
2829
* Abstract superclass for Base-N encoders and decoders.
@@ -190,6 +191,12 @@ public String toString() {
190191
*/
191192
private final int chunkSeparatorLength;
192193

194+
/**
195+
* If true then decoding should throw an exception for impossible combinations of bits at the
196+
* end of the byte input. The default is to decode as much of them as possible.
197+
*/
198+
private boolean strictDecoding;
199+
193200
/**
194201
* Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size.
195202
* If {@code chunkSeparatorLength} is zero, then chunking is disabled.
@@ -223,6 +230,47 @@ protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
223230
this.pad = pad;
224231
}
225232

233+
/**
234+
* Sets the decoding behaviour when the input bytes contain leftover trailing bits that
235+
* cannot be created by a valid encoding. These can be bits that are unused from the final
236+
* character or entire characters. The default mode is lenient decoding. Set this to
237+
* {@code true} to enable strict decoding.
238+
* <ul>
239+
* <li>Lenient: Any trailing bits are composed into 8-bit bytes where possible.
240+
* The remainder are discarded.
241+
* <li>Strict: The decoding will raise an {@link IllegalArgumentException} if trailing bits
242+
* are not part of a valid encoding. Any unused bits from the final character must
243+
* be zero. Impossible counts of entire final characters are not allowed.
244+
* </ul>
245+
*
246+
* <p>When strict decoding is enabled it is expected that the decoded bytes will be re-encoded
247+
* to a byte array that matches the original, i.e. no changes occur on the final
248+
* character. This requires that the input bytes use the same padding and alphabet
249+
* as the encoder.
250+
*
251+
* @param strictDecoding Set to true to enable strict decoding; otherwise use lenient decoding.
252+
* @see #encode(byte[])
253+
* @since 1.15
254+
*/
255+
public void setStrictDecoding(boolean strictDecoding) {
256+
this.strictDecoding = strictDecoding;
257+
}
258+
259+
/**
260+
* Returns true if decoding behaviour is strict. Decoding will raise an
261+
* {@link IllegalArgumentException} if trailing bits are not part of a valid encoding.
262+
*
263+
* <p>The default is false for lenient encoding. Decoding will compose trailing bits
264+
* into 8-bit bytes and discard the remainder.
265+
*
266+
* @return true if using strict decoding
267+
* @see #setStrictDecoding(boolean)
268+
* @since 1.15
269+
*/
270+
public boolean isStrictDecoding() {
271+
return strictDecoding;
272+
}
273+
226274
/**
227275
* Returns true if this object has buffered data for reading.
228276
*

src/main/java/org/apache/commons/codec/net/BCodec.java

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ public class BCodec extends RFC1522Codec implements StringEncoder, StringDecoder
4848
*/
4949
private final Charset charset;
5050

51+
/**
52+
* If true then decoding should throw an exception for impossible combinations of bits at the
53+
* end of the byte input. The default is to decode as much of them as possible.
54+
*/
55+
private boolean strictDecoding;
56+
5157
/**
5258
* Default constructor.
5359
*/
@@ -82,6 +88,40 @@ public BCodec(final String charsetName) {
8288
this(Charset.forName(charsetName));
8389
}
8490

91+
/**
92+
* Sets the decoding behaviour when the input bytes contain leftover trailing bits that
93+
* cannot be created by a valid Base64 encoding. This setting is transferred to the instance
94+
* of {@link Base64} used to perform decoding.
95+
*
96+
* <p>The default is false for lenient encoding. Decoding will compose trailing bits
97+
* into 8-bit bytes and discard the remainder.
98+
*
99+
* <p>Set to true to enable strict decoding. Decoding will raise a
100+
* {@link DecoderException} if trailing bits are not part of a valid Base64 encoding.
101+
*
102+
* @param strictDecoding Set to true to enable strict decoding; otherwise use lenient decoding.
103+
* @see org.apache.commons.codec.binary.BaseNCodec#setStrictDecoding(boolean) BaseNCodec.setStrictDecoding(boolean)
104+
* @since 1.15
105+
*/
106+
public void setStrictDecoding(boolean strictDecoding) {
107+
this.strictDecoding = strictDecoding;
108+
}
109+
110+
/**
111+
* Returns true if decoding behaviour is strict. Decoding will raise a
112+
* {@link DecoderException} if trailing bits are not part of a valid Base64 encoding.
113+
*
114+
* <p>The default is false for lenient encoding. Decoding will compose trailing bits
115+
* into 8-bit bytes and discard the remainder.
116+
*
117+
* @return true if using strict decoding
118+
* @see #setStrictDecoding(boolean)
119+
* @since 1.15
120+
*/
121+
public boolean isStrictDecoding() {
122+
return strictDecoding;
123+
}
124+
85125
@Override
86126
protected String getEncoding() {
87127
return "B";
@@ -100,7 +140,9 @@ protected byte[] doDecoding(final byte[] bytes) {
100140
if (bytes == null) {
101141
return null;
102142
}
103-
return Base64.decodeBase64(bytes);
143+
final Base64 codec = new Base64();
144+
codec.setStrictDecoding(strictDecoding);
145+
return codec.decode(bytes);
104146
}
105147

106148
/**

src/test/java/org/apache/commons/codec/binary/Base32Test.java

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@
1919
package org.apache.commons.codec.binary;
2020

2121
import static org.junit.Assert.assertEquals;
22+
import static org.junit.Assert.assertFalse;
2223
import static org.junit.Assert.assertArrayEquals;
2324
import static org.junit.Assert.assertNotNull;
25+
import static org.junit.Assert.assertTrue;
2426
import static org.junit.Assert.fail;
2527

2628
import java.nio.charset.Charset;
@@ -299,6 +301,7 @@ public void testBase32HexImpossibleSamples() {
299301
}
300302

301303
private void testImpossibleCases(final Base32 codec, final String[] impossible_cases) {
304+
codec.setStrictDecoding(true);
302305
for (final String impossible : impossible_cases) {
303306
try {
304307
codec.decode(impossible);
@@ -309,6 +312,11 @@ private void testImpossibleCases(final Base32 codec, final String[] impossible_c
309312
}
310313
}
311314

315+
@Test
316+
public void testBase32DecodingOfTrailing5Bits() {
317+
assertBase32DecodingOfTrailingBits(5);
318+
}
319+
312320
@Test
313321
public void testBase32DecodingOfTrailing10Bits() {
314322
assertBase32DecodingOfTrailingBits(10);
@@ -349,31 +357,49 @@ public void testBase32DecodingOfTrailing35Bits() {
349357
*/
350358
private static void assertBase32DecodingOfTrailingBits(final int nbits) {
351359
final Base32 codec = new Base32();
352-
// Create the encoded bytes. The first characters must be valid so fill with 'zero'.
353-
final byte[] encoded = new byte[nbits / 5];
354-
Arrays.fill(encoded, ENCODE_TABLE[0]);
360+
// Requires strict decoding
361+
codec.setStrictDecoding(true);
362+
assertTrue(codec.isStrictDecoding());
363+
// A lenient decoder should not re-encode to the same bytes
364+
final Base32 defaultCodec = new Base32();
365+
assertFalse(defaultCodec.isStrictDecoding());
366+
367+
// Create the encoded bytes. The first characters must be valid so fill with 'zero'
368+
// then pad to the block size.
369+
final int length = nbits / 5;
370+
final byte[] encoded = new byte[8];
371+
Arrays.fill(encoded, 0, length, ENCODE_TABLE[0]);
372+
Arrays.fill(encoded, length, encoded.length, (byte) '=');
355373
// Compute how many bits would be discarded from 8-bit bytes
356374
final int discard = nbits % 8;
357375
final int emptyBitsMask = (1 << discard) - 1;
376+
// Special case when an impossible number of trailing characters
377+
final boolean invalid = length == 1 || length == 3 || length == 6;
358378
// Enumerate all 32 possible final characters in the last position
359-
final int last = encoded.length - 1;
379+
final int last = length - 1;
360380
for (int i = 0; i < 32; i++) {
361381
encoded[last] = ENCODE_TABLE[i];
362382
// If the lower bits are set we expect an exception. This is not a valid
363383
// final character.
364-
if ((i & emptyBitsMask) != 0) {
384+
if (invalid || (i & emptyBitsMask) != 0) {
365385
try {
366386
codec.decode(encoded);
367387
fail("Final base-32 digit should not be allowed");
368388
} catch (final IllegalArgumentException ex) {
369389
// expected
370390
}
391+
// The default lenient mode should decode this
392+
final byte[] decoded = defaultCodec.decode(encoded);
393+
// Re-encoding should not match the original array as it was invalid
394+
assertFalse(Arrays.equals(encoded, defaultCodec.encode(decoded)));
371395
} else {
372396
// Otherwise this should decode
373397
final byte[] decoded = codec.decode(encoded);
374398
// Compute the bits that were encoded. This should match the final decoded byte.
375399
final int bitsEncoded = i >> discard;
376400
assertEquals("Invalid decoding of last character", bitsEncoded, decoded[decoded.length - 1]);
401+
// Re-encoding should match the original array (requires the same padding character)
402+
assertArrayEquals(encoded, codec.encode(decoded));
377403
}
378404
}
379405
}

0 commit comments

Comments
 (0)