Skip to content

Commit bac0b9e

Browse files
Patch for CODEC-239 - Allow "look-alike" characters when decoding Base32
Allows "look-alike" characters like 0 (digit zero) and 1 (digit one) as aliases for O (uppercase oh) and I (uppercase eye) respectively when decoding.
1 parent 3b9a0a2 commit bac0b9e

File tree

2 files changed

+153
-3
lines changed

2 files changed

+153
-3
lines changed

src/main/java/org/apache/commons/codec/binary/Base32.java

+129-2
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,25 @@ public class Base32 extends BaseNCodec {
7676
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 70 - 7a p-z/**/
7777
};
7878

79+
/**
80+
* This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified
81+
* in Table 3 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the Base32
82+
* alphabet but fall within the bounds of the array are translated to -1.
83+
* Look-alike characters are 0 (zero) and 1 (one).
84+
*/
85+
private static final byte[] DECODE_TABLE_WITH_LOOKALIKES = {
86+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
87+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
88+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
89+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
90+
14, 8, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 0-7
91+
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
92+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a P-Z
93+
-1, -1, -1, -1, -1, // 5b - 5f [-_
94+
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 60 - 6f `-o
95+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 70 - 7a p-z
96+
};
97+
7998
/**
8099
* This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet"
81100
* equivalents as specified in Table 3 of RFC 4648.
@@ -104,6 +123,24 @@ public class Base32 extends BaseNCodec {
104123
25, 26, 27, 28, 29, 30, 31 // 70-76 p-v
105124
};
106125

126+
/**
127+
* This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as
128+
* specified in Table 4 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the
129+
* Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
130+
*/
131+
private static final byte[] HEX_DECODE_TABLE_WITH_LOOKALIKES = {
132+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
133+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
134+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
135+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
136+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
137+
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
138+
25, 26, 27, 28, 29, 30, 31, // 50-57 P-V
139+
-1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f W-_
140+
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f `-o
141+
25, 26, 27, 28, 29, 30, 31 // 70-76 p-v
142+
};
143+
107144
/**
108145
* This array is a lookup table that translates 5-bit positive integer index values into their
109146
* "Base32 Hex Alphabet" equivalents as specified in Table 4 of RFC 4648.
@@ -186,6 +223,20 @@ public Base32(final boolean useHex) {
186223
this(0, null, useHex, PAD_DEFAULT);
187224
}
188225

226+
/**
227+
* Creates a Base32 codec used for decoding and encoding.
228+
* <p>
229+
* When encoding the line length is 0 (no chunking).
230+
* </p>
231+
* @param useHex if {@code true} then use Base32 Hex alphabet
232+
* @param allowLookalikes
233+
* if {@code true}, then allow look-alike characters like {@code 0} -&gt; {@code O}
234+
* and {@code 1} -&gt; {@code O} when decoding, otherwise ignore look-alike characters
235+
*/
236+
public Base32(final boolean useHex, final boolean allowLookalikes) {
237+
this(0, null, useHex, allowLookalikes, PAD_DEFAULT);
238+
}
239+
189240
/**
190241
* Creates a Base32 codec used for decoding and encoding.
191242
* <p>
@@ -198,6 +249,22 @@ public Base32(final boolean useHex, final byte pad) {
198249
this(0, null, useHex, pad);
199250
}
200251

252+
253+
/**
254+
* Creates a Base32 codec used for decoding and encoding.
255+
* <p>
256+
* When encoding the line length is 0 (no chunking).
257+
* </p>
258+
* @param useHex if {@code true} then use Base32 Hex alphabet
259+
* @param allowLookalikes
260+
* if {@code true}, then allow look-alike characters like {@code 0} -&gt; {@code O}
261+
* and {@code 1} -&gt; {@code O} when decoding, otherwise ignore look-alike characters
262+
* @param pad byte used as padding byte.
263+
*/
264+
public Base32(final boolean useHex, final boolean allowLookalikes, final byte pad) {
265+
this(0, null, useHex, allowLookalikes, pad);
266+
}
267+
201268
/**
202269
* Creates a Base32 codec used for decoding and encoding.
203270
* <p>
@@ -260,6 +327,31 @@ public Base32(final int lineLength, final byte[] lineSeparator, final boolean us
260327
this(lineLength, lineSeparator, useHex, PAD_DEFAULT);
261328
}
262329

330+
/**
331+
* Creates a Base32 / Base32 Hex codec used for decoding and encoding.
332+
* <p>
333+
* When encoding the line length and line separator are given in the constructor.
334+
* </p>
335+
* <p>
336+
* Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
337+
* </p>
338+
*
339+
* @param lineLength
340+
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
341+
* 8). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
342+
* decoding.
343+
* @param lineSeparator
344+
* Each line of encoded data will end with this sequence of bytes.
345+
* @param useHex
346+
* if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
347+
* @throws IllegalArgumentException
348+
* The provided lineSeparator included some Base32 characters. That's not going to work! Or the
349+
* lineLength &gt; 0 and lineSeparator is null.
350+
*/
351+
public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final boolean allowLookalikes) {
352+
this(lineLength, lineSeparator, useHex, allowLookalikes, PAD_DEFAULT);
353+
}
354+
263355
/**
264356
* Creates a Base32 / Base32 Hex codec used for decoding and encoding.
265357
* <p>
@@ -283,14 +375,49 @@ public Base32(final int lineLength, final byte[] lineSeparator, final boolean us
283375
* lineLength &gt; 0 and lineSeparator is null.
284376
*/
285377
public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte pad) {
378+
this(lineLength, lineSeparator, useHex, false, pad);
379+
}
380+
381+
/**
382+
* Creates a Base32 / Base32 Hex codec used for decoding and encoding.
383+
* <p>
384+
* When encoding the line length and line separator are given in the constructor.
385+
* </p>
386+
* <p>
387+
* Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
388+
* </p>
389+
*
390+
* @param lineLength
391+
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
392+
* 8). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
393+
* decoding.
394+
* @param lineSeparator
395+
* Each line of encoded data will end with this sequence of bytes.
396+
* @param useHex
397+
* if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
398+
* @param allowLookalikes
399+
* if {@code true}, then allow look-alike characters like {@code 0} -&gt; {@code O}
400+
* and {@code 1} -&gt; {@code O} when decoding, otherwise ignore-alike characters.
401+
* @param pad byte used as padding byte.
402+
* @throws IllegalArgumentException
403+
* The provided lineSeparator included some Base32 characters. That's not going to work! Or the
404+
* lineLength &gt; 0 and lineSeparator is null.
405+
*/
406+
public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final boolean allowLookalikes, final byte pad) {
286407
super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength,
287408
lineSeparator == null ? 0 : lineSeparator.length, pad);
288409
if (useHex) {
289410
this.encodeTable = HEX_ENCODE_TABLE;
290-
this.decodeTable = HEX_DECODE_TABLE;
411+
if(allowLookalikes)
412+
this.decodeTable = HEX_DECODE_TABLE_WITH_LOOKALIKES;
413+
else
414+
this.decodeTable = HEX_DECODE_TABLE;
291415
} else {
292416
this.encodeTable = ENCODE_TABLE;
293-
this.decodeTable = DECODE_TABLE;
417+
if(allowLookalikes)
418+
this.decodeTable = DECODE_TABLE_WITH_LOOKALIKES;
419+
else
420+
this.decodeTable = DECODE_TABLE;
294421
}
295422
if (lineLength > 0) {
296423
if (lineSeparator == null) {

src/test/java/org/apache/commons/codec/binary/Base32Test.java

+24-1
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818

1919
package org.apache.commons.codec.binary;
2020

21-
import static org.junit.Assert.assertEquals;
2221
import static org.junit.Assert.assertArrayEquals;
22+
import static org.junit.Assert.assertEquals;
2323
import static org.junit.Assert.assertNotNull;
2424
import static org.junit.Assert.fail;
2525

@@ -120,6 +120,14 @@ public class Base32Test {
120120
{"foobar" ,"MZXW6YTBOI%%%%%%"},
121121
};
122122

123+
private static final String[][] BASE32_LOOKALIKES = {
124+
{ "O0oOI1iI", "OOOOIIII" }
125+
};
126+
127+
private static final String[][] BASE32_HEX_LOOKALIKES = {
128+
{ "O0oOI1iI", "OOOOIIII" }
129+
};
130+
123131
@Test
124132
public void testBase64AtBufferStart() {
125133
testBase64InBuffer(0, 100);
@@ -297,4 +305,19 @@ private void testImpossibleCases(final Base32 codec, final String[] impossible_c
297305
}
298306
}
299307
}
308+
@Test
309+
public void testBase32Lookalikes() throws Exception {
310+
final Base32 codec = new Base32(false, true);
311+
for(String[] element : BASE32_LOOKALIKES)
312+
assertArrayEquals(codec.decode(element[0]),
313+
codec.decode(element[1]));
314+
}
315+
316+
@Test
317+
public void testBase32HexLookalikes() throws Exception {
318+
final Base32 codec = new Base32(false, true);
319+
for(String[] element : BASE32_HEX_LOOKALIKES)
320+
assertArrayEquals(codec.decode(element[0]),
321+
codec.decode(element[1]));
322+
}
300323
}

0 commit comments

Comments
 (0)