Skip to content

Commit 0ed8bd7

Browse files
committed
Improve javadoc, naming, magic constants
1 parent b71233b commit 0ed8bd7

2 files changed

Lines changed: 106 additions & 53 deletions

File tree

src/main/java/org/apache/commons/codec/digest/Blake3.java

Lines changed: 89 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,47 @@
2424
* {@linkplain #initKeyedHash(byte[]) keyed hash function} (MAC, PRF), and a
2525
* {@linkplain #initKeyDerivationFunction(byte[]) key derivation function} (KDF). Blake3 has a 128-bit security level
2626
* and a default output length of 256 bits (32 bytes) which can extended up to 2<sup>64</sup> bytes.
27+
* <h2>Hashing</h2>
28+
* <p>Hash mode calculates the same output hash given the same input bytes and can be used as both a message digest and
29+
* and extensible output function.</p>
30+
* <pre>{@code
31+
* Blake3 hasher = Blake3.initHash();
32+
* hasher.update("Hello, world!".getBytes(StandardCharsets.UTF_8));
33+
* byte[] hash = new byte[32];
34+
* hasher.doFinalize(hash);
35+
* }</pre>
36+
* <h2>Keyed Hashing</h2>
37+
* <p>Keyed hashes take a 32-byte secret key and calculates a message authentication code on some input bytes. These
38+
* also work as pseduo-random functions (PRFs) with extensible output similar to the extensible hash output. Note that
39+
* Blake3 keyed hashes have the same performance as plain hashes; the key is used in initialization in place of a
40+
* standard initialization vector used for plain hashing.</p>
41+
* <pre>{@code
42+
* SecureRandom random = new SecureRandom(); // or SecureRandom.getInstanceStrong() in Java 8+
43+
* byte[] key = new byte[32];
44+
* random.nextBytes(key);
45+
* Blake3 hasher = Blake3.initKeyedHash(key);
46+
* hasher.update("Hello, Alice!".getBytes(StandardCharsets.UTF_8));
47+
* byte[] mac = new byte[32];
48+
* hasher.doFinalize(mac);
49+
* }</pre>
50+
* <h2>Key Derivation</h2>
51+
* <p>A specific hash mode for deriving session keys and other derived keys in a unique key derivation context
52+
* identified by some sequence of bytes. These context strings should be unique but do not need to be kept secret.
53+
* Additional input data is hashed for key material which can be finalized to derive subkeys.</p>
54+
* <pre>{@code
55+
* String context = "org.apache.commons.codec.digest.Blake3Example";
56+
* byte[] sharedSecret = ...;
57+
* byte[] senderId = ...;
58+
* byte[] recipientId = ...;
59+
* Blake3 kdf = Blake3.initKeyDerivationFunction(context.getBytes(StandardCharsets.UTF_8));
60+
* kdf.update(sharedSecret);
61+
* kdf.update(senderId);
62+
* kdf.update(recipientId);
63+
* byte[] txKey = new byte[32];
64+
* byte[] rxKey = new byte[32];
65+
* kdf.doFinalize(txKey);
66+
* kdf.doFinalize(rxKey);
67+
* }</pre>
2768
* <p>
2869
* Adapted from the ISC-licensed O(1) Cryptography library by Matt Sicker and ported from the reference public domain
2970
* implementation by Jack O'Connor.
@@ -37,10 +78,14 @@ public final class Blake3 {
3778
private static final int INT_BYTES = Integer.SIZE / Byte.SIZE;
3879

3980
private static final int BLOCK_LEN = 64;
81+
private static final int BLOCK_INTS = BLOCK_LEN / INT_BYTES;
4082
private static final int KEY_LEN = 32;
83+
private static final int KEY_INTS = KEY_LEN / INT_BYTES;
4184
private static final int OUT_LEN = 32;
4285
private static final int CHUNK_LEN = 1024;
86+
private static final int CHAINING_VALUE_INTS = 8;
4387

88+
// standard hash key used for plain hashes; same initialization vector as Blake2s
4489
private static final int[] IV =
4590
{ 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
4691

@@ -67,59 +112,59 @@ public void reset() {
67112
}
68113

69114
/**
70-
* Absorbs the provided bytes into this instance's state.
115+
* Updates this hash state using the provided bytes.
71116
*
72-
* @param in source array to absorb data from
117+
* @param in source array to update data from
73118
*/
74-
public void absorb(final byte[] in) {
119+
public void update(final byte[] in) {
75120
Objects.requireNonNull(in);
76-
absorb(in, 0, in.length);
121+
update(in, 0, in.length);
77122
}
78123

79124
/**
80-
* Absorbs the provided bytes at an offset into this instance's state.
125+
* Updates this hash state using the provided bytes at an offset.
81126
*
82-
* @param in source array to absorb data from
83-
* @param offset where in the array to begin absorbing bytes
84-
* @param length number of bytes to absorb in
127+
* @param in source array to update data from
128+
* @param offset where in the array to begin reading bytes
129+
* @param length number of bytes to update
85130
*/
86-
public void absorb(final byte[] in, final int offset, final int length) {
131+
public void update(final byte[] in, final int offset, final int length) {
87132
Objects.requireNonNull(in);
88133
engineState.inputData(in, offset, length);
89134
}
90135

91136
/**
92-
* Squeezes hash output data that depends on the sequence of absorbed bytes preceding this invocation and any previously
93-
* squeezed bytes.
137+
* Finalizes hash output data that depends on the sequence of updated bytes preceding this invocation and any
138+
* previously finalized bytes. Note that this can finalize up to 2<sup>64</sup> bytes per instance.
94139
*
95-
* @param out destination array to squeeze bytes into
140+
* @param out destination array to finalize bytes into
96141
*/
97-
public void squeeze(final byte[] out) {
98-
squeeze(out, 0, out.length);
142+
public void doFinalize(final byte[] out) {
143+
doFinalize(out, 0, out.length);
99144
}
100145

101146
/**
102-
* Squeezes an arbitrary number of bytes into the provided output array that depends on the sequence of previously
103-
* absorbed and squeezed bytes.
147+
* Finalizes an arbitrary number of bytes into the provided output array that depends on the sequence of previously
148+
* updated and finalized bytes. Note that this can finalize up to 2<sup>64</sup> bytes per instance.
104149
*
105-
* @param out destination array to squeeze bytes into
150+
* @param out destination array to finalize bytes into
106151
* @param offset where in the array to begin writing bytes to
107-
* @param length number of bytes to squeeze out
152+
* @param length number of bytes to finalize
108153
*/
109-
public void squeeze(final byte[] out, final int offset, final int length) {
154+
public void doFinalize(final byte[] out, final int offset, final int length) {
110155
Objects.requireNonNull(out);
111156
engineState.outputHash(out, offset, length);
112157
}
113158

114159
/**
115160
* Squeezes and returns an arbitrary number of bytes dependent on the sequence of previously absorbed and squeezed bytes.
116161
*
117-
* @param nrBytes number of bytes to squeeze
118-
* @return requested number of squeezed bytes
162+
* @param nrBytes number of bytes to finalize
163+
* @return requested number of finalized bytes
119164
*/
120-
public byte[] squeeze(final int nrBytes) {
165+
public byte[] doFinalize(final int nrBytes) {
121166
final byte[] hash = new byte[nrBytes];
122-
squeeze(hash);
167+
doFinalize(hash);
123168
return hash;
124169
}
125170

@@ -144,7 +189,7 @@ public static Blake3 initKeyedHash(final byte[] key) {
144189
if (key.length != KEY_LEN) {
145190
throw new IllegalArgumentException("Blake3 keys must be 32 bytes");
146191
}
147-
return new Blake3(unpackInts(key, 8), KEYED_HASH);
192+
return new Blake3(unpackInts(key, KEY_INTS), KEYED_HASH);
148193
}
149194

150195
/**
@@ -161,7 +206,7 @@ public static Blake3 initKeyDerivationFunction(final byte[] kdfContext) {
161206
kdf.inputData(kdfContext, 0, kdfContext.length);
162207
final byte[] key = new byte[KEY_LEN];
163208
kdf.outputHash(key, 0, key.length);
164-
return new Blake3(unpackInts(key, 8), DERIVE_KEY_MATERIAL);
209+
return new Blake3(unpackInts(key, KEY_INTS), DERIVE_KEY_MATERIAL);
165210
}
166211

167212
/**
@@ -172,8 +217,8 @@ public static Blake3 initKeyDerivationFunction(final byte[] kdfContext) {
172217
*/
173218
public static byte[] hash(final byte[] data) {
174219
final Blake3 blake3 = Blake3.initHash();
175-
blake3.absorb(data);
176-
return blake3.squeeze(OUT_LEN);
220+
blake3.update(data);
221+
return blake3.doFinalize(OUT_LEN);
177222
}
178223

179224
/**
@@ -185,8 +230,8 @@ public static byte[] hash(final byte[] data) {
185230
*/
186231
public static byte[] keyedHash(final byte[] key, final byte[] data) {
187232
final Blake3 blake3 = Blake3.initKeyedHash(key);
188-
blake3.absorb(data);
189-
return blake3.squeeze(OUT_LEN);
233+
blake3.update(data);
234+
return blake3.doFinalize(OUT_LEN);
190235
}
191236

192237
private static void packInt(final int value, final byte[] dst, final int off, final int len) {
@@ -248,7 +293,7 @@ private static void round(final int[] state, final int[] msg, final byte[] sched
248293
private static int[] compress(
249294
final int[] chainingValue, final int[] blockWords, final int blockLength, final long counter,
250295
final int flags) {
251-
final int[] state = Arrays.copyOf(chainingValue, 16);
296+
final int[] state = Arrays.copyOf(chainingValue, BLOCK_INTS);
252297
System.arraycopy(IV, 0, state, 8, 4);
253298
state[12] = (int) counter;
254299
state[13] = (int) (counter >> Integer.SIZE);
@@ -258,7 +303,7 @@ private static int[] compress(
258303
final byte[] schedule = MSG_SCHEDULE[i];
259304
round(state, blockWords, schedule);
260305
}
261-
for (int i = 0; i < 8; i++) {
306+
for (int i = 0; i < state.length / 2; i++) {
262307
state[i] ^= state[i + 8];
263308
state[i + 8] ^= chainingValue[i];
264309
}
@@ -267,8 +312,8 @@ private static int[] compress(
267312

268313
private static Output parentOutput(
269314
final int[] leftChildCV, final int[] rightChildCV, final int[] key, final int flags) {
270-
final int[] blockWords = Arrays.copyOf(leftChildCV, 16);
271-
System.arraycopy(rightChildCV, 0, blockWords, 8, 8);
315+
final int[] blockWords = Arrays.copyOf(leftChildCV, BLOCK_INTS);
316+
System.arraycopy(rightChildCV, 0, blockWords, 8, CHAINING_VALUE_INTS);
272317
return new Output(key.clone(), blockWords, 0, BLOCK_LEN, flags | PARENT);
273318
}
274319

@@ -278,8 +323,8 @@ private static int[] parentChainingValue(
278323
}
279324

280325
/**
281-
* Represents the state just prior to either producing an eight word chaining value or any number of output bytes when the
282-
* ROOT flag is set.
326+
* Represents the state just prior to either producing an eight word chaining value or any number of output bytes
327+
* when the ROOT flag is set.
283328
*/
284329
private static class Output {
285330
private final int[] inputChainingValue;
@@ -299,7 +344,8 @@ private static class Output {
299344
}
300345

301346
int[] chainingValue() {
302-
return Arrays.copyOf(compress(inputChainingValue, blockWords, blockLength, counter, flags), 8);
347+
return Arrays
348+
.copyOf(compress(inputChainingValue, blockWords, blockLength, counter, flags), CHAINING_VALUE_INTS);
303349
}
304350

305351
void rootOutputBytes(final byte[] out, int offset, int length) {
@@ -348,9 +394,10 @@ void update(final byte[] input, int offset, int length) {
348394
if (blockLength == BLOCK_LEN) {
349395
// If the block buffer is full, compress it and clear it. More
350396
// input is coming, so this compression is not CHUNK_END.
351-
final int[] blockWords = unpackInts(block, 16);
397+
final int[] blockWords = unpackInts(block, BLOCK_INTS);
352398
chainingValue = Arrays.copyOf(
353-
compress(chainingValue, blockWords, BLOCK_LEN, chunkCounter, flags | startFlag()), 8);
399+
compress(chainingValue, blockWords, BLOCK_LEN, chunkCounter, flags | startFlag()),
400+
CHAINING_VALUE_INTS);
354401
blocksCompressed++;
355402
blockLength = 0;
356403
Arrays.fill(block, (byte) 0);
@@ -366,7 +413,7 @@ void update(final byte[] input, int offset, int length) {
366413
}
367414

368415
Output output() {
369-
final int[] blockWords = unpackInts(block, 16);
416+
final int[] blockWords = unpackInts(block, BLOCK_INTS);
370417
final int outputFlags = flags | startFlag() | CHUNK_END;
371418
return new Output(chainingValue, blockWords, chunkCounter, blockLength, outputFlags);
372419
}
@@ -376,6 +423,9 @@ private static class EngineState {
376423
private final int[] key;
377424
private final int flags;
378425
// Space for 54 subtree chaining values: 2^54 * CHUNK_LEN = 2^64
426+
// No more than 54 entries can ever be added to this stack (after updating 2^64 bytes and not finalizing any)
427+
// so we preallocate the stack here. This can be smaller in environments where the data limit is expected to
428+
// be much lower.
379429
private final int[][] cvStack = new int[54][];
380430
private int stackLen;
381431
private ChunkState state;

src/test/java/org/apache/commons/codec/digest/Blake3TestVectorsTest.java

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,15 @@
2727

2828
import static org.junit.Assert.assertArrayEquals;
2929

30-
// Each test is an input length and three outputs, one for each of the hash, keyed_hash, and derive_key modes.
31-
// The input in each case is filled with a repeating sequence of 251 bytes: 0, 1, 2, ..., 249, 250, 0, 1, ..., and so on.
32-
// The key used with keyed_hash is the 32-byte ASCII string "whats the Elvish word for friend", also given in the `key` field
33-
// below. The context string used with derive_key is the ASCII string "BLAKE3 2019-12-27 16:29:52 test vectors context", also
34-
// given in the `context_string` field below. Outputs are encoded as hexadecimal. Each case is an extended output, and
35-
// implementations should also check that the first 32 bytes match their default-length output.
30+
/**
31+
* Tests the standard test vectors provided by the reference Blake3 implementation. Each test uses as input data the
32+
* cyclic byte sequence 0, 1, 2, ..., 249, 250, 0, 1, ..., up to a specified length. For each test, the hash of this
33+
* message up to a specific length is calculated. Then the same hash is calculated in keyed mode using the UTF-8
34+
* encoding of the string "whats the Elvish word for friend". Finally, the same hash is calculated in key derivation
35+
* mode using the KDF context string with the UTF-8 encoding of "BLAKE3 2019-12-27 16:29:52 test vectors context".
36+
* For each of these hashes, both the extended hash output and the truncated 32-byte hash outputs are validated against
37+
* these known answer tests (KATs).
38+
*/
3639
@RunWith(Parameterized.class)
3740
public class Blake3TestVectorsTest {
3841
private static final byte[] KEY = "whats the Elvish word for friend".getBytes(StandardCharsets.UTF_8);
@@ -277,8 +280,8 @@ public Blake3TestVectorsTest(int inputLength, String hash, String keyedHash, Str
277280

278281
@Test
279282
public void hashArbitraryOutputLength() {
280-
hasher.absorb(input);
281-
byte[] actual = hasher.squeeze(hash.length);
283+
hasher.update(input);
284+
byte[] actual = hasher.doFinalize(hash.length);
282285
assertArrayEquals(hash, actual);
283286
}
284287

@@ -290,8 +293,8 @@ public void hashTruncatedOutput() {
290293

291294
@Test
292295
public void keyedHashArbitraryOutputLength() {
293-
keyedHasher.absorb(input);
294-
byte[] actual = keyedHasher.squeeze(keyedHash.length);
296+
keyedHasher.update(input);
297+
byte[] actual = keyedHasher.doFinalize(keyedHash.length);
295298
assertArrayEquals(keyedHash, actual);
296299
}
297300

@@ -303,12 +306,12 @@ public void keyedHashTruncatedOutput() {
303306

304307
@Test
305308
public void keyDerivation() {
306-
kdfHasher.absorb(input);
307-
byte[] actual = kdfHasher.squeeze(deriveKey.length);
309+
kdfHasher.update(input);
310+
byte[] actual = kdfHasher.doFinalize(deriveKey.length);
308311
assertArrayEquals(deriveKey, actual);
309312
kdfHasher.reset();
310-
kdfHasher.absorb(input);
311-
byte[] truncated = kdfHasher.squeeze(32);
313+
kdfHasher.update(input);
314+
byte[] truncated = kdfHasher.doFinalize(32);
312315
assertArrayEquals(Arrays.copyOf(deriveKey, 32), truncated);
313316
}
314317
}

0 commit comments

Comments
 (0)