Skip to content

Commit f40005a

Browse files
committed
[CODEC-276] Reliance on default encoding in MurmurHash2 and MurmurHash3.
1 parent ef6023c commit f40005a

4 files changed

Lines changed: 35 additions & 14 deletions

File tree

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ The <action> type attribute can be add,update,fix,remove.
5555
<action issue="CODEC-273" dev="ggregory" type="add" due-to="Gary Gregory">Add Path APIs to org.apache.commons.codec.digest.DigestUtils similar to File APIs.</action>
5656
<action issue="CODEC-274" dev="ggregory" type="add" due-to="Gary Gregory">Add SHA-512/224 and SHA-512/256 to DigestUtils for Java 9 and up.</action>
5757
<action issue="CODEC-275" dev="ggregory" type="add" due-to="Claude Warren">Add missing note in javadoc when sign extension error is present #34.</action>
58+
<action issue="CODEC-276" dev="ggregory" type="add" due-to="Gary Gregory">Reliance on default encoding in MurmurHash2 and MurmurHash3.</action>
5859
</release>
5960

6061
<release version="1.13" date="2019-07-20" description="Feature and fix release.">

src/main/java/org/apache/commons/codec/digest/MurmurHash2.java

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717

1818
package org.apache.commons.codec.digest;
1919

20+
import java.nio.charset.Charset;
21+
import java.nio.charset.StandardCharsets;
22+
2023
/**
2124
* Implementation of the MurmurHash2 32-bit and 64-bit hash functions.
2225
*
@@ -48,6 +51,13 @@
4851
*/
4952
public final class MurmurHash2 {
5053

54+
/**
55+
* Default Charset used to convert strings into bytes.
56+
*
57+
* Consider private; package private for tests only.
58+
*/
59+
static final Charset GET_BYTES_CHARSET = StandardCharsets.UTF_8;
60+
5161
// Constants for 32-bit variant
5262
private static final int M32 = 0x5bd1e995;
5363
private static final int R32 = 24;
@@ -132,7 +142,7 @@ public static int hash32(final byte[] data, final int length) {
132142
*
133143
* <pre>
134144
* int seed = 0x9747b28c;
135-
* byte[] bytes = data.getBytes();
145+
* byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
136146
* int hash = MurmurHash2.hash32(bytes, bytes.length, seed);
137147
* </pre>
138148
*
@@ -141,7 +151,7 @@ public static int hash32(final byte[] data, final int length) {
141151
* @see #hash32(byte[], int, int)
142152
*/
143153
public static int hash32(final String text) {
144-
final byte[] bytes = text.getBytes();
154+
final byte[] bytes = text.getBytes(GET_BYTES_CHARSET);
145155
return hash32(bytes, bytes.length);
146156
}
147157

@@ -152,7 +162,7 @@ public static int hash32(final String text) {
152162
*
153163
* <pre>
154164
* int seed = 0x9747b28c;
155-
* byte[] bytes = text.substring(from, from + length).getBytes();
165+
* byte[] bytes = text.substring(from, from + length).getBytes(StandardCharsets.UTF_8);
156166
* int hash = MurmurHash2.hash32(bytes, bytes.length, seed);
157167
* </pre>
158168
*
@@ -243,7 +253,7 @@ public static long hash64(final byte[] data, final int length) {
243253
*
244254
* <pre>
245255
* int seed = 0xe17a1465;
246-
* byte[] bytes = data.getBytes();
256+
* byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
247257
* int hash = MurmurHash2.hash64(bytes, bytes.length, seed);
248258
* </pre>
249259
*
@@ -252,7 +262,7 @@ public static long hash64(final byte[] data, final int length) {
252262
* @see #hash64(byte[], int, int)
253263
*/
254264
public static long hash64(final String text) {
255-
final byte[] bytes = text.getBytes();
265+
final byte[] bytes = text.getBytes(GET_BYTES_CHARSET);
256266
return hash64(bytes, bytes.length);
257267
}
258268

@@ -263,7 +273,7 @@ public static long hash64(final String text) {
263273
*
264274
* <pre>
265275
* int seed = 0xe17a1465;
266-
* byte[] bytes = text.substring(from, from + length).getBytes();
276+
* byte[] bytes = text.substring(from, from + length).getBytes(StandardCharsets.UTF_8);
267277
* int hash = MurmurHash2.hash64(bytes, bytes.length, seed);
268278
* </pre>
269279
*

src/main/java/org/apache/commons/codec/digest/MurmurHash3.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717

1818
package org.apache.commons.codec.digest;
1919

20+
import java.nio.charset.Charset;
21+
import java.nio.charset.StandardCharsets;
22+
2023
/**
2124
* Implementation of the MurmurHash3 32-bit and 128-bit hash functions.
2225
*
@@ -55,6 +58,13 @@
5558
*/
5659
public final class MurmurHash3 {
5760

61+
/**
62+
* Default Charset used to convert strings into bytes.
63+
*
64+
* Consider private; package private for tests only.
65+
*/
66+
static final Charset GET_BYTES_CHARSET = StandardCharsets.UTF_8;
67+
5868
/**
5969
* A random number to use for a hash code.
6070
*
@@ -233,7 +243,7 @@ public static int hash32(final byte[] data) {
233243
* <pre>
234244
* int offset = 0;
235245
* int seed = 104729;
236-
* byte[] bytes = data.getBytes();
246+
* byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
237247
* int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
238248
* </pre>
239249
*
@@ -249,7 +259,7 @@ public static int hash32(final byte[] data) {
249259
*/
250260
@Deprecated
251261
public static int hash32(final String data) {
252-
final byte[] bytes = data.getBytes();
262+
final byte[] bytes = data.getBytes(GET_BYTES_CHARSET);
253263
return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
254264
}
255265

@@ -751,7 +761,7 @@ public static long[] hash128x64(final byte[] data) {
751761
* <pre>
752762
* int offset = 0;
753763
* int seed = 104729;
754-
* byte[] bytes = data.getBytes();
764+
* byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
755765
* int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
756766
* </pre>
757767
*
@@ -766,7 +776,7 @@ public static long[] hash128x64(final byte[] data) {
766776
*/
767777
@Deprecated
768778
public static long[] hash128(final String data) {
769-
final byte[] bytes = data.getBytes();
779+
final byte[] bytes = data.getBytes(GET_BYTES_CHARSET);
770780
return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
771781
}
772782

src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ public void testHash32String() {
355355
pos += Character.toChars(codePoint, chars, pos);
356356
}
357357
final String text = String.copyValueOf(chars, 0, pos);
358-
final byte[] bytes = text.getBytes();
358+
final byte[] bytes = text.getBytes(MurmurHash3.GET_BYTES_CHARSET);
359359
final int h1 = MurmurHash3.hash32(bytes, 0, bytes.length, seed);
360360
final int h2 = MurmurHash3.hash32(text);
361361
Assert.assertEquals(h1, h2);
@@ -455,7 +455,7 @@ public void testHash32x86WithTrailingNegativeSignedBytes() {
455455
*/
456456
@Test
457457
public void testHash64() {
458-
final byte[] origin = TEST_HASH64.getBytes();
458+
final byte[] origin = TEST_HASH64.getBytes(MurmurHash3.GET_BYTES_CHARSET);
459459
final long hash = MurmurHash3.hash64(origin);
460460
Assert.assertEquals(5785358552565094607L, hash);
461461
}
@@ -466,7 +466,7 @@ public void testHash64() {
466466
*/
467467
@Test
468468
public void testHash64WithOffsetAndLength() {
469-
final byte[] origin = TEST_HASH64.getBytes();
469+
final byte[] origin = TEST_HASH64.getBytes(MurmurHash3.GET_BYTES_CHARSET);
470470
final byte[] originOffset = new byte[origin.length + 150];
471471
Arrays.fill(originOffset, (byte) 123);
472472
System.arraycopy(origin, 0, originOffset, 150, origin.length);
@@ -627,7 +627,7 @@ public void testHash128String() {
627627
pos += Character.toChars(codePoint, chars, pos);
628628
}
629629
final String text = String.copyValueOf(chars, 0, pos);
630-
final byte[] bytes = text.getBytes();
630+
final byte[] bytes = text.getBytes(MurmurHash3.GET_BYTES_CHARSET);
631631
final long[] h1 = MurmurHash3.hash128(bytes, 0, bytes.length, seed);
632632
final long[] h2 = MurmurHash3.hash128(text);
633633
Assert.assertArrayEquals(h1, h2);

0 commit comments

Comments
 (0)