Skip to content

Commit 2bcdfd8

Browse files
mellowaregarydgregory
authored andcommitted
CODEC-236: MurmurHash2 and MurmurHash3 implementations. (#22)
* CODEC-236: MurmurHash2 and MurmurHash3 implementations. * CODEC-236: Removed author tag. * CODEC-236: Added javadoc and increased unit test coverage.
1 parent 45a195e commit 2bcdfd8

5 files changed

Lines changed: 1214 additions & 2 deletions

File tree

src/changes/changes.xml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,9 @@ The <action> type attribute can be add,update,fix,remove.
4646
<release version="1.13" date="YYYY-MM-DD" description="TBD">
4747
<action issue="CODEC-255" dev="sebb" due-to="Holger Grote" type="fix">ColognePhonetic handles x incorrectly</action>
4848
<action issue="CODEC-254" dev="sebb" due-to="Holger Grote" type="fix">ColognePhonetic does not treat the letter H correctly</action>
49-
<action issue="CODEC-257" dev="ggregory" type="update">Update from Java 7 to Java 8</action>
50-
<action issue="CODEC-134" dev="tmousaw-ptc" type="fix">Reject any decode request for a value that is impossible to encode to for Base32/Base64 rather than blindly decoding.</action>
49+
<action issue="CODEC-134" dev="tmousaw-ptc" type="fix">Reject any decode request for a value that is impossible to encode to for Base32/Base64 rather than blindly decoding.</action>
50+
<action issue="CODEC-236" dev="melloware" due-to="Viliam Holub" type="add">MurmurHash2 for 32-bit or 64-bit value.</action>
51+
<action issue="CODEC-236" dev="melloware" due-to="Austin Appleby" type="add">MurmurHash3 for 32-bit or 128-bit value.</action>
5152
</release>
5253

5354
<release version="1.12" date="2019-02-04" description="Feature and fix release.">
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.codec.digest;
19+
20+
/**
21+
* MurmurHash2 yields a 32-bit or 64-bit value.
22+
*
23+
* MurmurHash is a non-cryptographic hash function suitable for general
24+
* hash-based lookup. The name comes from two basic operations, multiply (MU)
25+
* and rotate (R), used in its inner loop. Unlike cryptographic hash functions,
26+
* it is not specifically designed to be difficult to reverse by an adversary,
27+
* making it unsuitable for cryptographic purposes.
28+
*
29+
* This is a re-implementation of the original C code plus some additional
30+
* features.
31+
*
32+
* Public domain.
33+
*
34+
* @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
35+
* @since 1.13
36+
*/
37+
public final class MurmurHash2 {
38+
39+
// all methods static; private constructor.
40+
private MurmurHash2() {
41+
}
42+
43+
/**
44+
* Generates 32 bit hash from byte array of the given length and seed.
45+
*
46+
* @param data byte array to hash
47+
* @param length length of the array to hash
48+
* @param seed initial seed value
49+
* @return 32 bit hash of the given array
50+
*/
51+
public static int hash32(final byte[] data, int length, int seed) {
52+
// 'm' and 'r' are mixing constants generated offline.
53+
// They're not really 'magic', they just happen to work well.
54+
final int m = 0x5bd1e995;
55+
final int r = 24;
56+
57+
// Initialize the hash to a random value
58+
int h = seed ^ length;
59+
int length4 = length / 4;
60+
61+
for (int i = 0; i < length4; i++) {
62+
final int i4 = i * 4;
63+
int k = (data[i4 + 0] & 0xff) + ((data[i4 + 1] & 0xff) << 8) + ((data[i4 + 2] & 0xff) << 16)
64+
+ ((data[i4 + 3] & 0xff) << 24);
65+
k *= m;
66+
k ^= k >>> r;
67+
k *= m;
68+
h *= m;
69+
h ^= k;
70+
}
71+
72+
// Handle the last few bytes of the input array
73+
switch (length % 4) {
74+
case 3:
75+
h ^= (data[(length & ~3) + 2] & 0xff) << 16;
76+
case 2:
77+
h ^= (data[(length & ~3) + 1] & 0xff) << 8;
78+
case 1:
79+
h ^= (data[length & ~3] & 0xff);
80+
h *= m;
81+
}
82+
83+
h ^= h >>> 13;
84+
h *= m;
85+
h ^= h >>> 15;
86+
87+
return h;
88+
}
89+
90+
/**
91+
* Generates 32 bit hash from byte array with default seed value.
92+
*
93+
* @param data byte array to hash
94+
* @param length length of the array to hash
95+
* @return 32 bit hash of the given array
96+
*/
97+
public static int hash32(final byte[] data, int length) {
98+
return hash32(data, length, 0x9747b28c);
99+
}
100+
101+
/**
102+
* Generates 32 bit hash from a string.
103+
*
104+
* @param text string to hash
105+
* @return 32 bit hash of the given string
106+
*/
107+
public static int hash32(final String text) {
108+
final byte[] bytes = text.getBytes();
109+
return hash32(bytes, bytes.length);
110+
}
111+
112+
/**
113+
* Generates 32 bit hash from a substring.
114+
*
115+
* @param text string to hash
116+
* @param from starting index
117+
* @param length length of the substring to hash
118+
* @return 32 bit hash of the given string
119+
*/
120+
public static int hash32(final String text, int from, int length) {
121+
return hash32(text.substring(from, from + length));
122+
}
123+
124+
/**
125+
* Generates 64 bit hash from byte array of the given length and seed.
126+
*
127+
* @param data byte array to hash
128+
* @param length length of the array to hash
129+
* @param seed initial seed value
130+
* @return 64 bit hash of the given array
131+
*/
132+
public static long hash64(final byte[] data, int length, int seed) {
133+
final long m = 0xc6a4a7935bd1e995L;
134+
final int r = 47;
135+
136+
long h = (seed & 0xffffffffl) ^ (length * m);
137+
138+
int length8 = length / 8;
139+
140+
for (int i = 0; i < length8; i++) {
141+
final int i8 = i * 8;
142+
long k = ((long) data[i8 + 0] & 0xff) + (((long) data[i8 + 1] & 0xff) << 8)
143+
+ (((long) data[i8 + 2] & 0xff) << 16) + (((long) data[i8 + 3] & 0xff) << 24)
144+
+ (((long) data[i8 + 4] & 0xff) << 32) + (((long) data[i8 + 5] & 0xff) << 40)
145+
+ (((long) data[i8 + 6] & 0xff) << 48) + (((long) data[i8 + 7] & 0xff) << 56);
146+
147+
k *= m;
148+
k ^= k >>> r;
149+
k *= m;
150+
151+
h ^= k;
152+
h *= m;
153+
}
154+
155+
switch (length % 8) {
156+
case 7:
157+
h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;
158+
case 6:
159+
h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;
160+
case 5:
161+
h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;
162+
case 4:
163+
h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;
164+
case 3:
165+
h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;
166+
case 2:
167+
h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;
168+
case 1:
169+
h ^= (long) (data[length & ~7] & 0xff);
170+
h *= m;
171+
}
172+
;
173+
174+
h ^= h >>> r;
175+
h *= m;
176+
h ^= h >>> r;
177+
178+
return h;
179+
}
180+
181+
/**
182+
* Generates 64 bit hash from byte array with default seed value.
183+
*
184+
* @param data byte array to hash
185+
* @param length length of the array to hash
186+
* @return 64 bit hash of the given string
187+
*/
188+
public static long hash64(final byte[] data, int length) {
189+
return hash64(data, length, 0xe17a1465);
190+
}
191+
192+
/**
193+
* Generates 64 bit hash from a string.
194+
*
195+
* @param text string to hash
196+
* @return 64 bit hash of the given string
197+
*/
198+
public static long hash64(final String text) {
199+
final byte[] bytes = text.getBytes();
200+
return hash64(bytes, bytes.length);
201+
}
202+
203+
/**
204+
* Generates 64 bit hash from a substring.
205+
*
206+
* @param text string to hash
207+
* @param from starting index
208+
* @param length length of the substring to hash
209+
* @return 64 bit hash of the given array
210+
*/
211+
public static long hash64(final String text, int from, int length) {
212+
return hash64(text.substring(from, from + length));
213+
}
214+
}

0 commit comments

Comments
 (0)