Skip to content

Commit f5a61f0

Browse files
committed
[CODEC-264]: Ensure hash128 maintains the sign extension bug.
The hash128(...) method was calling the public hash128x86(...) method with an int seed when it should have called the private hash128x86(...) with a long seed. Thus it did not have the sign extension error. The internal method has been renamed to avoid a name clash with the public API. The old hash128() behaviour is now restored as the seed is passed to the hash method with implicit long conversion.
1 parent 2405989 commit f5a61f0

3 files changed

Lines changed: 49 additions & 5 deletions

File tree

src/changes/changes.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ The <action> type attribute can be add,update,fix,remove.
4242
</properties>
4343
<body>
4444

45+
<release version="1.15" date="YYYY-MM-DD" description="Feature and fix release.">
46+
<action issue="CODEC-264" dev="aherbert" due-to="Andy Seaborne" type="fix">MurmurHash3: Ensure hash128 maintains the sign extension bug.</action>
47+
</release>
48+
4549
<release version="1.14" date="2019-12-30" description="Feature and fix release.">
4650
<action issue="CODEC-261" dev="aherbert" type="fix">Hex: Allow encoding read-only ByteBuffer.</action>
4751
<action issue="CODEC-259" dev="aherbert" type="fix">Hex: Only use an available ByteBuffer backing array if the length equals the remaining byte count.</action>

src/main/java/org/apache/commons/codec/digest/MurmurHash3.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -800,9 +800,12 @@ public static long[] hash128(final String data) {
800800
@Deprecated
801801
public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
802802
// ************
803-
// Note: This fails to apply masking using 0xffffffffL to the seed.
803+
// Note: This deliberately fails to apply masking using 0xffffffffL to the seed
804+
// to maintain behavioural compatibility with the original version.
805+
// The implicit conversion to a long will extend a negative sign
806+
// bit through the upper 32-bits of the long seed. These should be zero.
804807
// ************
805-
return hash128x64(data, offset, length, seed);
808+
return hash128x64Internal(data, offset, length, seed);
806809
}
807810

808811
/**
@@ -820,7 +823,7 @@ public static long[] hash128(final byte[] data, final int offset, final int leng
820823
*/
821824
public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
822825
// Use an unsigned 32-bit integer as the seed
823-
return hash128x64(data, offset, length, seed & 0xffffffffL);
826+
return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
824827
}
825828

826829
/**
@@ -835,7 +838,7 @@ public static long[] hash128x64(final byte[] data, final int offset, final int l
835838
* @param seed The initial seed value
836839
* @return The 128-bit hash (2 longs)
837840
*/
838-
private static long[] hash128x64(final byte[] data, final int offset, final int length, final long seed) {
841+
private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
839842
long h1 = seed;
840843
long h2 = seed;
841844
final int nblocks = length >> 4;

src/test/java/org/apache/commons/codec/digest/MurmurHash3Test.java

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,7 @@ public void testHash128() {
559559
{1237530251176898868L, 6144786892208594932L}, {2347717913548230384L, -7461066668225718223L},
560560
{-7963311463560798404L, 8435801462986138227L}, {-7493166089060196513L, 8163503673197886404L},
561561
{6807249306539951962L, -1438886581269648819L}, {6752656991043418179L, 6334147827922066123L},
562-
{-4534351735605790331L, -4530801663887858236L}, {-7886946241830957955L, -6261339648449285315L}, };
562+
{-4534351735605790331L, -4530801663887858236L}, {-7886946241830957955L, -6261339648449285315L},};
563563
for (int i = 0; i < answers.length; i++) {
564564
final byte[] bytes = Arrays.copyOf(RANDOM_BYTES, i);
565565
Assert.assertArrayEquals(answers[i], MurmurHash3.hash128(bytes));
@@ -605,6 +605,43 @@ public void testHash128WithOffsetLengthAndSeed() {
605605
}
606606
}
607607

608+
/**
609+
* Test the {@link MurmurHash3#hash128(byte[], int, int, int)} algorithm.
610+
*
611+
* <p>Explicit test for a negative seed. The original implementation has a sign extension error
612+
* for negative seeds. This test is here to maintain behavioural compatibility of the
613+
* broken deprecated method.
614+
*/
615+
@Test
616+
public void testHash128WithOffsetLengthAndNegativeSeed() {
617+
// Seed can be negative
618+
final int seed = -42;
619+
final int offset = 13;
620+
621+
// Test with all sizes up to 31 bytes. This ensures a full round of 16-bytes plus up to
622+
// 15 bytes remaining.
623+
final long[][] answers = {{5954234972212089025L, 3342108296337967352L},
624+
{8501094764898402923L, 7873951092908129427L}, {-3334322685492296196L, -2842715922956549478L},
625+
{-2177918982459519644L, -1612349961980368636L}, {4172870320608886992L, -4177375712254136503L},
626+
{7546965006884307324L, -5222114032564054641L}, {-2885083166621537267L, -2069868899915344482L},
627+
{-2397098497873118851L, 4990578036999888806L}, {-706479374719025018L, 7531201699171849870L},
628+
{6487943141157228609L, 3576221902299447884L}, {6671331646806999453L, -3428049860825046360L},
629+
{-8700221138601237020L, -2748450904559980545L}, {-9028762509863648063L, 6130259301750313402L},
630+
{729958512305702590L, -36367317333638988L}, {-3803232241584178983L, -4257744207892929651L},
631+
{5734013720237474696L, -760784490666078990L}, {-6097477411153026656L, 625288777006549065L},
632+
{1320365359996757504L, -2251971390373072541L}, {5551441703887653022L, -3516892619809375941L},
633+
{698875391638415033L, 3456972931370496131L}, {5874956830271303805L, -6074126509360777023L},
634+
{-7030758398537734781L, -3174643657101295554L}, {6835789852786226556L, 7245353136839389595L},
635+
{-7755767580598793204L, -6680491060945077989L}, {-3099789923710523185L, -2751080516924952518L},
636+
{-7772046549951435453L, 5263206145535830491L}, {7458715941971015543L, 5470582752171544854L},
637+
{-7753394773760064468L, -2330157750295630617L}, {-5899278942232791979L, 6235686401271389982L},
638+
{4881732293467626532L, 2617335658565007304L}, {-5722863941703478257L, -5424475653939430258L},
639+
{-3703319768293496315L, -2124426428486426443L},};
640+
for (int i = 0; i < answers.length; i++) {
641+
Assert.assertArrayEquals("Length: " + i, answers[i], MurmurHash3.hash128(RANDOM_BYTES, offset, i, seed));
642+
}
643+
}
644+
608645
/**
609646
* Test the {@link MurmurHash3#hash128(String)} algorithm. This only tests it can return
610647
* the same value as {@link MurmurHash3#hash128(byte[], int, int, int)} if the string

0 commit comments

Comments
 (0)