diff --git a/pom.xml b/pom.xml
index cd58814c9c..825f66bfed 100644
--- a/pom.xml
+++ b/pom.xml
@@ -476,6 +476,12 @@
${commons.junit.version}
test
+
+ org.junit.jupiter
+ junit-jupiter-params
+ ${commons.junit.version}
+ test
+
org.junit.vintage
junit-vintage-engine
@@ -488,12 +494,6 @@
-
- org.junit.jupiter
- junit-jupiter-params
- ${commons.junit.version}
- test
-
org.hamcrest
hamcrest
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java
deleted file mode 100644
index 18e1fee029..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import java.util.PrimitiveIterator.OfInt;
-import java.util.function.LongBinaryOperator;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-
-/**
- * An abstract Bloom filter providing default implementations for most Bloom filter
- * functions. Specific implementations are encouraged to override the methods that can be
- * more efficiently implemented.
- *
- * This abstract class provides additional functionality not declared in the interface.
- * Specifically:
- *
- * - {@link #isFull()}
- *
- *
- * @since 4.5
- */
-public abstract class AbstractBloomFilter implements BloomFilter {
-
- /**
- * The shape used by this BloomFilter
- */
- private final Shape shape;
-
- /**
- * Construct a Bloom filter with the specified shape.
- *
- * @param shape The shape.
- */
- protected AbstractBloomFilter(final Shape shape) {
- this.shape = shape;
- }
-
- @Override
- public int andCardinality(final BloomFilter other) {
- verifyShape(other);
- final long[] mine = getBits();
- final long[] theirs = other.getBits();
- final int limit = Integer.min(mine.length, theirs.length);
- int count = 0;
- for (int i = 0; i < limit; i++) {
- count += Long.bitCount(mine[i] & theirs[i]);
- }
- return count;
- }
-
- @Override
- public int cardinality() {
- int count = 0;
- for (final long bits : getBits()) {
- count += Long.bitCount(bits);
- }
- return count;
- }
-
- @Override
- public boolean contains(final BloomFilter other) {
- verifyShape(other);
- return other.cardinality() == andCardinality(other);
- }
-
- @Override
- public boolean contains(final Hasher hasher) {
- verifyHasher(hasher);
- final long[] buff = getBits();
-
- final OfInt iter = hasher.iterator(shape);
- while (iter.hasNext()) {
- final int idx = iter.nextInt();
- BloomFilterIndexer.checkPositive(idx);
- final int buffIdx = BloomFilterIndexer.getLongIndex(idx);
- final long buffOffset = BloomFilterIndexer.getLongBit(idx);
- if ((buff[buffIdx] & buffOffset) == 0) {
- return false;
- }
- }
- return true;
- }
-
- @Override
- public final Shape getShape() {
- return shape;
- }
-
- /**
- * Determines if the bloom filter is "full". Full is defined as having no unset
- * bits.
- *
- * @return true if the filter is full.
- */
- public final boolean isFull() {
- return cardinality() == getShape().getNumberOfBits();
- }
-
- @Override
- public int orCardinality(final BloomFilter other) {
- // Logical OR
- return opCardinality(other, (a, b) -> a | b);
- }
-
- /**
- * Verifies that the hasher has the same name as the shape.
- *
- * @param hasher the Hasher to check
- */
- protected void verifyHasher(final Hasher hasher) {
- // It is assumed that the filter and hasher have been constructed using the
- // same hash function. Use the signature for a fast check the hash function is equal.
- // Collisions will occur at a rate of 1 in 2^64.
- if (shape.getHashFunctionIdentity().getSignature() != hasher.getHashFunctionIdentity().getSignature()) {
- throw new IllegalArgumentException(
- String.format("Hasher (%s) is not the hasher for shape (%s)",
- HashFunctionIdentity.asCommonString(hasher.getHashFunctionIdentity()),
- shape.toString()));
- }
- }
-
- /**
- * Verify the other Bloom filter has the same shape as this Bloom filter.
- *
- * @param other the other filter to check.
- * @throws IllegalArgumentException if the shapes are not the same.
- */
- protected void verifyShape(final BloomFilter other) {
- verifyShape(other.getShape());
- }
-
- /**
- * Verify the specified shape has the same shape as this Bloom filter.
- *
- * @param shape the other shape to check.
- * @throws IllegalArgumentException if the shapes are not the same.
- */
- protected void verifyShape(final Shape shape) {
- if (!this.shape.equals(shape)) {
- throw new IllegalArgumentException(String.format("Shape %s is not the same as %s", shape, this.shape));
- }
- }
-
- @Override
- public int xorCardinality(final BloomFilter other) {
- // Logical XOR
- return opCardinality(other, (a, b) -> a ^ b);
- }
-
- /**
- * Perform the operation on the matched longs from this filter and the other filter
- * and count the cardinality.
- *
- * The remaining unmatched longs from the larger filter are always counted. This
- * method is suitable for OR and XOR cardinality.
- *
- * @param other the other Bloom filter.
- * @param operation the operation (e.g. OR, XOR)
- * @return the cardinality
- */
- private int opCardinality(final BloomFilter other, final LongBinaryOperator operation) {
- verifyShape(other);
- final long[] mine = getBits();
- final long[] theirs = other.getBits();
- final long[] small;
- final long[] big;
- if (mine.length > theirs.length) {
- big = mine;
- small = theirs;
- } else {
- small = mine;
- big = theirs;
- }
- int count = 0;
- for (int i = 0; i < small.length; i++) {
- count += Long.bitCount(operation.applyAsLong(small[i], big[i]));
- }
- for (int i = small.length; i < big.length; i++) {
- count += Long.bitCount(big[i]);
- }
- return count;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java
index 0722b92576..b6f120ce2c 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java
@@ -16,18 +16,13 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import java.util.BitSet;
-import java.util.NoSuchElementException;
-import java.util.PrimitiveIterator;
-import java.util.PrimitiveIterator.OfInt;
-import java.util.function.IntConsumer;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
+import java.util.Objects;
+import java.util.function.IntPredicate;
+import java.util.function.LongPredicate;
+import java.util.stream.IntStream;
/**
- * A counting Bloom filter using an array to track counts for each enabled bit
+ * A counting Bloom filter using an int array to track counts for each enabled bit
* index.
*
*
Any operation that results in negative counts or integer overflow of
@@ -35,13 +30,13 @@
* The operation is completed in full, no exception is raised and the state is
* set to invalid. This allows the counts for the filter immediately prior to the
* operation that created the invalid state to be recovered. See the documentation
- * in {@link #isValid()} for details.
+ * in {@link #isValid()} for details.
*
* All the operations in the filter assume the counts are currently valid,
- * for example cardinality or contains operations. Behaviour of an invalid
+ * for example {@code cardinality} or {@code contains} operations. Behavior of an invalid
* filter is undefined. It will no longer function identically to a standard
* Bloom filter that is the merge of all the Bloom filters that have been added
- * to and not later subtracted from the counting Bloom filter.
+ * to and not later subtracted from the counting Bloom filter.
*
* The maximum supported number of items that can be stored in the filter is
* limited by the maximum array size combined with the {@link Shape}. For
@@ -53,7 +48,12 @@
* @see Shape
* @since 4.5
*/
-public class ArrayCountingBloomFilter extends AbstractBloomFilter implements CountingBloomFilter {
+public final class ArrayCountingBloomFilter implements CountingBloomFilter {
+
+ /**
+ * The shape of this Bloom filter.
+ */
+ private final Shape shape;
/**
* The count of each bit index in the filter.
@@ -61,20 +61,20 @@ public class ArrayCountingBloomFilter extends AbstractBloomFilter implements Cou
private final int[] counts;
/**
- * The state flag. This is a bitwise OR of the entire history of all updated
+ * The state flag. This is a bitwise @{code OR} of the entire history of all updated
* counts. If negative then a negative count or integer overflow has occurred on
* one or more counts in the history of the filter and the state is invalid.
*
*
Maintenance of this state flag is branch-free for improved performance. It
* eliminates a conditional check for a negative count during remove/subtract
* operations and a conditional check for integer overflow during merge/add
- * operations.
+ * operations.
*
* Note: Integer overflow is unlikely in realistic usage scenarios. A count
* that overflows indicates that the number of items in the filter exceeds the
* maximum possible size (number of bits) of any Bloom filter constrained by
* integer indices. At this point the filter is most likely full (all bits are
- * non-zero) and thus useless.
+ * non-zero) and thus useless.
*
* Negative counts are a concern if the filter is used incorrectly by
* removing an item that was never added. It is expected that a user of a
@@ -82,174 +82,108 @@ public class ArrayCountingBloomFilter extends AbstractBloomFilter implements Cou
* Enabling an explicit recovery path for negative or overflow counts is a major
* performance burden not deemed necessary for the unlikely scenarios when an
* invalid state is created. Maintenance of the state flag is a concession to
- * flag improper use that should not have a major performance impact.
+ * flag improper use that should not have a major performance impact.
*/
private int state;
- /**
- * An iterator of all indexes with non-zero counts.
- *
- * In the event that the filter state is invalid any index with a negative count
- * will also be produced by the iterator.
- */
- private class IndexIterator implements PrimitiveIterator.OfInt {
- /** The next non-zero index (or counts.length). */
- private int next;
-
- /**
- * Create an instance.
- */
- IndexIterator() {
- advance();
- }
-
- /**
- * Advance to the next non-zero index.
- */
- void advance() {
- while (next < counts.length && counts[next] == 0) {
- next++;
- }
- }
-
- @Override
- public boolean hasNext() {
- return next < counts.length;
- }
-
- @Override
- public int nextInt() {
- if (hasNext()) {
- final int result = next++;
- advance();
- return result;
- }
- // Currently unreachable as the iterator is only used by
- // the StaticHasher which iterates correctly.
- throw new NoSuchElementException();
- }
- }
-
/**
* Constructs an empty counting Bloom filter with the specified shape.
*
* @param shape the shape of the filter
+ *
*/
public ArrayCountingBloomFilter(final Shape shape) {
- super(shape);
+ Objects.requireNonNull(shape, "shape");
+ this.shape = shape;
counts = new int[shape.getNumberOfBits()];
}
- @Override
- public int cardinality() {
- int size = 0;
- for (final int c : counts) {
- if (c != 0) {
- size++;
- }
- }
- return size;
+ private ArrayCountingBloomFilter(ArrayCountingBloomFilter source) {
+ this.shape = source.shape;
+ this.state = source.state;
+ this.counts = source.counts.clone();
}
@Override
- public boolean contains(final BloomFilter other) {
- // The AbstractBloomFilter implementation converts both filters to long[] bits.
- // This would involve checking all indexes in this filter against zero.
- // Ideally we use an iterator of bit indexes to allow fail-fast on the
- // first bit index that is zero.
- if (other instanceof ArrayCountingBloomFilter) {
- verifyShape(other);
- return contains(((ArrayCountingBloomFilter) other).iterator());
- }
-
- // Note:
- // This currently creates a StaticHasher which stores all the indexes.
- // It would greatly benefit from direct generation of the index iterator
- // avoiding the intermediate storage.
- return contains(other.getHasher());
+ public ArrayCountingBloomFilter copy() {
+ return new ArrayCountingBloomFilter(this);
}
@Override
- public boolean contains(final Hasher hasher) {
- verifyHasher(hasher);
- return contains(hasher.iterator(getShape()));
- }
-
- /**
- * Return true if this filter is has non-zero counts for each index in the iterator.
- *
- * @param iter the iterator
- * @return true if this filter contains all the indexes
- */
- private boolean contains(final OfInt iter) {
- while (iter.hasNext()) {
- if (counts[iter.nextInt()] == 0) {
- return false;
- }
- }
+ public boolean isSparse() {
return true;
}
@Override
- public long[] getBits() {
- final BitSet bs = new BitSet();
- for (int i = 0; i < counts.length; i++) {
- if (counts[i] != 0) {
- bs.set(i);
- }
- }
- return bs.toLongArray();
+ public int cardinality() {
+ return (int) IntStream.range(0, counts.length).filter(i -> counts[i] > 0).count();
}
@Override
- public StaticHasher getHasher() {
- return new StaticHasher(iterator(), getShape());
+ public CountingBloomFilter merge(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ CountingBloomFilter filter = copy();
+ filter.add(BitCountProducer.from(other));
+ return filter;
}
- /**
- * Returns an iterator over the enabled indexes in this filter.
- * Any index with a non-zero count is considered enabled.
- * The iterator returns indexes in their natural order.
- *
- * @return an iterator over the enabled indexes
- */
- private PrimitiveIterator.OfInt iterator() {
- return new IndexIterator();
+ @Override
+ public CountingBloomFilter merge(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ ArrayCountingBloomFilter filter = copy();
+ try {
+ filter.add(BitCountProducer.from(hasher.uniqueIndices(shape)));
+ } catch (IndexOutOfBoundsException e) {
+ throw new IllegalArgumentException(
+ String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
+ }
+ return filter;
}
@Override
- public boolean merge(final BloomFilter other) {
- applyAsBloomFilter(other, this::increment);
- return isValid();
+ public boolean mergeInPlace(final BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ try {
+ return add(BitCountProducer.from(other));
+ } catch (IndexOutOfBoundsException e) {
+ throw new IllegalArgumentException( e );
+ }
}
@Override
- public boolean merge(final Hasher hasher) {
- applyAsHasher(hasher, this::increment);
- return isValid();
+ public boolean mergeInPlace(final Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ try {
+ return add(BitCountProducer.from(hasher.uniqueIndices(shape)));
+ } catch (IndexOutOfBoundsException e) {
+ throw new IllegalArgumentException(
+ String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
+ }
}
@Override
public boolean remove(final BloomFilter other) {
- applyAsBloomFilter(other, this::decrement);
- return isValid();
+ Objects.requireNonNull(other, "other");
+ return subtract(BitCountProducer.from(other));
}
@Override
public boolean remove(final Hasher hasher) {
- applyAsHasher(hasher, this::decrement);
- return isValid();
+ Objects.requireNonNull(hasher, "hasher");
+ return subtract(BitCountProducer.from(hasher.uniqueIndices(shape)));
}
@Override
- public boolean add(final CountingBloomFilter other) {
- applyAsCountingBloomFilter(other, this::add);
+ public boolean add(final BitCountProducer other) {
+ Objects.requireNonNull(other, "other");
+ other.forEachCount(this::add);
return isValid();
}
@Override
- public boolean subtract(final CountingBloomFilter other) {
- applyAsCountingBloomFilter(other, this::subtract);
+ public boolean subtract(final BitCountProducer other) {
+ Objects.requireNonNull(other, "other");
+ other.forEachCount(this::subtract);
return isValid();
}
@@ -258,14 +192,14 @@ public boolean subtract(final CountingBloomFilter other) {
*
*
Implementation note
*
- *
The state transition to invalid is permanent.
+ *
The state transition to invalid is permanent.
*
* This implementation does not correct negative counts to zero or integer
* overflow counts to {@link Integer#MAX_VALUE}. Thus the operation that
* generated invalid counts can be reversed by using the complement of the
* original operation with the same Bloom filter. This will restore the counts
* to the state prior to the invalid operation. Counts can then be extracted
- * using {@link #forEachCount(BitCountConsumer)}.
+ * using {@link #forEachCount(BitCountConsumer)}.
*/
@Override
public boolean isValid() {
@@ -273,69 +207,31 @@ public boolean isValid() {
}
@Override
- public void forEachCount(final BitCountConsumer action) {
+ public boolean forEachCount(final BitCountProducer.BitCountConsumer consumer) {
+ Objects.requireNonNull(consumer, "consumer");
for (int i = 0; i < counts.length; i++) {
- if (counts[i] != 0) {
- action.accept(i, counts[i]);
+ if (counts[i] != 0 && !consumer.test(i, counts[i])) {
+ return false;
}
}
+ return true;
}
- /**
- * Apply the action for each index in the Bloom filter.
- */
- private void applyAsBloomFilter(final BloomFilter other, final IntConsumer action) {
- verifyShape(other);
- if (other instanceof ArrayCountingBloomFilter) {
- // Only use the presence of non-zero and not the counts
- final int[] counts2 = ((ArrayCountingBloomFilter) other).counts;
- for (int i = 0; i < counts2.length; i++) {
- if (counts2[i] != 0) {
- action.accept(i);
- }
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ for (int i = 0; i < counts.length; i++) {
+ if (counts[i] != 0 && !consumer.test(i)) {
+ return false;
}
- } else {
- BitSet.valueOf(other.getBits()).stream().forEach(action);
}
+ return true;
}
- /**
- * Apply the action for each index in the hasher.
- */
- private void applyAsHasher(final Hasher hasher, final IntConsumer action) {
- verifyHasher(hasher);
- // We do not naturally handle duplicates so filter them.
- IndexFilters.distinctIndexes(hasher, getShape(), action);
- }
-
- /**
- * Apply the action for each index in the Bloom filter.
- */
- private void applyAsCountingBloomFilter(final CountingBloomFilter other, final BitCountConsumer action) {
- verifyShape(other);
- other.forEachCount(action);
- }
-
- /**
- * Increment to the count for the bit index.
- *
- * @param idx the index
- */
- private void increment(final int idx) {
- final int updated = counts[idx] + 1;
- state |= updated;
- counts[idx] = updated;
- }
-
- /**
- * Decrement from the count for the bit index.
- *
- * @param idx the index
- */
- private void decrement(final int idx) {
- final int updated = counts[idx] - 1;
- state |= updated;
- counts[idx] = updated;
+ @Override
+ public boolean forEachBitMap(LongPredicate consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ return BitMapProducer.fromIndexProducer(this, shape.getNumberOfBits()).forEachBitMap(consumer);
}
/**
@@ -343,11 +239,13 @@ private void decrement(final int idx) {
*
* @param idx the index
* @param addend the amount to add
+ * @return {@code true} always.
*/
- private void add(final int idx, final int addend) {
+ private boolean add(final int idx, final int addend) {
final int updated = counts[idx] + addend;
state |= updated;
counts[idx] = updated;
+ return true;
}
/**
@@ -355,10 +253,32 @@ private void add(final int idx, final int addend) {
*
* @param idx the index
* @param subtrahend the amount to subtract
+ * @return {@code true} always.
*/
- private void subtract(final int idx, final int subtrahend) {
+ private boolean subtract(final int idx, final int subtrahend) {
final int updated = counts[idx] - subtrahend;
state |= updated;
counts[idx] = updated;
+ return true;
+ }
+
+ @Override
+ public Shape getShape() {
+ return shape;
+ }
+
+ @Override
+ public boolean contains(IndexProducer indexProducer) {
+ return indexProducer.forEachIndex(idx -> this.counts[idx] != 0);
+ }
+
+ @Override
+ public boolean contains(BitMapProducer bitMapProducer) {
+ return contains(IndexProducer.fromBitMapProducer(bitMapProducer));
+ }
+
+ @Override
+ public int[] asIndexArray() {
+ return IntStream.range(0, counts.length).filter(i -> counts[i] > 0).toArray();
}
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BitCountProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BitCountProducer.java
new file mode 100644
index 0000000000..aea07b36e7
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BitCountProducer.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.function.IntPredicate;
+
+/**
+ * Produces bit counts for counting type Bloom filters.
+ *
+ * @since 4.5
+ */
+@FunctionalInterface
+public interface BitCountProducer extends IndexProducer {
+
+ /**
+ * Performs the given action for each {@code } pair where the count is non-zero.
+ * Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each
+ * index-count pair, if the consumer returns {@code false} the execution is stopped, {@code false}
+ * is returned, and no further pairs are processed.
+ *
+ * Must only process each index once, and must process indexes in order.
+ *
+ * @param consumer the action to be performed for each non-zero bit count
+ * @return {@code true} if all count pairs return true from consumer, {@code false} otherwise.
+ * @throws NullPointerException if the specified action is null
+ */
+ boolean forEachCount(BitCountConsumer consumer);
+
+ @Override
+ default boolean forEachIndex(IntPredicate predicate) {
+ return forEachCount((i, v) -> predicate.test(i));
+ }
+
+ /**
+ * Creates a BitCountProducer from an IndexProducer. The resulting
+ * producer will count each enabled bit once.
+ * @param idx An index producer.
+ * @return A BitCountProducer with the same indices as the IndexProducer.
+ */
+ static BitCountProducer from(IndexProducer idx) {
+ return new BitCountProducer() {
+ @Override
+ public boolean forEachCount(BitCountConsumer consumer) {
+ return idx.forEachIndex(i -> consumer.test(i, 1));
+ }
+ };
+ }
+
+ /**
+ * Represents an operation that accepts an {@code } pair representing
+ * the count for a bit index in a Bit Count Producer Bloom filter and returns {@code true}
+ * if processing should continue, {@code false} otherwise.
+ *
+ * Note: This is a functional interface as a specialization of
+ * {@link java.util.function.BiPredicate} for {@code int}.
+ */
+ @FunctionalInterface
+ interface BitCountConsumer {
+ /**
+ * Performs this operation on the given {@code } pair.
+ *
+ * @param index the bit index.
+ * @param count the count at the specified bit index.
+ * @return {@code true} if processing should continue, {@code false} it processing should stop.
+ */
+ boolean test(int index, int count);
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BitMap.java
similarity index 53%
rename from src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java
rename to src/main/java/org/apache/commons/collections4/bloomfilter/BitMap.java
index fe9b1161a9..f6f744ef78 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BitMap.java
@@ -17,25 +17,57 @@
package org.apache.commons.collections4.bloomfilter;
/**
- * Contains functions to convert {@code int} indices into Bloom filter bit positions.
+ * Contains functions to convert {@code int} indices into Bloom filter bit positions and visa versa.
+ *
+ * The functions view an array of longs as a collection of bit maps each containing 64 bits. The bits are arranged
+ * in memory as a little-endian long value. This matches the requirements of the BitMapProducer interface.
+ *
+ * @since 4.5
*/
-public final class BloomFilterIndexer {
+public class BitMap {
/** A bit shift to apply to an integer to divided by 64 (2^6). */
private static final int DIVIDE_BY_64 = 6;
/** Do not instantiate. */
- private BloomFilterIndexer() {}
+ private BitMap() {
+ }
+
+ /**
+ * Calculates the number of bit maps (longs) required for the numberOfBits parameter.
+ *
+ * If the input is negative the behavior is not defined.
+ *
+ * @param numberOfBits the number of bits to store in the array of bit maps.
+ * @return the number of bit maps necessary.
+ */
+ public static int numberOfBitMaps(int numberOfBits) {
+ return ((numberOfBits - 1) >> DIVIDE_BY_64) + 1;
+ }
+
+ /**
+ * Checks if the specified index bit is enabled in the array of bit maps.
+ *
+ * If the bit specified by bitIndex is not in the bit map false is returned.
+ *
+ * @param bitMaps The array of bit maps.
+ * @param bitIndex the index of the bit to locate.
+ * @return {@code true} if the bit is enabled, {@code false} otherwise.
+ * @throws IndexOutOfBoundsException if bitIndex specifies a bit not in the range being tracked.
+ */
+ public static boolean contains(long[] bitMaps, int bitIndex) {
+ return (bitMaps[getLongIndex(bitIndex)] & getLongBit(bitIndex)) != 0;
+ }
/**
- * Check the index is positive.
+ * Sets the bit in the bit maps.
+ * Does not perform range checking
*
- * @param bitIndex the bit index
- * @throws IndexOutOfBoundsException if the index is not positive
+ * @param bitMaps The array of bit maps.
+ * @param bitIndex the index of the bit to set.
+ * @throws IndexOutOfBoundsException if bitIndex specifies a bit not in the range being tracked.
*/
- public static void checkPositive(final int bitIndex) {
- if (bitIndex < 0) {
- throw new IndexOutOfBoundsException("Negative bitIndex: " + bitIndex);
- }
+ public static void set(long[] bitMaps, int bitIndex) {
+ bitMaps[getLongIndex(bitIndex)] |= getLongBit(bitIndex);
}
/**
@@ -43,20 +75,21 @@ public static void checkPositive(final int bitIndex) {
* to store bits starting at index 0.
*
* The index is assumed to be positive. For a positive index the result will match
- * {@code bitIndex / 64}.
+ * {@code bitIndex / 64}.
*
- * The divide is performed using bit shifts. If the input is negative the behavior
- * is not defined.
+ *
The divide is performed using bit shifts. If the input is negative the behavior
+ * is not defined.
*
* @param bitIndex the bit index (assumed to be positive)
- * @return the filter index
- * @see #checkPositive(int)
+ * @return the index of the bit map in an array of bit maps.
*/
public static int getLongIndex(final int bitIndex) {
- // An integer divide by 64 is equivalent to a shift of 6 bits if the integer is positive.
+ // An integer divide by 64 is equivalent to a shift of 6 bits if the integer is
+ // positive.
// We do not explicitly check for a negative here. Instead we use a
// a signed shift. Any negative index will produce a negative value
- // by sign-extension and if used as an index into an array it will throw an exception.
+ // by sign-extension and if used as an index into an array it will throw an
+ // exception.
return bitIndex >> DIVIDE_BY_64;
}
@@ -66,13 +99,12 @@ public static int getLongIndex(final int bitIndex) {
* 1 bit set.
*
* The index is assumed to be positive. For a positive index the result will match
- * {@code 1L << (bitIndex % 64)}.
+ * {@code 1L << (bitIndex % 64)}.
*
- * If the input is negative the behavior is not defined.
+ *
If the input is negative the behavior is not defined.
*
* @param bitIndex the bit index (assumed to be positive)
* @return the filter bit
- * @see #checkPositive(int)
*/
public static long getLongBit(final int bitIndex) {
// Bit shifts only use the first 6 bits. Thus it is not necessary to mask this
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BitMapProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BitMapProducer.java
new file mode 100644
index 0000000000..84561eba55
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BitMapProducer.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.LongPredicate;
+
+/**
+ * Produces bit map longs for a Bloom filter.
+ *
+ * Each bit map is a little-endian long value representing a block of bits of in a filter.
+ *
+ * The returned array will have length {@code ceil(m / 64)} where {@code m} is the
+ * number of bits in the filter and {@code ceil} is the ceiling function.
+ * Bits 0-63 are in the first long. A value of 1 at a bit position indicates the bit
+ * index is enabled.
+ *
+ * The default implementations of the {@code makePredicate()} and {@code asBitMapArray} methods
+ * are slow and should be reimplemented in the implementing classes where possible.
+ *
+ * @since 4.5
+ */
+@FunctionalInterface
+public interface BitMapProducer {
+
+ /**
+ * Each bit map is passed to the predicate in order. The predicate is applied to each
+ * bit map value, if the predicate returns {@code false} the execution is stopped, {@code false}
+ * is returned, and no further bit maps are processed.
+ *
+ * If the producer is empty this method will return true.
+ *
+ * Any exceptions thrown by the action are relayed to the caller.
+ *
+ * @param predicate the function to execute
+ * @return {@code true} if all bit maps returned {@code true}, {@code false} otherwise.
+ * @throws NullPointerException if the specified consumer is null
+ */
+ boolean forEachBitMap(LongPredicate predicate);
+
+ /**
+ * Applies the {@code func} to each bit map pair in order. Will apply all of the bit maps from the other
+ * BitMapProducer to this producer. If this producer does not have as many bit maps it will provide 0 (zero)
+ * for all excess calls to the LongBiPredicate.
+ *
+ * The default implementation of this method uses {@code asBitMapArray()} It is recommended that implementations
+ * of BitMapProducer that have local arrays reimplement this method.
+ *
+ * @param other The other BitMapProducer that provides the y values in the (x,y) pair.
+ * @param func The function to apply.
+ * @return A LongPredicate that tests this BitMapProducers bitmap values in order.
+ */
+ default boolean forEachBitMapPair(BitMapProducer other, LongBiPredicate func) {
+ CountingLongPredicate p = new CountingLongPredicate(asBitMapArray(), func);
+ return other.forEachBitMap(p) && p.forEachRemaining();
+ }
+
+ /**
+ * Return a copy of the BitMapProducer data as a bit map array.
+ *
+ * The default implementation of this method is slow. It is recommended
+ * that implementing classes reimplement this method.
+ *
+ * @return An array of bit map data.
+ */
+ default long[] asBitMapArray() {
+ class Bits {
+ private long[] data = new long[16];
+ private int size;
+
+ boolean add(long bits) {
+ if (size == data.length) {
+ // This will throw an out-of-memory error if there are too many bits.
+ // Since bits are addressed using 32-bit signed integer indices
+ // the maximum length should be ~2^31 / 2^6 = ~2^25.
+ // Any more is a broken implementation.
+ data = Arrays.copyOf(data, size * 2);
+ }
+ data[size++] = bits;
+ return true;
+ }
+
+ long[] toArray() {
+ // Edge case to avoid a large array copy
+ return size == data.length ? data : Arrays.copyOf(data, size);
+ }
+ }
+ Bits bits = new Bits();
+ forEachBitMap(bits::add);
+ return bits.toArray();
+ }
+
+ /**
+ * Creates a BitMapProducer from an array of Long.
+ * @param bitMaps the bit maps to return.
+ * @return a BitMapProducer.
+ */
+ static BitMapProducer fromBitMapArray(long... bitMaps) {
+ return new BitMapProducer() {
+ @Override
+ public boolean forEachBitMap(LongPredicate predicate) {
+ for (long word : bitMaps) {
+ if (!predicate.test(word)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public long[] asBitMapArray() {
+ return Arrays.copyOf(bitMaps, bitMaps.length);
+ }
+
+ @Override
+ public boolean forEachBitMapPair(BitMapProducer other, LongBiPredicate func) {
+ CountingLongPredicate p = new CountingLongPredicate(bitMaps, func);
+ return other.forEachBitMap(p) && p.forEachRemaining();
+ }
+ };
+ }
+
+ /**
+ * Creates a BitMapProducer from an IndexProducer.
+ * @param producer the IndexProducer that specifies the indexes of the bits to enable.
+ * @param numberOfBits the number of bits in the Bloom filter.
+ * @return A BitMapProducer that produces the bit maps equivalent of the Indices from the producer.
+ */
+ static BitMapProducer fromIndexProducer(IndexProducer producer, int numberOfBits) {
+ Objects.requireNonNull(producer, "producer");
+ Objects.requireNonNull(numberOfBits, "numberOfBits");
+
+ long[] result = new long[BitMap.numberOfBitMaps(numberOfBits)];
+ producer.forEachIndex(i -> {
+ BitMap.set(result, i);
+ return true;
+ });
+ return fromBitMapArray(result);
+ }
+
+ /**
+ * A long predicate that applies the test func to each member of the @{code ary} in sequence for each call to @{code test()}.
+ * if the @{code ary} is exhausted, the subsequent calls to to @{code test} are executed with a zero value.
+ * If the calls to @{code test} do not exhaust the @{code ary} the @{code forEachRemaining} method can be called to
+ * execute the @code{text} with a zero value for each remaining @{code idx} value.
+ *
+ */
+ class CountingLongPredicate implements LongPredicate {
+ int idx = 0;
+ final long[] ary;
+ final LongBiPredicate func;
+
+ CountingLongPredicate(long[] ary, LongBiPredicate func) {
+ this.ary = ary;
+ this.func = func;
+ }
+
+ @Override
+ public boolean test(long other) {
+ return func.test(idx == ary.length ? 0 : ary[idx++], other);
+ }
+
+ boolean forEachRemaining() {
+ while (idx != ary.length && func.test(ary[idx], 0)) {
+ idx++;
+ }
+ return idx == ary.length;
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilter.java
deleted file mode 100644
index de55cbe93d..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilter.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import java.util.BitSet;
-import java.util.PrimitiveIterator.OfInt;
-import java.util.function.IntConsumer;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
-
-/**
- * A bloom filter using a Java BitSet to track enabled bits. This is a standard
- * implementation and should work well for most Bloom filters.
- * @since 4.5
- */
-public class BitSetBloomFilter extends AbstractBloomFilter {
-
- /**
- * The bitSet that defines this BloomFilter.
- */
- private final BitSet bitSet;
-
- /**
- * Constructs an empty BitSetBloomFilter.
- *
- * @param shape the desired shape of the filter.
- */
- public BitSetBloomFilter(final Shape shape) {
- super(shape);
- this.bitSet = new BitSet();
- }
-
- @Override
- public int andCardinality(final BloomFilter other) {
- if (other instanceof BitSetBloomFilter) {
- verifyShape(other);
- final BitSet result = (BitSet) bitSet.clone();
- result.and(((BitSetBloomFilter) other).bitSet);
- return result.cardinality();
- }
- return super.andCardinality(other);
- }
-
- @Override
- public int cardinality() {
- return bitSet.cardinality();
- }
-
- @Override
- public boolean contains(final Hasher hasher) {
- verifyHasher(hasher);
- final OfInt iter = hasher.iterator(getShape());
- while (iter.hasNext()) {
- if (!bitSet.get(iter.nextInt())) {
- return false;
- }
- }
- return true;
- }
-
- @Override
- public long[] getBits() {
- return bitSet.toLongArray();
- }
-
- @Override
- public StaticHasher getHasher() {
- return new StaticHasher(bitSet.stream().iterator(), getShape());
- }
-
- @Override
- public boolean merge(final BloomFilter other) {
- verifyShape(other);
- if (other instanceof BitSetBloomFilter) {
- bitSet.or(((BitSetBloomFilter) other).bitSet);
- } else {
- bitSet.or(BitSet.valueOf(other.getBits()));
- }
- return true;
- }
-
- @Override
- public boolean merge(final Hasher hasher) {
- verifyHasher(hasher);
- hasher.iterator(getShape()).forEachRemaining((IntConsumer) bitSet::set);
- return true;
- }
-
- @Override
- public int orCardinality(final BloomFilter other) {
- if (other instanceof BitSetBloomFilter) {
- verifyShape(other);
- final BitSet result = (BitSet) bitSet.clone();
- result.or(((BitSetBloomFilter) other).bitSet);
- return result.cardinality();
- }
- return super.orCardinality(other);
- }
-
- @Override
- public int xorCardinality(final BloomFilter other) {
- if (other instanceof BitSetBloomFilter) {
- verifyShape(other);
- final BitSet result = (BitSet) bitSet.clone();
- result.xor(((BitSetBloomFilter) other).bitSet);
- return result.cardinality();
- }
- return super.xorCardinality(other);
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
index af43ddd51e..9a4e6324fa 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
@@ -16,138 +16,237 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
+import java.util.Objects;
/**
* The interface that describes a Bloom filter.
+ *
+ * See implementation notes for BitMapProducer and IndexProducer.
+ *
+ * @see BitMapProducer
+ * @see IndexProducer
* @since 4.5
*/
-public interface BloomFilter {
-
- // Query Operations
+public interface BloomFilter extends IndexProducer, BitMapProducer {
/**
- * Gets the shape of this filter.
- *
- * @return the shape of this filter
+ * Creates a new instance of the BloomFilter with the same properties as the current one.
+ * @return a copy of this BloomFilter
*/
- Shape getShape();
+ BloomFilter copy();
+
+ // Query Operations
/**
- * Gets an array of little-endian long values representing the bits of this filter.
+ * This method is used to determine the best method for matching.
*
- * The returned array will have length {@code ceil(m / 64)} where {@code m} is the
- * number of bits in the filter and {@code ceil} is the ceiling function.
- * Bits 0-63 are in the first long. A value of 1 at a bit position indicates the bit
- * index is enabled.
+ *
For `sparse` implementations
+ * the {@code forEachIndex(IntConsumer consumer)} method is more efficient. For non `sparse` implementations
+ * the {@code forEachBitMap(LongConsumer consumer)} is more efficient. Implementers should determine if it is easier
+ * for the implementation to produce indexes of bit map blocks.
*
- * @return the {@code long[]} representation of this filter
+ * @return {@code true} if the implementation is sparse {@code false} otherwise.
+ * @see BitMap
*/
- long[] getBits();
+ boolean isSparse();
/**
- * Creates a StaticHasher that contains the indexes of the bits that are on in this
- * filter.
- *
- * @return a StaticHasher for that produces this Bloom filter
+ * Gets the shape that was used when the filter was built.
+ * @return The shape the filter was built with.
*/
- StaticHasher getHasher();
+ Shape getShape();
/**
- * Returns {@code true} if this filter contains the specified filter. Specifically this
+ * Returns {@code true} if this filter contains the specified filter.
+ *
+ * Specifically this
* returns {@code true} if this filter is enabled for all bits that are enabled in the
* {@code other} filter. Using the bit representations this is
- * effectively {@code (this AND other) == other}.
+ * effectively {@code (this AND other) == other}.
*
* @param other the other Bloom filter
- * @return true if this filter is enabled for all enabled bits in the other filter
- * @throws IllegalArgumentException if the shape of the other filter does not match
- * the shape of this filter
+ * @return true if all enabled bits in the other filter are enabled in this filter.
*/
- boolean contains(BloomFilter other);
+ default boolean contains(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ return isSparse() ? contains((IndexProducer) other) : contains((BitMapProducer) other);
+ }
/**
- * Returns {@code true} if this filter contains the specified decomposed Bloom filter.
- * Specifically this returns {@code true} if this filter is enabled for all bit indexes
- * identified by the {@code hasher}. Using the bit representations this is
- * effectively {@code (this AND hasher) == hasher}.
+ * Returns {@code true} if this filter contains the bits specified in the hasher.
+ *
+ * Specifically this returns {@code true} if this filter is enabled for all bit indexes
+ * identified by the {@code hasher}. Using the bit map representations this is
+ * effectively {@code (this AND hasher) == hasher}.
*
* @param hasher the hasher to provide the indexes
* @return true if this filter is enabled for all bits specified by the hasher
- * @throws IllegalArgumentException if the hasher cannot generate indices for the shape of
- * this filter
*/
- boolean contains(Hasher hasher);
+ default boolean contains(Hasher hasher) {
+ Objects.requireNonNull(hasher, "Hasher");
+ Shape shape = getShape();
+ return contains(hasher.indices(shape));
+ }
- // Modification Operations
+ /**
+ * Returns {@code true} if this filter contains the indices specified IndexProducer.
+ *
+ * Specifically this returns {@code true} if this filter is enabled for all bit indexes
+ * identified by the {@code IndexProducer}.
+ *
+ * @param indexProducer the IndexProducer to provide the indexes
+ * @return {@code true} if this filter is enabled for all bits specified by the IndexProducer
+ */
+ boolean contains(IndexProducer indexProducer);
/**
- * Merges the specified Bloom filter into this Bloom filter. Specifically all bit indexes
- * that are enabled in the {@code other} filter will be enabled in this filter.
+ * Returns {@code true} if this filter contains the bits specified in the bit maps produced by the
+ * bitMapProducer.
*
- * Note: This method should return {@code true} even if no additional bit indexes were
- * enabled. A {@code false} result indicates that this filter is not ensured to contain
- * the {@code other} Bloom filter.
+ * @param bitMapProducer the the {@code BitMapProducer} to provide the bit maps.
+ * @return {@code true} if this filter is enabled for all bits specified by the bit maps
+ */
+ default boolean contains(BitMapProducer bitMapProducer) {
+ return forEachBitMapPair(bitMapProducer, (x, y) -> (x & y) == y);
+ }
+
+ // update operations
+
+ /**
+ * Merges the specified Bloom filter with this Bloom filter creating a new Bloom filter.
+ *
+ *
Specifically all bit indexes that are enabled in the {@code other} and in @code this} filter will be
+ * enabled in the resulting filter.
*
* @param other the other Bloom filter
+ * @return The new Bloom filter.
+ */
+ default BloomFilter merge(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ BloomFilter result = copy();
+ result.mergeInPlace(other);
+ return result;
+ }
+
+ /**
+ * Merges the specified Hasher with this Bloom filter and returns a new Bloom filter.
+ *
+ * Specifically all bit indexes that are identified by the {@code hasher} and in {@code this} Bloom filter
+ * be enabled in the resulting filter.
+ *
+ * @param hasher the hasher to provide the indices
+ * @return the new Bloom filter.
+ */
+ default BloomFilter merge(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ BloomFilter result = copy();
+ result.mergeInPlace(hasher);
+ return result;
+ }
+
+ /**
+ * Merges the specified Bloom filter into this Bloom filter.
+ *
+ * Specifically all
+ * bit indexes that are identified by the {@code other} will be enabled in this filter.
+ *
+ * Note: This method should return {@code true} even if no additional bit indexes were
+ * enabled. A {@code false} result indicates that this filter may or may not contain
+ * the {@code other} Bloom filter. This state may occur in complex Bloom filter implementations like
+ * counting Bloom filters.
+ *
+ * @param other The bloom filter to merge into this one.
* @return true if the merge was successful
- * @throws IllegalArgumentException if the shape of the other filter does not match
- * the shape of this filter
*/
- boolean merge(BloomFilter other);
+ boolean mergeInPlace(BloomFilter other);
/**
- * Merges the specified decomposed Bloom filter into this Bloom filter. Specifically all
+ * Merges the specified hasher into this Bloom filter. Specifically all
* bit indexes that are identified by the {@code hasher} will be enabled in this filter.
*
- * Note: This method should return {@code true} even if no additional bit indexes were
- * enabled. A {@code false} result indicates that this filter is not ensured to contain
- * the specified decomposed Bloom filter.
+ *
Note: This method should return {@code true} even if no additional bit indexes were
+ * enabled. A {@code false} result indicates that this filter may or may not contain
+ * the {@code other} Bloom filter. This state may occur in complex Bloom filter implementations like
+ * counting Bloom filters.
*
- * @param hasher the hasher to provide the indexes
+ * @param hasher The hasher to merge.
* @return true if the merge was successful
- * @throws IllegalArgumentException if the hasher cannot generate indices for the shape of
- * this filter
*/
- boolean merge(Hasher hasher);
+ default boolean mergeInPlace(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ Shape shape = getShape();
+ // create the bloomfilter that is most likely to merge quickly with this one
+ BloomFilter result = isSparse() ? new SparseBloomFilter(shape, hasher) : new SimpleBloomFilter(shape, hasher);
+ return mergeInPlace(result);
+ }
// Counting Operations
+ /**
+ * Determines if the bloom filter is "full".
+ *
+ * Full is defined as having no unset bits.
+ *
+ * @return {@code true} if the filter is full, {@code false} otherwise.
+ */
+ default boolean isFull() {
+ return cardinality() == getShape().getNumberOfBits();
+ }
+
/**
* Gets the cardinality (number of enabled bits) of this Bloom filter.
*
- * This is also known as the Hamming value.
+ * This is also known as the Hamming value or Hamming number.
*
* @return the cardinality of this filter
*/
int cardinality();
/**
- * Performs a logical "AND" with the other Bloom filter and returns the cardinality
- * (number of enabled bits) of the result.
+ * Estimates the number of items in the Bloom filter.
*
- * @param other the other Bloom filter
- * @return the cardinality of the result of {@code (this AND other)}
+ * By default this is the rounding of the {@code Shape.estimateN(cardinality)} calculation for the
+ * shape and cardinality of this filter.
+ *
+ * This produces an estimate roughly equivalent to the number of Hashers that have been merged into the filter.
+ *
+ * @return an estimate of the number of items in the bloom filter.
+ * @see Shape#estimateN(int)
*/
- int andCardinality(BloomFilter other);
+ default int estimateN() {
+ return (int) Math.round(getShape().estimateN(cardinality()));
+ }
/**
- * Performs a logical "OR" with the other Bloom filter and returns the cardinality
- * (number of enabled bits) of the result.
+ * Estimates the number of items in the union of this Bloom filter with the other bloom filter.
*
- * @param other the other Bloom filter
- * @return the cardinality of the result of {@code (this OR other)}
+ * By default this is the {@code estimateN()} of the merging of this filter with the {@code other} filter.
+ *
+ * This produces an estimate roughly equivalent to the number of unique Hashers that have been merged into either
+ * of the filters.
+ *
+ * @param other The other Bloom filter
+ * @return an estimate of the number of items in the union.
+ * @see #estimateN()
*/
- int orCardinality(BloomFilter other);
+ default int estimateUnion(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ return this.merge(other).estimateN();
+ }
/**
- * Performs a logical "XOR" with the other Bloom filter and returns the cardinality
- * (number of enabled bits) of the result.
+ * Estimates the number of items in the intersection of this Bloom filter with the other bloom filter.
*
- * @param other the other Bloom filter
- * @return the cardinality of the result of {@code (this XOR other)}
+ * By default this is the {@code estimateN() + other.estimateN() - estimateUnion(other)}
+ *
+ * This produces estimate is roughly equivalent to the number of unique Hashers that have been merged into both
+ * of the filters.
+ *
+ * @param other The other Bloom filter
+ * @return an estimate of the number of items in the intersection.
*/
- int xorCardinality(BloomFilter other);
+ default int estimateIntersection(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ return estimateN() + other.estimateN() - estimateUnion(other);
+ }
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
index 0c414ebe93..49655351f5 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
@@ -16,8 +16,6 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-
/**
* The interface that describes a Bloom filter that associates a count with each
* bit index to allow reversal of merge operations with remove operations.
@@ -27,7 +25,7 @@
* to and not later subtracted from the counting Bloom filter. The functional
* state of a CountingBloomFilter at the start and end of a series of merge and
* subsequent remove operations of the same Bloom filters, irrespective of
- * remove order, is expected to be the same.
+ * remove order, is expected to be the same.
*
* Removal of a filter that has not previously been merged results in an
* invalid state where the counts no longer represent a sum of merged Bloom
@@ -36,166 +34,148 @@
* undetected. The CountingBloomFilter maintains a state flag that is used as a
* warning that an operation was performed that resulted in invalid counts and
* thus an invalid state. For example this may occur if a count for an index was
- * set to negative following a remove operation.
+ * set to negative following a remove operation.
*
* Implementations should document the expected state of the filter after an
* operation that generates invalid counts, and any potential recovery options.
* An implementation may support a reversal of the operation to restore the
* state to that prior to the operation. In the event that invalid counts are
* adjusted to a valid range then it should be documented if there has been
- * irreversible information loss.
+ * irreversible information loss.
*
* Implementations may choose to throw an exception during an operation that
* generates invalid counts. Implementations should document the expected state
* of the filter after such an operation. For example are the counts not updated,
- * partially updated or updated entirely before the exception is raised.
+ * partially updated or updated entirely before the exception is raised.
*
* @since 4.5
*/
-public interface CountingBloomFilter extends BloomFilter {
-
- /**
- * Represents an operation that accepts an {@code } pair representing
- * the count for a bit index in a counting Bloom filter and returns no result.
- *
- * Note: This is a functional interface as a primitive type specialization of
- * {@link java.util.function.BiConsumer} for {@code int}.
- */
- @FunctionalInterface
- interface BitCountConsumer {
- /**
- * Performs this operation on the given {@code } pair.
- *
- * @param index the bit index
- * @param count the count at the specified bit index
- */
- void accept(int index, int count);
- }
+public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
// Query Operations
/**
- * Returns true if the internal state is valid. This flag is a warning that an addition or
+ * Returns {@code true} if the internal state is valid.
+ *
+ * This flag is a warning that an addition or
* subtraction of counts from this filter resulted in an invalid count for one or more
* indexes. For example this may occur if a count for an index was
* set to negative following a subtraction operation, or overflows an {@code int} following an
- * addition operation.
+ * addition operation.
*
* A counting Bloom filter that has an invalid state is no longer ensured to function
* identically to a standard Bloom filter instance that is the merge of all the Bloom filters
- * that have been added to and not later subtracted from this counting Bloom filter.
+ * that have been added to and not later subtracted from this counting Bloom filter.
*
* Note: The change to an invalid state may or may not be reversible. Implementations
* are expected to document their policy on recovery from an addition or removal operation
- * that generated an invalid state.
+ * that generated an invalid state.
*
- * @return true if the state is valid
+ * @return {@code true} if the state is valid
*/
boolean isValid();
- /**
- * Performs the given action for each {@code } pair where the count is non-zero.
- * Any exceptions thrown by the action are relayed to the caller.
- *
- * @param action the action to be performed for each non-zero bit count
- * @throws NullPointerException if the specified action is null
- */
- void forEachCount(BitCountConsumer action);
-
// Modification Operations
/**
- * Merges the specified Bloom filter into this Bloom filter. Specifically all counts for
- * indexes that are enabled in the {@code other} filter will be incremented by 1.
+ * Removes the specified Bloom filter from this Bloom filter.
+ *
+ * Specifically: all counts for the indexes identified by the {@code other} filter will be decremented by 1,
*
- * Note: If the other filter is a counting Bloom filter the index counts are ignored; only
- * the enabled indexes are used.
+ *
Note: If the other filter is a counting Bloom filter the index counts are ignored and it is treated as an
+ * IndexProducer.
*
- * This method will return true if the filter is valid after the operation.
+ *
This method will return {@code true} if the filter is valid after the operation.
*
- * @param other {@inheritDoc}
- * @return true if the merge was successful and the state is valid
- * @throws IllegalArgumentException {@inheritDoc}
+ * @param other the other Bloom filter
+ * @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
+ * @see #subtract(BitCountProducer)
*/
- @Override
- boolean merge(BloomFilter other);
+ boolean remove(BloomFilter other);
/**
- * Merges the specified decomposed Bloom filter into this Bloom filter. Specifically all
- * counts for the distinct indexes that are identified by the {@code hasher} will
- * be incremented by 1. If the {@code hasher} contains duplicate bit indexes these are ignored.
+ * Removes the specified hasher from the Bloom filter from this Bloom filter.
+ *
+ * Specifically all counts for the indices produced by the {@code hasher} will be
+ * decremented by 1.
*
- * This method will return true if the filter is valid after the operation.
+ *
For HasherCollections each enclosed Hasher will be considered a single item and decremented
+ * from the counts separately.
*
- * @param hasher {@inheritDoc}
- * @return true if the merge was successful and the state is valid
- * @throws IllegalArgumentException {@inheritDoc}
+ * This method will return {@code true} if the filter is valid after the operation.
+ *
+ * @param hasher the hasher to provide the indexes
+ * @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
+ * @see #subtract(BitCountProducer)
*/
- @Override
- boolean merge(Hasher hasher);
+ boolean remove(Hasher hasher);
/**
- * Removes the specified Bloom filter from this Bloom filter. Specifically
- * all counts for the indexes identified by the {@code other} filter will be decremented by 1.
+ * Adds the specified BitCountProducer to this Bloom filter.
*
- * Note: If the other filter is a counting Bloom filter the index counts are ignored; only
- * the enabled indexes are used.
+ *
Specifically
+ * all counts for the indexes identified by the {@code other} will be incremented
+ * by their corresponding values in the {@code other}.
*
- * This method will return true if the filter is valid after the operation.
+ *
This method will return {@code true} if the filter is valid after the operation.
*
- * @param other the other Bloom filter
- * @return true if the removal was successful and the state is valid
- * @throws IllegalArgumentException if the shape of the other filter does not match
- * the shape of this filter
+ * @param other the BitCountProducer to add.
+ * @return {@code true} if the addition was successful and the state is valid
* @see #isValid()
- * @see #subtract(CountingBloomFilter)
+ * @see #subtract(BitCountProducer)
*/
- boolean remove(BloomFilter other);
+ boolean add(BitCountProducer other);
/**
- * Removes the specified decomposed Bloom filter from this Bloom filter. Specifically
- * all counts for the distinct indexes identified by the {@code hasher} will be
- * decremented by 1. If the {@code hasher} contains duplicate bit indexes these are ignored.
+ * Adds the specified BitCountProducer to this Bloom filter.
*
- * This method will return true if the filter is valid after the operation.
+ *
Specifically
+ * all counts for the indexes identified by the {@code other} will be decremented
+ * by their corresponding values in the {@code other}.
*
- * @param hasher the hasher to provide the indexes
- * @return true if the removal was successful and the state is valid
- * @throws IllegalArgumentException if the hasher cannot generate indices for the shape of
- * this filter
+ * This method will return true if the filter is valid after the operation.
+ *
+ * @param other the BitCountProducer to subtract.
+ * @return {@code true} if the subtraction was successful and the state is valid
* @see #isValid()
+ * @see #add(BitCountProducer)
*/
- boolean remove(Hasher hasher);
+ boolean subtract(BitCountProducer other);
/**
- * Adds the specified counting Bloom filter to this Bloom filter. Specifically
- * all counts for the indexes identified by the {@code other} filter will be incremented
- * by their corresponding counts in the {@code other} filter.
+ * Merges the specified Bloom filter into this Bloom filter to produce a new CountingBloomFilter.
*
- * This method will return true if the filter is valid after the operation.
+ *
Specifically the new Bloom filter will contain all the counts of this filter and in addition
+ * all bit indexes that are enabled in the {@code other} filter will be incremented
+ * by one in the new filter.
*
- * @param other the other counting Bloom filter
- * @return true if the addition was successful and the state is valid
- * @throws IllegalArgumentException if the shape of the other filter does not match
- * the shape of this filter
- * @see #isValid()
+ * Note: the validity of the resulting filter is not guaranteed. When in doubt {@code isValid()}
+ * should be called on the new filter.
+ *
+ * @param other the other Bloom filter
+ * @return A new CountingBloomFilter instance.
*/
- boolean add(CountingBloomFilter other);
+ @Override
+ CountingBloomFilter merge(BloomFilter other);
/**
- * Adds the specified counting Bloom filter to this Bloom filter. Specifically
- * all counts for the indexes identified by the {@code other} filter will be decremented
- * by their corresponding counts in the {@code other} filter.
+ * Merges the specified hasher with this Bloom filter to create a new CountingBloomFilter.
*
- * This method will return true if the filter is valid after the operation.
+ *
Specifically the new Bloom filter will contain all the counts of this filter and in addition
+ * all bit indexes specified by the {@code hasher} will be incremented
+ * by one in the new filter.
*
- * @param other the other counting Bloom filter
- * @return true if the subtraction was successful and the state is valid
- * @throws IllegalArgumentException if the shape of the other filter does not match
- * the shape of this filter
- * @see #isValid()
+ * For HasherCollections each enclosed Hasher will be considered a single item and increment
+ * the counts separately.
+ *
+ * Note: the validity of the resulting filter is not guaranteed. When in doubt {@code isValid()}
+ * should be called on the new filter.
+ *
+ * @param hasher the hasher to provide the indexes
+ * @return A new CountingBloomFilter instance.
*/
- boolean subtract(CountingBloomFilter other);
+ @Override
+ CountingBloomFilter merge(Hasher hasher);
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/Hasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/Hasher.java
new file mode 100644
index 0000000000..3afd9fbe08
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/Hasher.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.function.IntPredicate;
+
+/**
+ * A Hasher creates IndexProducer based on the hash implementation and the
+ * provided Shape.
+ *
+ * @since 4.5
+ */
+public interface Hasher {
+
+ /**
+ * Creates an IndexProducer for this hasher based on the Shape.
+ *
+ * The @{code IndexProducer} will create indices within the range defined by the number of bits in
+ * the shape. The total number of indices will respect the number of hash functions per item
+ * defined by the shape. However the count of indices may not be a multiple of the number of
+ * hash functions if the implementation has removed duplicates.
+ *
+ * This IndexProducer must be deterministic in that it must return the same indices for the
+ * same Shape.
+ *
+ * No guarantee is made as to order of indices.
+ * Duplicates indices for a single item may be produced.
+ *
+ * @param shape the shape of the desired Bloom filter.
+ * @return the iterator of integers
+ */
+ IndexProducer indices(Shape shape);
+
+ /**
+ * Creates an IndexProducer of unique indices for this hasher based on the Shape.
+ *
+ * This is like the `indices(Shape)` method except that it adds the guarantee that no
+ * duplicate values will be returned
+ *
+ * @param shape the shape of the desired Bloom filter.
+ * @return the iterator of integers
+ */
+ IndexProducer uniqueIndices(Shape shape);
+
+ /**
+ * A convenience class for Hasher implementations to filter out duplicate indices.
+ *
+ * If the index is negative the behavior is not defined.
+ *
+ * This is conceptually a unique filter implemented as a {@code IntPredicate}.
+ * @since 4.5
+ */
+ final class IndexFilter implements IntPredicate {
+ private final IntPredicate tracker;
+ private final int size;
+ private final IntPredicate consumer;
+
+ /**
+ * Creates an instance optimized for the specified shape.
+ * @param shape The shape that is being generated.
+ * @param consumer The consumer to accept the values.
+ * @return an IndexFilter optimized for the specified shape.
+ */
+ public static IndexFilter create(Shape shape, IntPredicate consumer) {
+ return new IndexFilter(shape, consumer);
+ }
+
+ /**
+ * Creates an instance optimized for the specified shape.
+ * @param shape The shape that is being generated.
+ * @param consumer The consumer to accept the values.
+ */
+ private IndexFilter(Shape shape, IntPredicate consumer) {
+ this.size = shape.getNumberOfBits();
+ this.consumer = consumer;
+ if (BitMap.numberOfBitMaps(shape.getNumberOfBits()) * Long.BYTES < (long) shape.getNumberOfHashFunctions()
+ * Integer.BYTES) {
+ this.tracker = new BitMapTracker(shape);
+ } else {
+ this.tracker = new ArrayTracker(shape);
+ }
+ }
+
+ /**
+ * Test if the number should be processed by the {@code consumer}.
+ *
+ * If the number has not been seen before it is passed to the {@code consumer} and the result returned.
+ * If the number has been seen before the {@code consumer} is not called and {@code true} returned.
+ *
+ * If the input is not in the range [0,size) an IndexOutOfBoundsException exception is thrown.
+ *
+ * @param number the number to check.
+ * @return {@code true} if processing should continue, {@code false} otherwise.
+ */
+ @Override
+ public boolean test(int number) {
+ if (number >= size) {
+ throw new IndexOutOfBoundsException(String.format("number too large %d >= %d", number, size));
+ }
+ return tracker.test(number) ? consumer.test(number) : true;
+ }
+
+ /**
+ * An IndexTracker implementation that uses an array of integers to track whether or not a
+ * number has been seen. Suitable for Shapes that have few hash functions.
+ * @since 4.5
+ */
+ static class ArrayTracker implements IntPredicate {
+ private int[] seen;
+ private int populated;
+
+ /**
+ * Constructs the tracker based on the shape.
+ * @param shape the shape to build the tracker for.
+ */
+ ArrayTracker(Shape shape) {
+ seen = new int[shape.getNumberOfHashFunctions()];
+ }
+
+ @Override
+ public boolean test(int number) {
+ if (number < 0) {
+ throw new IndexOutOfBoundsException("number may not be less than zero. " + number);
+ }
+ for (int i = 0; i < populated; i++) {
+ if (seen[i] == number) {
+ return false;
+ }
+ }
+ seen[populated++] = number;
+ return true;
+ }
+ }
+
+ /**
+ * An IndexTracker implementation that uses an array of bit maps to track whether or not a
+ * number has been seen.
+ * @since 4.5
+ */
+ static class BitMapTracker implements IntPredicate {
+ private long[] bits;
+
+ /**
+ * Constructs a bit map based tracker for the specified shape.
+ * @param shape The shape that is being generated.
+ */
+ BitMapTracker(Shape shape) {
+ bits = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
+ }
+
+ @Override
+ public boolean test(int number) {
+ boolean retval = !BitMap.contains(bits, number);
+ BitMap.set(bits, number);
+ return retval;
+ }
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java
deleted file mode 100644
index 1ae2b79d1e..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import java.util.Arrays;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.PrimitiveIterator.OfInt;
-import java.util.function.IntConsumer;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
-import org.apache.commons.collections4.iterators.EmptyIterator;
-import org.apache.commons.collections4.iterators.IteratorChain;
-
-/**
- * A Bloom filter built on a single hasher. This filter type should only be used for small
- * filters (few on bits). While this implementation correctly supports the merge() methods
- * it is recommended that if merges are expected that one of the other Bloom filter
- * implementations be used.
- * @since 4.5
- */
-public class HasherBloomFilter extends AbstractBloomFilter {
- /** The bit representation for an empty Bloom filter. */
- private static final long[] EMPTY = {};
-
- /**
- * The internal hasher representation.
- */
- private StaticHasher hasher;
-
- /**
- * Constructs a HasherBloomFilter from a hasher and a shape.
- *
- * @param hasher the hasher to use.
- * @param shape the shape of the Bloom filter.
- */
- public HasherBloomFilter(final Hasher hasher, final Shape shape) {
- super(shape);
- verifyHasher(hasher);
- if (hasher instanceof StaticHasher) {
- this.hasher = (StaticHasher) hasher;
- verifyShape(this.hasher.getShape());
- } else {
- this.hasher = new StaticHasher(hasher, shape);
- }
- }
-
- /**
- * Constructs an empty HasherBloomFilter from a shape.
- *
- * @param shape the shape of the Bloom filter.
- */
- public HasherBloomFilter(final Shape shape) {
- super(shape);
- this.hasher = new StaticHasher(EmptyIterator.emptyIterator(), shape);
- }
-
- @Override
- public int cardinality() {
- return hasher.size();
- }
-
- @Override
- public boolean contains(final Hasher hasher) {
- verifyHasher(hasher);
- final Set set = new TreeSet<>();
- hasher.iterator(getShape()).forEachRemaining((IntConsumer) set::add);
- final OfInt iter = this.hasher.iterator(getShape());
- while (iter.hasNext()) {
- final int idx = iter.nextInt();
- set.remove(idx);
- if (set.isEmpty()) {
- return true;
- }
- }
- return false;
- }
-
- @Override
- public long[] getBits() {
- if (hasher.isEmpty()) {
- return EMPTY;
- }
-
- // Note: This can be simplified if the StaticHasher exposed a getMaxIndex()
- // method. Since it maintains an ordered list of unique indices the maximum
- // is the last value in the iterator. Knowing this value would allow
- // exact allocation of the long[].
- // For now we assume that the long[] will have a positive length and at least
- // 1 bit set in the entire array.
-
- final int n = (int) Math.ceil(hasher.getShape().getNumberOfBits() * (1.0 / Long.SIZE));
- final long[] result = new long[n];
- final OfInt iter = hasher.iterator(hasher.getShape());
- iter.forEachRemaining((IntConsumer) idx -> {
- BloomFilterIndexer.checkPositive(idx);
- final int buffIdx = BloomFilterIndexer.getLongIndex(idx);
- final long buffOffset = BloomFilterIndexer.getLongBit(idx);
- result[buffIdx] |= buffOffset;
- });
-
- int limit = result.length;
-
- // Assume the array has a non-zero length and at least 1 bit set.
- // This is tested using assertions.
- assert limit > 0 : "Number of bits in Shape is 0";
- while (result[limit - 1] == 0) {
- limit--;
- // If the hasher was not empty it is not possible to return
- // an array of length zero.
- assert limit > 0 : "Hasher reported a non-zero size but has no indices";
- }
- if (limit < result.length) {
- return Arrays.copyOf(result, limit);
- }
- return result;
- }
-
- @Override
- public StaticHasher getHasher() {
- return hasher;
- }
-
- @Override
- public boolean merge(final BloomFilter other) {
- return merge(other.getHasher());
- }
-
- @Override
- public boolean merge(final Hasher hasher) {
- verifyHasher(hasher);
- final IteratorChain iter = new IteratorChain<>(this.hasher.iterator(getShape()),
- hasher.iterator(getShape()));
- this.hasher = new StaticHasher(iter, getShape());
- return true;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherCollection.java b/src/main/java/org/apache/commons/collections4/bloomfilter/HasherCollection.java
new file mode 100644
index 0000000000..91aa43b1ef
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/HasherCollection.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import java.util.function.IntPredicate;
+
+/**
+ * A collection of Hashers. Useful when the generation of a Bloom filter depends upon
+ * multiple items.
+ *
+ * Hashers for each item are added to the HasherCollection and then
+ * the collection is used wherever a Hasher can be used in the API.
+ *
+ * @since 4.5
+ */
+public class HasherCollection implements Hasher {
+
+ /**
+ * The list of hashers to be used to generate the indices.
+ */
+ private final List hashers;
+
+ /**
+ * Constructs an empty HasherCollection.
+ */
+ public HasherCollection() {
+ this.hashers = new ArrayList<>();
+ }
+
+ /**
+ * Constructs a HasherCollection from a collection of Hasher objects.
+ *
+ * @param hashers A collections of Hashers to build the indices with.
+ */
+ public HasherCollection(final Collection hashers) {
+ Objects.requireNonNull(hashers, "hashers");
+ this.hashers = new ArrayList<>(hashers);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param hashers A list of Hashers to initialize the collection with.
+ */
+ public HasherCollection(Hasher... hashers) {
+ this(Arrays.asList(hashers));
+ }
+
+ /**
+ * Adds a hasher to the collection.
+ * @param hasher The hasher to add.
+ */
+ public void add(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ hashers.add(hasher);
+ }
+
+ /**
+ * Add all the Hashers in a collection to this HasherCollection.
+ * @param hashers The hashers to add.
+ */
+ public void add(Collection hashers) {
+ Objects.requireNonNull(hashers, "hashers");
+ this.hashers.addAll(hashers);
+ }
+
+ @Override
+ public IndexProducer indices(final Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+ return new HasherCollectionIndexProducer(shape);
+ }
+
+ @Override
+ public IndexProducer uniqueIndices(final Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+ return new HasherCollectionIndexProducer(shape) {
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ for (Hasher hasher : hashers) {
+ if (!hasher.uniqueIndices(shape).forEachIndex(consumer)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ };
+ }
+
+ /**
+ * Allow child classes access to the hashers.
+ * @return hashers
+ */
+ protected List getHashers() {
+ return Collections.unmodifiableList(hashers);
+ }
+
+ /**
+ * IndexProducer that will return duplicates from the collection.
+ *
+ */
+ class HasherCollectionIndexProducer implements IndexProducer {
+ private final Shape shape;
+
+ HasherCollectionIndexProducer(Shape shape) {
+ this.shape = shape;
+ }
+
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ for (Hasher hasher : hashers) {
+ if (!hasher.indices(shape).forEachIndex(consumer)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public int[] asIndexArray() {
+ List lst = new ArrayList<>();
+ int[] count = new int[1];
+ /*
+ * This method needs to return duplicate indices
+ */
+ for (Hasher hasher : hashers) {
+ int[] ary = hasher.indices(shape).asIndexArray();
+ lst.add(ary);
+ count[0] += ary.length;
+ }
+ if (lst.isEmpty()) {
+ return new int[0];
+ }
+ if (lst.size() == 1) {
+ return lst.get(0);
+ }
+ int[] result = new int[count[0]];
+ int offset = 0;
+ for (int[] ary : lst) {
+ System.arraycopy(ary, 0, result, offset, ary.length);
+ offset += ary.length;
+ }
+ return result;
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilters.java b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilters.java
deleted file mode 100644
index e4adb4fc66..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilters.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-
-import java.util.Objects;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.function.Consumer;
-import java.util.function.IntConsumer;
-
-/**
- * Contains functions to filter indexes.
- */
-final class IndexFilters {
- /** Do not instantiate. */
- private IndexFilters() {
- }
-
- /**
- * Transfer all distinct indexes in the specified {@code hasher} generated for the
- * specified {@code shape} to the specified {@code consumer}. For example this
- * can be used to merge a {@link Hasher} representation of a Bloom filter into a
- * {@link BloomFilter} instance that does not naturally handle duplicate indexes.
- *
- * This method is functionally equivalent to:
- *
- *
- * final Set<Integer> distinct = new TreeSet<>();
- * hasher.iterator(shape).forEachRemaining((Consumer<Integer>) i -> {
- * if (distinct.add(i)) {
- * consumer.accept(i);
- * }
- * });
- *
- *
- * @param hasher the hasher
- * @param shape the shape
- * @param consumer the consumer to receive distinct indexes
- * @throws NullPointerException if the hasher, shape or action are null
- * @see Hasher#iterator(Shape)
- */
- static void distinctIndexes(final Hasher hasher, final Shape shape, final IntConsumer consumer) {
- Objects.requireNonNull(hasher, "hasher");
- Objects.requireNonNull(shape, "shape");
- Objects.requireNonNull(consumer, "consumer");
-
- // TODO
- // This function can be optimised based on the expected size
- // (number of indexes) of the hasher and the number of bits in the shape.
- //
- // A large size would benefit from a pre-allocated BitSet-type filter.
- // A very small size may be more efficient as a simple array of values
- // that have already been seen that is scanned for each new index.
- //
- // A default is to use a Set to filter distinct values. The choice of set
- // should be evaluated. A HashSet would be optimal if size is known.
- // A TreeSet has lower memory consumption and performance is not as
- // sensitive to knowing the size in advance.
-
- final Set distinct = new TreeSet<>();
- hasher.iterator(shape).forEachRemaining((Consumer) i -> {
- if (distinct.add(i)) {
- consumer.accept(i);
- }
- });
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java
new file mode 100644
index 0000000000..ca6ac6e8cd
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.BitSet;
+import java.util.Objects;
+import java.util.function.IntPredicate;
+import java.util.function.LongPredicate;
+
+/**
+ * An object that produces indices of a Bloom filter.
+ *
+ * The default implementation of {@code asIndexArray} is slow. Implementers should reimplement the
+ * method where possible.
+ *
+ * @since 4.5
+ */
+@FunctionalInterface
+public interface IndexProducer {
+
+ /**
+ * Each index is passed to the predicate. The predicate is applied to each
+ * index value, if the predicate returns {@code false} the execution is stopped, {@code false}
+ * is returned, and no further indices are processed.
+ *
+ * Any exceptions thrown by the action are relayed to the caller.
+ *
+ * Indices ordering is not guaranteed
+ *
+ * @param predicate the action to be performed for each non-zero bit index.
+ * @return {@code true} if all indexes return true from consumer, {@code false} otherwise.
+ * @throws NullPointerException if the specified action is null
+ */
+ boolean forEachIndex(IntPredicate predicate);
+
+ /**
+ * Creates an IndexProducer from an array of integers.
+ * @param values the index values
+ * @return an IndexProducer that uses the values.
+ */
+ static IndexProducer fromIndexArray(final int... values) {
+ return new IndexProducer() {
+
+ @Override
+ public boolean forEachIndex(IntPredicate predicate) {
+ for (int value : values) {
+ if (!predicate.test(value)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ };
+ }
+
+ /**
+ * Creates an IndexProducer from a {@code BitMapProducer}.
+ * @param producer the {@code BitMapProducer}
+ * @return a new {@code IndexProducer}.
+ */
+ static IndexProducer fromBitMapProducer(BitMapProducer producer) {
+ Objects.requireNonNull(producer, "producer");
+ return new IndexProducer() {
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ LongPredicate longPredicate = new LongPredicate() {
+ int wordIdx = 0;
+
+ @Override
+ public boolean test(long word) {
+ int i = wordIdx;
+ while (word != 0) {
+ if ((word & 1) == 1) {
+ if (!consumer.test(i)) {
+ return false;
+ }
+ }
+ word >>>= 1;
+ i++;
+ }
+ wordIdx += 64;
+ return true;
+ }
+ };
+ return producer.forEachBitMap(longPredicate::test);
+ }
+ };
+ }
+
+ /**
+ * Return a copy of the IndexProducer data as an int array.
+ *
+ * The default implementation of this method is slow. It is recommended
+ * that implementing classes reimplement this method.
+ *
+ * @return An int array of the data.
+ */
+ default int[] asIndexArray() {
+ BitSet result = new BitSet();
+ forEachIndex(i -> {
+ result.set(i);
+ return true;
+ });
+ return result.stream().toArray();
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/LongBiPredicate.java b/src/main/java/org/apache/commons/collections4/bloomfilter/LongBiPredicate.java
new file mode 100644
index 0000000000..9b45a09afb
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/LongBiPredicate.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+/**
+ * Represents a function that accepts a two long-valued argument and produces a binary result.
+ * This is the long-consuming primitive specialization for {@code BiPredicate}.
+ *
+ * This is a functional interface whose functional method is {@code test(long,long)}.
+ *
+ * @since 4.5
+ */
+@FunctionalInterface
+public interface LongBiPredicate {
+
+ /**
+ * A function that takes to long arguments and returns a boolean.
+ * @param x the first long argument.
+ * @param y the second long argument.
+ * @return true or false.
+ */
+ boolean test(long x, long y);
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java
index 48c43620ad..fa28559715 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java
@@ -16,147 +16,176 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
+import java.util.function.LongBinaryOperator;
/**
- * Implementations of set operations on Bloom filters.
+ * Implementations of set operations on BitMapProducers.
*
+ * @since 4.5
*/
public final class SetOperations {
/**
- * Calculates the Cosine distance between two Bloom filters.
+ * Calculates the cardinality of the result of a LongBinaryOperator using the
+ * {@code BitMapProducer.makePredicate} method.
+ * @param first the first BitMapProducer
+ * @param second the second BitMapProducer
+ * @param op a long binary operation on where x = {@code first} and y = {@code second} bitmap producers.
+ * @return the calculated cardinality.
+ */
+ private static int cardinality(BitMapProducer first, BitMapProducer second, LongBinaryOperator op) {
+ int[] cardinality = new int[1];
+
+ first.forEachBitMapPair(second, (x, y) -> {
+ cardinality[0] += Long.bitCount(op.applyAsLong(x, y));
+ return true;
+ });
+ return cardinality[0];
+ }
+
+ /**
+ * Calculates the cardinality of a BitMapProducer. By necessity this method will visit each bit map
+ * created by the producer.
+ * @param producer the Producer to calculate the cardinality for.
+ * @return the cardinality of the bit maps produced by the producer.
+ */
+ public static int cardinality(BitMapProducer producer) {
+ int[] cardinality = new int[1];
+ producer.forEachBitMap(l -> {
+ cardinality[0] += Long.bitCount(l);
+ return true;
+ });
+ return cardinality[0];
+ }
+
+ /**
+ * Calculates the cardinality of the logical {@code AND} of the bit maps for the two filters.
+ * @param first the first BitMapProducer.
+ * @param second the second BitMapProducer
+ * @return the cardinality of the {@code AND} of the filters.
+ */
+ public static int andCardinality(final BitMapProducer first, final BitMapProducer second) {
+ return cardinality(first, second, (x, y) -> x & y);
+ }
+
+ /**
+ * Calculates the cardinality of the logical {@code OR} of the bit maps for the two filters.
+ * @param first the first BitMapProducer.
+ * @param second the second BitMapProducer
+ * @return the cardinality of the {@code OR} of the filters.
+ */
+ public static int orCardinality(final BitMapProducer first, final BitMapProducer second) {
+ return cardinality(first, second, (x, y) -> x | y);
+ }
+
+ /**
+ * Calculates the cardinality of the logical {@code XOR} of the bit maps for the two filters.
+ * @param first the first BitMapProducer.
+ * @param second the second BitMapProducer
+ * @return the cardinality of the {@code XOR} of the filters.
+ */
+ public static int xorCardinality(final BitMapProducer first, final BitMapProducer second) {
+ return cardinality(first, second, (x, y) -> x ^ y);
+ }
+
+ /**
+ * Calculates the Cosine distance between two BitMapProducer.
*
* Cosine distance is defined as {@code 1 - Cosine similarity}
*
- * @param first the first Bloom filter.
- * @param second the second Bloom filter.
+ * @param first the first BitMapProducer.
+ * @param second the second BitMapProducer.
* @return the jaccard distance.
*/
- public static double cosineDistance(final BloomFilter first, final BloomFilter second) {
+ public static double cosineDistance(final BitMapProducer first, final BitMapProducer second) {
return 1.0 - cosineSimilarity(first, second);
}
/**
- * Calculates the Cosine similarity between two Bloom filters.
+ * Calculates the Cosine similarity between two BitMapProducers.
* Also known as Orchini similarity and the Tucker coefficient of congruence or
* Ochiai similarity.
*
- * If either filter is empty (no enabled bits) the result is 0 (zero)
+ * If either producer is empty the result is 0 (zero)
*
- * @param first the first Bloom filter.
- * @param second the second Bloom filter.
+ * @param first the first BitMapProducer.
+ * @param second the second BitMapProducer.
* @return the Cosine similarity.
*/
- public static double cosineSimilarity(final BloomFilter first, final BloomFilter second) {
- verifyShape(first, second);
- final int numerator = first.andCardinality(second);
- return numerator == 0 ? 0 : numerator / (Math.sqrt(first.cardinality()) * Math.sqrt(second.cardinality()));
+ public static double cosineSimilarity(final BitMapProducer first, final BitMapProducer second) {
+ final int numerator = andCardinality(first, second);
+ // Given that the cardinality is an int then the product as a double will not
+ // overflow, we can use one sqrt:
+ return numerator == 0 ? 0 : numerator / Math.sqrt(cardinality(first) * cardinality(second));
}
/**
- * Estimates the number of items in the intersection of the sets represented by two
- * Bloom filters.
+ * Calculates the Cosine similarity between two Bloom filters.
+ * Also known as Orchini similarity and the Tucker coefficient of congruence or
+ * Ochiai similarity.
*
- * @param first the first Bloom filter.
- * @param second the second Bloom filter.
- * @return an estimate of the size of the intersection between the two filters.
- */
- public static long estimateIntersectionSize(final BloomFilter first, final BloomFilter second) {
- verifyShape(first, second);
- // do subtraction early to avoid Long overflow.
- return estimateSize(first) - estimateUnionSize(first, second) + estimateSize(second);
- }
-
- /**
- * Estimates the number of items in the Bloom filter based on the shape and the number
- * of bits that are enabled.
+ * If either filter is empty (no enabled bits) the result is 0 (zero)
*
- * @param filter the Bloom filter to estimate size for.
- * @return an estimate of the number of items that were placed in the Bloom filter.
- */
- public static long estimateSize(final BloomFilter filter) {
- final Shape shape = filter.getShape();
- final double estimate = -(shape.getNumberOfBits() *
- Math.log(1.0 - filter.cardinality() * 1.0 / shape.getNumberOfBits())) /
- shape.getNumberOfHashFunctions();
- return Math.round(estimate);
- }
-
- /**
- * Estimates the number of items in the union of the sets represented by two
- * Bloom filters.
+ * This is a version of cosineSimilarity optimized for Bloom filters.
*
* @param first the first Bloom filter.
* @param second the second Bloom filter.
- * @return an estimate of the size of the union between the two filters.
+ * @return the Cosine similarity.
*/
- public static long estimateUnionSize(final BloomFilter first, final BloomFilter second) {
- verifyShape(first, second);
- final Shape shape = first.getShape();
- final double estimate = -(shape.getNumberOfBits() *
- Math.log(1.0 - first.orCardinality(second) * 1.0 / shape.getNumberOfBits())) /
- shape.getNumberOfHashFunctions();
- return Math.round(estimate);
+ public static double cosineSimilarity(final BloomFilter first, final BloomFilter second) {
+ final int numerator = andCardinality(first, second);
+ // Given that the cardinality is an int then the product as a double will not
+ // overflow, we can use one sqrt:
+ return numerator == 0 ? 0 : numerator / Math.sqrt(first.cardinality() * second.cardinality());
}
/**
- * Calculates the Hamming distance between two Bloom filters.
+ * Calculates the Hamming distance between two BitMapProducers.
*
- * @param first the first Bloom filter.
- * @param second the second Bloom filter.
+ * @param first the first BitMapProducer.
+ * @param second the second BitMapProducer.
* @return the Hamming distance.
*/
- public static int hammingDistance(final BloomFilter first, final BloomFilter second) {
- verifyShape(first, second);
- return first.xorCardinality(second);
+ public static int hammingDistance(final BitMapProducer first, final BitMapProducer second) {
+ return xorCardinality(first, second);
}
/**
- * Calculates the Jaccard distance between two Bloom filters.
+ * Calculates the Jaccard distance between two BitMapProducer.
*
* Jaccard distance is defined as {@code 1 - Jaccard similarity}
*
- * @param first the first Bloom filter.
- * @param second the second Bloom filter.
+ * @param first the first BitMapProducer.
+ * @param second the second BitMapProducer.
* @return the Jaccard distance.
*/
- public static double jaccardDistance(final BloomFilter first, final BloomFilter second) {
+ public static double jaccardDistance(final BitMapProducer first, final BitMapProducer second) {
return 1.0 - jaccardSimilarity(first, second);
}
/**
- * Calculates the Jaccard similarity between two Bloom filters.
+ * Calculates the Jaccard similarity between two BitMapProducer.
*
* Also known as Jaccard index, Intersection over Union, and Jaccard similarity coefficient
*
- * @param first the first Bloom filter.
- * @param second the second Bloom filter.
+ * @param first the first BitMapProducer.
+ * @param second the second BitMapProducer.
* @return the Jaccard similarity.
*/
- public static double jaccardSimilarity(final BloomFilter first, final BloomFilter second) {
- verifyShape(first, second);
- final int orCard = first.orCardinality(second);
- // if the orCard is zero then the hamming distance will also be zero.
- return orCard == 0 ? 0 : hammingDistance(first, second) / (double) orCard;
- }
-
- /**
- * Verifies the Bloom filters have the same shape.
- *
- * @param first the first filter to check.
- * @param second the second filter to check.
- * @throws IllegalArgumentException if the shapes are not the same.
- */
- private static void verifyShape(final BloomFilter first, final BloomFilter second) {
- if (!first.getShape().equals(second.getShape())) {
- throw new IllegalArgumentException(String.format("Shape %s is not the same as %s",
- first.getShape(), second.getShape()));
- }
+ public static double jaccardSimilarity(final BitMapProducer first, final BitMapProducer second) {
+ int[] cardinality = new int[2];
+ first.forEachBitMapPair(second, (x, y) -> {
+ cardinality[0] += Long.bitCount(x & y);
+ cardinality[1] += Long.bitCount(x | y);
+ return true;
+ });
+ final int intersection = cardinality[0];
+ return intersection == 0 ? 0 : intersection / (double) cardinality[1];
}
/**
* Do not instantiate.
*/
- private SetOperations() {}
+ private SetOperations() {
+ }
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Shape.java b/src/main/java/org/apache/commons/collections4/bloomfilter/Shape.java
similarity index 59%
rename from src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Shape.java
rename to src/main/java/org/apache/commons/collections4/bloomfilter/Shape.java
index a82586fe4e..40db56516a 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Shape.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/Shape.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.commons.collections4.bloomfilter.hasher;
+package org.apache.commons.collections4.bloomfilter;
import java.util.Objects;
@@ -27,23 +27,23 @@
*
* Interrelatedness of values
*
- * - Number of Items ({@code n})
- * - {@code n = ceil(m / (-k / ln(1 - exp(ln(p) / k))))}
- Probability of
- * False Positives ({@code p})
- {@code p = pow(1 - exp(-k / (m / n)), k)}
- Number
- * of Bits ({@code m})
- * - {@code m = ceil((n * ln(p)) / ln(1 / pow(2, ln(2))))}
- Number of
- * Functions ({@code k})
- {@code k = round((m / n) * ln(2))}
+ *
+ * - Number of Items ({@code n})
+ * - {@code n = ceil(m / (-k / ln(1 - exp(ln(p) / k))))}
+ * - Probability of False Positives ({@code p})
+ * - {@code p = pow(1 - exp(-k / (m / n)), k)}
+ * - Number of Bits ({@code m})
+ * - {@code m = ceil((n * ln(p)) / ln(1 / pow(2, ln(2))))}
+ * - Number of Functions ({@code k})
+ * - {@code k = round((m / n) * ln(2))}
+ *
*
- * Comparisons
For purposes of equality checking and hashCode
- * calculations a {@code Shape} is defined by the hashing function identity, the number of
- * bits ({@code m}), and the number of functions ({@code k}).
- *
- * @see Bloom Filter calculator
+ * @see Bloom Filter calculator
* @see Bloom filter
* [Wikipedia]
* @since 4.5
*/
-public final class Shape {
+public final class Shape implements Comparable {
/**
* The natural logarithm of 2. Used in several calculations. Approximately 0.693147180559945.
@@ -58,9 +58,9 @@ public final class Shape {
private static final double DENOMINATOR = -LN_2 * LN_2;
/**
- * Number of items in the filter ({@code n}).
+ * Number of hash functions to create a filter ({@code k}).
*/
- private final int numberOfItems;
+ private final int numberOfHashFunctions;
/**
* Number of bits in the filter ({@code m}).
@@ -68,19 +68,137 @@ public final class Shape {
private final int numberOfBits;
/**
- * Number of hash functions ({@code k}).
+ * Constructs a filter configuration with the specified number of hashFunctions ({@code k}) and
+ * bits ({@code m}).
+ *
+ * @param numberOfHashFunctions Number of hash functions to use for each item placed in the filter.
+ * @param numberOfBits The number of bits in the filter
+ * @throws IllegalArgumentException if {@code numberOfHashFunctions < 1} or {@code numberOfBits < 1}
*/
- private final int numberOfHashFunctions;
+ private Shape(final int numberOfHashFunctions, final int numberOfBits) {
+ this.numberOfHashFunctions = checkNumberOfHashFunctions(numberOfHashFunctions);
+ this.numberOfBits = checkNumberOfBits(numberOfBits);
+ }
+
+ @Override
+ public int compareTo(Shape other) {
+ int i = Integer.compare(numberOfBits, other.numberOfBits);
+ return i == 0 ? Integer.compare(numberOfHashFunctions, other.numberOfHashFunctions) : i;
+ }
+
+ @Override
+ public boolean equals(final Object o) {
+ return (o instanceof Shape) ? compareTo((Shape) o) == 0 : false;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(numberOfBits, numberOfHashFunctions);
+ }
+
+ /**
+ * Gets the number of bits in the Bloom filter.
+ * This is also known as {@code m}.
+ *
+ * @return the number of bits in the Bloom filter ({@code m}).
+ */
+ public int getNumberOfBits() {
+ return numberOfBits;
+ }
+
+ /**
+ * Gets the number of hash functions used to construct the filter.
+ * This is also known as {@code k}.
+ *
+ * @return the number of hash functions used to construct the filter ({@code k}).
+ */
+ public int getNumberOfHashFunctions() {
+ return numberOfHashFunctions;
+ }
+
+ /**
+ * Calculates the probability of false positives ({@code p}) given
+ * numberOfItems ({@code n}), numberOfBits ({@code m}) and numberOfHashFunctions ({@code k}).
+ * p = pow(1 - exp(-k / (m / n)), k)
+ *
+ * This is the probability that a Bloom filter will return true for the presence of an item
+ * when it does not contain the item.
+ *
+ * The probability assumes that the Bloom filter is filled with the expected number of
+ * items. If the filter contains fewer items then the actual probability will be lower.
+ * Thus, this returns the worst-case false positive probability for a filter that has not
+ * exceeded its expected number of items.
+ *
+ * @param numberOfItems the number of items hashed into the Bloom filter.
+ * @return the probability of false positives.
+ */
+ public double getProbability(int numberOfItems) {
+ if (numberOfItems < 0) {
+ throw new IllegalArgumentException("Number of items must be greater than or equal to 0: " + numberOfItems);
+ }
+ if (numberOfItems == 0) {
+ return 0;
+ }
+ return Math.pow(-Math.expm1(-1.0 * numberOfHashFunctions * numberOfItems / numberOfBits),
+ numberOfHashFunctions);
+ }
+
+ @Override
+ public String toString() {
+ return String.format("Shape[k=%s m=%s]", numberOfHashFunctions, numberOfBits);
+ }
+
+ /**
+ * Determines if a cardinality is sparse based on the shape.
+ * This method assumes that bit maps are 64bits and indexes are 32bits. If the memory
+ * necessary to store the cardinality as indexes is less than the estimated memory for bit maps,
+ * the cardinality is determined to be {@code sparse}.
+ * @param cardinality the cardinality to check.
+ * @return true if the cardinality is sparse within the shape.
+ */
+ public boolean isSparse(int cardinality) {
+ /*
+ * Since the size of a bit map is a long and the size of an index is an int,
+ * there can be 2 indexes for each bit map. In Bloom filters indexes are evenly
+ * distributed across the range of possible values, Thus if the cardinality
+ * (number of indexes) is less than or equal to 2*number of bit maps the
+ * cardinality is sparse within the shape.
+ */
+ return cardinality <= (BitMap.numberOfBitMaps(getNumberOfBits()) * 2);
+ }
/**
- * The hash code for this filter.
+ * Estimate the number of items in a Bloom filter with this shape and the specified number of bits enabled.
+ *
+ * Note:
+ *
+ * - if cardinality == numberOfBits, then result is infinity.
+ * - if cardinality > numberOfBits, then result is NaN.
+ *
+ *
+ * @param cardinality the number of enabled bits also known as the hamming value.
+ * @return An estimate of the number of items in the Bloom filter.
*/
- private final int hashCode;
+ public double estimateN(int cardinality) {
+ double c = cardinality;
+ double m = numberOfBits;
+ double k = numberOfHashFunctions;
+ return -(m / k) * Math.log1p(-c / m);
+ }
/**
- * The identity of the hasher function.
+ * The factory to assist in the creation of proper Shapes.
+ *
+ * In the methods of this factory the `from` names are appended with the standard variable
+ * names in the order expected:
+ *
+ *
+ * - {@code N})
- The number of items to be placed in the Bloom filter
+ * - {@code M})
- The number of bits in the Bloom filter
+ * - {@code K})
- The number of hash functions for each item placed in the Bloom filter
+ * - {@code P})
- The probability of a collision once N items have been placed in the Bloom filter
+ *
*/
- private final HashFunctionIdentity hashFunctionIdentity;
/**
* Constructs a filter configuration with a desired false-positive probability ({@code p}) and the
@@ -94,27 +212,23 @@ public final class Shape {
* (number of items). An exception is raised if this is greater than or equal to 1 (i.e. the
* shape is invalid for use as a Bloom filter).
*
- * @param hashFunctionIdentity The identity of the hash function this shape uses
* @param probability The desired false-positive probability in the range {@code (0, 1)}
* @param numberOfBits The number of bits in the filter
* @param numberOfHashFunctions The number of hash functions in the filter
- * @throws NullPointerException if the hash function identity is null
- * @throws IllegalArgumentException if the desired probability is not in the range {@code (0, 1)};
- * if {@code numberOfBits < 1}; if {@code numberOfHashFunctions < 1}; or if the actual
+ * @return a valid Shape.
+ * @throws IllegalArgumentException if the desired probability is not in the range {@code (0, 1)},
+ * {@code numberOfBits < 1}, {@code numberOfHashFunctions < 1}, or the actual
* probability is {@code >= 1.0}
- * @see #getProbability()
*/
- public Shape(final HashFunctionIdentity hashFunctionIdentity, final double probability, final int numberOfBits,
- final int numberOfHashFunctions) {
- this.hashFunctionIdentity = Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
+ public static Shape fromPMK(final double probability, final int numberOfBits, final int numberOfHashFunctions) {
checkProbability(probability);
- this.numberOfBits = checkNumberOfBits(numberOfBits);
- this.numberOfHashFunctions = checkNumberOfHashFunctions(numberOfHashFunctions);
+ checkNumberOfBits(numberOfBits);
+ checkNumberOfHashFunctions(numberOfHashFunctions);
// Number of items (n):
// n = ceil(m / (-k / ln(1 - exp(ln(p) / k))))
- final double n = Math.ceil(numberOfBits /
- (-numberOfHashFunctions / Math.log(1 - Math.exp(Math.log(probability) / numberOfHashFunctions))));
+ final double n = Math.ceil(numberOfBits
+ / (-numberOfHashFunctions / Math.log(-Math.expm1(Math.log(probability) / numberOfHashFunctions))));
// log of probability is always < 0
// number of hash functions is >= 1
@@ -126,10 +240,11 @@ public Shape(final HashFunctionIdentity hashFunctionIdentity, final double proba
//
// similarly we can not produce a number greater than numberOfBits so we
// do not have to check for Integer.MAX_VALUE either.
- this.numberOfItems = (int) n;
+
+ Shape shape = new Shape(numberOfHashFunctions, numberOfBits);
// check that probability is within range
- checkCalculatedProbability(getProbability());
- this.hashCode = generateHashCode();
+ checkCalculatedProbability(shape.getProbability((int) n));
+ return shape;
}
/**
@@ -147,17 +262,14 @@ public Shape(final HashFunctionIdentity hashFunctionIdentity, final double proba
* functions. An exception is raised if this is greater than or equal to 1 (i.e. the
* shape is invalid for use as a Bloom filter).
*
- * @param hashFunctionIdentity The identity of the hash function this shape uses
* @param numberOfItems Number of items to be placed in the filter
* @param probability The desired false-positive probability in the range {@code (0, 1)}
- * @throws NullPointerException if the hash function identity is null
- * @throws IllegalArgumentException if {@code numberOfItems < 1}; if the desired probability
- * is not in the range {@code (0, 1)}; or if the actual probability is {@code >= 1.0}
- * @see #getProbability()
+ * @return a valid Shape
+ * @throws IllegalArgumentException if {@code numberOfItems < 1}, if the desired probability
+ * is not in the range {@code (0, 1)} or if the actual probability is {@code >= 1.0}.
*/
- public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOfItems, final double probability) {
- this.hashFunctionIdentity = Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
- this.numberOfItems = checkNumberOfItems(numberOfItems);
+ public static Shape fromNP(final int numberOfItems, final double probability) {
+ checkNumberOfItems(numberOfItems);
checkProbability(probability);
// Number of bits (m)
@@ -165,12 +277,26 @@ public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOf
if (m > Integer.MAX_VALUE) {
throw new IllegalArgumentException("Resulting filter has more than " + Integer.MAX_VALUE + " bits: " + m);
}
- this.numberOfBits = (int) m;
+ int numberOfBits = (int) m;
- this.numberOfHashFunctions = calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
+ int numberOfHashFunctions = calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
+ Shape shape = new Shape(numberOfHashFunctions, numberOfBits);
// check that probability is within range
- checkCalculatedProbability(getProbability());
- this.hashCode = generateHashCode();
+ checkCalculatedProbability(shape.getProbability(numberOfItems));
+ return shape;
+ }
+
+ /**
+ * Constructs a filter configuration with the specified number of hashFunctions ({@code k}) and
+ * bits ({@code m}).
+ *
+ * @param numberOfHashFunctions Number of hash functions to use for each item placed in the filter.
+ * @param numberOfBits The number of bits in the filter
+ * @return a valid Shape.
+ * @throws IllegalArgumentException if {@code numberOfHashFunctions < 1} or {@code numberOfBits < 1}
+ */
+ public static Shape fromKM(final int numberOfHashFunctions, final int numberOfBits) {
+ return new Shape(numberOfHashFunctions, numberOfBits);
}
/**
@@ -184,23 +310,20 @@ public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOf
* functions. An exception is raised if this is greater than or equal to 1 (i.e. the
* shape is invalid for use as a Bloom filter).
*
- * @param hashFunctionIdentity The identity of the hash function this shape uses
* @param numberOfItems Number of items to be placed in the filter
* @param numberOfBits The number of bits in the filter
- * @throws NullPointerException if the hash function identity is null
- * @throws IllegalArgumentException if {@code numberOfItems < 1}; if {@code numberOfBits < 1};
- * if the calculated number of hash function is {@code < 1};
- * or if the actual probability is {@code >= 1.0}
- * @see #getProbability()
+ * @return a valid Shape.
+ * @throws IllegalArgumentException if {@code numberOfItems < 1}, {@code numberOfBits < 1},
+ * the calculated number of hash function is {@code < 1}, or if the actual probability is {@code >= 1.0}
*/
- public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOfItems, final int numberOfBits) {
- this.hashFunctionIdentity = Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
- this.numberOfItems = checkNumberOfItems(numberOfItems);
- this.numberOfBits = checkNumberOfBits(numberOfBits);
- this.numberOfHashFunctions = calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
+ public static Shape fromNM(final int numberOfItems, final int numberOfBits) {
+ checkNumberOfItems(numberOfItems);
+ checkNumberOfBits(numberOfBits);
+ int numberOfHashFunctions = calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
+ Shape shape = new Shape(numberOfHashFunctions, numberOfBits);
// check that probability is within range
- checkCalculatedProbability(getProbability());
- this.hashCode = generateHashCode();
+ checkCalculatedProbability(shape.getProbability(numberOfItems));
+ return shape;
}
/**
@@ -211,24 +334,22 @@ public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOf
* functions. An exception is raised if this is greater than or equal to 1 (i.e. the
* shape is invalid for use as a Bloom filter).
*
- * @param hashFunctionIdentity The identity of the hash function this shape uses
* @param numberOfItems Number of items to be placed in the filter
* @param numberOfBits The number of bits in the filter.
* @param numberOfHashFunctions The number of hash functions in the filter
- * @throws NullPointerException if the hash function identity is null
- * @throws IllegalArgumentException if {@code numberOfItems < 1}; if {@code numberOfBits < 1};
- * if {@code numberOfHashFunctions < 1}; or if the actual probability is {@code >= 1.0}
- * @see #getProbability()
+ * @return a valid Shape.
+ * @throws IllegalArgumentException if {@code numberOfItems < 1}, {@code numberOfBits < 1},
+ * {@code numberOfHashFunctions < 1}, or if the actual probability is {@code >= 1.0}.
*/
- public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOfItems, final int numberOfBits,
- final int numberOfHashFunctions) {
- this.hashFunctionIdentity = Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
- this.numberOfItems = checkNumberOfItems(numberOfItems);
- this.numberOfBits = checkNumberOfBits(numberOfBits);
- this.numberOfHashFunctions = checkNumberOfHashFunctions(numberOfHashFunctions);
+ public static Shape fromNMK(final int numberOfItems, final int numberOfBits, final int numberOfHashFunctions) {
+ checkNumberOfItems(numberOfItems);
+ checkNumberOfBits(numberOfBits);
+ checkNumberOfHashFunctions(numberOfHashFunctions);
+ // check that probability is within range
+ Shape shape = new Shape(numberOfHashFunctions, numberOfBits);
// check that probability is within range
- checkCalculatedProbability(getProbability());
- this.hashCode = generateHashCode();
+ checkCalculatedProbability(shape.getProbability(numberOfItems));
+ return shape;
}
/**
@@ -236,7 +357,7 @@ public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOf
*
* @param numberOfItems the number of items
* @return the number of items
- * @throws IllegalArgumentException if the number of items is {@code < 1}
+ * @throws IllegalArgumentException if the number of items is {@code < 1}.
*/
private static int checkNumberOfItems(final int numberOfItems) {
if (numberOfItems < 1) {
@@ -250,7 +371,7 @@ private static int checkNumberOfItems(final int numberOfItems) {
*
* @param numberOfBits the number of bits
* @return the number of bits
- * @throws IllegalArgumentException if the number of bits is {@code < 1}
+ * @throws IllegalArgumentException if the number of bits is {@code < 1}.
*/
private static int checkNumberOfBits(final int numberOfBits) {
if (numberOfBits < 1) {
@@ -260,15 +381,16 @@ private static int checkNumberOfBits(final int numberOfBits) {
}
/**
- * Check number of hash functions is strictly positive
+ * Check number of hash functions is strictly positive.
*
* @param numberOfHashFunctions the number of hash functions
* @return the number of hash functions
- * @throws IllegalArgumentException if the number of hash functions is {@code < 1}
+ * @throws IllegalArgumentException if the number of hash functions is {@code < 1}.
*/
private static int checkNumberOfHashFunctions(final int numberOfHashFunctions) {
if (numberOfHashFunctions < 1) {
- throw new IllegalArgumentException("Number of hash functions must be greater than 0: " + numberOfHashFunctions);
+ throw new IllegalArgumentException(
+ "Number of hash functions must be greater than 0: " + numberOfHashFunctions);
}
return numberOfHashFunctions;
}
@@ -294,7 +416,7 @@ private static void checkProbability(final double probability) {
* construction.
*
* @param probability the probability
- * @throws IllegalArgumentException if the probability is {@code >= 1.0}
+ * @throws IllegalArgumentException if the probability is {@code >= 1.0}.
*/
private static void checkCalculatedProbability(final double probability) {
// We do not need to check for p <= 0.0 since we only allow positive values for
@@ -303,7 +425,7 @@ private static void checkCalculatedProbability(final double probability) {
// always be 0<1 and y>0
if (probability >= 1.0) {
throw new IllegalArgumentException(
- String.format("Calculated probability is greater than or equal to 1: " + probability));
+ String.format("Calculated probability is greater than or equal to 1: " + probability));
}
}
@@ -322,7 +444,7 @@ private static int calculateNumberOfHashFunctions(final int numberOfItems, final
final long k = Math.round(LN_2 * numberOfBits / numberOfItems);
if (k < 1) {
throw new IllegalArgumentException(
- String.format("Filter too small: Calculated number of hash functions (%s) was less than 1", k));
+ String.format("Filter too small: Calculated number of hash functions (%s) was less than 1", k));
}
// Normally we would check that numberofHashFunctions <= Integer.MAX_VALUE but
// since numberOfBits is at most Integer.MAX_VALUE the numerator of
@@ -330,91 +452,4 @@ private static int calculateNumberOfHashFunctions(final int numberOfItems, final
// value of k can not be above Integer.MAX_VALUE.
return (int) k;
}
-
- @Override
- public boolean equals(final Object o) {
- if (o instanceof Shape) {
- final Shape other = (Shape) o;
- return numberOfBits == other.numberOfBits &&
- numberOfHashFunctions == other.numberOfHashFunctions &&
- HashFunctionValidator.areEqual(hashFunctionIdentity,
- other.hashFunctionIdentity);
- }
- return false;
- }
-
- @Override
- public int hashCode() {
- return hashCode;
- }
-
- private int generateHashCode() {
- return Objects.hash(numberOfBits, numberOfHashFunctions, HashFunctionValidator.hash(hashFunctionIdentity));
- }
-
- /**
- * Gets the HashFunctionIdentity of the hash function this shape uses.
- * @return the HashFunctionIdentity of the hash function this shape uses.
- */
- public HashFunctionIdentity getHashFunctionIdentity() {
- return hashFunctionIdentity;
- }
-
- /**
- * Gets the number of bits in the Bloom filter.
- * This is also known as {@code m}.
- *
- * @return the number of bits in the Bloom filter ({@code m}).
- */
- public int getNumberOfBits() {
- return numberOfBits;
- }
-
- /**
- * Gets the number of hash functions used to construct the filter.
- * This is also known as {@code k}.
- *
- * @return the number of hash functions used to construct the filter ({@code k}).
- */
- public int getNumberOfHashFunctions() {
- return numberOfHashFunctions;
- }
-
- /**
- * Gets the number of items that are expected in the filter.
- * This is also known as {@code n}.
- *
- * @return the number of items ({@code n}).
- */
- public int getNumberOfItems() {
- return numberOfItems;
- }
-
- /**
- * Calculates the probability of false positives ({@code p}) given
- * numberOfItems ({@code n}), numberOfBits ({@code m}) and numberOfHashFunctions ({@code k}).
- * p = pow(1 - exp(-k / (m / n)), k)
- *
- * This is the probability that a Bloom filter will return true for the presence of an item
- * when it does not contain the item.
- *
- *
The probability assumes that the Bloom filter is filled with the expected number of
- * items. If the filter contains fewer items then the actual probability will be lower.
- * Thus this returns the worst-case false positive probability for a filter that has not
- * exceeded its expected number of items.
- *
- * @return the probability of false positives.
- * @see #getNumberOfItems()
- */
- public double getProbability() {
- return Math.pow(1.0 - Math.exp(-1.0 * numberOfHashFunctions * numberOfItems / numberOfBits),
- numberOfHashFunctions);
- }
-
- @Override
- public String toString() {
- return String.format("Shape[ %s n=%s m=%s k=%s ]",
- HashFunctionIdentity.asCommonString(hashFunctionIdentity),
- numberOfItems, numberOfBits, numberOfHashFunctions);
- }
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java
new file mode 100644
index 0000000000..fcdcf36d5b
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntPredicate;
+import java.util.function.LongPredicate;
+
+/**
+ * A bloom filter using an array of bit maps to track enabled bits. This is a standard
+ * implementation and should work well for most Bloom filters.
+ * @since 4.5
+ */
+public final class SimpleBloomFilter implements BloomFilter {
+
+ /**
+ * The array of bit map longs that defines this Bloom filter. Will be null if the filter is empty.
+ */
+ private final long[] bitMap;
+
+ /**
+ * The Shape of this Bloom filter.
+ */
+ private final Shape shape;
+
+ /**
+ * The cardinality of this Bloom filter.
+ */
+ private int cardinality;
+
+ /**
+ * Creates an empty instance.
+ *
+ * @param shape The shape for the filter.
+ */
+ public SimpleBloomFilter(Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+ this.shape = shape;
+ this.bitMap = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
+ this.cardinality = 0;
+ }
+
+ /**
+ * Creates an instance that is equivalent to {@code other}.
+ *
+ * @param other The bloom filter to copy.
+ */
+ public SimpleBloomFilter(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ this.shape = other.getShape();
+ this.bitMap = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
+ this.cardinality = 0;
+ if (other.isSparse()) {
+ mergeInPlace((IndexProducer) other);
+ } else {
+ mergeInPlace((BitMapProducer) other);
+ }
+ }
+
+ /**
+ * Creates a populated instance.
+ * @param shape The shape for the filter.
+ * @param hasher the Hasher to initialize the filter with.
+ */
+ public SimpleBloomFilter(final Shape shape, Hasher hasher) {
+ this(shape);
+ Objects.requireNonNull(hasher, "hasher");
+ mergeInPlace(hasher);
+ }
+
+ /**
+ * Creates a populated instance.
+ * @param shape The shape for the filter.
+ * @param indices the IndexProducer to initialize the filter with.
+ * @throws IllegalArgumentException if producer sends illegal value.
+ */
+ public SimpleBloomFilter(final Shape shape, IndexProducer indices) {
+ this(shape);
+ Objects.requireNonNull(indices, "indices");
+ mergeInPlace(indices);
+ }
+
+ /**
+ * Creates a populated instance.
+ * @param shape The shape for the filter.
+ * @param bitMaps the BitMapProducer to initialize the filter with.
+ * @throws IllegalArgumentException if the producer returns too many or too few bit maps.
+ */
+ public SimpleBloomFilter(final Shape shape, BitMapProducer bitMaps) {
+ this(shape);
+ Objects.requireNonNull(bitMaps, "bitMaps");
+ mergeInPlace(bitMaps);
+ }
+
+ /**
+ * Copy constructor for {@code copy()} use.
+ * @param source
+ */
+ private SimpleBloomFilter(SimpleBloomFilter source) {
+ this.shape = source.shape;
+ this.bitMap = source.bitMap.clone();
+ this.cardinality = source.cardinality;
+ }
+
+ @Override
+ public long[] asBitMapArray() {
+ return Arrays.copyOf(bitMap, bitMap.length);
+ }
+
+ @Override
+ public boolean forEachBitMapPair(BitMapProducer other, LongBiPredicate func) {
+ CountingLongPredicate p = new CountingLongPredicate(bitMap, func);
+ return other.forEachBitMap(p) && p.forEachRemaining();
+ }
+
+ @Override
+ public SimpleBloomFilter copy() {
+ return new SimpleBloomFilter(this);
+ }
+
+ /**
+ * Performs a merge in place using an IndexProducer.
+ * @param indexProducer the IndexProducer to merge from.
+ * @throws IllegalArgumentException if producer sends illegal value.
+ */
+ private void mergeInPlace(IndexProducer indexProducer) {
+ indexProducer.forEachIndex(idx -> {
+ if (idx < 0 || idx >= shape.getNumberOfBits()) {
+ throw new IllegalArgumentException(String.format(
+ "IndexProducer should only send values in the range[0,%s]", shape.getNumberOfBits() - 1));
+ }
+ BitMap.set(bitMap, idx);
+ return true;
+ });
+ cardinality = -1;
+ }
+
+ /**
+ * Performs a merge in place using an BitMapProducer.
+ * @param bitMapProducer the BitMapProducer to merge from.
+ * @throws IllegalArgumentException if producer sends illegal value.
+ */
+ private void mergeInPlace(BitMapProducer bitMapProducer) {
+ try {
+ int[] idx = new int[1];
+ bitMapProducer.forEachBitMap(value -> {
+ bitMap[idx[0]++] |= value;
+ return true;
+ });
+ // idx[0] will be limit+1 so decrement it
+ idx[0]--;
+ int idxLimit = BitMap.getLongIndex(shape.getNumberOfBits());
+ if (idxLimit < idx[0]) {
+ throw new IllegalArgumentException(String.format(
+ "BitMapProducer set a bit higher than the limit for the shape: %s", shape.getNumberOfBits()));
+ }
+ if (idxLimit == idx[0]) {
+ long excess = (bitMap[idxLimit] >> shape.getNumberOfBits());
+ if (excess != 0) {
+ throw new IllegalArgumentException(
+ String.format("BitMapProducer set a bit higher than the limit for the shape: %s",
+ shape.getNumberOfBits()));
+ }
+ }
+ cardinality = -1;
+ } catch (IndexOutOfBoundsException e) {
+ throw new IllegalArgumentException(
+ String.format("BitMapProducer should send at most %s maps", bitMap.length), e);
+ }
+ }
+
+ @Override
+ public boolean mergeInPlace(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ mergeInPlace(hasher.indices(shape));
+ return true;
+ }
+
+ @Override
+ public boolean mergeInPlace(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ if (other.isSparse()) {
+ mergeInPlace((IndexProducer) other);
+ } else {
+ mergeInPlace((BitMapProducer) other);
+ }
+ return true;
+ }
+
+ @Override
+ public Shape getShape() {
+ return shape;
+ }
+
+ @Override
+ public boolean isSparse() {
+ return false;
+ }
+
+ @Override
+ public int cardinality() {
+ // Lazy evaluation with caching
+ int c = cardinality;
+ if (c < 0) {
+ cardinality = c = SetOperations.cardinality(this);
+ }
+ return c;
+ }
+
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ return IndexProducer.fromBitMapProducer(this).forEachIndex(consumer);
+ }
+
+ @Override
+ public boolean forEachBitMap(LongPredicate consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ for (long l : bitMap) {
+ if (!consumer.test(l)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public boolean contains(IndexProducer indexProducer) {
+ return indexProducer.forEachIndex(idx -> BitMap.contains(bitMap, idx));
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleHasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleHasher.java
new file mode 100644
index 0000000000..9bc7a99648
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleHasher.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.Objects;
+import java.util.function.IntPredicate;
+
+/**
+ * A Hasher that implements combinatorial hashing as as described by
+ * Krisch and Mitzenmacher.
+ *
+ * Common use for this hasher is to generate a byte array as the output of a hashing
+ * or MessageDigest algorithm.
+ *
+ * @since 4.5
+ */
+public class SimpleHasher implements Hasher {
+
+ /**
+ * The initial hash value.
+ */
+ private final long initial;
+
+ /**
+ * The value to increment the hash value by.
+ */
+ private final long increment;
+
+ /**
+ * Convert bytes to long.
+ * @param byteArray the byte array to extract the values from.
+ * @param offset the offset to start extraction from.
+ * @param len the length of the extraction, may be longer than 8.
+ * @return
+ */
+ private static long toLong(byte[] byteArray, int offset, int len) {
+ long val = 0;
+ len = Math.min(len, Long.BYTES);
+ for (int i = 0; i < len; i++) {
+ val <<= 8;
+ val |= (byteArray[offset + i] & 0x00FF);
+ }
+ return val;
+ }
+
+ /**
+ * Constructs the SimpleHasher from a byte array.
+ * The byte array is split in 2 and each half is interpreted as a long value.
+ * Excess bytes are ignored. This simplifies the conversion from a Digest or hasher algorithm output
+ * to the two values used by the SimpleHasher.
+ * If the second long is zero the default increment is used instead.
+ * @param buffer the buffer to extract the longs from.
+ * @throws IllegalArgumentException is buffer length is zero.
+ * @see #getDefaultIncrement()
+ */
+ public SimpleHasher(byte[] buffer) {
+ if (buffer.length == 0) {
+ throw new IllegalArgumentException("buffer length must be greater than 0");
+ }
+ int segment = buffer.length / 2;
+ this.initial = toLong(buffer, 0, segment);
+ long possibleIncrement = toLong(buffer, segment, buffer.length - segment);
+ this.increment = possibleIncrement == 0 ? getDefaultIncrement() : possibleIncrement;
+ }
+
+ /**
+ * Constructs the SimpleHasher from 2 longs. The long values will be interpreted as unsigned values.
+ * If the increment is zero the default increment is used instead.
+ * @param initial The initial value for the hasher.
+ * @param increment The value to increment the hash by on each iteration.
+ * @see #getDefaultIncrement()
+ */
+ public SimpleHasher(long initial, long increment) {
+ this.initial = initial;
+ this.increment = increment == 0 ? getDefaultIncrement() : increment;
+ }
+
+ /**
+ * Get the default increment used when the requested increment is zero.
+ *
+ * By default this is the same
+ * default increment used in Java's SplittableRandom random number generator. It is the
+ * fractional representation of the golden ratio (0.618...) with a base of 2^64.
+ *
+ * Implementations may want to override this value to match defaults in legacy implementations.
+ *
+ * @return The default increment to use when the requested increment is zero.
+ */
+ public long getDefaultIncrement() {
+ return 0x9e3779b97f4a7c15L;
+ }
+
+ /**
+ * Performs a modulus calculation on an unsigned long and an integer divisor.
+ * @param dividend a unsigned long value to calculate the modulus of.
+ * @param divisor the divisor for the modulus calculation.
+ * @return the remainder or modulus value.
+ */
+ static int mod(long dividend, int divisor) {
+ // See Hacker's Delight (2nd ed), section 9.3.
+ // Assume divisor is positive.
+ // Divide half the unsigned number and then double the quotient result.
+ final long quotient = ((dividend >>> 1) / divisor) << 1;
+ final long remainder = dividend - quotient * divisor;
+ // remainder in [0, 2 * divisor)
+ return (int) (remainder >= divisor ? remainder - divisor : remainder);
+ }
+
+ @Override
+ public IndexProducer indices(final Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+
+ return new IndexProducer() {
+
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ int bits = shape.getNumberOfBits();
+ /*
+ * Essentially this is computing a wrapped modulus from a start point and an
+ * increment. So actually you only need two modulus operations before the loop.
+ * This avoids any modulus operation inside the while loop. It uses a long index
+ * to avoid overflow.
+ */
+ long index = mod(initial, bits);
+ int inc = mod(increment, bits);
+
+ for (int functionalCount = 0; functionalCount < shape.getNumberOfHashFunctions(); functionalCount++) {
+
+ if (!consumer.test((int) index)) {
+ return false;
+ }
+ index += inc;
+ index = index >= bits ? index - bits : index;
+ }
+ return true;
+ }
+
+ @Override
+ public int[] asIndexArray() {
+ int[] result = new int[shape.getNumberOfHashFunctions()];
+ int[] idx = new int[1];
+ /*
+ * This method needs to return duplicate indices
+ */
+ forEachIndex(i -> {
+ result[idx[0]++] = i;
+ return true;
+ });
+ return result;
+ }
+ };
+ }
+
+ @Override
+ public IndexProducer uniqueIndices(final Shape shape) {
+ return new IndexProducer() {
+
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ IndexFilter filter = IndexFilter.create(shape, consumer);
+
+ int bits = shape.getNumberOfBits();
+
+ // Set up for the modulus. Use a long index to avoid overflow.
+ long index = mod(initial, bits);
+ int inc = mod(increment, bits);
+
+ for (int functionalCount = 0; functionalCount < shape.getNumberOfHashFunctions(); functionalCount++) {
+
+ if (!filter.test((int) index)) {
+ return false;
+ }
+ index += inc;
+ index = index >= bits ? index - bits : index;
+ }
+ return true;
+ }
+ };
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java
new file mode 100644
index 0000000000..4711667ac1
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.Objects;
+import java.util.TreeSet;
+import java.util.function.IntPredicate;
+import java.util.function.LongPredicate;
+
+/**
+ * A bloom filter using a TreeSet of integers to track enabled bits. This is a standard
+ * implementation and should work well for most low cardinality Bloom filters.
+ * @since 4.5
+ */
+public final class SparseBloomFilter implements BloomFilter {
+
+ /**
+ * The bitSet that defines this BloomFilter.
+ */
+ private final TreeSet indices;
+
+ /**
+ * The shape of this BloomFilter.
+ */
+ private final Shape shape;
+
+ /**
+ * Constructs an empty BitSetBloomFilter.
+ *
+ * @param shape The shape of the filter.
+ */
+ public SparseBloomFilter(Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+ this.shape = shape;
+ this.indices = new TreeSet<>();
+ }
+
+ /**
+ * Creates an instance that is equivalent to {@code other}.
+ *
+ * @param other The bloom filter to copy.
+ */
+ public SparseBloomFilter(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ this.shape = other.getShape();
+ this.indices = new TreeSet<>();
+ if (other.isSparse()) {
+ mergeInPlace((IndexProducer) other);
+ } else {
+ mergeInPlace(IndexProducer.fromBitMapProducer(other));
+ }
+ }
+
+ private void checkIndices(Shape shape) {
+ if (this.indices.floor(-1) != null || this.indices.ceiling(shape.getNumberOfBits()) != null) {
+ throw new IllegalArgumentException(
+ String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
+ }
+ }
+
+ /**
+ * Constructs a populated Bloom filter.
+ * @param shape the shape for the bloom filter.
+ * @param hasher the hasher to provide the initial data.
+ */
+ public SparseBloomFilter(final Shape shape, Hasher hasher) {
+ this(shape);
+ Objects.requireNonNull(hasher, "hasher");
+ hasher.indices(shape).forEachIndex(this::add);
+ checkIndices(shape);
+ }
+
+ /**
+ * Constructs a populated Bloom filter.
+ * @param shape the shape of the filter.
+ * @param indices an index producer for the indices to to enable.
+ * @throws IllegalArgumentException if indices contains a value greater than the number
+ * of bits in the shape.
+ */
+ public SparseBloomFilter(Shape shape, IndexProducer indices) {
+ this(shape);
+ Objects.requireNonNull(indices, "indices");
+ indices.forEachIndex(this::add);
+ checkIndices(shape);
+ }
+
+ /**
+ * Constructs a populated Bloom filter.
+ * @param shape the shape of the filter.
+ * @param bitMaps a BitMapProducer for the bit maps to add.
+ * @throws IllegalArgumentException if the bit maps contain a value greater than the number
+ * of bits in the shape.
+ */
+ public SparseBloomFilter(Shape shape, BitMapProducer bitMaps) {
+ this(shape);
+ Objects.requireNonNull(bitMaps, "bitMaps");
+ mergeInPlace(IndexProducer.fromBitMapProducer(bitMaps));
+ }
+
+ private SparseBloomFilter(SparseBloomFilter source) {
+ shape = source.shape;
+ indices = new TreeSet(source.indices);
+ }
+
+ @Override
+ public long[] asBitMapArray() {
+ long[] result = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
+ for (int i : indices) {
+ BitMap.set(result, i);
+ }
+ return result;
+ }
+
+ @Override
+ public SparseBloomFilter copy() {
+ return new SparseBloomFilter(this);
+ }
+
+ /**
+ * Adds the index to the indices.
+ * @param idx the index to add.
+ * @return {@code true} always
+ */
+ private boolean add(int idx) {
+ indices.add(idx);
+ return true;
+ }
+
+ /**
+ * Performs a merge in place using an IndexProducer.
+ * @param indexProducer the IndexProducer to merge from.
+ * @throws IllegalArgumentException if producer sends illegal value.
+ */
+ private void mergeInPlace(IndexProducer indexProducer) {
+ indexProducer.forEachIndex(this::add);
+ if (!this.indices.isEmpty()) {
+ if (this.indices.last() >= shape.getNumberOfBits()) {
+ throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
+ this.indices.last(), shape.getNumberOfBits()));
+ }
+ if (this.indices.first() < 0) {
+ throw new IllegalArgumentException(
+ String.format("Value in list %s is less than 0", this.indices.first()));
+ }
+ }
+ }
+
+ @Override
+ public boolean mergeInPlace(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ mergeInPlace(hasher.indices(shape));
+ return true;
+ }
+
+ @Override
+ public boolean mergeInPlace(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ IndexProducer producer = other.isSparse() ? (IndexProducer) other : IndexProducer.fromBitMapProducer(other);
+ mergeInPlace(producer);
+ return true;
+ }
+
+ @Override
+ public Shape getShape() {
+ return shape;
+ }
+
+ @Override
+ public boolean isSparse() {
+ return true;
+ }
+
+ @Override
+ public int cardinality() {
+ return indices.size();
+ }
+
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ for (int value : indices) {
+ if (!consumer.test(value)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public boolean forEachBitMap(LongPredicate consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ int limit = BitMap.numberOfBitMaps(shape.getNumberOfBits());
+ /*
+ * because our indices are always in order we can shorten the time necessary to
+ * create the longs for the consumer
+ */
+ // the currenlty constructed bitMap
+ long bitMap = 0;
+ // the bitmap we are working on
+ int idx = 0;
+ for (int i : indices) {
+ while (BitMap.getLongIndex(i) != idx) {
+ if (!consumer.test(bitMap)) {
+ return false;
+ }
+ bitMap = 0;
+ idx++;
+ }
+ bitMap |= BitMap.getLongBit(i);
+ }
+ // we fall through with data in the bitMap
+ if (!consumer.test(bitMap)) {
+ return false;
+ }
+ // account for hte bitMap in the previous block + the next one
+ idx++;
+ // while there are more blocks to generate send zero to the consumer.
+ while (idx < limit) {
+ if (!consumer.test(0L)) {
+ return false;
+ }
+ idx++;
+ }
+ return true;
+ }
+
+ @Override
+ public boolean contains(IndexProducer indexProducer) {
+ return indexProducer.forEachIndex(indices::contains);
+ }
+
+ @Override
+ public boolean contains(BitMapProducer bitMapProducer) {
+ return contains(IndexProducer.fromBitMapProducer(bitMapProducer));
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java
deleted file mode 100644
index ab6b773d6c..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.PrimitiveIterator;
-
-/**
- * The class that performs hashing on demand.
- * @since 4.5
- */
-public class DynamicHasher implements Hasher {
-
- /**
- * The builder for DynamicHashers.
- * @since 4.5
- */
- public static class Builder implements Hasher.Builder {
-
- /**
- * The list of items (each as a byte[]) that are to be hashed.
- */
- private final List buffers;
-
- /**
- * The function that the resulting DynamicHasher will use.
- */
- private final HashFunction function;
-
- /**
- * Constructs a DynamicHasher builder.
- *
- * @param function the function implementation.
- */
- public Builder(final HashFunction function) {
- this.function = function;
- this.buffers = new ArrayList<>();
- }
-
- @Override
- public DynamicHasher build() throws IllegalArgumentException {
- // Assumes the hasher will create a copy of the buffers
- final DynamicHasher hasher = new DynamicHasher(function, buffers);
- // Reset for further use
- buffers.clear();
- return hasher;
- }
-
- @Override
- public final DynamicHasher.Builder with(final byte[] property) {
- buffers.add(property);
- return this;
- }
-
- @Override
- public DynamicHasher.Builder with(final CharSequence item, final Charset charset) {
- Hasher.Builder.super.with(item, charset);
- return this;
- }
-
- @Override
- public DynamicHasher.Builder withUnencoded(final CharSequence item) {
- Hasher.Builder.super.withUnencoded(item);
- return this;
- }
- }
-
- /**
- * The iterator of integers.
- *
- * This assumes that the list of buffers is not empty.
- */
- private class Iterator implements PrimitiveIterator.OfInt {
- /** The number of hash functions per item. */
- private final int k;
- /** The number of bits in the shape. */
- private final int m;
- /** The current item. */
- private byte[] item;
- /** The index of the next item. */
- private int nextItem;
- /** The count of hash functions for the current item. */
- private int functionCount;
-
- /**
- * Constructs iterator with the specified shape.
- *
- * @param shape
- */
- private Iterator(final Shape shape) {
- // Assumes that shape returns non-zero positive values for hash functions and bits
- k = shape.getNumberOfHashFunctions();
- m = shape.getNumberOfBits();
- // Assume non-empty
- item = buffers.get(0);
- nextItem = 1;
- }
-
- @Override
- public boolean hasNext() {
- if (functionCount != k) {
- return true;
- }
- // Reached the number of hash functions for the current item.
- // Try and advance to the next item.
- if (nextItem != buffers.size()) {
- item = buffers.get(nextItem++);
- functionCount = 0;
- return true;
- }
- // Finished.
- // functionCount == shape.getNumberOfHashFunctions()
- // nextItem == buffers.size()
- return false;
- }
-
- @SuppressWarnings("cast") // Cast to long to workaround a bug in animal-sniffer.
- @Override
- public int nextInt() {
- if (hasNext()) {
- return (int) Math.floorMod(function.apply(item, functionCount++),
- // Cast to long to workaround a bug in animal-sniffer.
- (long) m);
- }
- throw new NoSuchElementException();
- }
- }
-
- /**
- * An iterator of integers to use when there are no values.
- */
- private static class NoValuesIterator implements PrimitiveIterator.OfInt {
- /** The singleton instance. */
- private static final NoValuesIterator INSTANCE = new NoValuesIterator();
-
- /**
- * Empty constructor.
- */
- private NoValuesIterator() {}
-
- @Override
- public boolean hasNext() {
- return false;
- }
-
- @Override
- public int nextInt() {
- throw new NoSuchElementException();
- }
- }
-
- /**
- * The list of byte arrays that are to be hashed.
- * Package private for access by the iterator.
- */
- final List buffers;
-
- /**
- * The function to hash the buffers.
- * Package private for access by the iterator.
- */
- final HashFunction function;
-
- /**
- * Constructs a DynamicHasher.
- *
- * @param function the function to use.
- * @param buffers the byte buffers that will be hashed.
- */
- public DynamicHasher(final HashFunction function, final List buffers) {
- this.buffers = new ArrayList<>(buffers);
- this.function = function;
- }
-
- @Override
- public PrimitiveIterator.OfInt iterator(final Shape shape) {
- HashFunctionValidator.checkAreEqual(getHashFunctionIdentity(),
- shape.getHashFunctionIdentity());
- // Use optimised iterator for no values
- return buffers.isEmpty() ? NoValuesIterator.INSTANCE : new Iterator(shape);
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return function;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunction.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunction.java
deleted file mode 100644
index d14fd3d830..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunction.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-/**
- * Defines a hash function used by a {@link Hasher} .
- * @since 4.5
- */
-public interface HashFunction extends HashFunctionIdentity {
-
- /**
- * Applies the hash function to the buffer.
- *
- * @param buffer the buffer to apply the hash function to.
- * @param seed the seed for the hashing.
- * @return the long value of the hash.
- */
- long apply(byte[] buffer, int seed);
-
- /**
- * Gets the signature of this function.
- *
- * The signature of this function is calculated as:
- *
- * int seed = 0;
- * apply(String.format("%s-%s-%s",
- * getName().toUpperCase(Locale.ROOT), getSignedness(), getProcess())
- * .getBytes("UTF-8"), seed);
- *
- *
- * @see HashFunctionIdentity#prepareSignatureBuffer(HashFunctionIdentity)
- */
- @Override
- long getSignature();
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentity.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentity.java
deleted file mode 100644
index 0ff2edb8d4..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentity.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import java.nio.charset.StandardCharsets;
-import java.util.Locale;
-
-/**
- * Defines the hash function used by a {@link Hasher}.
- *
- * @since 4.5
- */
-public interface HashFunctionIdentity {
-
- /**
- * Identifies the process type of this function.
- *
- *
- * - Iterative processes
- * - Call the underlying hash algorithm for each (buffer, seed) pair passed to
- * {@link HashFunction#apply(byte[], int)}.
- * - Cyclic processes
- * - Call the underlying hash algorithm using a (buffer, seed) pair passed to
- * {@link HashFunction#apply(byte[], int)} to initialize the state. Subsequent
- * calls can generate hash values without calling the underlying algorithm.
- *
- */
- enum ProcessType {
- /**
- * Call the underlying hash algorithm for a (buffer, seed) pair passed to
- * {@link HashFunction#apply(byte[], int)} when the state is uninitialized or
- * the seed is zero. This initializes the state. Subsequent calls with a non-zero
- * seed use the state to generate a new value.
- */
- CYCLIC,
- /**
- * Call the underlying hash algorithm for each (buffer, seed) pair passed to
- * {@link HashFunction#apply(byte[], int)}.
- */
- ITERATIVE
- }
-
- /**
- * Identifies the signedness of the calculations for this function.
- *
- * When the hash function executes it typically returns an array of bytes.
- * That array is converted into one or more numerical values which will be provided
- * as a {@code long} primitive type.
- * The signedness identifies if those {@code long} values are signed or unsigned.
- * For example a hash function that outputs only 32-bits can be unsigned if converted
- * using {@link Integer#toUnsignedLong(int)}. A hash function that outputs more than
- * 64-bits is typically signed.
- *
- */
- enum Signedness {
- /**
- * The result of {@link HashFunction#apply(byte[], int)} is signed,
- * thus the sign bit may be set.
- *
- *
- * The result can be used with {@code Math.floorMod(x, y)} to generate a positive
- * value if y is positive.
- *
- *
- * @see Math#floorMod(int, int)
- */
- SIGNED,
- /**
- * The result of {@link HashFunction#apply(byte[], int)} is unsigned,
- * thus the sign bit is never set.
- *
- *
- * The result can be used with {@code x % y} to generate a positive
- * value if y is positive.
- *
- */
- UNSIGNED
- }
-
- /**
- * Gets a common formatted string for general display.
- *
- * @param identity the identity to format.
- * @return the String representing the identity.
- */
- static String asCommonString(final HashFunctionIdentity identity) {
- return String.format("%s-%s-%s", identity.getName(), identity.getSignedness(), identity.getProcessType());
- }
-
- /**
- * Gets a {@code byte[]} buffer for a HashFunctionIdentity to create a signature. The
- * {@code byte[]} is composed using properties of the hash function as:
- *
- *
- * String.format("%s-%s-%s",
- * getName().toUpperCase(Locale.ROOT), getSignedness(), getProcess())
- * .getBytes("UTF-8");
- *
- *
- * @param identity The HashFunctionIdentity to create the buffer for.
- * @return the signature buffer for the identity
- * @see #getSignature()
- */
- static byte[] prepareSignatureBuffer(final HashFunctionIdentity identity) {
- return String.format("%s-%s-%s",
- identity.getName().toUpperCase(Locale.ROOT), identity.getSignedness(),
- identity.getProcessType()).getBytes(StandardCharsets.UTF_8);
- }
-
- /**
- * Gets the name of this hash function.
- *
- * Hash function should be the common name
- * for the hash. This may include indications as to hash length
- *
- *
- * Names are not case specific. Thus, "MD5" and "md5" should be considered as the same.
- *
- * @return the Hash name
- */
- String getName();
-
- /**
- * Gets the process type of this function.
- *
- * @return process type of this function.
- */
- ProcessType getProcessType();
-
- /**
- * Gets the name of the provider of this hash function implementation.
- *
- * Provider names are not case specific. Thus, "Apache Commons Collection" and
- * "apache commons collection" should be considered as the same.
- *
- * @return the name of the provider of this hash implementation.
- */
- String getProvider();
-
- /**
- * Gets the signature of this function. The signature is the output of the hash function
- * when applied to a set of bytes composed using properties of the hash function.
- *
- *
- * Implementations should define the method used to generate the signature.
- *
- *
- * @return the signature of this function.
- * @see #prepareSignatureBuffer(HashFunctionIdentity)
- */
- long getSignature();
-
- /**
- * Gets the signedness of this function.
- *
- * @return signedness of this function.
- */
- Signedness getSignedness();
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImpl.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImpl.java
deleted file mode 100644
index c75973a376..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImpl.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-/**
- * An instance of HashFunctionIdentity that is suitable for deserializing
- * HashFunctionIdentity data from a stream or any other situation where the
- * hash function is not available but the identify of the function is required.
- *
- * @since 4.5
- */
-public final class HashFunctionIdentityImpl implements HashFunctionIdentity {
- private final String name;
- private final String provider;
- private final Signedness signedness;
- private final ProcessType process;
- private final long signature;
-
- /**
- * Creates a copy of the HashFunctionIdentity.
- * @param identity the identity to copy.
- */
- public HashFunctionIdentityImpl(final HashFunctionIdentity identity) {
- this.name = identity.getName();
- this.provider = identity.getProvider();
- this.signedness = identity.getSignedness();
- this.process = identity.getProcessType();
- this.signature = identity.getSignature();
- }
-
- /**
- * Creates a HashFunctionIdentity from component values.
- * @param provider the name of the provider.
- * @param name the name of the hash function.
- * @param signedness the signedness of the hash function.
- * @param process the processes of the hash function.
- * @param signature the signature for the hash function.
- */
- public HashFunctionIdentityImpl(final String provider, final String name, final Signedness signedness, final ProcessType process,
- final long signature) {
- this.name = name;
- this.provider = provider;
- this.signedness = signedness;
- this.process = process;
- this.signature = signature;
- }
-
- @Override
- public String getName() {
- return name;
- }
-
- @Override
- public ProcessType getProcessType() {
- return process;
- }
-
- @Override
- public String getProvider() {
- return provider;
- }
-
- @Override
- public long getSignature() {
- return signature;
- }
-
- @Override
- public Signedness getSignedness() {
- return signedness;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionValidator.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionValidator.java
deleted file mode 100644
index 3ec0753e4a..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionValidator.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import java.util.Locale;
-import java.util.Objects;
-
-/**
- * Contains validation for hash functions.
- */
-public final class HashFunctionValidator {
- /** Do not instantiate. */
- private HashFunctionValidator() {}
-
- /**
- * Generates a hash code for the identity of the hash function. The hash code is
- * generated using the same properties as those tested in
- * {@link #areEqual(HashFunctionIdentity, HashFunctionIdentity)}, that is the
- * signedness, process type and name. The name is not case specific and is converted
- * to lower-case using the {@link Locale#ROOT root locale}.
- *
- * The generated value is suitable for use in generation of a hash code that satisfies
- * the contract of {@link Object#hashCode()} if the {@link Object#equals(Object)} method
- * is implemented using {@link #areEqual(HashFunctionIdentity, HashFunctionIdentity)}. That
- * is two objects considered equal will have the same hash code.
- *
- *
If the hash function identity is a field within a larger object the generated hash code
- * should be incorporated into the entire hash, for example using
- * {@link Objects#hash(Object...)}.
- *
- * @param a hash function.
- * @return hash code
- * @see String#toLowerCase(Locale)
- * @see Locale#ROOT
- */
- static int hash(final HashFunctionIdentity a) {
- return Objects.hash(a.getSignedness(),
- a.getProcessType(),
- a.getName().toLowerCase(Locale.ROOT));
- }
-
- /**
- * Compares the identity of the two hash functions. The functions are considered
- * equal if the signedness, process type and name are equal. The name is not
- * case specific.
- *
- *
A pair of functions that are equal would be expected to produce the same
- * hash output from the same input.
- *
- * @param a First hash function.
- * @param b Second hash function.
- * @return true, if successful
- * @see String#equalsIgnoreCase(String)
- */
- public static boolean areEqual(final HashFunctionIdentity a, final HashFunctionIdentity b) {
- return (a.getSignedness() == b.getSignedness() &&
- a.getProcessType() == b.getProcessType() &&
- a.getName().equalsIgnoreCase(b.getName()));
- }
-
- /**
- * Compares the identity of the two hash functions and throws an exception if they
- * are not equal.
- *
- * @param a First hash function.
- * @param b Second hash function.
- * @see #areEqual(HashFunctionIdentity, HashFunctionIdentity)
- * @throws IllegalArgumentException if the hash functions are not equal
- */
- public static void checkAreEqual(final HashFunctionIdentity a, final HashFunctionIdentity b) {
- if (!areEqual(a, b)) {
- throw new IllegalArgumentException(String.format("Hash functions are not equal: (%s) != (%s)",
- HashFunctionIdentity.asCommonString(a), HashFunctionIdentity.asCommonString(b)));
- }
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java
deleted file mode 100644
index 3700567f1a..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import java.nio.charset.Charset;
-import java.util.PrimitiveIterator;
-
-/**
- * A Hasher represents items of arbitrary byte size as a byte representation of
- * fixed size (a hash). The hash representations can be used to create indexes
- * for a Bloom filter.
- *
- *
The hash for each item is created using a hash function; use of different
- * seeds allows generation of different hashes for the same item. The hashes can
- * be dynamically converted into the bit index representation used by a Bloom
- * filter. The shape of the Bloom filter defines the number of indexes per item
- * and the range of the indexes. The hasher can generate the correct number of
- * indexes in the range required by the Bloom filter for each item it
- * represents.
- *
- *
Note that the process of generating hashes and mapping them to a Bloom
- * filter shape may create duplicate indexes. The hasher may generate fewer than
- * the required number of hash functions per item if duplicates have been
- * removed. Implementations of {@code iterator()} may return duplicate values
- * and may return values in a random order. See implementation javadoc notes as
- * to the guarantees provided by the specific implementation.
- *
- *
Hashers have an identity based on the hashing algorithm used.
- *
- * @since 4.5
- */
-public interface Hasher {
-
- /**
- * A builder to build a hasher.
- *
- *
A hasher represents one or more items of arbitrary byte size. The builder
- * contains methods to collect byte representations of items. Each method to add
- * to the builder will add an entire item to the final hasher created by the
- * {@link #build()} method.
- *
- * @since 4.5
- */
- interface Builder {
-
- /**
- * Builds the hasher from all the items.
- *
- *
This method will clear the builder for future use.
- *
- * @return the fully constructed hasher
- */
- Hasher build();
-
- /**
- * Adds a byte array item to the hasher.
- *
- * @param item the item to add
- * @return a reference to this object
- */
- Builder with(byte[] item);
-
- /**
- * Adds a character sequence item to the hasher using the specified {@code charset}
- * encoding.
- *
- * @param item the item to add
- * @param charset the character set
- * @return a reference to this object
- */
- default Builder with(final CharSequence item, final Charset charset) {
- return with(item.toString().getBytes(charset));
- }
-
- /**
- * Adds a character sequence item to the hasher. Each 16-bit character is
- * converted to 2 bytes using little-endian order.
- *
- * @param item the item to add
- * @return a reference to this object
- */
- default Builder withUnencoded(final CharSequence item) {
- final int length = item.length();
- final byte[] bytes = new byte[length * 2];
- for (int i = 0; i < length; i++) {
- final char ch = item.charAt(i);
- bytes[i * 2] = (byte) ch;
- bytes[i * 2 + 1] = (byte) (ch >>> 8);
- }
- return with(bytes);
- }
- }
-
- /**
- * Gets an iterator of integers that are the bits to enable in the Bloom
- * filter based on the shape.
- *
- *
The iterator will create indexes within the range defined by the number of bits in
- * the shape. The total number of indexes will respect the number of hash functions per item
- * defined by the shape. However the count of indexes may not be a multiple of the number of
- * hash functions if the implementation has removed duplicates.
- *
- *
No guarantee is made as to order of values.
- *
- * @param shape the shape of the desired Bloom filter
- * @return the iterator of integers
- * @throws IllegalArgumentException if the hasher cannot generate indexes for
- * the specified @{@code shape}
- */
- PrimitiveIterator.OfInt iterator(Shape shape);
-
- /**
- * Gets the identify of the hash function used by the the hasher.
- *
- * @return the identity of the hash function
- */
- HashFunctionIdentity getHashFunctionIdentity();
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasher.java
deleted file mode 100644
index 430f99b565..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasher.java
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.PrimitiveIterator.OfInt;
-import java.util.Set;
-import java.util.TreeSet;
-
-/**
- * A Hasher implementation that contains the index for all enabled bits for a specific
- * Shape.
- * @since 4.5
- */
-public final class StaticHasher implements Hasher {
-
- /**
- * The shape of this hasher
- */
- private final Shape shape;
-
- /**
- * The ordered set of values that this hasher will return.
- */
- private final int[] values;
-
- /**
- * Constructs the StaticHasher from a Hasher and a Shape.
- * @param hasher the Hasher to read.
- * @param shape the Shape for the resulting values.
- * @throws IllegalArgumentException if the hasher function and the shape function are not the same.
- */
- public StaticHasher(final Hasher hasher, final Shape shape) {
- this(hasher.iterator(shape), shape);
- HashFunctionValidator.checkAreEqual(hasher.getHashFunctionIdentity(),
- shape.getHashFunctionIdentity());
- }
-
- /**
- * Constructs a StaticHasher from an Iterator of Integers and a Shape.
- * @param iter the Iterator of Integers.
- * @param shape the Shape that the integers were generated for.
- * @throws IllegalArgumentException if any Integer is outside the range [0,shape.getNumberOfBits())
- */
- public StaticHasher(final Iterator iter, final Shape shape) {
- this.shape = shape;
- final Set workingValues = new TreeSet<>();
- iter.forEachRemaining(idx -> {
- if (idx >= this.shape.getNumberOfBits()) {
- throw new IllegalArgumentException(String.format("Bit index (%s) is too big for %s", idx, shape));
- }
- if (idx < 0) {
- throw new IllegalArgumentException(String.format("Bit index (%s) may not be less than zero", idx));
- }
- workingValues.add(idx);
- });
- this.values = new int[workingValues.size()];
- int i = 0;
- for (final Integer value : workingValues) {
- values[i++] = value.intValue();
- }
- }
-
- /**
- * Constructs the StaticHasher from a StaticHasher and a Shape.
- * @param hasher the StaticHasher to read.
- * @param shape the Shape for the resulting values.
- * @throws IllegalArgumentException if the shape of the hasher and the shape parameter are not the same.
- */
- public StaticHasher(final StaticHasher hasher, final Shape shape) {
- if (!hasher.shape.equals(shape)) {
- throw new IllegalArgumentException(String.format("Hasher shape (%s) is not the same as shape (%s)",
- hasher.getShape().toString(), shape.toString()));
- }
- this.shape = shape;
- this.values = hasher.values;
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return shape.getHashFunctionIdentity();
- }
-
- /**
- * Gets the shape this static hasher was created with.
- *
- * @return the Shape of this hasher.
- */
- public Shape getShape() {
- return shape;
- }
-
- /**
- * Tests emptiness (size == 0).
- *
- * @return Whether or not this is empty.
- */
- public boolean isEmpty() {
- return size() == 0;
- }
-
- /**
- * Gets an iterator of integers that are the bits to enable in the Bloom
- * filter based on the shape. The iterator will not return the same value multiple
- * times. Values will be returned in ascending order.
- *
- * @param shape {@inheritDoc}
- * @return {@inheritDoc}
- * @throws IllegalArgumentException {@inheritDoc}
- */
- @Override
- public OfInt iterator(final Shape shape) {
- if (!this.shape.equals(shape)) {
- throw new IllegalArgumentException(
- String.format("shape (%s) does not match internal shape (%s)", shape, this.shape));
- }
- return Arrays.stream(values).iterator();
- }
-
- /**
- * Gets the the number of unique values in this hasher.
- * @return the number of unique values.
- */
- public int size() {
- return values.length;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5Cyclic.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5Cyclic.java
deleted file mode 100644
index 8e07793b7f..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5Cyclic.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import java.nio.ByteBuffer;
-
-import java.nio.LongBuffer;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-
-/**
- * An implementation of HashFunction that
- * performs MD5 hashing using a signed cyclic method.
- * @since 4.5
- */
-public final class MD5Cyclic implements HashFunction {
-
- /**
- * The name of this hash function.
- */
- public static final String NAME = "MD5";
-
- /**
- * The MD5 digest implementation.
- */
- private final MessageDigest messageDigest;
-
- /**
- * The signature for this hash function.
- *
- * TODO: Make static akin to a serialVersionUID?
- */
- private final long signature;
-
- /**
- * The result from the digest 0
- */
- private final long[] result = new long[2];
-
- /**
- * Constructs the MD5 hashing function.
- */
- public MD5Cyclic() {
- try {
- messageDigest = MessageDigest.getInstance(NAME);
- } catch (final NoSuchAlgorithmException e) {
- // This should not happen
- throw new IllegalStateException("Missing the standard MD5 message digest algorithm", e);
- }
- signature = Signatures.getSignature(this);
- }
-
- @Override
- public long apply(final byte[] buffer, final int seed) {
-
- if (seed == 0) {
- final byte[] hash;
- synchronized (messageDigest) {
- messageDigest.update(buffer);
- hash = messageDigest.digest();
- messageDigest.reset();
- }
-
- final LongBuffer lb = ByteBuffer.wrap(hash).asLongBuffer();
- result[0] = lb.get(0);
- result[1] = lb.get(1);
- } else {
- result[0] += result[1];
- }
- return result[0];
- }
-
- @Override
- public String getName() {
- return NAME;
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collections";
- }
-
- @Override
- public long getSignature() {
- return signature;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x64Cyclic.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x64Cyclic.java
deleted file mode 100644
index 99c27c8819..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x64Cyclic.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import org.apache.commons.codec.digest.MurmurHash3;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-
-/**
- * An implementation of HashFunction that
- * uses an underlying Murmur3 128-bit hash with a signed cyclic method.
- *
- *
Requires the optional Apache Commons Codec
- * library which contains a Java port of the 128-bit hash function
- * {@code MurmurHash3_x64_128} from Austin Applyby's original {@code c++}
- * code in SMHasher.
- *
- * @see SMHasher
- * @since 4.5
- */
-public final class Murmur128x64Cyclic implements HashFunction {
-
- /**
- * The name of this hash method.
- */
- public static final String NAME = "Murmur3_x64_128";
-
- /**
- * The result of the hash 0 call.
- */
- private long[] parts;
-
- /**
- * The signature for this hash function.
- *
- * TODO: Make static akin to a serialVersionUID?
- */
- private final long signature;
-
- /**
- * Constructs a Murmur3 x64 128 hash.
- */
- public Murmur128x64Cyclic() {
- signature = Signatures.getSignature(this);
- }
-
- @Override
- public long apply(final byte[] buffer, final int seed) {
- if (parts == null || seed == 0) {
- parts = MurmurHash3.hash128x64(buffer, 0, buffer.length, 0);
- } else {
- parts[0] += parts[1];
- }
- return parts[0];
- }
-
- @Override
- public String getName() {
- return NAME;
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collections";
- }
-
- @Override
- public long getSignature() {
- return signature;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86Iterative.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86Iterative.java
deleted file mode 100644
index 982ef5c869..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86Iterative.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import org.apache.commons.codec.digest.MurmurHash3;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-
-/**
- * An implementation of HashFunction that
- * uses an underlying Murmur3 32-bit hash with a signed iterative method.
- *
- *
Requires the optional Apache Commons Codec
- * library which contains a Java port of the 32-bit hash function
- * {@code MurmurHash3_x86_32} from Austin Applyby's original {@code c++}
- * code in SMHasher.
- *
- * @see Apache Commons Codec
- * @see SMHasher
- * @since 4.5
- */
-public final class Murmur32x86Iterative implements HashFunction {
-
- /**
- * The name of this hash function.
- */
- public static final String NAME = "Murmur3_x86_32";
-
- /**
- * The signature for this hash function.
- *
- * TODO: Make static akin to a serialVersionUID?
- */
- private final long signature;
-
- /**
- * Constructs a Murmur3 x86 32 hash
- */
- public Murmur32x86Iterative() {
- signature = Signatures.getSignature(this);
- }
-
- @Override
- public long apply(final byte[] buffer, final int seed) {
- return MurmurHash3.hash32x86(buffer, 0, buffer.length, seed);
- }
-
- @Override
- public String getName() {
- return NAME;
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.ITERATIVE;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collections";
- }
-
- @Override
- public long getSignature() {
- return signature;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterative.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterative.java
deleted file mode 100644
index da0fc2c2db..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterative.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import java.util.Arrays;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-
-/**
- * An implementation of HashFunction that
- * performs {@code Objects.hash} hashing using a signed iterative method.
- *
- * Except in the case of seed 0, the value of the previous hash is
- * used as a seed for the next hash. Hashes are seeded by calling
- * {@code Arrays.deepHashCode( new Object[]{seed, buffer} )}.
- *
- * @since 4.5
- */
-public final class ObjectsHashIterative implements HashFunction {
-
- /**
- * The name of the hash function.
- */
- public static final String NAME = "Objects32";
-
- /**
- * The signature for this hash function.
- *
- * TODO: Make static akin to a serialVersionUID?
- */
- private final long signature;
-
- /**
- * The value of the last hash.
- */
- private long last;
-
- /**
- * Constructs a hash that uses the Objects.hash method to has values.
- */
- public ObjectsHashIterative() {
- signature = Signatures.getSignature(this);
- }
-
- @Override
- public long apply(final byte[] buffer, final int seed) {
- if (seed == 0) {
- last = 0;
- }
- // Effectively:
- // result = Arrays.deepHashCode(new Object[] { last, buffer });
- // The method loops over items starting with result=1
- // for i in items:
- // result = 31 * result + hashCode(i)
- // Here we unroll the computation to 2 iterations.
- // The computation is done using 32-bit integers then cast to a long
- final long result = 31 * (31 + Long.hashCode(last)) + Arrays.hashCode(buffer);
- last += result;
- return result;
- }
-
- @Override
- public String getName() {
- return NAME;
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.ITERATIVE;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collections";
- }
-
- @Override
- public long getSignature() {
- return signature;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Signatures.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Signatures.java
deleted file mode 100644
index b7f35ac051..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Signatures.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
-
-/**
- * Allow computation of HashFunction signatures.
- * @since 4.5
- */
-final class Signatures {
-
- /** No instances. */
- private Signatures() {}
-
- /**
- * Gets the standard signature for the hash function. The signature is prepared as:
- *
- * int seed = 0;
- * return hashFunction.apply(HashFunctionIdentity.prepareSignatureBuffer(hashFunction), seed);
- *
- *
- * @param hashFunction the hash function
- * @return the signature
- * @see HashFunctionIdentity#prepareSignatureBuffer(HashFunctionIdentity)
- * @see HashFunction#apply(byte[], int)
- */
- static long getSignature(final HashFunction hashFunction) {
- return hashFunction.apply(HashFunctionIdentity.prepareSignatureBuffer(hashFunction), 0);
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java
index bfc3d67abe..c207254561 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java
@@ -18,101 +18,78 @@
/**
* A collection of extensible Bloom filter classes and interfaces.
*
- *
- * Background:
- *
- * A Bloom filter is conceptually a bit vector. It is used to
- * tell you where things are not. Basically, you create a Bloom filter by creating hashes
- * and converting those to enabled bits in a vector. You can merge the Bloom filters
- * together with logical "or" (call this filter "B"). You can then check to see if filter
- * "A" was "or"ed into "B" by testing A & B == A. if the statement is false then "A" was
- * not merged into "B", otherwise it _might_ have. They are generally used where hash
- * tables would be too large or as a filter front end for longer processes. For example
+ *
Background:
+ *
+ * The Bloom filter is a probabilistic data structure that indicates where things are not.
+ * Conceptually it is a bit vector. You create a Bloom filter by creating hashes
+ * and converting those to enabled bits in the vector. Multiple Bloom filters may be merged
+ * together into one Bloom filter. It is possible to test if a filter {@code B} has merged into
+ * another filter {@code A} by verifying that {@code (A & B) == B}.
+ *
+ * Bloom filters are generally used where hash
+ * tables would be too large, or as a filter front end for longer processes. For example
* most browsers have a Bloom filter that is built from all known bad URLs (ones that
* serve up malware). When you enter a URL the browser builds a Bloom filter and checks to
* see if it is "in" the bad URL filter. If not the URL is good, if it matches, then the
* expensive lookup on a remote system is made to see if it actually is in the list. There
* are lots of other uses, and in most cases the reason is to perform a fast check as a
* gateway for a longer operation.
- *
- * BloomFilter
- *
- * The bloom filter code is
- * an abstract class that requires implementation of 4 methods:
- * -
- * getBits() which
- * returns the set bits as a buffer encoded into an array of long.
- * -
- * getHasher()
- * which returns a list of integers that are indexes of the bits that are enabled. These
- * are returned in a Hasher construct.
- * -
- * merge( BloomFilter ) to merge another
- * Bloom filter into this one.
- * -
- * merge( Hasher ) to merge the values in a hasher
- * into this Bloom filter.
- *
- * There are 3 implementations of Bloom filter
- * provided:
- * -
- * BitSetBloomFilter - based on the Java BitSet class.
- * -
- *
- * CountingBloomFilter - uses a sparse array of integers (Map) to implement a counting
- * Bloom filter. This filter also implements remove() methods as that is the great
- * advantage of a counting Bloom filter.
- * -
- * HasherBloomFilter - implements bloom
- * filter on a Hasher. A rather slow implementation but convenient in some
- * situations.
- *
- *
- *
- * Shape
- *
- * Describes the Bloom filter using the
- * standard number of bits, number of hash functions and number of items along with a
- * description of the HashFunction. It is this description that has caused the most issues
- * of late.
- *
- * Hasher
- *
- * converts byte buffers into an iterator if int based
- * on a Shape. There are 2 implementations of Hasher provided
- * -
- * Dynamic - calls
- * the HashFunction for each value required in the Bloom filter.
- * -
- * Static - based
- * on a pre-calculated list of Bloom filter index values. It is also limited to generating
- * values for a specific Shape.
- *
- *
- *
- * Hash Functions
- *
- * Hash
- * functions generate individual index values for the filter from a byte buffer. There are
- * four implementations provided.
- *
- * HashFunctionIdentity
- *
- * The
- * HashFunctionIdentity is the base interface for the HashFunction. It tracks three (3)
- * properties:
- * -
- * The Hashing algorithm
- * -
- * Whether the contents of the
- * resulting hash buffer are read as signed or unsigned values.
- * -
- * Whether the hash
- * function uses an iterative or cyclic method. In traditional iterative methods this is
- * done by calling the selected hash function with a different seed for each hash
- * required. The second method described by Adam Kirsch and Micheal Mitzenmacher[1] has
- * become more common and is used in applications like Cassandra[2].
- *
+ *
+ * BloomFilter
+ *
+ * The Bloom filter architecture here is designed so that the implementation of the storage of bits is abstracted.
+ * Programs that utilize the Bloom filters may use the {@code BitMapProducer} or {@code IndexProducer} to retrieve a
+ * representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in
+ * manipulation of the representations.
+ *
+ * The bloom filter code is an interface that requires implementation of 6 methods:
+ *
+ * - {@code cardinality()}
+ * returns the number of bits enabled in the Bloom filter.
+ *
+ * - {@code contains(BitMapProducer)} which
+ * returns true if the bits specified by the bit maps generated by the BitMapProducer are enabled in the Bloom filter.
+ *
+ * - {@code contains(IndexProducer)} which
+ * returns true if the bits specified by the indices generated by IndexProducer are enabled in the Bloom filter.
+ *
+ * - {@code getShape()} which
+ * returns the shape the Bloom filter was created with.
+
+ * - {@code isSparse()} which
+ * returns true if an the implementation tracks indices natively, false if bit maps are used. In cases where
+ * neither are used the {@code isSparse} return value should reflect which is faster to produce.
+ *
+ * - {@code mergeInPlace(BloomFilter)} which
+ * utilizes either the {@code BitMapProducer} or {@code IndexProducer} from the argument to enable extra bits
+ * in the internal representation of the Bloom filter.
+ *
+ *
+ * Other methods should be implemented where they can be done so more efficiently than the default implementations.
+ *
+ *
+ * CountingBloomFilter
+ *
+ * The counting bloom filter extends the Bloom filter by counting the number of times a specific bit has been
+ * enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional
+ * overhead.
+ *
+ * Shape
+ *
+ * The Shape describes the Bloom filter using the number of bits and the number of hash functions
+ *
+ * Hasher
+ *
+ * A Hasher converts bytes into a series of integers based on a Shape. With the exception of the HasherCollecton,
+ * each hasher represents one item being added to the Bloom filter. The HasherCollection represents the
+ * number of items as the sum of the number of items represented by the Hashers in the collection.
+ *
+ * The SimpleHasher uses a combinatorial generation technique to create the integers. It is easily
+ * initialized by using a standard {@code MessageDigest} or other Hash function to hash the item to insert and
+ * then splitting the hash bytes in half and considering each as a long value.
+ *
+ * Other implementations of the Hasher are easy to implement, and should make use of the {@code Hasher.Filter}
+ * and/or {@code Hasher.FileredIntConsumer} classes to filter out duplicate indices.
*
* References
*
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitCountProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitCountProducerTest.java
new file mode 100644
index 0000000000..5894b7c376
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitCountProducerTest.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.apache.commons.collections4.bloomfilter.BitCountProducer.BitCountConsumer;
+import org.junit.jupiter.api.Test;
+
+public abstract class AbstractBitCountProducerTest extends AbstractIndexProducerTest {
+
+ /**
+ * A testing BitCountConsumer that always returns false.
+ */
+ public static BitCountConsumer FALSE_CONSUMER = new BitCountConsumer() {
+
+ @Override
+ public boolean test(int index, int count) {
+ return false;
+ }
+ };
+
+ /**
+ * A testing BitCountConsumer that always returns true.
+ */
+ public static BitCountConsumer TRUE_CONSUMER = new BitCountConsumer() {
+
+ @Override
+ public boolean test(int index, int count) {
+ return true;
+ }
+ };
+
+ /**
+ * Creates a producer with some data.
+ * @return a producer with some data
+ */
+ @Override
+ protected abstract BitCountProducer createProducer();
+
+ /**
+ * Creates an producer without data.
+ * @return a producer that has no data.
+ */
+ @Override
+ protected abstract BitCountProducer createEmptyProducer();
+
+ /**
+ * Determines if empty tests should be run. Some producers do not implement an empty
+ * version. Tests for those classes should return false.
+ * @return
+ */
+ protected boolean supportsEmpty() {
+ return true;
+ }
+
+ @Test
+ public final void testForEachCount() {
+
+ assertFalse(createProducer().forEachCount(FALSE_CONSUMER), "non-empty should be false");
+ assertTrue(createProducer().forEachCount(TRUE_CONSUMER), "non-empty should be true");
+ if (supportsEmpty()) {
+ assertTrue(createEmptyProducer().forEachCount(FALSE_CONSUMER), "empty should be true");
+ assertTrue(createEmptyProducer().forEachCount(TRUE_CONSUMER), "empty should be true");
+ }
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitMapProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitMapProducerTest.java
new file mode 100644
index 0000000000..fa6f6cb4df
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitMapProducerTest.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.Arrays;
+import java.util.function.LongPredicate;
+
+import org.junit.jupiter.api.Test;
+
+public abstract class AbstractBitMapProducerTest {
+
+ /**
+ * A testing consumer that always returns false.
+ */
+ public static final LongPredicate FALSE_CONSUMER = new LongPredicate() {
+
+ @Override
+ public boolean test(long arg0) {
+ return false;
+ }
+ };
+
+ /**
+ * A testing consumer that always returns true.
+ */
+ public static final LongPredicate TRUE_CONSUMER = new LongPredicate() {
+
+ @Override
+ public boolean test(long arg0) {
+ return true;
+ }
+ };
+
+ /**
+ * Creates a producer with some data.
+ * @return a producer with some data
+ */
+ protected abstract BitMapProducer createProducer();
+
+ /**
+ * Creates an producer without data.
+ * @return a producer that has no data.
+ */
+ protected abstract BitMapProducer createEmptyProducer();
+
+ protected boolean emptyIsZeroLength() {
+ return false;
+ }
+
+ @Test
+ public final void testForEachBitMap() {
+ assertFalse(createProducer().forEachBitMap(FALSE_CONSUMER), "non-empty should be false");
+ if (emptyIsZeroLength()) {
+ assertTrue(createEmptyProducer().forEachBitMap(FALSE_CONSUMER), "empty should be true");
+ } else {
+ assertFalse(createEmptyProducer().forEachBitMap(FALSE_CONSUMER), "empty should be false");
+ }
+
+ assertTrue(createProducer().forEachBitMap(TRUE_CONSUMER), "non-empty should be true");
+ assertTrue(createEmptyProducer().forEachBitMap(TRUE_CONSUMER), "empty should be true");
+ }
+
+ @Test
+ public final void testAsBitMapArray() {
+ long[] array = createEmptyProducer().asBitMapArray();
+ for (int i = 0; i < array.length; i++) {
+ assertEquals(0, array[i], "Wrong value at " + i);
+ }
+
+ array = createProducer().asBitMapArray();
+ assertFalse(array.length == 0);
+ }
+
+ @Test
+ public final void testForEachBitMapPair() {
+ LongBiPredicate func = (x, y) -> x == y;
+ assertTrue(createEmptyProducer().forEachBitMapPair(createEmptyProducer(), func), "empty == empty failed");
+ assertFalse(createEmptyProducer().forEachBitMapPair(createProducer(), func), "empty == not_empty failed");
+ assertFalse(createProducer().forEachBitMapPair(createEmptyProducer(), func), "not_empty == empty passed");
+ assertTrue(createProducer().forEachBitMapPair(createProducer(), func), "not_empty == not_empty failed");
+
+ // test BitMapProducers of different length send 0 for missing values.
+ int[] count = new int[3];
+ LongBiPredicate lbp = new LongBiPredicate() {
+
+ @Override
+ public boolean test(long x, long y) {
+ if (x == 0) {
+ count[0]++;
+ }
+ if (y == 0) {
+ count[1]++;
+ }
+ count[2]++;
+ return true;
+ }
+ };
+ createEmptyProducer().forEachBitMapPair(createProducer(), lbp);
+ assertEquals(count[2], count[0]);
+
+ Arrays.fill(count, 0);
+ createProducer().forEachBitMapPair(createEmptyProducer(), lbp);
+ assertEquals(count[2], count[1]);
+ }
+
+ @Test
+ public void testForEachBitMapEarlyExit() {
+ int[] passes = new int[1];
+ assertFalse(createProducer().forEachBitMap(l -> {
+ passes[0]++;
+ return false;
+ }));
+ assertEquals(1, passes[0]);
+
+ passes[0] = 0;
+ if (emptyIsZeroLength()) {
+ assertTrue(createEmptyProducer().forEachBitMap(l -> {
+ passes[0]++;
+ return false;
+ }));
+ assertEquals(0, passes[0]);
+ } else {
+ assertFalse(createEmptyProducer().forEachBitMap(l -> {
+ passes[0]++;
+ return false;
+ }));
+ assertEquals(1, passes[0]);
+ }
+ }
+
+ @Test
+ public void testForEachBitMapPairEarlyExit() {
+
+ // test BitMapProducers of different length send 0 for missing values.
+ int[] count = new int[1];
+ LongBiPredicate lbp = new LongBiPredicate() {
+
+ @Override
+ public boolean test(long x, long y) {
+ count[0]++;
+ return false;
+ }
+ };
+ createProducer().forEachBitMapPair(createEmptyProducer(), lbp);
+ assertEquals(1, count[0]);
+
+ Arrays.fill(count, 0);
+ createEmptyProducer().forEachBitMapPair(createProducer(), lbp);
+ assertEquals(1, count[0]);
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
index 8ba620c8c2..9e681f6693 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
@@ -16,608 +16,413 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import java.util.List;
-import java.util.PrimitiveIterator.OfInt;
-import java.util.function.BiFunction;
-import java.util.function.IntConsumer;
import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.BitSet;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
+import java.util.List;
import org.junit.jupiter.api.Test;
/**
* Test standard methods in the {@link BloomFilter} interface.
*/
-public abstract class AbstractBloomFilterTest {
+public abstract class AbstractBloomFilterTest {
+
+ protected final SimpleHasher from1 = new SimpleHasher(1, 1);
+ protected final long from1Value = 0x3fffeL;
+ protected final SimpleHasher from11 = new SimpleHasher(11, 1);
+ protected final long from11Value = 0xffff800L;
+ protected final HasherCollection bigHasher = new HasherCollection(from1, from11);
+ protected final long bigHashValue = 0xffffffeL;
+ protected final HasherCollection fullHasher = new HasherCollection(new SimpleHasher(0, 1)/* 0-16 */,
+ new SimpleHasher(17, 1)/* 17-33 */, new SimpleHasher(33, 1)/* 33-49 */, new SimpleHasher(50, 1)/* 50-66 */,
+ new SimpleHasher(67, 1)/* 67-83 */
+ );
+ protected final long[] fullHashValue = { 0xffffffffffffffffL, 0xfffffL };
/**
- * An implementation of BloomFilter that is used to test merge and cardinality
- * operations with a filter type that does not match the type of the filter
- * being tested.
+ * The shape of the Bloom filters for testing.
+ *
+ * - Hash functions (k) = 17
+ *
- Number of bits (m) = 72
+ *
+ * @return the testing shape.
*/
- private static class TestBloomFilter extends AbstractBloomFilter {
- /** The bits. */
- final BitSet bits;
-
- protected TestBloomFilter(final Shape shape, final BitSet bits) {
- super(shape);
- this.bits = bits;
- }
-
- @Override
- public long[] getBits() {
- return bits.toLongArray();
- }
-
- @Override
- public StaticHasher getHasher() {
- return new StaticHasher(bits.stream().iterator(), getShape());
- }
-
- @Override
- public boolean merge(final BloomFilter other) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean merge(final Hasher hasher) {
- throw new UnsupportedOperationException();
- }
+ protected final Shape getTestShape() {
+ return Shape.fromKM(17, 72);
}
/**
- * A HashFunctionIdentity for testing.
+ * Create an empty version of the BloomFilter implementation we are testing.
+ *
+ * @param shape the shape of the filter.
+ * @return a BloomFilter implementation.
*/
- protected HashFunctionIdentity testFunction = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test Function";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
- };
+ protected abstract T createEmptyFilter(Shape shape);
/**
- * A second HashFunctionIdentity for testing.
+ * Create the BloomFilter implementation we are testing.
+ *
+ * @param shape the shape of the filter.
+ * @param hasher the hasher to use to create the filter.
+ * @return a BloomFilter implementation.
*/
- protected HashFunctionIdentity testFunctionX = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test FunctionX";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 1;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
- };
+ protected abstract T createFilter(Shape shape, Hasher hasher);
/**
- * The shape of the Bloom filters for testing
+ * Create the BloomFilter implementation we are testing.
+ *
+ * @param shape the shape of the filter.
+ * @param producer A BitMap producer to build the filter with.
+ * @return a BloomFilter implementation.
*/
- protected Shape shape = new Shape(testFunction, 3, 72, 17);
+ protected abstract T createFilter(Shape shape, BitMapProducer producer);
/**
- * Tests that the andCardinality calculations are correct.
+ * Create the BloomFilter implementation we are testing.
+ *
+ * @param shape the shape of the filter.
+ * @param producer An Index producer to build the filter with.
+ * @return a BloomFilter implementation.
*/
- @Test
- public final void andCardinalityTest() {
- andCardinalityTest(this::createFilter);
- }
+ protected abstract T createFilter(Shape shape, IndexProducer producer);
/**
- * Tests that the andCardinality calculations are correct with a generic BloomFilter.
+ *
*/
@Test
- public final void andCardinalityTest_GenericBloomFilter() {
- andCardinalityTest(this::createGenericFilter);
+ public void testConstructWithBadHasher() {
+ // value too large
+ assertThrows(IllegalArgumentException.class,
+ () -> createFilter(getTestShape(), new BadHasher(getTestShape().getNumberOfBits())));
+ // negative value
+ assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), new BadHasher(-1)));
}
- /**
- * Tests that the andCardinality calculations are correct.
- *
- * @param filterFactory the factory function to create the filter
- */
- private void andCardinalityTest(final BiFunction filterFactory) {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
+ @Test
+ public void testConstructWitBitMapProducer() {
+ long[] values = { from11Value, 0x9L };
+ BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values));
+ List lst = new ArrayList<>();
+ for (long l : values) {
+ lst.add(l);
+ }
+ assertTrue(f.forEachBitMap(l -> {
+ return lst.remove(Long.valueOf(l));
+ }));
+ assertTrue(lst.isEmpty());
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE);
+ // values too large
+ assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), badProducer));
+ }
- final BloomFilter bf2 = filterFactory.apply(hasher2, shape);
+ @Test
+ public void testConstructWithIndexProducer() {
+ int[] values = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 };
+ BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
+ List lst = new ArrayList<>();
+ for (int i : values) {
+ lst.add(i);
+ }
+ assertTrue(f.forEachIndex(i -> {
+ return lst.remove(Integer.valueOf(i));
+ }));
+ assertTrue(lst.isEmpty());
- assertEquals(7, bf.andCardinality(bf2));
+ // value to large
+ assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(),
+ IndexProducer.fromIndexArray(new int[] { getTestShape().getNumberOfBits() })));
+ // negative value
+ assertThrows(IllegalArgumentException.class,
+ () -> createFilter(getTestShape(), IndexProducer.fromIndexArray(new int[] { -1 })));
}
- /**
- * Tests that the andCardinality calculations are correct when there are more than Long.LENGTH bits.
- */
@Test
- public final void andCardinalityTest_ExtraLongs() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ public final void testContains() {
+ BloomFilter bf1 = createFilter(getTestShape(), from1);
+ final BloomFilter bf2 = createFilter(getTestShape(), bigHasher);
- final BloomFilter bf = createFilter(hasher, shape);
+ assertTrue(bf1.contains(bf1), "BF Should contain itself");
+ assertTrue(bf2.contains(bf2), "BF2 Should contain itself");
+ assertFalse(bf1.contains(bf2), "BF should not contain BF2");
+ assertTrue(bf2.contains(bf1), "BF2 should contain BF");
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ assertTrue(bf2.contains(new SimpleHasher(1, 1)), "BF2 Should contain this hasher");
+ assertFalse(bf2.contains(new SimpleHasher(1, 3)), "BF2 Should not contain this hasher");
- final BloomFilter bf2 = createFilter(hasher2, shape);
+ IndexProducer indexProducer = new SimpleHasher(1, 1).indices(getTestShape());
+ assertTrue(bf2.contains(indexProducer), "BF2 Should contain this hasher");
+ indexProducer = new SimpleHasher(1, 3).indices(getTestShape());
+ assertFalse(bf2.contains(indexProducer), "BF2 Should not contain this hasher");
- assertEquals(7, bf.andCardinality(bf2));
- assertEquals(7, bf2.andCardinality(bf));
- }
+ BitMapProducer bitMapProducer = BitMapProducer.fromIndexProducer(new SimpleHasher(1, 1).indices(getTestShape()),
+ getTestShape().getNumberOfBits());
+ assertTrue(bf2.contains(bitMapProducer), "BF2 Should contain this hasher");
+ bitMapProducer = BitMapProducer.fromIndexProducer(new SimpleHasher(1, 3).indices(getTestShape()),
+ getTestShape().getNumberOfBits());
+ assertFalse(bf2.contains(bitMapProducer), "BF2 Should not contain this hasher");
- /**
- * Compare 2 static hashers to verify they have the same bits enabled.
- *
- * @param hasher1 the first static hasher.
- * @param hasher2 the second static hasher.
- */
- private void assertSameBits(final StaticHasher hasher1, final StaticHasher hasher2) {
- final OfInt iter1 = hasher1.iterator(shape);
- final OfInt iter2 = hasher2.iterator(shape);
+ // Test different lengths
+ bf1 = createFilter(getTestShape(), from1);
+ final BloomFilter bf3 = createFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1),
+ from1);
+ assertTrue(bf1.contains(bf3));
+ assertTrue(bf3.contains(bf1));
- while (iter1.hasNext()) {
- assertTrue(iter2.hasNext(), "Not enough data in second hasher");
- assertEquals(iter1.nextInt(), iter2.nextInt());
- }
- assertFalse(iter2.hasNext(), "Too much data in second hasher");
+ final BloomFilter bf4 = createFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1),
+ bigHasher);
+ assertFalse(bf1.contains(bf4));
+ assertTrue(bf4.contains(bf1));
}
/**
- * Tests that cardinality is correct.
+ * Tests that the andCardinality calculations are correct.
+ *
+ * @param filterFactory the factory function to create the filter
*/
@Test
- public final void cardinalityTest() {
+ public final void testEstimateIntersection() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ final BloomFilter bf = createFilter(getTestShape(), from1);
+ final BloomFilter bf2 = createFilter(getTestShape(), bigHasher);
- final BloomFilter bf = createFilter(hasher, shape);
- assertEquals(17, bf.cardinality());
- }
+ assertEquals(1, bf.estimateIntersection(bf2));
+ assertEquals(1, bf2.estimateIntersection(bf));
- /**
- * Tests that creating an empty hasher works as expected.
- */
- @Test
- public final void constructorTest_Empty() {
+ final BloomFilter bf3 = createEmptyFilter(getTestShape());
- final BloomFilter bf = createEmptyFilter(shape);
- final long[] lb = bf.getBits();
- assertEquals(0, lb.length);
+ assertEquals(0, bf.estimateIntersection(bf3));
+ assertEquals(0, bf3.estimateIntersection(bf));
}
/**
- * Tests that creating a filter with a hasher works as expected.
- */
- @Test
- public final void constructorTest_Hasher() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
- final long[] lb = bf.getBits();
- assertEquals(0x1FFFF, lb[0]);
- assertEquals(1, lb.length);
- }
-
- /**
- * Tests that creating a Bloom filter with a Static hasher that has one shape and a
- * different specified shape fails.
+ * Tests that the andCardinality calculations are correct.
+ *
+ * @param filterFactory the factory function to create the filter
*/
@Test
- public final void constructorTest_WrongShape() {
- final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
+ public final void testEstimateUnion() {
+ final BloomFilter bf = createFilter(getTestShape(), from1);
+ final BloomFilter bf2 = createFilter(getTestShape(), from11);
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), anotherShape);
- assertThrows(IllegalArgumentException.class, () -> createFilter(hasher, shape),
- "Should throw IllegalArgumentException");
- }
-
- /**
- * Tests that contains() with a Bloom filter argument returns the proper results.
- */
- @Test
- public final void containsTest_BloomFilter() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
-
- final List lst2 = Arrays.asList(4, 5, 6, 7, 8, 9, 10);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- final BloomFilter bf2 = createFilter(hasher2, shape);
- assertTrue(bf.contains(bf2));
- assertFalse(bf2.contains(bf));
- }
+ assertEquals(2, bf.estimateUnion(bf2));
+ assertEquals(2, bf2.estimateUnion(bf));
- /**
- * Tests that contains() fails properly if the other Bloom filter is not of the proper shape.
- */
- @Test
- public final void containsTest_BloomFilter_WrongShape() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
-
- final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
- final Hasher hasher2 = new StaticHasher(lst.iterator(), anotherShape);
- final BloomFilter bf2 = createFilter(hasher2, anotherShape);
- assertThrows(IllegalArgumentException.class, () -> bf.contains(bf2),
- "Should throw IllegalArgumentException");
- }
+ final BloomFilter bf3 = createEmptyFilter(getTestShape());
- /**
- * Tests that contains() with a Hasher argument returns the proper results.
- */
- @Test
- public final void containsTest_Hasher() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
-
- List lst2 = Arrays.asList(4, 5, 6, 7, 8, 9, 10);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- assertTrue(bf.contains(hasher2));
-
- lst2 = Arrays.asList(17, 18, 19, 20);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- assertFalse(bf.contains(hasher2));
-
- lst2 = Arrays.asList(10, 11, 12, 17, 18, 19, 20);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- assertFalse(bf.contains(hasher2));
+ assertEquals(1, bf.estimateUnion(bf3));
+ assertEquals(1, bf3.estimateUnion(bf));
}
/**
- * Tests that contains() fails properly if the hasher is not of the proper shape.
+ * Tests that the size estimate is correctly calculated.
*/
@Test
- public final void containsTest_Hasher_WrongShape() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
+ public final void testEstimateN() {
+ // build a filter
+ BloomFilter filter1 = new SimpleBloomFilter(getTestShape(), from1);
+ assertEquals(1, filter1.estimateN());
- final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
+ // the data provided above do not generate an estimate that is equivalent to the
+ // actual.
+ filter1.mergeInPlace(new SimpleHasher(4, 1));
- final List lst2 = Arrays.asList(4, 5, 6, 7, 8, 9, 10);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), anotherShape);
- assertThrows(IllegalArgumentException.class, () -> bf.contains(hasher2),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Create an empty version of the BloomFilter implementation we are testing.
- *
- * @param shape the shape of the filter.
- * @return a BloomFilter implementation.
- */
- protected abstract AbstractBloomFilter createEmptyFilter(Shape shape);
+ assertEquals(1, filter1.estimateN());
- /**
- * Create the BloomFilter implementation we are testing.
- *
- * @param hasher the hasher to use to create the filter.
- * @param shape the shape of the filter.
- * @return a BloomFilter implementation.
- */
- protected abstract AbstractBloomFilter createFilter(Hasher hasher, Shape shape);
+ filter1.mergeInPlace(new SimpleHasher(17, 1));
- /**
- * Create a generic BloomFilter implementation.
- *
- * @param hasher the hasher to use to create the filter.
- * @param shape the shape of the filter.
- * @return a BloomFilter implementation.
- */
- private AbstractBloomFilter createGenericFilter(final Hasher hasher, final Shape shape) {
- final BitSet bits = new BitSet();
- hasher.iterator(shape).forEachRemaining((IntConsumer) bits::set);
- return new TestBloomFilter(shape, bits);
+ assertEquals(3, filter1.estimateN());
}
/**
- * Tests that getBits() works correctly when multiple long values are returned.
+ * Tests that asBitMapArray works correctly.
*/
@Test
- public final void getBitsTest_SpanLong() {
- final List lst = Arrays.asList(63, 64);
- final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
- final long[] lb = bf.getBits();
+ public final void testAsBitMapArray() {
+
+ // test when multiple long values are returned.
+ final SimpleHasher hasher = new SimpleHasher(63, 1);
+ final BloomFilter bf = createFilter(Shape.fromKM(2, 72), hasher);
+ final long[] lb = bf.asBitMapArray();
assertEquals(2, lb.length);
assertEquals(0x8000000000000000L, lb[0]);
assertEquals(0x1, lb[1]);
}
- /**
- * Tests that the the hasher returned from getHasher() works correctly.
- */
- @Test
- public final void getHasherTest() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
-
- final StaticHasher hasher2 = bf.getHasher();
-
- assertEquals(shape, hasher2.getShape());
- assertSameBits(hasher, hasher2);
- }
-
/**
* Tests that isFull() returns the proper values.
*/
@Test
- public final void isFullTest() {
+ public final void testIsFull() {
// create empty filter
- AbstractBloomFilter filter = createEmptyFilter(shape);
- assertFalse(filter.isFull());
-
- final List values = new ArrayList<>(shape.getNumberOfBits());
- for (int i = 0; i < shape.getNumberOfBits(); i++) {
- values.add(i);
- }
-
- StaticHasher hasher2 = new StaticHasher(values.iterator(), shape);
- filter = createFilter(hasher2, shape);
+ BloomFilter filter = createEmptyFilter(getTestShape());
+ assertFalse(filter.isFull(), "Should not be full");
- assertTrue(filter.isFull());
+ filter = createFilter(getTestShape(), fullHasher);
+ assertTrue(filter.isFull(), "Should be full");
- final int mid = shape.getNumberOfBits() / 2;
- values.remove(Integer.valueOf(mid));
- hasher2 = new StaticHasher(values.iterator(), shape);
- filter = createFilter(hasher2, shape);
- assertFalse(filter.isFull());
- }
-
- /**
- * Tests that merging bloom filters works as expected.
- */
- @Test
- public final void mergeTest_BloomFilter() {
- mergeTest_BloomFilter(this::createFilter);
+ filter = createFilter(getTestShape(), new SimpleHasher(1, 3));
+ assertFalse(filter.isFull(), "Should not be full");
}
/**
* Tests that merging bloom filters works as expected with a generic BloomFilter.
*/
@Test
- public final void mergeTest_GenericBloomFilter() {
- mergeTest_BloomFilter(this::createGenericFilter);
- }
-
- /**
- * Tests that merging bloom filters works as expected.
- *
- * @param filterFactory the factory function to create the filter
- */
- private void mergeTest_BloomFilter(final BiFunction filterFactory) {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ public final void testMerge() {
- final BloomFilter bf = createFilter(hasher, shape);
+ // test with BloomFilter
+ final BloomFilter bf1 = createFilter(getTestShape(), from1);
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ final BloomFilter bf2 = createFilter(getTestShape(), from11);
- final BloomFilter bf2 = filterFactory.apply(hasher2, shape);
+ final BloomFilter bf3 = bf1.merge(bf2);
+ assertTrue(bf3.contains(bf1), "Should contain bf1");
+ assertTrue(bf3.contains(bf2), "Should contain bf2");
- assertTrue(bf.merge(bf2), "Merge should not fail");
- assertEquals(27, bf.cardinality());
- }
+ final BloomFilter bf4 = bf2.merge(bf1);
+ assertTrue(bf4.contains(bf1), "Should contain bf1");
+ assertTrue(bf4.contains(bf2), "Should contain bf2");
+ assertTrue(bf4.contains(bf3), "Should contain bf3");
+ assertTrue(bf3.contains(bf4), "Should contain bf4");
- /**
- * Tests that merging bloom filters with different shapes fails properly
- */
- @Test
- public final void mergeTest_BloomFilter_WrongShape() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
+ // test with Hasher
- final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), anotherShape);
- final BloomFilter bf2 = createFilter(hasher2, anotherShape);
+ final BloomFilter bf5 = bf1.merge(from11);
+ assertTrue(bf5.contains(bf1), "Should contain bf1");
+ assertTrue(bf5.contains(bf2), "Should contain bf2");
- assertThrows(IllegalArgumentException.class, () -> bf.merge(bf2),
- "Should throw IllegalArgumentException");
+ // test with hasher returning numbers out of range
+ assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(bf1.getShape().getNumberOfBits())));
+ assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1)));
}
/**
- * Tests that merging a hasher into a Bloom filter works as expected
+ * Tests that merging in place works as expected.
*/
@Test
- public final void mergeTest_Hasher() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ public final void testMergeInPlace() {
- final BloomFilter bf = createFilter(hasher, shape);
+ final BloomFilter bf1 = createFilter(getTestShape(), from1);
+ final BloomFilter bf2 = createFilter(getTestShape(), from11);
+ final BloomFilter bf3 = bf1.merge(bf2);
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ // test with BloomFilter
- assertTrue(bf.merge(hasher2), "Merge should not fail");
- assertEquals(27, bf.cardinality());
- }
-
- /**
- * Tests that merging a static hasher with the wrong shape into a Bloom filter fails as expected
- */
- @Test
- public final void mergeTest_Hasher_WrongShape() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
+ long[] bf1Val = bf1.asBitMapArray();
+ long[] bf2Val = bf2.asBitMapArray();
+ for (int i = 0; i < bf1Val.length; i++) {
+ bf1Val[i] |= bf2Val[i];
+ }
+ bf1.mergeInPlace(bf2);
- final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), anotherShape);
+ long[] bf1New = bf1.asBitMapArray();
+ for (int i = 0; i < bf1Val.length; i++) {
+ assertEquals(bf1Val[i], bf1New[i], "Bad value at " + i);
+ }
- assertThrows(IllegalArgumentException.class, () -> bf.merge(hasher2),
- "Should throw IllegalArgumentException");
+ assertTrue(bf1.contains(bf2), "Should contain bf2");
+ assertTrue(bf1.contains(bf3), "Should contain bf3");
+
+ // test with hasher
+
+ BloomFilter bf4 = createFilter(getTestShape(), from1);
+ bf4.mergeInPlace(from11);
+
+ assertTrue(bf4.contains(bf2), "Should contain Bf2");
+ assertTrue(bf4.contains(bf3), "Should contain Bf3");
+
+ // test with hasher returning numbers out of range
+ assertThrows(IllegalArgumentException.class,
+ () -> bf1.mergeInPlace(new BadHasher(bf1.getShape().getNumberOfBits())));
+ assertThrows(IllegalArgumentException.class, () -> bf1.mergeInPlace(new BadHasher(-1)));
+
+ // test error when bloom filter returns values out of range
+ final BloomFilter bf5 = new SimpleBloomFilter(
+ Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE),
+ new SimpleHasher(Long.SIZE * 2, 1));
+ assertThrows(IllegalArgumentException.class, () -> bf1.mergeInPlace(bf5));
+
+ final BloomFilter bf6 = new SparseBloomFilter(
+ Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE),
+ new SimpleHasher(Long.SIZE * 2, 1));
+ assertThrows(IllegalArgumentException.class, () -> bf1.mergeInPlace(bf6));
+ }
+
+ private void assertIndexProducerConstructor(Shape shape, int[] values, int[] expected) {
+ IndexProducer indices = IndexProducer.fromIndexArray(values);
+ SparseBloomFilter filter = new SparseBloomFilter(shape, indices);
+ List lst = new ArrayList<>();
+ filter.forEachIndex(x -> {
+ lst.add(x);
+ return true;
+ });
+ assertEquals(expected.length, lst.size());
+ for (int value : expected) {
+ assertTrue(lst.contains(Integer.valueOf(value)), "Missing " + value);
+ }
}
- /**
- * Tests that the orCardinality calculations are correct.
- */
- @Test
- public final void orCardinalityTest() {
- orCardinalityTest(this::createFilter);
+ private void assertFailedIndexProducerConstructor(Shape shape, int[] values) {
+ IndexProducer indices = IndexProducer.fromIndexArray(values);
+ assertThrows(IllegalArgumentException.class, () -> createFilter(shape, indices));
}
- /**
- * Tests that the orCardinality calculations are correct with a generic BloomFilter.
- */
@Test
- public final void orCardinalityTest_GenericBloomFilter() {
- orCardinalityTest(this::createGenericFilter);
- }
+ public void testIndexProducerConstructor() {
+ Shape shape = Shape.fromKM(5, 10);
- /**
- * Tests that the andCardinality calculations are correct.
- *
- * @param filterFactory the factory function to create the filter
- */
- private void orCardinalityTest(final BiFunction filterFactory) {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final AbstractBloomFilter bf = createFilter(hasher, shape);
-
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
-
- final BloomFilter bf2 = filterFactory.apply(hasher2, shape);
-
- assertEquals(27, bf.orCardinality(bf2));
+ assertIndexProducerConstructor(shape, new int[] { 0, 2, 4, 6, 8 }, new int[] { 0, 2, 4, 6, 8 });
+ // test duplicate values
+ assertIndexProducerConstructor(shape, new int[] { 0, 2, 4, 2, 8 }, new int[] { 0, 2, 4, 8 });
+ // test negative values
+ assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, -2, 8 });
+ // test index too large
+ assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, 12, 8 });
+ // test no indices
+ assertIndexProducerConstructor(shape, new int[0], new int[0]);
}
- /**
- * Tests that the orCardinality calculations are correct when there are more than Long.LENGTH bits.
- */
@Test
- public final void orCardinalityTest_ExtraLongs() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final AbstractBloomFilter bf = createFilter(hasher, shape);
-
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
-
- final AbstractBloomFilter bf2 = createFilter(hasher2, shape);
+ public void testBitMapProducerSize() {
+ int[] idx = new int[1];
+ createFilter(getTestShape(), from1).forEachBitMap(i -> {
+ idx[0]++;
+ return true;
+ });
+ assertEquals(BitMap.numberOfBitMaps(getTestShape().getNumberOfBits()), idx[0]);
- assertEquals(27, bf.orCardinality(bf2));
- assertEquals(27, bf2.orCardinality(bf));
+ idx[0] = 0;
+ createEmptyFilter(getTestShape()).forEachBitMap(i -> {
+ idx[0]++;
+ return true;
+ });
+ assertEquals(BitMap.numberOfBitMaps(getTestShape().getNumberOfBits()), idx[0]);
}
/**
- * Tests that the xorCardinality calculations are correct.
+ * Testing class returns the value as the only value.
*/
- @Test
- public final void xorCardinalityTest() {
- xorCardinalityTest(this::createFilter);
- }
-
- /**
- * Tests that the xorCardinality calculations are correct with a generic BloomFilter.
- */
- @Test
- public final void xorCardinalityTest_GenericBloomFilter() {
- xorCardinalityTest(this::createGenericFilter);
- }
-
- /**
- * Tests that the andCardinality calculations are correct.
- *
- * @param filterFactory the factory function to create the filter
- */
- private void xorCardinalityTest(final BiFunction filterFactory) {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
+ class BadHasher implements Hasher {
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ IndexProducer producer;
- final BloomFilter bf2 = filterFactory.apply(hasher2, shape);
-
- assertEquals(20, bf.xorCardinality(bf2));
- }
-
- /**
- * Tests that the xorCardinality calculations are correct when there are more than Long.LENGTH bits.
- */
- @Test
- public final void xorCardinalityTest_ExtraLongs() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
+ BadHasher(int value) {
+ this.producer = IndexProducer.fromIndexArray(new int[] { value });
+ }
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- final BloomFilter bf2 = createFilter(hasher2, shape);
+ @Override
+ public IndexProducer indices(Shape shape) {
+ return producer;
+ }
- assertEquals(20, bf.xorCardinality(bf2));
- assertEquals(20, bf2.xorCardinality(bf));
+ @Override
+ public IndexProducer uniqueIndices(Shape shape) {
+ return producer;
+ }
}
-
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java
new file mode 100644
index 0000000000..a839e2d9c5
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java
@@ -0,0 +1,264 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests for the {@link ArrayCountingBloomFilter}.
+ */
+public abstract class AbstractCountingBloomFilterTest
+ extends AbstractBloomFilterTest {
+ protected int[] from1Counts = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 };
+ protected int[] from11Counts = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0 };
+ protected int[] bigHashCounts = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 0 };
+
+ protected final BitCountProducer maximumValueProducer = new BitCountProducer() {
+
+ @Override
+ public boolean forEachCount(BitCountProducer.BitCountConsumer consumer) {
+ for (int i = 1; i < 18; i++) {
+ if (!consumer.test(i, Integer.MAX_VALUE)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ };
+
+ /**
+ * Assert the counts match the expected values. Values are for indices starting
+ * at 0. Assert the cardinality equals the number of non-zero counts.
+ *
+ * @param bf the bloom filter
+ * @param expected the expected counts
+ */
+ private static void assertCounts(final CountingBloomFilter bf, final int[] expected) {
+ final Map m = new HashMap<>();
+ bf.forEachCount((i, c) -> {
+ m.put(i, c);
+ return true;
+ });
+ int zeros = 0;
+ for (int i = 0; i < expected.length; i++) {
+ if (m.get(i) == null) {
+ assertEquals(expected[i], 0, "Wrong value for " + i);
+ zeros++;
+ } else {
+ assertEquals(expected[i], m.get(i).intValue(), "Wrong value for " + i);
+ }
+ }
+ assertEquals(expected.length - zeros, bf.cardinality());
+ }
+
+ /**
+ * Tests that counts are correct when a hasher with duplicates is used in the
+ * constructor.
+ */
+ @Test
+ public final void testCountingSpecificConstructor() {
+ // verify hasher duplicates are counted.
+ // bit hasher has duplicates for 11, 12,13,14,15,16, and 17
+ final CountingBloomFilter bf = createFilter(getTestShape(), from1);
+ bf.add(BitCountProducer.from(from11.indices(getTestShape())));
+
+ final long[] lb = bf.asBitMapArray();
+ assertEquals(2, lb.length);
+ assertEquals(bigHashValue, lb[0]);
+
+ assertCounts(bf, bigHashCounts);
+ }
+
+ @Test
+ public final void testCountingBloomFilterSpecificContains() {
+ final BloomFilter bf = new SimpleBloomFilter(getTestShape(), from1);
+ final CountingBloomFilter bf2 = createFilter(getTestShape(), bigHasher);
+
+ assertTrue(bf.contains(bf), "BF Should contain itself");
+ assertTrue(bf2.contains(bf2), "BF2 Should contain itself");
+ assertFalse(bf.contains(bf2), "BF should not contain BF2");
+ assertTrue(bf2.contains(bf), "BF2 should contain BF");
+ BitMapProducer producer = bf2;
+ assertTrue(bf2.contains(producer), "BF2 should contain BF bitMapProducer");
+ }
+
+ /**
+ * Tests that merging bloom filters works as expected with a generic BloomFilter.
+ */
+ @Test
+ public final void testCountingSpecificMerge() {
+ final BloomFilter bf1 = createFilter(getTestShape(), from1);
+
+ final BloomFilter bf2 = new SimpleBloomFilter(getTestShape(), from11);
+
+ final BloomFilter bf3 = bf1.merge(bf2);
+ assertTrue(bf3.contains(bf1), "Should contain");
+ assertTrue(bf3.contains(bf2), "Should contain");
+
+ final BloomFilter bf4 = bf2.merge(bf1);
+ assertTrue(bf4.contains(bf1), "Should contain");
+ assertTrue(bf4.contains(bf2), "Should contain");
+ assertTrue(bf4.contains(bf3), "Should contain");
+ assertTrue(bf3.contains(bf4), "Should contain");
+
+ // test overflow
+
+ final CountingBloomFilter bf5 = createEmptyFilter(getTestShape());
+ assertTrue(bf5.add(maximumValueProducer), "Should add to empty");
+ assertTrue(bf5.isValid(), "Should be valid");
+
+ CountingBloomFilter bf6 = bf5.merge(new SimpleBloomFilter(getTestShape(), from1));
+ assertFalse(bf6.isValid(), "Should not be valid");
+ }
+
+ /**
+ * Tests that merge correctly updates the counts when a CountingBloomFilter is
+ * passed.
+ */
+ @Test
+ public void testAdd() {
+ final CountingBloomFilter bf1 = createFilter(getTestShape(), from1);
+ assertTrue(bf1.add(createFilter(getTestShape(), from11)), "Add should work");
+ assertTrue(bf1.contains(from1), "Should contain");
+ assertTrue(bf1.contains(from11), "Should contain");
+ assertCounts(bf1, bigHashCounts);
+
+ // test overflow
+
+ final CountingBloomFilter bf2 = createEmptyFilter(getTestShape());
+ assertTrue(bf2.add(maximumValueProducer), "Should add to empty");
+ assertTrue(bf2.isValid(), "Should be valid");
+
+ assertFalse(bf2.add(createFilter(getTestShape(), from1)), "Should not add");
+ assertFalse(bf2.isValid(), "Should not be valid");
+ }
+
+ /**
+ * Tests that merge correctly updates the counts when a CountingBloomFilter is
+ * passed.
+ */
+ @Test
+ public final void testSubtract() {
+ final CountingBloomFilter bf1 = createFilter(getTestShape(), from1);
+ bf1.add(BitCountProducer.from(from11.indices(getTestShape())));
+
+ final CountingBloomFilter bf2 = createFilter(getTestShape(), from11);
+
+ assertTrue(bf1.subtract(bf2), "Subtract should work");
+ assertFalse(bf1.contains(bigHasher), "Should not contain bitHasher");
+ assertTrue(bf1.contains(from1), "Should contain from1");
+
+ assertCounts(bf1, from1Counts);
+
+ // test underflow
+ final CountingBloomFilter bf3 = createFilter(getTestShape(), from1);
+
+ final CountingBloomFilter bf4 = createFilter(getTestShape(), from11);
+
+ assertFalse(bf3.subtract(bf4), "Subtract should not work");
+ assertFalse(bf3.isValid(), "isValid should return false");
+ assertFalse(bf3.contains(from1), "Should not contain");
+ assertFalse(bf3.contains(bf4), "Should not contain");
+
+ assertCounts(bf3, new int[] { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 });
+ }
+
+ /**
+ * Tests that merge correctly updates the counts when a CountingBloomFilter is
+ * passed.
+ */
+ @Test
+ public final void testRemove() {
+ final CountingBloomFilter bf1 = createFilter(getTestShape(), from1);
+ bf1.add(BitCountProducer.from(from11.indices(getTestShape())));
+
+ assertTrue(bf1.remove(new SimpleBloomFilter(getTestShape(), from11)), "Remove should work");
+ assertFalse(bf1.contains(from11), "Should not contain");
+ assertTrue(bf1.contains(from1), "Should contain");
+
+ assertCounts(bf1, from1Counts);
+
+ // with hasher
+ final CountingBloomFilter bf2 = createFilter(getTestShape(), from1);
+ bf2.add(BitCountProducer.from(from11.indices(getTestShape())));
+
+ assertTrue(bf2.remove(from11), "Remove should work");
+ assertFalse(bf2.contains(from11), "Should not contain");
+ assertTrue(bf2.contains(from1), "Should contain");
+
+ assertCounts(bf2, from1Counts);
+
+ // test underflow
+
+ final CountingBloomFilter bf3 = createFilter(getTestShape(), from1);
+
+ final BloomFilter bf4 = new SimpleBloomFilter(getTestShape(), from11);
+
+ assertFalse(bf3.remove(bf4), "Subtract should not work");
+ assertFalse(bf3.isValid(), "isValid should return false");
+ assertFalse(bf3.contains(from1), "Should not contain");
+ assertFalse(bf3.contains(bf4), "Should not contain");
+
+ assertCounts(bf3, new int[] { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
+ }
+
+ @Test
+ public void testExcludesDuplicates() {
+
+ // create a hasher that produces duplicates with the specified shape.
+ // this setup produces 5, 17, 29, 41, 53, 65 two times
+ Shape shape = Shape.fromKM(12, 72);
+ SimpleHasher hasher = new SimpleHasher(5, 12);
+
+ CountingBloomFilter bf1 = createFilter(shape, hasher);
+ assertEquals(6, bf1.cardinality());
+ bf1.forEachCount((x, y) -> {
+ assertEquals(1, y, "Hasher in constructor results in value not equal to 1");
+ return true;
+ });
+
+ bf1 = createEmptyFilter(shape);
+ bf1.mergeInPlace(hasher);
+ assertEquals(6, bf1.cardinality());
+ bf1.forEachCount((x, y) -> {
+ assertEquals(1, y, "Hasher in mergeInPlace results in value not equal to 1");
+ return true;
+ });
+
+ bf1 = createEmptyFilter(shape);
+ CountingBloomFilter bf2 = bf1.merge(hasher);
+ assertEquals(6, bf2.cardinality());
+ bf2.forEachCount((x, y) -> {
+ assertEquals(1, y, "Hasher in merge results in value not equal to 1");
+ return true;
+ });
+
+ bf1 = createFilter(shape, hasher);
+ bf1.remove(hasher);
+ assertEquals(0, bf1.cardinality());
+ assertTrue(bf1.forEachCount((x, y) -> (false)), "Hasher in removes results in value not equal to 0");
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java
new file mode 100644
index 0000000000..95b2e59fbf
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.junit.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+
+public abstract class AbstractHasherTest extends AbstractIndexProducerTest {
+
+ protected abstract Hasher createHasher();
+
+ protected abstract Hasher createEmptyHasher();
+
+ /**
+ * A method to get the number of items in a hasher. Mostly applies to
+ * Collections of hashers.
+ * @param hasher the hasher to check.
+ * @return the number of hashers in the hasher
+ */
+ protected abstract int getHasherSize(Hasher hasher);
+
+ /**
+ * The shape of the Hashers filters for testing.
+ *
+ * - Hash functions (k) = 17
+ *
- Number of bits (m) = 72
+ *
+ * @return the testing shape.
+ */
+ protected final Shape getTestShape() {
+ return Shape.fromKM(17, 72);
+ }
+
+ @Override
+ protected IndexProducer createProducer() {
+ return createHasher().indices(getTestShape());
+ }
+
+ @Override
+ protected IndexProducer createEmptyProducer() {
+ return createEmptyHasher().indices(getTestShape());
+ }
+
+ @ParameterizedTest
+ @CsvSource({ "17, 72", "3, 14", "5, 67868", })
+ public void testHashing(int k, int m) {
+ int[] count = { 0 };
+ Hasher hasher = createHasher();
+ hasher.indices(Shape.fromKM(k, m)).forEachIndex(i -> {
+ assertTrue(i >= 0 && i < m, () -> "Out of range: " + i + ", m=" + m);
+ count[0]++;
+ return true;
+ });
+ assertEquals(k * getHasherSize(hasher), count[0],
+ () -> String.format("Did not produce k=%d * m=%d indices", k, getHasherSize(hasher)));
+ }
+
+ @Test
+ public void testUniqueIndex() {
+ // create a hasher that produces duplicates with the specified shape.
+ // this setup produces 5, 17, 29, 41, 53, 65 two times
+ Shape shape = Shape.fromKM(12, 72);
+ Hasher hasher = new SimpleHasher(5, 12);
+ Set set = new HashSet<>();
+ assertTrue(hasher.uniqueIndices(shape).forEachIndex(set::add), "Duplicate detected");
+ assertEquals(6, set.size());
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java
new file mode 100644
index 0000000000..54dc01c7d4
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.IntPredicate;
+
+import org.junit.jupiter.api.Test;
+
+public abstract class AbstractIndexProducerTest {
+
+ public static final IntPredicate TRUE_PREDICATE = new IntPredicate() {
+
+ @Override
+ public boolean test(int arg0) {
+ return true;
+ }
+ };
+
+ public static final IntPredicate FALSE_PREDICATE = new IntPredicate() {
+
+ @Override
+ public boolean test(int arg0) {
+ return false;
+ }
+ };
+
+ /**
+ * Creates a producer with some data.
+ * @return a producer with some data
+ */
+ protected abstract IndexProducer createProducer();
+
+ /**
+ * Creates an producer without data.
+ * @return a producer that has no data.
+ */
+ protected abstract IndexProducer createEmptyProducer();
+
+ @Test
+ public final void testForEachIndex() {
+
+ IndexProducer populated = createProducer();
+ IndexProducer empty = createEmptyProducer();
+ assertFalse(populated.forEachIndex(FALSE_PREDICATE), "non-empty should be false");
+
+ assertTrue(empty.forEachIndex(FALSE_PREDICATE), "empty should be true");
+
+ assertTrue(populated.forEachIndex(TRUE_PREDICATE), "non-empty should be true");
+ assertTrue(empty.forEachIndex(TRUE_PREDICATE), "empty should be true");
+ }
+
+ @Test
+ public final void testAsIndexArray() {
+ int ary[] = createEmptyProducer().asIndexArray();
+ assertEquals(0, ary.length);
+
+ IndexProducer producer = createProducer();
+ List lst = new ArrayList();
+ for (int i : producer.asIndexArray()) {
+ lst.add(i);
+ }
+ assertTrue(producer.forEachIndex(new IntPredicate() {
+
+ @Override
+ public boolean test(int value) {
+ assertTrue(lst.remove(Integer.valueOf(value)),
+ String.format("Instance of %d was not found in lst", value));
+ return true;
+ }
+ }));
+ }
+
+ @Test
+ public void testForIndexEarlyExit() {
+ int[] passes = new int[1];
+ assertFalse(createProducer().forEachIndex(i -> {
+ passes[0]++;
+ return false;
+ }));
+ assertEquals(1, passes[0]);
+
+ passes[0] = 0;
+ assertTrue(createEmptyProducer().forEachIndex(i -> {
+ passes[0]++;
+ return false;
+ }));
+ assertEquals(0, passes[0]);
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java
index a661f93fde..86bd638b73 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java
@@ -16,520 +16,37 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.ThreadLocalRandom;
-import java.util.function.BiPredicate;
-import java.util.function.Function;
-import java.util.function.ToIntBiFunction;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.junit.jupiter.api.Test;
-
/**
* Tests for the {@link ArrayCountingBloomFilter}.
*/
-public class ArrayCountingBloomFilterTest extends AbstractBloomFilterTest {
-
- /**
- * Function to convert int arrays to BloomFilters for testing.
- */
- private final Function converter = counts -> {
- final BloomFilter testingFilter = new BitSetBloomFilter(shape);
- testingFilter.merge(new FixedIndexesTestHasher(shape, counts));
- return testingFilter;
- };
+public class ArrayCountingBloomFilterTest extends AbstractCountingBloomFilterTest {
@Override
- protected ArrayCountingBloomFilter createEmptyFilter(final Shape shape) {
+ protected ArrayCountingBloomFilter createEmptyFilter(Shape shape) {
return new ArrayCountingBloomFilter(shape);
}
@Override
- protected ArrayCountingBloomFilter createFilter(final Hasher hasher, final Shape shape) {
- final ArrayCountingBloomFilter result = new ArrayCountingBloomFilter(shape);
- result.merge( hasher );
- return result;
- }
-
- private ArrayCountingBloomFilter createFromCounts(final int[] counts) {
- // Use a dummy filter to add the counts to an empty filter
- final CountingBloomFilter dummy = new ArrayCountingBloomFilter(shape) {
- @Override
- public void forEachCount(final BitCountConsumer action) {
- for (int i = 0; i < counts.length; i++) {
- action.accept(i, counts[i]);
- }
- }
- };
- final ArrayCountingBloomFilter bf = new ArrayCountingBloomFilter(shape);
- bf.add(dummy);
- return bf;
- }
-
- /**
- * Assert the counts match the expected values. Values are for indices starting
- * at 0. Assert the cardinality equals the number of non-zero counts.
- *
- * @param bf the bloom filter
- * @param expected the expected counts
- */
- private static void assertCounts(final CountingBloomFilter bf, final int[] expected) {
- final Map m = new HashMap<>();
- bf.forEachCount(m::put);
- int zeros = 0;
- for (int i = 0; i < expected.length; i++) {
- if (m.get(i) == null) {
- assertEquals(expected[i], 0, "Wrong value for " + i);
- zeros++;
- } else {
- assertEquals(expected[i], m.get(i).intValue(), "Wrong value for " + i);
- }
- }
- assertEquals(expected.length - zeros, bf.cardinality());
- }
-
- /**
- * Tests that counts are correct when a hasher with duplicates is used in the
- * constructor.
- */
- @Test
- public void constructorTest_Hasher_Duplicates() {
- final int[] expected = {0, 1, 1, 0, 0, 1};
- // Some indexes with duplicates
- final Hasher hasher = new FixedIndexesTestHasher(shape, 1, 2, 2, 5);
-
- final ArrayCountingBloomFilter bf = createFilter(hasher, shape);
- final long[] lb = bf.getBits();
- assertEquals(1, lb.length);
- assertEquals(0b100110L, lb[0]);
-
- assertCounts(bf, expected);
- }
-
- /**
- * Test the contains function with a standard Bloom filter.
- * The contains function is tested using a counting Bloom filter in the parent test class.
- */
- @Test
- public void contains_BloomFilter() {
- // Some indexes with duplicates
- final Hasher hasher = new FixedIndexesTestHasher(shape, 1, 2, 5);
- final ArrayCountingBloomFilter bf = createFilter(hasher, shape);
- BitSetBloomFilter testingFilter = new BitSetBloomFilter(shape);
- testingFilter.merge( new FixedIndexesTestHasher(shape, 3, 4));
- assertFalse(bf.contains(testingFilter));
- testingFilter = new BitSetBloomFilter(shape);
- testingFilter.merge( new FixedIndexesTestHasher(shape, 2, 5));
- assertTrue(bf.contains(testingFilter));
- }
-
- /**
- * Tests that merge correctly updates the counts when a CountingBloomFilter is
- * passed.
- */
- @Test
- public void mergeTest_Counts_CountingBloomFilter() {
- assertMerge(counts -> createFilter(new FixedIndexesTestHasher(shape, counts), shape),
- BloomFilter::merge);
- }
-
- /**
- * Tests that merge correctly updates the counts when a BloomFilter is passed.
- */
- @Test
- public void mergeTest_Counts_BloomFilter() {
- assertMerge(converter, BloomFilter::merge);
- }
-
- /**
- * Test that merge correctly updates the counts when a Hasher is passed.
- */
- @Test
- public void mergeTest_Counts_Hasher() {
- assertMerge(counts -> new FixedIndexesTestHasher(shape, counts),
- BloomFilter::merge);
- }
-
- /**
- * Test that merge correctly updates the counts when a Hasher is passed with duplicates.
- */
- @Test
- public void mergeTest_Counts_Hasher_Duplicates() {
- assertMerge(counts -> new FixedIndexesTestHasher(shape, createDuplicates(counts)),
- BloomFilter::merge);
- }
-
- /**
- * Tests that remove correctly updates the counts when a CountingBloomFilter is
- * passed.
- */
- @Test
- public void removeTest_Counts_CountingBloomFilter() {
- assertRemove(counts -> createFilter(new FixedIndexesTestHasher(shape, counts), shape),
- CountingBloomFilter::remove);
- }
-
- /**
- * Tests that remove correctly updates the counts when a BloomFilter is passed.
- */
- @Test
- public void removeTest_Counts_BloomFilter() {
- assertRemove(converter, CountingBloomFilter::remove);
- }
-
- /**
- * Test that remove correctly updates the counts when a Hasher is passed.
- */
- @Test
- public void removeTest_Counts_Hasher() {
- assertRemove(counts -> new FixedIndexesTestHasher(shape, counts),
- CountingBloomFilter::remove);
- }
-
- /**
- * Test that remove correctly updates the counts when a Hasher is passed with duplicates.
- */
- @Test
- public void removeTest_Counts_Hasher_Duplicates() {
- assertRemove(counts -> new FixedIndexesTestHasher(shape, createDuplicates(counts)),
- CountingBloomFilter::remove);
- }
-
- /**
- * Creates duplicates in the counts.
- *
- * @param counts the counts
- * @return the new counts
- */
- private static int[] createDuplicates(final int[] counts) {
- // Duplicate some values randomly
- final int length = counts.length;
- final int[] countsWithDuplicates = Arrays.copyOf(counts, 2 * length);
- for (int i = length; i < countsWithDuplicates.length; i++) {
- // Copy a random value from the counts into the end position
- countsWithDuplicates[i] = countsWithDuplicates[ThreadLocalRandom.current().nextInt(i)];
- }
- return countsWithDuplicates;
- }
-
- /**
- * Assert a merge operation. The converter should construct a suitable object
- * to remove the indices from the provided Bloom filter with the remove operation.
- *
- * @param the type of the filter
- * @param converter the converter
- * @param merge the merge operation
- */
- private void assertMerge(final Function converter,
- final BiPredicate merge) {
- final int[] indexes1 = { 1, 2, 4, 5, 6};
- final int[] indexes2 = { 3, 4, 6};
- final int[] expected = {0, 1, 1, 1, 2, 1, 2};
- assertOperation(indexes1, indexes2, converter, merge, true, expected);
- }
-
- /**
- * Assert a remove operation. The converter should construct a suitable object
- * to remove the indices from the provided Bloom filter with the remove operation.
- *
- * @param the type of the filter
- * @param converter the converter
- * @param remove the remove operation
- */
- private void assertRemove(final Function converter,
- final BiPredicate remove) {
- final int[] indexes1 = { 1, 2, 4, 5, 6};
- final int[] indexes2 = { 2, 5, 6};
- final int[] expected = {0, 1, 0, 0, 1, 0, 0};
- assertOperation(indexes1, indexes2, converter, remove, true, expected);
- }
-
- /**
- * Assert a counting operation. The first set of indexes is used to create the
- * CountingBloomFilter. The second set of indices is passed to the converter to
- * construct a suitable object to combine with the counting Bloom filter. The counts
- * of the first Bloom filter are checked using the expected counts.
- *
- * Counts are assumed to map to indexes starting from 0.
- *
- * @param the type of the filter
- * @param indexes1 the first set of indexes
- * @param indexes2 the second set of indexes
- * @param converter the converter
- * @param operation the operation
- * @param isValid the expected value for the operation result
- * @param expected the expected counts after the operation
- */
- private void assertOperation(final int[] indexes1, final int[] indexes2,
- final Function converter,
- final BiPredicate operation,
- final boolean isValid, final int[] expected) {
- final Hasher hasher = new FixedIndexesTestHasher(shape, indexes1);
- final ArrayCountingBloomFilter bf = createFilter(hasher, shape);
- final F filter = converter.apply(indexes2);
- final boolean result = operation.test(bf, filter);
- assertEquals(isValid, result);
- assertEquals(isValid, bf.isValid());
- assertCounts(bf, expected);
- }
-
- /**
- * Tests that merge errors when the counts overflow the maximum integer value.
- */
- @Test
- public void mergeTest_Overflow() {
- final Hasher hasher = new FixedIndexesTestHasher(shape, 1, 2, 3);
- final ArrayCountingBloomFilter bf = createFilter(hasher, shape);
-
- final ArrayCountingBloomFilter bf2 = createFromCounts(new int[] {0, 0, Integer.MAX_VALUE});
-
- // Small + 1 = OK
- // should not fail as the counts are ignored
- assertTrue(bf.merge(bf2));
- assertTrue(bf.isValid());
- assertCounts(bf, new int[] {0, 1, 2, 1});
-
- // Big + 1 = Overflow
- assertTrue(bf2.isValid());
- assertFalse(bf2.merge(bf));
- assertFalse(bf2.isValid(), "Merge should overflow and the filter is invalid");
-
- // The counts are not clipped to max. They have simply overflowed.
- // Note that this is a merge and the count is only incremented by 1
- // and not the actual count at each index. So it is not 2 + Integer.MAX_VALUE.
- assertCounts(bf2, new int[] {0, 1, 1 + Integer.MAX_VALUE, 1});
+ protected ArrayCountingBloomFilter createFilter(Shape shape, Hasher hasher) {
+ return createFilter(shape, hasher.uniqueIndices(shape));
}
- /**
- * Tests that removal errors when the counts become negative.
- */
- @Test
- public void removeTest_Negative() {
- final Hasher hasher = new FixedIndexesTestHasher(shape, 1, 2, 3);
- final ArrayCountingBloomFilter bf = createFilter(hasher, shape);
-
- final Hasher hasher2 = new FixedIndexesTestHasher(shape, 2);
- final ArrayCountingBloomFilter bf2 = createFilter(hasher2, shape);
-
- // More - Less = OK
- bf.remove(bf2);
- assertTrue(bf.isValid());
- assertCounts(bf, new int[] {0, 1, 0, 1});
-
- // Less - More = Negative
- assertTrue(bf2.isValid());
- bf2.remove(bf);
- assertFalse(bf2.isValid(), "Remove should create negative counts and the filter is invalid");
-
- // The counts are not clipped to zero. They have been left as negative.
- assertCounts(bf2, new int[] {0, -1, 1, -1});
+ @Override
+ protected ArrayCountingBloomFilter createFilter(Shape shape, BitMapProducer producer) {
+ return createFilter(shape, IndexProducer.fromBitMapProducer(producer));
}
- /**
- * Tests that counts can be added to a new instance.
- *
- * Note: This test ensures the CountingBloomFilter
- * can be created with whatever counts are required for other tests.
- */
- @Test
- public void addTest_NewInstance() {
- for (final int[] counts : new int[][] {
- { /* empty */},
- {0, 0, 1},
- {0, 1, 2},
- {2, 3, 4},
- {66, 77, 0, 99},
- {Integer.MAX_VALUE, 42},
- }) {
- assertCounts(createFromCounts(counts), counts);
+ @Override
+ protected ArrayCountingBloomFilter createFilter(Shape shape, IndexProducer producer) {
+ ArrayCountingBloomFilter filter = createEmptyFilter(shape);
+ try {
+ filter.add(BitCountProducer.from(producer));
+ return filter;
+ } catch (ArrayIndexOutOfBoundsException e) {
+ // since ArrayCountingBloomFilter does not ahave a constructor that takes a
+ // hasher
+ // we have to duplicate the expected results here.
+ throw new IllegalArgumentException(e);
}
}
-
- /**
- * Test that add correctly ignores an empty CountingBloomFilter.
- */
- @Test
- public void addTest_Empty() {
- assertCountingOperation(new int[] {5, 2, 1},
- new int[0],
- CountingBloomFilter::add,
- true,
- new int[] {5, 2, 1});
- }
-
- /**
- * Test that add correctly updates the counts when a CountingBloomFilter is
- * passed.
- */
- @Test
- public void addTest_Counts() {
- assertCountingOperation(new int[] {5, 2, 1},
- new int[] {0, 6, 4, 1},
- CountingBloomFilter::add,
- true,
- new int[] {5, 8, 5, 1});
- }
-
- /**
- * Test that add correctly updates the isValid state when a CountingBloomFilter is
- * passed and an integer overflow occurs.
- */
- @Test
- public void addTest_Overflow() {
- assertCountingOperation(new int[] {5, 2, 1},
- new int[] {0, 6, Integer.MAX_VALUE},
- CountingBloomFilter::add,
- false,
- new int[] {5, 8, 1 + Integer.MAX_VALUE});
- }
-
- /**
- * Test that subtract correctly ignores an empty CountingBloomFilter.
- */
- @Test
- public void subtractTest_Empty() {
- assertCountingOperation(new int[] {5, 2, 1},
- new int[0],
- CountingBloomFilter::subtract,
- true,
- new int[] {5, 2, 1});
- }
-
- /**
- * Test that subtract correctly updates the counts when a CountingBloomFilter is
- * passed.
- */
- @Test
- public void subtractTest_Counts() {
- assertCountingOperation(new int[] {5, 9, 1, 1},
- new int[] {0, 2, 1},
- CountingBloomFilter::subtract,
- true,
- new int[] {5, 7, 0, 1});
- }
-
- /**
- * Test that subtract correctly updates the isValid state when a CountingBloomFilter is
- * passed and the counts become negative.
- */
- @Test
- public void subtractTest_Negative() {
- assertCountingOperation(new int[] {5, 2, 1},
- new int[] {0, 6, 1},
- CountingBloomFilter::subtract,
- false,
- new int[] {5, -4, 0});
- }
-
- /**
- * Assert a counting operation. Two CountingBloomFilters are created from the
- * two sets of counts. The operation is applied and the counts of the first
- * Bloom filter is checked using the expected counts.
- *
- *
Counts are assumed to map to indexes starting from 0.
- *
- * @param counts1 the first set counts
- * @param counts2 the first set counts
- * @param operation the operation
- * @param isValid the expected value for the operation result
- * @param expected the expected counts after the operation
- */
- private void assertCountingOperation(final int[] counts1, final int[] counts2,
- final BiPredicate operation,
- final boolean isValid, final int[] expected) {
- final ArrayCountingBloomFilter bf1 = createFromCounts(counts1);
- final ArrayCountingBloomFilter bf2 = createFromCounts(counts2);
- final boolean result = operation.test(bf1, bf2);
- assertEquals(isValid, result);
- assertEquals(isValid, bf1.isValid());
- assertCounts(bf1, expected);
- }
-
- /**
- * Tests that the andCardinality calculation executes correctly when using a
- * CountingBloomFilter argument.
- */
- @Test
- public void andCardinalityTest_CountingBloomFilter() {
- assertCardinalityOperation(new int[] {1, 1},
- new int[] {1, 1},
- BloomFilter::andCardinality,
- 2);
- assertCardinalityOperation(new int[] {0, 1, 0, 1, 1, 1, 0, 1, 0},
- new int[] {1, 1, 0, 0, 0, 1},
- BloomFilter::andCardinality,
- 2);
- assertCardinalityOperation(new int[] {1, 1},
- new int[] {0, 0, 1, 1, 1},
- BloomFilter::andCardinality,
- 0);
- }
-
- /**
- * Tests that the orCardinality calculation executes correctly when using a
- * CountingBloomFilter argument.
- */
- @Test
- public void orCardinalityTest_CountingBloomFilter() {
- assertCardinalityOperation(new int[] {1, 1},
- new int[] {1, 1},
- BloomFilter::orCardinality,
- 2);
- assertCardinalityOperation(new int[] {0, 1, 0, 1, 1, 1, 0, 1, 0},
- new int[] {1, 1, 0, 0, 0, 1},
- BloomFilter::orCardinality,
- 6);
- assertCardinalityOperation(new int[] {1, 1},
- new int[] {0, 0, 1, 1, 1},
- BloomFilter::orCardinality,
- 5);
- }
-
- /**
- * Tests that the xorCardinality calculation executes correctly when using a
- * CountingBloomFilter argument.
- */
- @Test
- public void xorCardinalityTest_CountingBloomFilter() {
- assertCardinalityOperation(new int[] {1, 1},
- new int[] {1, 1},
- BloomFilter::xorCardinality,
- 0);
- assertCardinalityOperation(new int[] {0, 1, 0, 1, 1, 1, 0, 1, 0},
- new int[] {1, 1, 0, 0, 0, 1},
- BloomFilter::xorCardinality,
- 4);
- assertCardinalityOperation(new int[] {1, 1},
- new int[] {0, 0, 1, 1, 1},
- BloomFilter::xorCardinality,
- 5);
- }
-
- /**
- * Assert a cardinality operation. Two CountingBloomFilters are created from the
- * two sets of counts. The operation is applied and the counts of the first
- * Bloom filter is checked using the expected counts.
- *
- * Counts are assumed to map to indexes starting from 0.
- *
- * @param counts1 the first set counts
- * @param counts2 the first set counts
- * @param operation the operation
- * @param expected the expected cardinality
- */
- private void assertCardinalityOperation(final int[] counts1, final int[] counts2,
- final ToIntBiFunction operation,
- final int expected) {
- final ArrayCountingBloomFilter bf1 = createFromCounts(counts1);
- final ArrayCountingBloomFilter bf2 = createFromCounts(counts2);
- assertEquals(expected, operation.applyAsInt(bf1, bf2));
- assertEquals(expected, operation.applyAsInt(bf2, bf1));
- }
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayTrackerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayTrackerTest.java
new file mode 100644
index 0000000000..d340505406
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayTrackerTest.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.function.IntPredicate;
+
+import org.apache.commons.collections4.bloomfilter.Hasher.IndexFilter.ArrayTracker;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests the Filter class.
+ */
+public class ArrayTrackerTest {
+
+ @Test
+ public void testSeen() {
+ Shape shape = Shape.fromKM(3, 12);
+ IntPredicate tracker = new ArrayTracker(shape);
+
+ assertTrue(tracker.test(0));
+ assertFalse(tracker.test(0));
+ assertTrue(tracker.test(1));
+ assertFalse(tracker.test(1));
+ assertTrue(tracker.test(2));
+ assertFalse(tracker.test(2));
+
+ assertThrows(IndexOutOfBoundsException.class, () -> tracker.test(3));
+ assertThrows(IndexOutOfBoundsException.class, () -> tracker.test(-1));
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromArrayCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromArrayCountingBloomFilterTest.java
new file mode 100644
index 0000000000..59fc4fc4e5
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromArrayCountingBloomFilterTest.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+public class BitCountProducerFromArrayCountingBloomFilterTest extends AbstractBitCountProducerTest {
+
+ protected Shape shape = Shape.fromKM(17, 72);
+
+ @Override
+ protected BitCountProducer createProducer() {
+ ArrayCountingBloomFilter filter = new ArrayCountingBloomFilter(shape);
+ Hasher hasher = new SimpleHasher(0, 1);
+ return filter.merge(hasher);
+ }
+
+ @Override
+ protected BitCountProducer createEmptyProducer() {
+ return new ArrayCountingBloomFilter(shape);
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIndexProducerTest.java
new file mode 100644
index 0000000000..d4c2be603e
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIndexProducerTest.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.HashMap;
+import java.util.Map;
+import org.junit.jupiter.api.Test;
+
+public class BitCountProducerFromIndexProducerTest extends AbstractBitCountProducerTest {
+
+ @Override
+ protected BitCountProducer createProducer() {
+ return BitCountProducer.from(IndexProducer.fromIndexArray(new int[] { 0, 1, 63, 64, 127, 128 }));
+ }
+
+ @Override
+ protected BitCountProducer createEmptyProducer() {
+ return BitCountProducer.from(IndexProducer.fromIndexArray(new int[0]));
+ }
+
+ @Test
+ public final void testFromIndexProducer() {
+
+ BitCountProducer producer = createProducer();
+ Map m = new HashMap<>();
+
+ producer.forEachCount((i, v) -> {
+ m.put(i, v);
+ return true;
+ });
+
+ assertEquals(6, m.size());
+ assertEquals(Integer.valueOf(1), m.get(0));
+ assertEquals(Integer.valueOf(1), m.get(1));
+ assertEquals(Integer.valueOf(1), m.get(63));
+ assertEquals(Integer.valueOf(1), m.get(64));
+ assertEquals(Integer.valueOf(1), m.get(127));
+ assertEquals(Integer.valueOf(1), m.get(128));
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromArrayCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromArrayCountingBloomFilterTest.java
new file mode 100644
index 0000000000..25cd04ed34
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromArrayCountingBloomFilterTest.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+public class BitMapProducerFromArrayCountingBloomFilterTest extends AbstractBitMapProducerTest {
+
+ protected Shape shape = Shape.fromKM(17, 72);
+
+ @Override
+ protected BitMapProducer createProducer() {
+ ArrayCountingBloomFilter filter = new ArrayCountingBloomFilter(shape);
+ Hasher hasher = new SimpleHasher(0, 1);
+ return filter.merge(hasher);
+ }
+
+ @Override
+ protected BitMapProducer createEmptyProducer() {
+ return new ArrayCountingBloomFilter(shape);
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromIndexProducerTest.java
new file mode 100644
index 0000000000..4a9500d435
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromIndexProducerTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.IntPredicate;
+
+import org.junit.jupiter.api.Test;
+
+public class BitMapProducerFromIndexProducerTest extends AbstractBitMapProducerTest {
+
+ @Override
+ protected BitMapProducer createProducer() {
+ IndexProducer iProducer = new IndexProducer() {
+
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ return consumer.test(0) && consumer.test(1) && consumer.test(63) && consumer.test(64)
+ && consumer.test(127) && consumer.test(128);
+ }
+ };
+ return BitMapProducer.fromIndexProducer(iProducer, 200);
+ }
+
+ @Override
+ protected BitMapProducer createEmptyProducer() {
+ IndexProducer iProducer = new IndexProducer() {
+
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ return true;
+ }
+ };
+ return BitMapProducer.fromIndexProducer(iProducer, 200);
+ }
+
+ @Test
+ public final void testFromIndexProducer() {
+ List lst = new ArrayList<>();
+ createProducer().forEachBitMap(lst::add);
+ long[] buckets = lst.stream().mapToLong(l -> l.longValue()).toArray();
+ assertTrue(BitMap.contains(buckets, 0));
+ assertTrue(BitMap.contains(buckets, 1));
+ assertTrue(BitMap.contains(buckets, 63));
+ assertTrue(BitMap.contains(buckets, 64));
+ assertTrue(BitMap.contains(buckets, 127));
+ assertTrue(BitMap.contains(buckets, 128));
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromLongArrayTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromLongArrayTest.java
new file mode 100644
index 0000000000..77bc6ed455
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromLongArrayTest.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.IntPredicate;
+
+import org.junit.jupiter.api.Test;
+
+public class BitMapProducerFromLongArrayTest extends AbstractBitMapProducerTest {
+
+ @Override
+ protected BitMapProducer createProducer() {
+ long[] ary = new long[] { 1L, 2L, 3L, 4L, 5L };
+ return BitMapProducer.fromBitMapArray(ary);
+ }
+
+ @Override
+ protected BitMapProducer createEmptyProducer() {
+ return BitMapProducer.fromBitMapArray(new long[0]);
+ }
+
+ @Override
+ protected boolean emptyIsZeroLength() {
+ return true;
+ }
+
+ @Test
+ public void constructorTest() {
+ List lst = new ArrayList<>();
+ createProducer().forEachBitMap(lst::add);
+ assertEquals(Long.valueOf(1), lst.get(0));
+ assertEquals(Long.valueOf(2), lst.get(1));
+ assertEquals(Long.valueOf(3), lst.get(2));
+ assertEquals(Long.valueOf(4), lst.get(3));
+ assertEquals(Long.valueOf(5), lst.get(4));
+ }
+
+ @Test
+ public void testFromIndexProducer() {
+ int limit = Integer.SIZE + Long.SIZE;
+ IndexProducer iProducer = new IndexProducer() {
+
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ for (int i = 0; i < limit; i++) {
+ if (!consumer.test(i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ };
+ BitMapProducer producer = BitMapProducer.fromIndexProducer(iProducer, limit);
+ List lst = new ArrayList<>();
+ producer.forEachBitMap(lst::add);
+ long expected = ~0L;
+ assertEquals(expected, lst.get(0).longValue());
+ expected &= 0XFFFFFFFFL;
+ assertEquals(expected, lst.get(1).longValue());
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSimpleBloomFilterTest.java
new file mode 100644
index 0000000000..f73b4807bc
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSimpleBloomFilterTest.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+public class BitMapProducerFromSimpleBloomFilterTest extends AbstractBitMapProducerTest {
+
+ protected Shape shape = Shape.fromKM(17, 72);
+
+ @Override
+ protected BitMapProducer createProducer() {
+ Hasher hasher = new SimpleHasher(0, 1);
+ return new SimpleBloomFilter(shape, hasher);
+ }
+
+ @Override
+ protected BitMapProducer createEmptyProducer() {
+ return new SimpleBloomFilter(shape);
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSparseBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSparseBloomFilterTest.java
new file mode 100644
index 0000000000..0a6331ce78
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSparseBloomFilterTest.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+public class BitMapProducerFromSparseBloomFilterTest extends AbstractBitMapProducerTest {
+
+ protected Shape shape = Shape.fromKM(17, 72);
+
+ @Override
+ protected BitMapProducer createProducer() {
+ Hasher hasher = new SimpleHasher(0, 1);
+ return new SparseBloomFilter(shape, hasher);
+ }
+
+ @Override
+ protected BitMapProducer createEmptyProducer() {
+ return new SparseBloomFilter(shape);
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapTest.java
new file mode 100644
index 0000000000..f68ca7df35
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapTest.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.junit.jupiter.api.Test;
+
+public class BitMapTest {
+
+ @Test
+ public final void testGetLongBit() {
+ assertEquals(1, BitMap.getLongBit(0));
+ assertEquals(0x8000000000000000L, BitMap.getLongBit(63));
+ assertEquals(1, BitMap.getLongBit(64));
+ assertEquals(0x8000000000000000L, BitMap.getLongBit(127));
+ assertEquals(1, BitMap.getLongBit(128));
+ }
+
+ @Test
+ public final void testGetLongIndex() {
+ assertEquals(0, BitMap.getLongIndex(0));
+ assertEquals(0, BitMap.getLongIndex(63));
+ assertEquals(1, BitMap.getLongIndex(64));
+ assertEquals(1, BitMap.getLongIndex(127));
+ assertEquals(2, BitMap.getLongIndex(128));
+ }
+
+ @Test
+ public final void testNumberOfBitMaps() {
+ assertEquals(0, BitMap.numberOfBitMaps(0), "Number of bits 0");
+ for (int i = 1; i < 65; i++) {
+ assertEquals(1, BitMap.numberOfBitMaps(i), String.format("Number of bits %d", i));
+ }
+ for (int i = 65; i < 129; i++) {
+ assertEquals(2, BitMap.numberOfBitMaps(i), String.format("Number of bits %d", i));
+ }
+ assertEquals(3, BitMap.numberOfBitMaps(129), "Number of bits 129");
+ }
+
+ @Test
+ public final void testSet() {
+ long[] bitMaps = new long[BitMap.numberOfBitMaps(129)];
+ for (int i = 0; i < 129; i++) {
+ BitMap.set(bitMaps, i);
+ assertTrue(BitMap.contains(bitMaps, i), String.format("Failed at index: %d", i));
+ }
+ assertEquals(0xFFFFFFFFFFFFFFFFL, bitMaps[0]);
+ assertEquals(0xFFFFFFFFFFFFFFFFL, bitMaps[1]);
+ assertEquals(1L, bitMaps[2]);
+ }
+
+ @Test
+ public final void testContains() {
+ long[] bitMaps = new long[1];
+
+ for (int i = 0; i < 64; i++) {
+ bitMaps[0] = 0L;
+ BitMap.set(bitMaps, i);
+ for (int j = 0; j < 64; j++) {
+ if (j == i) {
+ assertTrue(BitMap.contains(bitMaps, j), String.format("Failed at index: %d for %d", i, j));
+ } else {
+ assertFalse(BitMap.contains(bitMaps, j), String.format("Failed at index %d for %d", i, j));
+ }
+ }
+ }
+
+ // test boundary conditions
+ long[] ary = new long[1];
+
+ final long[] aryT = ary;
+ assertThrows(ArrayIndexOutOfBoundsException.class, () -> BitMap.contains(aryT, -1));
+ assertFalse(BitMap.contains(ary, 0));
+ ary[0] = 0x01;
+ assertTrue(BitMap.contains(ary, 0));
+
+ assertFalse(BitMap.contains(ary, 63));
+ ary[0] = (1L << 63);
+ assertTrue(BitMap.contains(ary, 63));
+ assertThrows(ArrayIndexOutOfBoundsException.class, () -> BitMap.contains(aryT, 64));
+
+ ary = new long[2];
+ assertFalse(BitMap.contains(ary, 64));
+ ary[1] = 1;
+ assertTrue(BitMap.contains(ary, 64));
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapTrackerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapTrackerTest.java
new file mode 100644
index 0000000000..8d6df5c3de
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapTrackerTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.function.IntPredicate;
+
+import org.apache.commons.collections4.bloomfilter.Hasher.IndexFilter.BitMapTracker;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests the Filter class.
+ */
+public class BitMapTrackerTest {
+
+ @Test
+ public void testSeen() {
+ Shape shape = Shape.fromKM(3, 12);
+ IntPredicate tracker = new BitMapTracker(shape);
+
+ assertTrue(tracker.test(0));
+ assertFalse(tracker.test(0));
+ assertTrue(tracker.test(1));
+ assertFalse(tracker.test(1));
+ assertTrue(tracker.test(2));
+ assertFalse(tracker.test(2));
+
+ assertTrue(tracker.test(4));
+ assertFalse(tracker.test(4));
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java
deleted file mode 100644
index ffd2d0d8c5..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import org.junit.jupiter.api.Test;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.util.ArrayList;
-import java.util.Random;
-import java.util.concurrent.ThreadLocalRandom;
-
-import static org.junit.jupiter.api.Assertions.assertThrows;
-
-/**
- * Tests for the {@link BloomFilterIndexer}.
- */
-public class BloomFilterIndexerTest {
-
- @Test
- public void testCheckPositiveThrows() {
- assertThrows(IndexOutOfBoundsException.class, () -> BloomFilterIndexer.checkPositive(-1));
- }
-
- @Test
- public void testGetLongIndex() {
- assertEquals(0, BloomFilterIndexer.getLongIndex(0));
-
- for (final int index : getIndexes()) {
- // getLongIndex is expected to identify a block of 64-bits (starting from zero)
- assertEquals(index / Long.SIZE, BloomFilterIndexer.getLongIndex(index));
-
- // Verify the behavior for negatives. It should produce a negative (invalid)
- // as a simple trip for incorrect usage.
- assertTrue(BloomFilterIndexer.getLongIndex(-index) < 0);
-
- // If index is not zero then when negated this is what a signed shift
- // of 6-bits actually does
- assertEquals(((1 - index) / Long.SIZE) - 1,
- BloomFilterIndexer.getLongIndex(-index));
- }
- }
-
- @Test
- public void testGetLongBit() {
- assertEquals(1L, BloomFilterIndexer.getLongBit(0));
-
- for (final int index : getIndexes()) {
- // getLongBit is expected to identify a single bit in a 64-bit block
- assertEquals(1L << (index % Long.SIZE), BloomFilterIndexer.getLongBit(index));
-
- // Verify the behavior for negatives
- assertEquals(1L << (64 - (index & 0x3f)), BloomFilterIndexer.getLongBit(-index));
- }
- }
-
- /**
- * Gets non-zero positive indexes for testing.
- *
- * @return the indices
- */
- private static int[] getIndexes() {
- final Random rng = ThreadLocalRandom.current();
- final ArrayList indexes = new ArrayList<>(40);
- for (int i = 0; i < 10; i++) {
- // random positive numbers
- indexes.add(rng.nextInt() >>> 1);
- indexes.add(rng.nextInt(23647826));
- indexes.add(rng.nextInt(245));
- }
- // Quickly remove zeros (as these cannot be negated)
- indexes.removeIf(i -> i == 0);
- // Add edge cases here
- indexes.add(1);
- indexes.add(2);
- indexes.add(63);
- indexes.add(64);
- indexes.add(Integer.MAX_VALUE);
- return indexes.stream().mapToInt(Integer::intValue).toArray();
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java
new file mode 100644
index 0000000000..e7af149b1b
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.function.LongPredicate;
+
+public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest {
+
+ @Override
+ protected BitMapProducer createProducer() {
+ return new DefaultBitMapProducer(new long[] { 1L, 2L });
+ }
+
+ @Override
+ protected BitMapProducer createEmptyProducer() {
+ return new DefaultBitMapProducer(new long[0]);
+ }
+
+ @Override
+ protected boolean emptyIsZeroLength() {
+ return true;
+ }
+
+ class DefaultBitMapProducer implements BitMapProducer {
+ long[] bitMaps;
+
+ DefaultBitMapProducer(long[] bitMaps) {
+ this.bitMaps = bitMaps;
+ }
+
+ @Override
+ public boolean forEachBitMap(LongPredicate predicate) {
+ for (long bitmap : bitMaps) {
+ if (!predicate.test(bitmap)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterMethodsTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterMethodsTest.java
deleted file mode 100644
index 0d6443355c..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterMethodsTest.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import java.util.BitSet;
-import java.util.function.IntConsumer;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
-
-/**
- * Test all the default implementations of the BloomFilter in {@link AbstractBloomFilter}.
- */
-public class DefaultBloomFilterMethodsTest extends AbstractBloomFilterTest {
-
- /**
- * A testing class that implements only the abstract methods from BloomFilter.
- *
- */
- private static class BF extends AbstractBloomFilter {
-
- /**
- * The bits for this BloomFilter.
- */
- private final BitSet bitSet;
-
- /**
- * Constructs a BitSetBloomFilter from a hasher and a shape.
- *
- * @param hasher the Hasher to use.
- * @param shape the desired shape of the filter.
- */
- BF(final Hasher hasher, final Shape shape) {
- this(shape);
- verifyHasher(hasher);
- hasher.iterator(shape).forEachRemaining((IntConsumer) bitSet::set);
- }
-
- /**
- * Constructs an empty BitSetBloomFilter.
- *
- * @param shape the desired shape of the filter.
- */
- BF(final Shape shape) {
- super(shape);
- this.bitSet = new BitSet();
- }
-
- @Override
- public long[] getBits() {
- return bitSet.toLongArray();
- }
-
- @Override
- public StaticHasher getHasher() {
- return new StaticHasher(bitSet.stream().iterator(), getShape());
- }
-
- @Override
- public boolean merge(final BloomFilter other) {
- verifyShape(other);
- bitSet.or(BitSet.valueOf(other.getBits()));
- return true;
- }
-
- @Override
- public boolean merge(final Hasher hasher) {
- verifyHasher(hasher);
- hasher.iterator(getShape()).forEachRemaining((IntConsumer) bitSet::set);
- return true;
- }
- }
-
- @Override
- protected AbstractBloomFilter createEmptyFilter(final Shape shape) {
- return new BF(shape);
- }
-
- @Override
- protected AbstractBloomFilter createFilter(final Hasher hasher, final Shape shape) {
- return new BF(hasher, shape);
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java
new file mode 100644
index 0000000000..aab0f43b2f
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.TreeSet;
+import java.util.function.IntPredicate;
+import java.util.function.LongPredicate;
+
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests for the {@link BloomFilter}.
+ */
+public class DefaultBloomFilterTest extends AbstractBloomFilterTest {
+ @Override
+ protected AbstractDefaultBloomFilter createEmptyFilter(final Shape shape) {
+ return new SparseDefaultBloomFilter(shape);
+ }
+
+ @Override
+ protected AbstractDefaultBloomFilter createFilter(final Shape shape, final Hasher hasher) {
+ return new SparseDefaultBloomFilter(shape, hasher);
+ }
+
+ @Override
+ protected AbstractDefaultBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
+ return new SparseDefaultBloomFilter(shape, producer);
+ }
+
+ @Override
+ protected AbstractDefaultBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
+ return new SparseDefaultBloomFilter(shape, producer);
+ }
+
+ @Test
+ public void testDefaultBloomFilterSimpleSpecificMergeInPlace() {
+ AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(Shape.fromKM(3, 150));
+ Hasher hasher = new SimpleHasher(0, 1);
+ assertTrue(filter.mergeInPlace(hasher));
+ assertEquals(3, filter.cardinality());
+ }
+
+ @Test
+ public void testDefaultBloomFilterSparseSpecificMergeInPlace() {
+ AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(Shape.fromKM(3, 150));
+ Hasher hasher = new SimpleHasher(0, 1);
+ BloomFilter newFilter = filter.merge(hasher);
+ assertEquals(3, newFilter.cardinality());
+ }
+
+ @Test
+ public void testDefaultBloomFilterSparseSpecificMerge() {
+ Shape shape = Shape.fromKM(3, 150);
+ AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(shape);
+ AbstractDefaultBloomFilter filter2 = new SparseDefaultBloomFilter(shape, new SimpleHasher(0, 1));
+ BloomFilter newFilter = filter.merge(filter2);
+ assertEquals(3, newFilter.cardinality());
+ }
+
+ @Test
+ public void testHasherBasedMergeInPlaceWithDifferingSparseness() {
+ Hasher hasher = new SimpleHasher(1, 1);
+
+ BloomFilter bf1 = new NonSparseDefaultBloomFilter(getTestShape());
+ bf1.mergeInPlace(hasher);
+ assertTrue(BitMapProducer.fromIndexProducer(hasher.indices(getTestShape()), getTestShape().getNumberOfBits())
+ .forEachBitMapPair(bf1, (x, y) -> x == y));
+
+ bf1 = new SparseDefaultBloomFilter(getTestShape());
+ bf1.mergeInPlace(hasher);
+ assertTrue(BitMapProducer.fromIndexProducer(hasher.indices(getTestShape()), getTestShape().getNumberOfBits())
+ .forEachBitMapPair(bf1, (x, y) -> x == y));
+ }
+
+ abstract static class AbstractDefaultBloomFilter implements BloomFilter {
+ private Shape shape;
+ protected TreeSet indices;
+
+ AbstractDefaultBloomFilter(Shape shape) {
+ this.shape = shape;
+ this.indices = new TreeSet<>();
+ }
+
+ AbstractDefaultBloomFilter(Shape shape, Hasher hasher) {
+ this(shape, hasher.indices(shape));
+ }
+
+ AbstractDefaultBloomFilter(Shape shape, BitMapProducer producer) {
+ this(shape, IndexProducer.fromBitMapProducer(producer));
+ }
+
+ AbstractDefaultBloomFilter(Shape shape, IndexProducer producer) {
+ this(shape);
+ producer.forEachIndex((i) -> {
+ indices.add(i);
+ return true;
+ });
+ if (this.indices.floor(-1) != null || this.indices.ceiling(shape.getNumberOfBits()) != null) {
+ throw new IllegalArgumentException(
+ String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
+ }
+ }
+
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ for (Integer i : indices) {
+ if (!consumer.test(i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public boolean forEachBitMap(LongPredicate consumer) {
+ return BitMapProducer.fromIndexProducer(this, shape.getNumberOfBits()).forEachBitMap(consumer);
+ }
+
+ @Override
+ public Shape getShape() {
+ return shape;
+ }
+
+ @Override
+ public boolean contains(IndexProducer indexProducer) {
+ return indexProducer.forEachIndex((i) -> indices.contains(i));
+ }
+
+ @Override
+ public boolean contains(BitMapProducer bitMapProducer) {
+ return contains(IndexProducer.fromBitMapProducer(bitMapProducer));
+ }
+
+ @Override
+ public boolean mergeInPlace(BloomFilter other) {
+ other.forEachIndex((i) -> {
+ indices.add(i);
+ return true;
+ });
+ if (!indices.isEmpty()) {
+ if (indices.last() >= shape.getNumberOfBits()) {
+ throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
+ indices.last(), shape.getNumberOfBits()));
+ }
+ if (indices.first() < 0) {
+ throw new IllegalArgumentException(
+ String.format("Value in list %s is less than 0", indices.first()));
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public int cardinality() {
+ return indices.size();
+ }
+ }
+
+ static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
+
+ SparseDefaultBloomFilter(Shape shape, BitMapProducer producer) {
+ super(shape, producer);
+ }
+
+ SparseDefaultBloomFilter(Shape shape, Hasher hasher) {
+ super(shape, hasher);
+ }
+
+ SparseDefaultBloomFilter(Shape shape, IndexProducer producer) {
+ super(shape, producer);
+ }
+
+ SparseDefaultBloomFilter(Shape shape) {
+ super(shape);
+ }
+
+ @Override
+ public boolean isSparse() {
+ return true;
+ }
+
+ @Override
+ public AbstractDefaultBloomFilter copy() {
+ AbstractDefaultBloomFilter result = new SparseDefaultBloomFilter(getShape());
+ result.indices.addAll(indices);
+ return result;
+ }
+ }
+
+ static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
+
+ NonSparseDefaultBloomFilter(Shape shape, BitMapProducer producer) {
+ super(shape, producer);
+ }
+
+ NonSparseDefaultBloomFilter(Shape shape, Hasher hasher) {
+ super(shape, hasher);
+ }
+
+ NonSparseDefaultBloomFilter(Shape shape, IndexProducer producer) {
+ super(shape, producer);
+ }
+
+ NonSparseDefaultBloomFilter(Shape shape) {
+ super(shape);
+ }
+
+ @Override
+ public boolean isSparse() {
+ return false;
+ }
+
+ @Override
+ public AbstractDefaultBloomFilter copy() {
+ AbstractDefaultBloomFilter result = new SparseDefaultBloomFilter(getShape());
+ result.indices.addAll(indices);
+ return result;
+ }
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/FixedIndexesTestHasher.java b/src/test/java/org/apache/commons/collections4/bloomfilter/FixedIndexesTestHasher.java
deleted file mode 100644
index ec4886294c..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/FixedIndexesTestHasher.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-
-import java.util.Arrays;
-import java.util.PrimitiveIterator.OfInt;
-
-/**
- * A Hasher implementation to return fixed indexes. Duplicates are allowed.
- * The shape is ignored when generating the indexes.
- *
- * This is not a real hasher and is used for testing only.
- */
-class FixedIndexesTestHasher implements Hasher {
- /** The shape. */
- private final Shape shape;
- /** The indexes. */
- private final int[] indexes;
-
- /**
- * Create an instance.
- *
- * @param shape the shape
- * @param indexes the indexes
- */
- FixedIndexesTestHasher(final Shape shape, final int... indexes) {
- this.shape = shape;
- this.indexes = indexes;
- }
-
- @Override
- public OfInt iterator(final Shape shape) {
- if (!this.shape.equals(shape)) {
- throw new IllegalArgumentException(
- String.format("shape (%s) does not match internal shape (%s)", shape, this.shape));
- }
- return Arrays.stream(indexes).iterator();
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return shape.getHashFunctionIdentity();
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java
deleted file mode 100644
index a10df81643..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import static org.junit.jupiter.api.Assertions.assertArrayEquals;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import org.apache.commons.collections4.bloomfilter.hasher.DynamicHasher;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
-import org.junit.jupiter.api.Test;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-import java.util.PrimitiveIterator.OfInt;
-
-/**
- * Tests the {@link HasherBloomFilter}.
- */
-public class HasherBloomFilterTest extends AbstractBloomFilterTest {
-
- /**
- * Tests that the constructor works correctly.
- */
- @Test
- public void constructorTest_NonStatic() {
- final Shape shape = new Shape(new MD5Cyclic(), 3, 72, 17);
- final DynamicHasher hasher = new DynamicHasher.Builder(new MD5Cyclic()).with("Hello", StandardCharsets.UTF_8).build();
- final HasherBloomFilter filter = createFilter(hasher, shape);
- final long[] lb = filter.getBits();
- assertEquals(2, lb.length);
- assertEquals(0x6203101001888c44L, lb[0]);
- assertEquals(0x60L, lb[1]);
- }
-
- @Override
- protected AbstractBloomFilter createEmptyFilter(final Shape shape) {
- return new HasherBloomFilter(shape);
- }
-
- @Override
- protected HasherBloomFilter createFilter(final Hasher hasher, final Shape shape) {
- return new HasherBloomFilter(hasher, shape);
- }
-
- /**
- * Test the edge case where the filter is empty and the getBits() function returns a
- * zero length array.
- */
- @Test
- public void getBitsTest_Empty() {
- final BloomFilter filter = createEmptyFilter(shape);
- assertArrayEquals(new long[0], filter.getBits());
- }
-
- /**
- * Test the edge case where the filter has only 1 bit in the lowest index and the getBits()
- * function returns an array of length 1.
- */
- @Test
- public void getBitsTest_LowestBitOnly() {
- final BloomFilter filter = createEmptyFilter(shape);
- // Set the lowest bit index only.
- filter.merge(new Hasher() {
- @Override
- public OfInt iterator(final Shape shape) {
- return Arrays.stream(new int[] {0}).iterator();
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return shape.getHashFunctionIdentity();
- }
- });
- assertArrayEquals(new long[] {1L}, filter.getBits());
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/HasherCollectionTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/HasherCollectionTest.java
new file mode 100644
index 0000000000..419196ab7e
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/HasherCollectionTest.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests the {@link HasherCollection}.
+ */
+public class HasherCollectionTest extends AbstractHasherTest {
+
+ @Override
+ protected HasherCollection createHasher() {
+ return new HasherCollection(new SimpleHasher(1, 1), new SimpleHasher(2, 2));
+ }
+
+ @Override
+ protected HasherCollection createEmptyHasher() {
+ return new HasherCollection();
+ }
+
+ @Override
+ protected int getHasherSize(Hasher hasher) {
+ return ((HasherCollection) hasher).getHashers().size();
+ }
+
+ protected void nestedTest(HasherCollectionTest nestedTest) {
+ nestedTest.testAsIndexArray();
+ nestedTest.testForEachIndex();
+ nestedTest.testAdd();
+ }
+
+ @Test
+ public void testCollectionConstructor() {
+ List lst = Arrays.asList(new SimpleHasher(3, 2), new SimpleHasher(4, 2));
+ HasherCollectionTest nestedTest = new HasherCollectionTest() {
+ @Override
+ protected HasherCollection createHasher() {
+ return new HasherCollection(lst);
+ }
+
+ @Override
+ protected HasherCollection createEmptyHasher() {
+ return new HasherCollection();
+ }
+ };
+ nestedTest(nestedTest);
+
+ nestedTest = new HasherCollectionTest() {
+ @Override
+ protected HasherCollection createHasher() {
+ return new HasherCollection(new SimpleHasher(3, 2), new SimpleHasher(4, 2));
+ }
+
+ @Override
+ protected HasherCollection createEmptyHasher() {
+ return new HasherCollection();
+ }
+ };
+ nestedTest(nestedTest);
+ }
+
+ @Test
+ public void testAdd() {
+ HasherCollection hasher = createHasher();
+ hasher.add(new SimpleHasher(2, 2));
+ assertEquals(3, hasher.getHashers().size());
+
+ hasher.add(Arrays.asList(new SimpleHasher(3, 2), new SimpleHasher(4, 2)));
+ assertEquals(5, hasher.getHashers().size());
+ }
+
+ @Override
+ public void testUniqueIndex() {
+ // create a hasher that produces duplicates with the specified shape.
+ // this setup produces 5, 17, 29, 41, 53, 65 two times
+ Shape shape = Shape.fromKM(12, 72);
+ Hasher h1 = new SimpleHasher(5, 12);
+ HasherCollection hasher = createEmptyHasher();
+ hasher.add(h1);
+ hasher.add(h1);
+ List lst = new ArrayList<>();
+ for (int i : new int[] { 5, 17, 29, 41, 53, 65 }) {
+ lst.add(i);
+ lst.add(i);
+ }
+
+ assertTrue(hasher.uniqueIndices(shape).forEachIndex(i -> {
+ return lst.remove(Integer.valueOf(i));
+ }), "unable to remove value");
+ assertEquals(0, lst.size());
+ }
+
+ @Test
+ void testHasherCollection() {
+ Hasher h1 = new SimpleHasher(13, 4678);
+ Hasher h2 = new SimpleHasher(42, 987);
+ Hasher h3 = new SimpleHasher(454, 2342);
+
+ HasherCollection hc1 = new HasherCollection(Arrays.asList(h1, h1));
+ HasherCollection hc2 = new HasherCollection(Arrays.asList(h2, h3));
+ HasherCollection hc3 = new HasherCollection(Arrays.asList(hc1, hc2));
+
+ ArrayCountingBloomFilter bf = new ArrayCountingBloomFilter(Shape.fromKM(5, 10000));
+
+ // Should add h1, h1, h2, h3
+ Assertions.assertTrue(bf.mergeInPlace(hc3));
+ Assertions.assertTrue(bf.remove(h1));
+ Assertions.assertTrue(bf.remove(h1));
+ Assertions.assertNotEquals(0, bf.cardinality());
+ Assertions.assertTrue(bf.remove(hc2));
+ Assertions.assertEquals(0, bf.cardinality());
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexFilterTest.java
index 5aa6c94bef..80c93c1448 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexFilterTest.java
@@ -16,76 +16,123 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentityImpl;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.ProcessType;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.Signedness;
-import org.junit.jupiter.api.Test;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Set;
-import java.util.function.IntConsumer;
-import java.util.stream.Collectors;
-
-import static org.junit.jupiter.api.Assertions.assertAll;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+import java.util.SplittableRandom;
+import java.util.concurrent.ThreadLocalRandom;
+
+import org.apache.commons.collections4.bloomfilter.Hasher.IndexFilter;
+import org.apache.commons.collections4.bloomfilter.Hasher.IndexFilter.ArrayTracker;
+import org.apache.commons.collections4.bloomfilter.Hasher.IndexFilter.BitMapTracker;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+
/**
- * Tests for the {@link IndexFilters}.
+ * Tests the Filter class.
*/
public class IndexFilterTest {
- /**
- * The shape of the dummy Bloom filter.
- * This is used as an argument to a Hasher that just returns fixed indexes
- * so the parameters do not matter.
- */
- private final Shape shape = new Shape(new HashFunctionIdentityImpl(
- "Apache Commons Collections", "Dummy", Signedness.SIGNED, ProcessType.CYCLIC, 0L),
- 50, 3000, 4);
-
@Test
- public void testApplyThrowsWithNullArguments() {
- final FixedIndexesTestHasher hasher = new FixedIndexesTestHasher(shape, 1, 2, 3);
- final Shape shape = this.shape;
- final ArrayList actual = new ArrayList<>();
- final IntConsumer consumer = actual::add;
- assertAll(
- () -> assertThrows(NullPointerException.class, () -> IndexFilters.distinctIndexes(null, shape, consumer), "null hasher"),
- () -> assertThrows(NullPointerException.class, () -> IndexFilters.distinctIndexes(hasher, null, consumer), "null shape"),
- () -> assertThrows(NullPointerException.class, () -> IndexFilters.distinctIndexes(hasher, shape, null), "null consumer")
- );
-
- // All OK together
- IndexFilters.distinctIndexes(hasher, shape, consumer);
+ public void testFiltering() {
+ Shape shape = Shape.fromKM(3, 12);
+ List consumer = new ArrayList();
+ IndexFilter filter = IndexFilter.create(shape, consumer::add);
+
+ for (int i = 0; i < 12; i++) {
+ assertTrue(filter.test(i));
+ }
+ assertEquals(12, consumer.size());
+
+ for (int i = 0; i < 12; i++) {
+ assertTrue(filter.test(i));
+ }
+ assertEquals(12, consumer.size());
}
- @Test
- public void testApply() {
- assertFilter(1, 4, 6, 7, 9);
+ @ParameterizedTest
+ @CsvSource({ "1, 64", "2, 64", "3, 64", "7, 357", "7, 17", })
+ void testFilter(int k, int m) {
+ Shape shape = Shape.fromKM(k, m);
+ BitSet used = new BitSet(m);
+ for (int n = 0; n < 10; n++) {
+ used.clear();
+ List consumer = new ArrayList<>();
+ IndexFilter filter = IndexFilter.create(shape, consumer::add);
+
+ // Make random indices; these may be duplicates
+ long seed = ThreadLocalRandom.current().nextLong();
+ SplittableRandom rng = new SplittableRandom(seed);
+ for (int i = Math.min(k, m / 2); i-- > 0;) {
+ int bit = rng.nextInt(m);
+ // duplicates should not alter the list size
+ int newSize = consumer.size() + (used.get(bit) ? 0 : 1);
+ assertTrue(filter.test(bit));
+ assertEquals(newSize, consumer.size(), () -> String.format("Bad filter. Seed=%d, bit=%d", seed, bit));
+ used.set(bit);
+ }
+
+ // The list should have unique entries
+ assertArrayEquals(used.stream().toArray(), consumer.stream().mapToInt(i -> (int) i).sorted().toArray());
+ final int size = consumer.size();
+
+ // Second observations do not change the list size
+ used.stream().forEach(bit -> {
+ assertTrue(filter.test(bit));
+ assertEquals(size, consumer.size(), () -> String.format("Bad filter. Seed=%d, bit=%d", seed, bit));
+ });
+
+ assertThrows(IndexOutOfBoundsException.class, () -> filter.test(m));
+ assertThrows(IndexOutOfBoundsException.class, () -> filter.test(-1));
+ }
}
@Test
- public void testApplyWithDuplicates() {
- assertFilter(1, 4, 4, 6, 7, 7, 7, 7, 7, 9);
- }
+ public void testConstructor()
+ throws IllegalArgumentException, IllegalAccessException, NoSuchFieldException, SecurityException {
+ Field tracker = IndexFilter.class.getDeclaredField("tracker");
+ tracker.setAccessible(true);
+ List consumer = new ArrayList();
- private void assertFilter(final int... indexes) {
- final FixedIndexesTestHasher hasher = new FixedIndexesTestHasher(shape, indexes);
- final Set expected = Arrays.stream(indexes).boxed().collect(Collectors.toSet());
- final ArrayList actual = new ArrayList<>();
+ // test even split
+ int k = 2;
+ int m = Long.SIZE;
+ Shape shape = Shape.fromKM(k, m);
+ IndexFilter filter = IndexFilter.create(shape, consumer::add);
+ assertTrue(tracker.get(filter) instanceof ArrayTracker);
- IndexFilters.distinctIndexes(hasher, shape, actual::add);
+ // test k ints < longs for m
+ k = 1;
+ shape = Shape.fromKM(k, m);
+ filter = IndexFilter.create(shape, consumer::add);
+ assertTrue(tracker.get(filter) instanceof ArrayTracker);
- assertEquals(expected.size(), actual.size());
- // Check the array has all the values.
- // We do not currently check the order of indexes from the
- // hasher.iterator() function.
- for (final Integer index : actual) {
- assertTrue(expected.contains(index));
- }
+ // test k ints > longs for m
+ k = 3;
+ shape = Shape.fromKM(k, m);
+ filter = IndexFilter.create(shape, consumer::add);
+ assertTrue(tracker.get(filter) instanceof BitMapTracker);
+
+ /* test overflows */
+ shape = Shape.fromKM(2, Integer.MAX_VALUE);
+ filter = IndexFilter.create(shape, consumer::add);
+ assertTrue(tracker.get(filter) instanceof ArrayTracker);
+
+ // overflow when computing the storage of the int array
+ shape = Shape.fromKM(Integer.MAX_VALUE, 123);
+ filter = IndexFilter.create(shape, consumer::add);
+ // *** fails ***
+ assertTrue(tracker.get(filter) instanceof BitMapTracker);
+
+ shape = Shape.fromKM(Integer.MAX_VALUE, Integer.MAX_VALUE);
+ filter = IndexFilter.create(shape, consumer::add);
+ assertTrue(tracker.get(filter) instanceof BitMapTracker);
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromArrayCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromArrayCountingBloomFilterTest.java
new file mode 100644
index 0000000000..7e9941d017
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromArrayCountingBloomFilterTest.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+public class IndexProducerFromArrayCountingBloomFilterTest extends AbstractIndexProducerTest {
+
+ protected Shape shape = Shape.fromKM(17, 72);
+
+ @Override
+ protected IndexProducer createProducer() {
+ ArrayCountingBloomFilter filter = new ArrayCountingBloomFilter(shape);
+ Hasher hasher = new SimpleHasher(0, 1);
+ return filter.merge(hasher);
+ }
+
+ @Override
+ protected IndexProducer createEmptyProducer() {
+ return new ArrayCountingBloomFilter(shape);
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromBitmapProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromBitmapProducerTest.java
new file mode 100644
index 0000000000..a208d3a2f9
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromBitmapProducerTest.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.LongPredicate;
+
+import org.junit.jupiter.api.Test;
+
+public class IndexProducerFromBitmapProducerTest extends AbstractIndexProducerTest {
+
+ @Override
+ protected IndexProducer createEmptyProducer() {
+ TestingBitMapProducer producer = new TestingBitMapProducer(new long[0]);
+ return IndexProducer.fromBitMapProducer(producer);
+ }
+
+ @Override
+ protected IndexProducer createProducer() {
+ /* Creates an index producer that produces the values:
+ * 0, 65, 128, and 129
+ @formatter:off
+ Index2 Index1 Index0
+ bit 128 64 0
+ | | |
+ 1L => | | ...0001
+ 2L => | ...0010
+ 3L => ...0011
+ @formatter:on
+ */
+ TestingBitMapProducer producer = new TestingBitMapProducer(new long[] { 1L, 2L, 3L });
+ return IndexProducer.fromBitMapProducer(producer);
+ }
+
+ @Test
+ public final void testFromBitMapProducerTest() {
+ IndexProducer underTest = createProducer();
+ List lst = new ArrayList<>();
+
+ underTest.forEachIndex(lst::add);
+ assertEquals(4, lst.size());
+ assertEquals(Integer.valueOf(0), lst.get(0));
+ assertEquals(Integer.valueOf(1 + 64), lst.get(1));
+ assertEquals(Integer.valueOf(0 + 128), lst.get(2));
+ assertEquals(Integer.valueOf(1 + 128), lst.get(3));
+
+ BitMapProducer producer = new TestingBitMapProducer(new long[] { 0xFFFFFFFFFFFFFFFFL });
+ underTest = IndexProducer.fromBitMapProducer(producer);
+ lst = new ArrayList<>();
+
+ underTest.forEachIndex(lst::add);
+
+ assertEquals(64, lst.size());
+ for (int i = 0; i < 64; i++) {
+ assertEquals(Integer.valueOf(i), lst.get(i));
+ }
+ }
+
+ private class TestingBitMapProducer implements BitMapProducer {
+ long[] values;
+
+ TestingBitMapProducer(long[] values) {
+ this.values = values;
+ }
+
+ @Override
+ public boolean forEachBitMap(LongPredicate consumer) {
+ for (long l : values) {
+ if (!consumer.test(l)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherCollectionTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherCollectionTest.java
new file mode 100644
index 0000000000..d7e61d796d
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherCollectionTest.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+public class IndexProducerFromHasherCollectionTest extends AbstractIndexProducerTest {
+
+ @Override
+ protected IndexProducer createProducer() {
+ return new HasherCollection(new SimpleHasher(0, 1), new SimpleHasher(0, 2)).indices(Shape.fromKM(17, 72));
+ }
+
+ @Override
+ protected IndexProducer createEmptyProducer() {
+ return new HasherCollection().indices(Shape.fromKM(17, 72));
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/package-info.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherTest.java
similarity index 66%
rename from src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/package-info.java
rename to src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherTest.java
index 95951ad7fe..c089b4b420 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/package-info.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherTest.java
@@ -14,11 +14,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package org.apache.commons.collections4.bloomfilter;
-/**
- * Provides implementations of the Bloom filter
- * {@link org.apache.commons.collections4.bloomfilter.hasher.HashFunction HashFunction} interface.
- *
- * @since 4.5
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
+public class IndexProducerFromHasherTest extends AbstractIndexProducerTest {
+
+ @Override
+ protected IndexProducer createProducer() {
+ return new SimpleHasher(0, 1).indices(Shape.fromKM(17, 72));
+ }
+
+ @Override
+ protected IndexProducer createEmptyProducer() {
+ return NullHasher.INSTANCE.indices(Shape.fromKM(17, 72));
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/package-info.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromIntArrayTest.java
similarity index 66%
rename from src/main/java/org/apache/commons/collections4/bloomfilter/hasher/package-info.java
rename to src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromIntArrayTest.java
index b73675ed28..4755d24a85 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/package-info.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromIntArrayTest.java
@@ -14,12 +14,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package org.apache.commons.collections4.bloomfilter;
-/**
- * Provides classes and interfaces to define the shape of a Bloom filter and the conversion
- * of generic bytes to a hash of bit indexes to be used with a Bloom filter.
- *
- * @since 4.5
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
+public class IndexProducerFromIntArrayTest extends AbstractIndexProducerTest {
+
+ @Override
+ protected IndexProducer createEmptyProducer() {
+ return IndexProducer.fromIndexArray(new int[0]);
+ }
+ @Override
+ protected IndexProducer createProducer() {
+ return IndexProducer.fromIndexArray(new int[] { 1, 2, 3, 4, 5 });
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java
new file mode 100644
index 0000000000..8525428671
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+public class IndexProducerFromSimpleBloomFilterTest extends AbstractIndexProducerTest {
+
+ protected Shape shape = Shape.fromKM(17, 72);
+
+ @Override
+ protected IndexProducer createProducer() {
+ Hasher hasher = new SimpleHasher(0, 1);
+ return new SparseBloomFilter(shape, hasher);
+ }
+
+ @Override
+ protected IndexProducer createEmptyProducer() {
+ return new SparseBloomFilter(shape);
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java
new file mode 100644
index 0000000000..4204c90fe7
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+public class IndexProducerFromSparseBloomFilterTest extends AbstractIndexProducerTest {
+
+ protected Shape shape = Shape.fromKM(17, 72);
+
+ @Override
+ protected IndexProducer createProducer() {
+ Hasher hasher = new SimpleHasher(0, 1);
+ return new SimpleBloomFilter(shape, hasher);
+ }
+
+ @Override
+ protected IndexProducer createEmptyProducer() {
+ return new SimpleBloomFilter(shape);
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerTest.java
new file mode 100644
index 0000000000..fc11df6391
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerTest.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.LongPredicate;
+
+import org.junit.jupiter.api.Test;
+
+public class IndexProducerTest {
+
+ @Test
+ public void fromBitMapProducerTest() {
+ TestingBitMapProducer producer = new TestingBitMapProducer(new long[] { 1L, 2L, 3L });
+ IndexProducer underTest = IndexProducer.fromBitMapProducer(producer);
+ List lst = new ArrayList<>();
+
+ underTest.forEachIndex(lst::add);
+ assertEquals(4, lst.size());
+ assertEquals(Integer.valueOf(0), lst.get(0));
+ assertEquals(Integer.valueOf(1 + 64), lst.get(1));
+ assertEquals(Integer.valueOf(0 + 128), lst.get(2));
+ assertEquals(Integer.valueOf(1 + 128), lst.get(3));
+
+ producer = new TestingBitMapProducer(new long[] { 0xFFFFFFFFFFFFFFFFL });
+ underTest = IndexProducer.fromBitMapProducer(producer);
+ lst = new ArrayList<>();
+
+ underTest.forEachIndex(lst::add);
+
+ assertEquals(64, lst.size());
+ for (int i = 0; i < 64; i++) {
+ assertEquals(Integer.valueOf(i), lst.get(i));
+ }
+ }
+
+ private class TestingBitMapProducer implements BitMapProducer {
+ long[] values;
+
+ TestingBitMapProducer(long[] values) {
+ this.values = values;
+ }
+
+ @Override
+ public boolean forEachBitMap(LongPredicate consumer) {
+ for (long l : values) {
+ if (!consumer.test(l)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/NullHasher.java b/src/test/java/org/apache/commons/collections4/bloomfilter/NullHasher.java
new file mode 100644
index 0000000000..537c60d285
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/NullHasher.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.Objects;
+import java.util.function.IntPredicate;
+
+/**
+ * A Hasher that returns no values.
+ *
+ * @since 4.5
+ */
+public final class NullHasher implements Hasher {
+
+ /**
+ * The instance of the Null Hasher.
+ */
+ public static final NullHasher INSTANCE = new NullHasher();
+
+ private static final IndexProducer PRODUCER = new IndexProducer() {
+ @Override
+ public boolean forEachIndex(IntPredicate consumer) {
+ return true;
+ }
+ };
+
+ private NullHasher() {
+ }
+
+ @Override
+ public IndexProducer indices(final Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+ return PRODUCER;
+ }
+
+ @Override
+ public IndexProducer uniqueIndices(Shape shape) {
+ return PRODUCER;
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java
index 45a9943e85..9d7659d1fa 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java
@@ -17,14 +17,7 @@
package org.apache.commons.collections4.bloomfilter;
import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-
-import java.util.List;
-import java.util.Arrays;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
+
import org.junit.jupiter.api.Test;
/**
@@ -32,319 +25,274 @@
*/
public class SetOperationsTest {
- private final HashFunctionIdentity testFunction = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test Function";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
- };
-
- private final Shape shape = new Shape(testFunction, 3, 72, 17);
-
- @Test
- public void testDifferentShapesThrows() {
- final List lst = Arrays.asList(1, 2);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- final Shape shape2 = new Shape(testFunction, 3, 72, 18);
- final List lst2 = Arrays.asList(2, 3);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape2);
- final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape2);
-
- assertThrows(IllegalArgumentException.class, () -> SetOperations.cosineDistance(filter1, filter2));
- }
+ protected final SimpleHasher from1 = new SimpleHasher(1, 1);
+ protected final long from1Value = 0x3FFFEL;
+ protected final SimpleHasher from11 = new SimpleHasher(11, 1);
+ protected final long from11Value = 0xFFFF800L;
+ protected final HasherCollection bigHasher = new HasherCollection(from1, from11);
+ protected final long bigHashValue = 0xFFFFFFEL;
+ private final Shape shape = Shape.fromKM(17, 72);
/**
* Tests that the Cosine similarity is correctly calculated.
*/
@Test
- public final void cosineDistanceTest() {
- List lst = Arrays.asList(1, 2);
- Hasher hasher = new StaticHasher(lst.iterator(), shape);
- BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- List lst2 = Arrays.asList(2, 3);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(0.5, SetOperations.cosineDistance(filter1, filter2), 0.0001);
- assertEquals(0.5, SetOperations.cosineDistance(filter2, filter1), 0.0001);
-
- lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- hasher = new StaticHasher(lst.iterator(), shape);
- filter1 = new HasherBloomFilter(hasher, shape);
-
- lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(0.0, SetOperations.cosineDistance(filter1, filter2), 0.0001);
- assertEquals(0.0, SetOperations.cosineDistance(filter2, filter1), 0.0001);
-
- lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(0.514928749927334, SetOperations.cosineDistance(filter1, filter2), 0.000000000000001);
- assertEquals(0.514928749927334, SetOperations.cosineDistance(filter2, filter1), 0.000000000000001);
- }
-
- /**
- * Tests that the Cosine distance is correctly calculated when one or
- * both filters are empty
- */
- @Test
- public final void cosineDistanceTest_NoValues() {
- final BloomFilter filter1 = new HasherBloomFilter(shape);
- final BloomFilter filter2 = new HasherBloomFilter(shape);
- // build a filter
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter3 = new HasherBloomFilter(hasher, shape);
-
- assertEquals(1.0, SetOperations.cosineDistance(filter1, filter2), 0.0001);
- assertEquals(1.0, SetOperations.cosineDistance(filter2, filter1), 0.0001);
- assertEquals(1.0, SetOperations.cosineDistance(filter1, filter3), 0.0001);
- assertEquals(1.0, SetOperations.cosineDistance(filter3, filter1), 0.0001);
+ public final void testCosineDistance() {
+
+ BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
+
+ // identical filters should have no distance.
+ double expected = 0;
+ assertEquals(expected, SetOperations.cosineDistance(filter1, filter2));
+ assertEquals(expected, SetOperations.cosineDistance(filter2, filter1));
+
+ Shape shape2 = Shape.fromKM(2, 72);
+ filter1 = new SimpleBloomFilter(shape2, from1);
+ filter2 = new SimpleBloomFilter(shape2, new SimpleHasher(2, 1));
+
+ int dotProduct = /* [1,2] & [2,3] = [2] = */ 1;
+ int cardinalityA = 2;
+ int cardinalityB = 2;
+ expected = 1 - (dotProduct / Math.sqrt(cardinalityA * cardinalityB));
+ assertEquals(expected, SetOperations.cosineDistance(filter1, filter2));
+ assertEquals(expected, SetOperations.cosineDistance(filter2, filter1));
+
+ filter1 = new SimpleBloomFilter(shape, from1);
+ filter2 = new SimpleBloomFilter(shape, from11);
+ dotProduct = /* [1..17] & [11..27] = [] = */ 7;
+ cardinalityA = 17;
+ cardinalityB = 17;
+ expected = 1 - (dotProduct / Math.sqrt(cardinalityA * cardinalityB));
+ assertEquals(expected, SetOperations.cosineDistance(filter1, filter2));
+ assertEquals(expected, SetOperations.cosineDistance(filter2, filter1));
+
+ // test with no values
+ filter1 = new SimpleBloomFilter(shape, from1);
+ filter2 = new SimpleBloomFilter(shape);
+ BloomFilter filter3 = new SimpleBloomFilter(shape);
+
+ dotProduct = /* [1,2] & [] = [] = */ 0;
+ cardinalityA = 2;
+ cardinalityB = 0;
+ expected = /* 1 - (dotProduct/Math.sqrt( cardinalityA * cardinalityB )) = */ 1.0;
+ assertEquals(expected, SetOperations.cosineDistance(filter1, filter2));
+ assertEquals(expected, SetOperations.cosineDistance(filter2, filter1));
+
+ dotProduct = /* [] & [] = [] = */ 0;
+ cardinalityA = 0;
+ cardinalityB = 0;
+ expected = /* 1 - (dotProduct/Math.sqrt( cardinalityA * cardinalityB )) = */ 1.0;
+ assertEquals(1.0, SetOperations.cosineDistance(filter2, filter3));
+ assertEquals(1.0, SetOperations.cosineDistance(filter3, filter2));
}
/**
* Tests that the Cosine similarity is correctly calculated.
*/
@Test
- public final void cosineSimilarityTest() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- List lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(1.0, SetOperations.cosineSimilarity(filter1, filter2), 0.0001);
- assertEquals(1.0, SetOperations.cosineSimilarity(filter2, filter1), 0.0001);
-
- lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(0.485071250072666, SetOperations.cosineSimilarity(filter1, filter2), 0.000000000000001);
- assertEquals(0.485071250072666, SetOperations.cosineSimilarity(filter2, filter1), 0.000000000000001);
- }
-
- /**
- * Tests that the Cosine similarity is correctly calculated when one or
- * both filters are empty
- */
- @Test
- public final void cosineSimilarityTest_NoValues() {
- final BloomFilter filter1 = new HasherBloomFilter(shape);
- final BloomFilter filter2 = new HasherBloomFilter(shape);
+ public final void testCosineSimilarity() {
+ BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
+
+ int dotProduct = /* [1..17] & [1..17] = [1..17] = */ 17;
+ int cardinalityA = 17;
+ int cardinalityB = 17;
+ double expected = /* dotProduct/Sqrt( cardinalityA * cardinalityB ) = */ 1.0;
+ assertEquals(expected, SetOperations.cosineSimilarity(filter1, filter2));
+ assertEquals(expected, SetOperations.cosineSimilarity(filter2, filter1));
+
+ dotProduct = /* [1..17] & [11..27] = [11..17] = */ 7;
+ cardinalityA = 17;
+ cardinalityB = 17;
+ expected = dotProduct / Math.sqrt(cardinalityA * cardinalityB);
+ filter2 = new SimpleBloomFilter(shape, from11);
+ assertEquals(expected, SetOperations.cosineSimilarity(filter1, filter2));
+ assertEquals(expected, SetOperations.cosineSimilarity(filter2, filter1));
+
+ // test no values
+ filter1 = new SimpleBloomFilter(shape);
+ filter2 = new SimpleBloomFilter(shape);
// build a filter
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter3 = new HasherBloomFilter(hasher, shape);
-
- assertEquals(0.0, SetOperations.cosineSimilarity(filter1, filter2), 0.0001);
- assertEquals(0.0, SetOperations.cosineSimilarity(filter2, filter1), 0.0001);
- assertEquals(0.0, SetOperations.cosineSimilarity(filter1, filter3), 0.0001);
- assertEquals(0.0, SetOperations.cosineSimilarity(filter3, filter1), 0.0001);
- }
+ BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
- /**
- * Tests that the intersection size estimate is correctly calculated.
- */
- @Test
- public final void estimateIntersectionSizeTest() {
- // build a filter
- List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- lst = Arrays.asList(8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
- 31, 32, 33, 34, 35, 36, 37, 38, 39, 40);
- final Hasher hasher2 = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- final long estimate = SetOperations.estimateIntersectionSize(filter1, filter2);
- assertEquals(1, estimate);
+ assertEquals(0.0, SetOperations.cosineSimilarity(filter1, filter2));
+ assertEquals(0.0, SetOperations.cosineSimilarity(filter2, filter1));
+ assertEquals(0.0, SetOperations.cosineSimilarity(filter1, filter3));
+ assertEquals(0.0, SetOperations.cosineSimilarity(filter3, filter1));
}
/**
- * Tests that the size estimate is correctly calculated.
+ * Tests that the Hamming distance is correctly calculated.
*/
@Test
- public final void estimateSizeTest() {
- // build a filter
- List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- Hasher hasher = new StaticHasher(lst.iterator(), shape);
- BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
- assertEquals(1, SetOperations.estimateSize(filter1));
-
- // the data provided above do not generate an estimate that is equivalent to the
- // actual.
- lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
- hasher = new StaticHasher(lst.iterator(), shape);
- filter1 = new HasherBloomFilter(hasher, shape);
- assertEquals(1, SetOperations.estimateSize(filter1));
-
- lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
- 26, 27, 28, 29, 30, 31, 32, 33);
- final Hasher hasher2 = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(3, SetOperations.estimateSize(filter2));
+ public final void testHammingDistance() {
+ final BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
+
+ int hammingDistance = /* [1..17] ^ [1..17] = [] = */ 0;
+ assertEquals(hammingDistance, SetOperations.hammingDistance(filter1, filter2));
+ assertEquals(hammingDistance, SetOperations.hammingDistance(filter2, filter1));
+
+ filter2 = new SimpleBloomFilter(shape, from11);
+ hammingDistance = /* [1..17] ^ [11..27] = [1..10][17-27] = */ 20;
+ assertEquals(hammingDistance, SetOperations.hammingDistance(filter1, filter2));
+ assertEquals(hammingDistance, SetOperations.hammingDistance(filter2, filter1));
}
/**
- * Tests that the union size estimate is correctly calculated.
+ * Tests that the Jaccard distance is correctly calculated.
*/
@Test
- public final void estimateUnionSizeTest() {
- // build a filter
- List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- lst = Arrays.asList(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
- 40);
- final Hasher hasher2 = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- final long estimate = SetOperations.estimateUnionSize(filter1, filter2);
- assertEquals(3, estimate);
+ public final void testJaccardDistance() {
+ BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
+
+ // 1 - jaccardSimilarity -- see jaccardSimilarityTest
+
+ assertEquals(0.0, SetOperations.jaccardDistance(filter1, filter2));
+ assertEquals(0.0, SetOperations.jaccardDistance(filter2, filter1));
+
+ filter2 = new SimpleBloomFilter(shape, from11);
+ double intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
+ int union = /* [1..17] | [11..27] = [1..27] = */ 27;
+ assertEquals(1 - (intersection / union), SetOperations.jaccardDistance(filter1, filter2));
+ assertEquals(1 - (intersection / union), SetOperations.jaccardDistance(filter2, filter1));
+
+ // test no values
+ filter1 = new SimpleBloomFilter(shape);
+ filter2 = new SimpleBloomFilter(shape);
+ BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
+
+ // 1 - jaccardSimilarity -- see jaccardSimilarityTest
+ assertEquals(1.0, SetOperations.jaccardDistance(filter1, filter2));
+ assertEquals(1.0, SetOperations.jaccardDistance(filter2, filter1));
+ assertEquals(1.0, SetOperations.jaccardDistance(filter1, filter3));
+ assertEquals(1.0, SetOperations.jaccardDistance(filter3, filter1));
}
/**
- * Tests that the Hamming distance is correctly calculated.
+ * Tests that the Jaccard similarity is correctly calculated.
*/
@Test
- public final void hammingDistanceTest() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- List lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(0, SetOperations.hammingDistance(filter1, filter2));
- assertEquals(0, SetOperations.hammingDistance(filter2, filter1));
-
- lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(17, SetOperations.hammingDistance(filter1, filter2));
- assertEquals(17, SetOperations.hammingDistance(filter2, filter1));
+ public final void testJaccardSimilarity() {
+ BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
+
+ double intersection = /* [1..17] & [1..17] = [1..17] = */ 17.0;
+ int union = /* [1..17] | [1..17] = [1..17] = */ 17;
+
+ assertEquals(intersection / union, SetOperations.jaccardSimilarity(filter1, filter2));
+ assertEquals(intersection / union, SetOperations.jaccardSimilarity(filter2, filter1));
+
+ filter2 = new SimpleBloomFilter(shape, from11);
+ intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
+ union = /* [1..17] | [11..27] = [1..27] = */ 27;
+ assertEquals(intersection / union, SetOperations.jaccardSimilarity(filter1, filter2));
+ assertEquals(intersection / union, SetOperations.jaccardSimilarity(filter2, filter1));
+
+ // test no values
+ filter1 = new SimpleBloomFilter(shape);
+ filter2 = new SimpleBloomFilter(shape);
+ BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
+
+ assertEquals(0.0, SetOperations.jaccardSimilarity(filter1, filter2));
+ assertEquals(0.0, SetOperations.jaccardSimilarity(filter2, filter1));
+
+ intersection = /* [] & [1..17] = [] = */ 0.0;
+ union = /* [] | [1..17] = [] = */ 17;
+ assertEquals(intersection / union, SetOperations.jaccardSimilarity(filter1, filter3));
+ assertEquals(intersection / union, SetOperations.jaccardSimilarity(filter3, filter1));
}
- /**
- * Tests that the Jaccard distance is correctly calculated.
- */
@Test
- public final void jaccardDistanceTest() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- List lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(1.0, SetOperations.jaccardDistance(filter1, filter2), 0.0001);
- assertEquals(1.0, SetOperations.jaccardDistance(filter2, filter1), 0.0001);
-
- lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(0.32, SetOperations.jaccardDistance(filter1, filter2), 0.001);
- assertEquals(0.32, SetOperations.jaccardDistance(filter2, filter1), 0.001);
+ public final void testOrCardinality() {
+ Shape shape = Shape.fromKM(3, 128);
+ SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
+ SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ assertEquals(5, SetOperations.orCardinality(filter1, filter2));
+ assertEquals(5, SetOperations.orCardinality(filter2, filter1));
+
+ filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
+ filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ assertEquals(5, SetOperations.orCardinality(filter1, filter2));
+ assertEquals(5, SetOperations.orCardinality(filter2, filter1));
+
+ filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
+ filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ assertEquals(4, SetOperations.orCardinality(filter1, filter2));
+ assertEquals(4, SetOperations.orCardinality(filter2, filter1));
+
+ Shape bigShape = Shape.fromKM(3, 192);
+ filter1 = new SparseBloomFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185}));
+ filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 }));
+ assertEquals(5, SetOperations.orCardinality(filter1, filter2));
+ assertEquals(5, SetOperations.orCardinality(filter2, filter1));
}
- /**
- * Tests that the Jaccard distance is correctly calculated when one or
- * both filters are empty
- */
@Test
- public final void jaccardDistanceTest_NoValues() {
- final BloomFilter filter1 = new HasherBloomFilter(shape);
- final BloomFilter filter2 = new HasherBloomFilter(shape);
- // build a filter
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter3 = new HasherBloomFilter(hasher, shape);
-
- assertEquals(1.0, SetOperations.jaccardDistance(filter1, filter2), 0.0001);
- assertEquals(1.0, SetOperations.jaccardDistance(filter2, filter1), 0.0001);
- assertEquals(0.0, SetOperations.jaccardDistance(filter1, filter3), 0.0001);
- assertEquals(0.0, SetOperations.jaccardDistance(filter3, filter1), 0.0001);
+ public final void testAndCardinality() {
+ Shape shape = Shape.fromKM(3, 128);
+ SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
+ SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ assertEquals(1, SetOperations.andCardinality(filter1, filter2));
+ assertEquals(1, SetOperations.andCardinality(filter2, filter1));
+
+ filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
+ filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ assertEquals(0, SetOperations.andCardinality(filter1, filter2));
+ assertEquals(0, SetOperations.andCardinality(filter2, filter1));
+
+ filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
+ filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ assertEquals(1, SetOperations.andCardinality(filter1, filter2));
+ assertEquals(1, SetOperations.andCardinality(filter2, filter1));
+
+ Shape bigShape = Shape.fromKM(3, 192);
+ filter1 = new SparseBloomFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185}));
+ filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 }));
+ assertEquals(1, SetOperations.andCardinality(filter1, filter2));
+ assertEquals(1, SetOperations.andCardinality(filter2, filter1));
}
- /**
- * Tests that the Jaccard similarity is correctly calculated.
- */
@Test
- public final void jaccardSimilarityTest() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- List lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(0.0, SetOperations.jaccardSimilarity(filter1, filter2), 0.0001);
- assertEquals(0.0, SetOperations.jaccardSimilarity(filter2, filter1), 0.0001);
-
- lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(0.68, SetOperations.jaccardSimilarity(filter1, filter2), 0.001);
- assertEquals(0.68, SetOperations.jaccardSimilarity(filter2, filter1), 0.001);
+ public final void testXorCardinality() {
+ Shape shape = Shape.fromKM(3, 128);
+ SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
+ SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ assertEquals(4, SetOperations.xorCardinality(filter1, filter2));
+ assertEquals(4, SetOperations.xorCardinality(filter2, filter1));
+
+ filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
+ filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ assertEquals(5, SetOperations.xorCardinality(filter1, filter2));
+ assertEquals(5, SetOperations.xorCardinality(filter2, filter1));
+
+ filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
+ filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ assertEquals(3, SetOperations.xorCardinality(filter1, filter2));
+ assertEquals(3, SetOperations.xorCardinality(filter2, filter1));
+
+ Shape bigShape = Shape.fromKM(3, 192);
+ filter1 = new SparseBloomFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185}));
+ filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 }));
+ assertEquals(4, SetOperations.xorCardinality(filter1, filter2));
+ assertEquals(4, SetOperations.xorCardinality(filter2, filter1));
}
- /**
- * Tests that the Jaccard similarity is correctly calculated when one or
- * both filters are empty
- */
+
@Test
- public final void jaccardSimilarityTest_NoValues() {
- final BloomFilter filter1 = new HasherBloomFilter(shape);
- final BloomFilter filter2 = new HasherBloomFilter(shape);
- // build a filter
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter3 = new HasherBloomFilter(hasher, shape);
-
- assertEquals(0.0, SetOperations.jaccardSimilarity(filter1, filter2), 0.0001);
- assertEquals(0.0, SetOperations.jaccardSimilarity(filter2, filter1), 0.0001);
- assertEquals(1.0, SetOperations.jaccardSimilarity(filter1, filter3), 0.0001);
- assertEquals(1.0, SetOperations.jaccardSimilarity(filter3, filter1), 0.0001);
+ public final void testCommutativityOnMismatchedSizes() {
+ BitMapProducer p1 = BitMapProducer.fromBitMapArray(new long[] { 0x3L, 0x5L });
+ BitMapProducer p2 = BitMapProducer.fromBitMapArray(new long[] { 0x1L });
+
+ assertEquals(SetOperations.orCardinality(p1, p2), SetOperations.orCardinality(p2, p1));
+ assertEquals(SetOperations.xorCardinality(p1, p2), SetOperations.xorCardinality(p2, p1));
+ assertEquals(SetOperations.andCardinality(p1, p2), SetOperations.andCardinality(p2, p1));
+ assertEquals(SetOperations.hammingDistance(p1, p2), SetOperations.hammingDistance(p2, p1));
+ assertEquals(SetOperations.cosineDistance(p1, p2), SetOperations.cosineDistance(p2, p1));
+ assertEquals(SetOperations.cosineSimilarity(p1, p2), SetOperations.cosineSimilarity(p2, p1));
+ assertEquals(SetOperations.jaccardDistance(p1, p2), SetOperations.jaccardDistance(p2, p1));
+ assertEquals(SetOperations.jaccardSimilarity(p1, p2), SetOperations.jaccardSimilarity(p2, p1));
}
-
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/ShapeTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/ShapeTest.java
new file mode 100644
index 0000000000..ecafa2ef42
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/ShapeTest.java
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests the {@link Shape} class.
+ */
+public class ShapeTest {
+
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&p=.1&m=&k=
+ *
+ * n = 5
+ *
+ * p = 0.100375138 (1 in 10)
+ *
+ * m = 24 (3B)
+ *
+ * k = 3
+ */
+
+ private final Shape shape = Shape.fromKM(3, 24);
+
+ /**
+ * Test equality of shape.
+ */
+ @Test
+ public void testEquals() {
+
+ assertEquals(shape, shape);
+ assertEquals(3, shape.getNumberOfHashFunctions());
+ assertEquals(24, shape.getNumberOfBits());
+ assertEquals(shape.hashCode(), Shape.fromKM(3, 24).hashCode());
+ assertNotEquals(shape, null);
+ assertNotEquals(shape, Shape.fromKM(3, 25));
+ assertNotEquals(shape, Shape.fromKM(4, 24));
+ assertNotEquals(shape, "text");
+ assertNotEquals(shape, Integer.valueOf(3));
+ }
+
+ @Test
+ public void testEstimateN() {
+ for (int i = 0; i < 24; i++) {
+ double c = i;
+ double expected = -(24.0 / 3.0) * Math.log1p(-c / 24.0);
+ assertEquals(expected, shape.estimateN(i), "Error on " + i);
+ }
+
+ assertEquals(Double.POSITIVE_INFINITY, shape.estimateN(24));
+
+ assertEquals(Double.NaN, shape.estimateN(25));
+ }
+
+ @Test
+ public void testGetProbability() {
+ for (int i = 0; i <= 24; i++) {
+ double expected = Math.pow(-Math.expm1(-3.0 * i / 24), 3);
+ assertEquals(expected, shape.getProbability(i), "error at " + i);
+ }
+
+ assertEquals(0.0, shape.getProbability(0), 0.0);
+
+ assertThrows(IllegalArgumentException.class, () -> shape.getProbability(-1));
+ }
+
+ @Test
+ public void testIsSparse() {
+ int functions = 1; // Ignored
+ for (int i = 1; i <= 3; i++) {
+ int bits = i * Long.SIZE;
+ Shape shape = Shape.fromKM(functions, bits);
+ for (int n = 0; n <= bits; n++) {
+ final int c = n;
+ // is sparse when number of bits stored as integers is less than 2 times the
+ // number of bitmaps
+ Assertions.assertEquals(n * Integer.SIZE <= Math.ceil((double) bits / Long.SIZE) * Long.SIZE,
+ shape.isSparse(n), () -> String.format("n=%d : bits=%d", c, bits));
+ }
+ }
+ }
+
+ @Test
+ public void testToString() {
+ assertEquals("Shape[k=3 m=5]", Shape.fromKM(3, 5).toString());
+ }
+
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&p=.1&m=&k=
+ *
+ * n = 5
+ *
+ * p = 0.100375138 (1 in 10)
+ *
+ * m = 24 (3B)
+ *
+ * k = 3
+ */
+
+ /**
+ * Tests that if the number of items less than 1 an IllegalArgumentException is thrown.
+ */
+ @Test
+ public void testBadNumberOfItems() {
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNM(0, 24));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNMK(0, 24, 5));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNP(0, 0.02));
+ }
+
+ /**
+ * Tests that if the number of bits is less than 1 an exception is thrown
+ */
+ @Test
+ public void testBadNumberOfBits() {
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromKM(5, 0));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNM(5, 0));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNMK(5, 0, 7));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromPMK(0.035, 0, 7));
+ }
+
+ /**
+ * Tests that if the number of hash functions is less than 1 an exception is thrown.
+ */
+ @Test
+ public void testBadNumberOfHashFunctions() {
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromKM(0, 7));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNMK(5, 26, 0));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromPMK(0.35, 26, 0));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNM(2, 1));
+ }
+
+ /**
+ * Tests that if the calculated probability is greater than or equal to 1 an IllegalArgumentException is thrown
+ */
+ @Test
+ public void testBadProbability() {
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNMK(4000, 8, 1));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNP(10, 0.0));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNP(10, 1.0));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNP(10, Double.NaN));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNP(10, Double.POSITIVE_INFINITY));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNP(10, Double.NEGATIVE_INFINITY));
+ }
+
+ /**
+ * Tests that when the number of items, number of bits and number of hash functions is passed the values are
+ * calculated correctly.
+ */
+ @Test
+ public void testFromNMK() {
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&m=24&k=4
+ */
+ Shape shape = Shape.fromNMK(5, 24, 4);
+
+ assertEquals(24, shape.getNumberOfBits());
+ assertEquals(4, shape.getNumberOfHashFunctions());
+ assertEquals(0.102194782, shape.getProbability(5), 0.000001);
+
+ assertThrows(IllegalArgumentException.class,
+ () -> Shape.fromNMK(Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNMK(5, 5, 0));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNMK(5, 0, 5));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNMK(0, 5, 5));
+ }
+
+ /**
+ * Tests that if the number of bits less than 1 an IllegalArgumentException is thrown.
+ */
+ @Test
+ public void testFromKM() {
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromKM(5, 0));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromKM(0, 5));
+ }
+
+ /**
+ * Tests that the number of items and number of bits is passed the other values are calculated correctly.
+ */
+ @Test
+ public void testFromNM() {
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&m=24
+ */
+ Shape shape = Shape.fromNM(5, 24);
+
+ assertEquals(24, shape.getNumberOfBits());
+ assertEquals(3, shape.getNumberOfHashFunctions());
+ assertEquals(0.100375138, shape.getProbability(5), 0.000001);
+
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNM(5, 0));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNM(0, 5));
+ }
+
+ /**
+ * Tests that the probability is calculated correctly.
+ */
+ @Test
+ public void testProbability() {
+ Shape shape = Shape.fromNMK(5, 24, 3);
+ assertEquals(24, shape.getNumberOfBits());
+ assertEquals(3, shape.getNumberOfHashFunctions());
+ assertEquals(0.100375138, shape.getProbability(5), 0.000001);
+ }
+
+ /**
+ * Tests the calculated values of calling the constructor with the probability, number of bits and number of hash
+ * functions.
+ */
+ @Test
+ public void testFromPMK() {
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&p=.1&m=24&k=3
+ */
+ Shape shape = Shape.fromPMK(0.1, 24, 3);
+
+ assertEquals(24, shape.getNumberOfBits());
+ assertEquals(3, shape.getNumberOfHashFunctions());
+ assertEquals(0.100375138, shape.getProbability(5), 0.000001);
+
+ assertThrows(IllegalArgumentException.class,
+ () -> Shape.fromPMK(Math.nextDown(1.0), Integer.MAX_VALUE, Integer.MAX_VALUE));
+ shape = Shape.fromPMK(Math.nextUp(0.0), 5, 5);
+ assertEquals(1.0, shape.getProbability(Integer.MAX_VALUE));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromPMK(Math.nextDown(1.0), 5, 5));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromPMK(0.0, 5, 5));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromPMK(0.5, 0, 5));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromPMK(0.5, 5, 0));
+ }
+
+ /**
+ * Tests the calculated values of calling the constructor with the probability, number of bits and number of hash
+ * functions.
+ */
+ @Test
+ public void testFromNP() {
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&p=.1&m=24&k=3
+ */
+ final double probability = 1.0 / 2000000;
+ Shape shape = Shape.fromNP(10, probability);
+
+ assertEquals(302, shape.getNumberOfBits());
+ assertEquals(21, shape.getNumberOfHashFunctions());
+
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNP(Integer.MAX_VALUE, Math.nextDown(1.0)));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNP(0, probability));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNP(5, 0.0));
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNP(Integer.MAX_VALUE, Math.nextUp(0.0)));
+ // Test that if calculated number of bits is greater than Integer.MAX_VALUE an
+ // IllegalArgumentException is thrown.
+ assertThrows(IllegalArgumentException.class, () -> Shape.fromNP(Integer.MAX_VALUE, 0.1));
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java
new file mode 100644
index 0000000000..c5c91f4748
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests for the {@link SimpleBloomFilter}.
+ */
+public class SimpleBloomFilterTest extends AbstractBloomFilterTest {
+ @Override
+ protected SimpleBloomFilter createEmptyFilter(final Shape shape) {
+ return new SimpleBloomFilter(shape);
+ }
+
+ @Override
+ protected SimpleBloomFilter createFilter(final Shape shape, final Hasher hasher) {
+ return new SimpleBloomFilter(shape, hasher);
+ }
+
+ @Override
+ protected SimpleBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
+ return new SimpleBloomFilter(shape, producer);
+ }
+
+ @Override
+ protected SimpleBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
+ return new SimpleBloomFilter(shape, producer);
+ }
+
+ private void executeNestedTest(SimpleBloomFilterTest nestedTest) {
+ nestedTest.testAsBitMapArray();
+ nestedTest.testContains();
+ nestedTest.testEstimateIntersection();
+ nestedTest.testEstimateN();
+ nestedTest.testEstimateUnion();
+ nestedTest.testIsFull();
+ nestedTest.testMerge();
+ nestedTest.testMergeInPlace();
+ }
+
+ @Test
+ public void testConstructors() {
+
+ // // copy of Sparse
+ SimpleBloomFilterTest nestedTest = new SimpleBloomFilterTest() {
+
+ @Override
+ protected SimpleBloomFilter createEmptyFilter(Shape shape) {
+ return new SimpleBloomFilter(new SparseBloomFilter(shape));
+ }
+
+ @Override
+ protected SimpleBloomFilter createFilter(Shape shape, Hasher hasher) {
+ return new SimpleBloomFilter(new SparseBloomFilter(shape, hasher));
+ }
+ };
+ executeNestedTest(nestedTest);
+
+ // copy of Simple
+ nestedTest = new SimpleBloomFilterTest() {
+
+ @Override
+ protected SimpleBloomFilter createEmptyFilter(Shape shape) {
+ return new SimpleBloomFilter(new SimpleBloomFilter(shape));
+ }
+
+ @Override
+ protected SimpleBloomFilter createFilter(Shape shape, Hasher hasher) {
+ return new SimpleBloomFilter(new SimpleBloomFilter(shape, hasher));
+ }
+ };
+ executeNestedTest(nestedTest);
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleHasherTest.java
new file mode 100644
index 0000000000..cb52bf80a8
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleHasherTest.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import java.util.ArrayList;
+import java.util.List;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests the {@link SimpleHasher}.
+ */
+public class SimpleHasherTest extends AbstractHasherTest {
+
+ @Override
+ protected Hasher createHasher() {
+ return new SimpleHasher(1, 1);
+ }
+
+ @Override
+ protected Hasher createEmptyHasher() {
+ return NullHasher.INSTANCE;
+ }
+
+ @Override
+ protected int getHasherSize(Hasher hasher) {
+ return 1;
+ }
+
+ private void assertConstructorBuffer(Shape shape, byte[] buffer, Integer[] expected) {
+ SimpleHasher hasher = new SimpleHasher(buffer);
+ List lst = new ArrayList<>();
+ IndexProducer producer = hasher.indices(shape);
+ producer.forEachIndex(lst::add);
+ assertEquals(expected.length, lst.size());
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(expected[i], lst.get(i));
+ }
+ }
+
+ private void assertIncrement(SimpleHasher hasher, long defaultIncrement) {
+ assertEquals(defaultIncrement, hasher.getDefaultIncrement());
+ int[] values = hasher.indices(Shape.fromKM(2, Integer.MAX_VALUE)).asIndexArray();
+ assertEquals(0, values[0]);
+ assertEquals(Long.remainderUnsigned(defaultIncrement, Integer.MAX_VALUE), values[1]);
+ }
+
+ @Test
+ public void testConstructor() {
+ Shape shape = Shape.fromKM(5, 10);
+ assertConstructorBuffer(shape, new byte[] { 1, 1 }, new Integer[] { 1, 2, 3, 4, 5 });
+ assertConstructorBuffer(shape, new byte[] { 1 }, new Integer[] { 0, 1, 2, 3, 4 });
+ assertConstructorBuffer(shape, new byte[] { 1, 0, 1 }, new Integer[] { 1, 2, 3, 4, 5 });
+ assertConstructorBuffer(shape, new byte[] { 0, 1, 0, 1 }, new Integer[] { 1, 2, 3, 4, 5 });
+ assertConstructorBuffer(shape, new byte[] { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1 },
+ new Integer[] { 1, 2, 3, 4, 5 });
+ assertConstructorBuffer(shape, new byte[] { 0, 0, 0, 0, 0, 0, 0, 1, 5, 5, 0, 0, 0, 0, 0, 0, 0, 1, 5, 5 },
+ new Integer[] { 1, 2, 3, 4, 5 });
+ assertConstructorBuffer(shape, new byte[] { 0, 0, 0, 0, 0, 0, 0, 1, 5, 0, 0, 0, 0, 0, 0, 0, 1, 5, 5 },
+ new Integer[] { 1, 2, 3, 4, 5 });
+
+ // test empty buffer
+ assertThrows(IllegalArgumentException.class, () -> new SimpleHasher(new byte[0]));
+
+ // test zero incrementer gets default
+ // default increment from SimpleHasher.
+ long defaultIncrement = 0x9e3779b97f4a7c15L;
+ SimpleHasher hasher = new SimpleHasher(0, 0);
+ assertIncrement(new SimpleHasher(0, 0), defaultIncrement);
+ assertIncrement(new SimpleHasher(new byte[2]), defaultIncrement);
+
+ // test that changing default increment works
+ defaultIncrement = 4;
+ defaultIncrement = 4L;
+ hasher = new SimpleHasher(0, 0) {
+ @Override
+ public long getDefaultIncrement() {
+ return 4L;
+ }
+ };
+ assertIncrement(hasher, defaultIncrement);
+ hasher = new SimpleHasher(new byte[2]) {
+ @Override
+ public long getDefaultIncrement() {
+ return 4L;
+ }
+ };
+
+ assertEquals(defaultIncrement, hasher.getDefaultIncrement());
+ }
+
+ @Test
+ void testModEdgeCases() {
+ for (long dividend : new long[] { -1, -2, -3, -6378683, -23567468136887892L, Long.MIN_VALUE, 345, 678686,
+ 67868768686878924L, Long.MAX_VALUE }) {
+ for (int divisor : new int[] { 1, 2, 3, 5, 13, Integer.MAX_VALUE }) {
+ assertEquals((int) Long.remainderUnsigned(dividend, divisor), SimpleHasher.mod(dividend, divisor),
+ () -> String.format("failure with dividend=%s and divisor=%s.", dividend, divisor));
+ }
+ }
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java
new file mode 100644
index 0000000000..2b2ba0fb5a
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests for the {@link SparseBloomFilter}.
+ */
+public class SparseBloomFilterTest extends AbstractBloomFilterTest {
+ @Override
+ protected SparseBloomFilter createEmptyFilter(final Shape shape) {
+ return new SparseBloomFilter(shape);
+ }
+
+ @Override
+ protected SparseBloomFilter createFilter(final Shape shape, final Hasher hasher) {
+ return new SparseBloomFilter(shape, hasher);
+ }
+
+ @Override
+ protected SparseBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
+ return new SparseBloomFilter(shape, producer);
+ }
+
+ @Override
+ protected SparseBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
+ return new SparseBloomFilter(shape, producer);
+ }
+
+ private void executeNestedTest(SparseBloomFilterTest nestedTest) {
+ nestedTest.testContains();
+ nestedTest.testEstimateIntersection();
+ nestedTest.testEstimateN();
+ nestedTest.testEstimateUnion();
+ nestedTest.testIsFull();
+ nestedTest.testMerge();
+ nestedTest.testMergeInPlace();
+ }
+
+ @Test
+ public void testConstructors() {
+
+ // copy of Sparse
+ SparseBloomFilterTest nestedTest = new SparseBloomFilterTest() {
+
+ @Override
+ protected SparseBloomFilter createEmptyFilter(Shape shape) {
+ return new SparseBloomFilter(new SparseBloomFilter(shape));
+ }
+
+ @Override
+ protected SparseBloomFilter createFilter(Shape shape, Hasher hasher) {
+ return new SparseBloomFilter(new SparseBloomFilter(shape, hasher));
+ }
+ };
+ executeNestedTest(nestedTest);
+
+ // copy of Simple
+ nestedTest = new SparseBloomFilterTest() {
+
+ @Override
+ protected SparseBloomFilter createEmptyFilter(Shape shape) {
+ return new SparseBloomFilter(new SimpleBloomFilter(shape));
+ }
+
+ @Override
+ protected SparseBloomFilter createFilter(Shape shape, Hasher hasher) {
+ return new SparseBloomFilter(new SimpleBloomFilter(shape, hasher));
+ }
+ };
+ executeNestedTest(nestedTest);
+ }
+
+ @Test
+ public void testBitMapProducerEdgeCases() {
+ int[] values = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 65, 66, 67, 68, 69, 70, 71 };
+ BloomFilter bf = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
+
+ // verify exit early before bitmap boundary
+ int[] passes = new int[1];
+ assertFalse(bf.forEachBitMap(l -> {
+ passes[0]++;
+ return false;
+ }));
+ assertEquals(1, passes[0]);
+
+ // verify exit early at bitmap boundary
+ bf = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
+ passes[0] = 0;
+ assertFalse(bf.forEachBitMap(l -> {
+ boolean result = passes[0] == 0;
+ if (result) {
+ passes[0]++;
+ }
+ return result;
+ }));
+ assertEquals(1, passes[0]);
+
+ // verify add extra if all values in first bitmap
+ values = new int[] { 1, 2, 3, 4 };
+ bf = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
+ passes[0] = 0;
+ assertTrue(bf.forEachBitMap(l -> {
+ passes[0]++;
+ return true;
+ }));
+ assertEquals(2, passes[0]);
+
+ // verify exit early if all values in first bitmap and predicate returns false
+ // on 2nd block
+ values = new int[] { 1, 2, 3, 4 };
+ bf = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
+ passes[0] = 0;
+ assertFalse(bf.forEachBitMap(l -> {
+ boolean result = passes[0] == 0;
+ if (result) {
+ passes[0]++;
+ }
+ return result;
+ }));
+ assertEquals(1, passes[0]);
+ }
+
+ @Test
+ public void testBloomFilterBasedMergeInPlaceEdgeCases() {
+ BloomFilter bf1 = createEmptyFilter(getTestShape());
+ BloomFilter bf2 = new SimpleBloomFilter(getTestShape(), from1);
+ bf1.mergeInPlace(bf2);
+ assertTrue(bf2.forEachBitMapPair(bf1, (x, y) -> x == y));
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherCollectionTest.java
similarity index 63%
rename from src/test/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilterTest.java
rename to src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherCollectionTest.java
index 9a2078d80c..4aaf9141a0 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherCollectionTest.java
@@ -16,22 +16,15 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
+public class UniqueIndexProducerFromHasherCollectionTest extends AbstractIndexProducerTest {
-/**
- * Tests for the {@link BitSetBloomFilter}.
- */
-public class BitSetBloomFilterTest extends AbstractBloomFilterTest {
@Override
- protected BitSetBloomFilter createEmptyFilter(final Shape shape) {
- return new BitSetBloomFilter(shape);
+ protected IndexProducer createProducer() {
+ return new HasherCollection(new SimpleHasher(0, 1), new SimpleHasher(0, 2)).uniqueIndices(Shape.fromKM(17, 72));
}
@Override
- protected BitSetBloomFilter createFilter(final Hasher hasher, final Shape shape) {
- final BitSetBloomFilter testFilter = new BitSetBloomFilter(shape);
- testFilter.merge( hasher );
- return testFilter;
+ protected IndexProducer createEmptyProducer() {
+ return new HasherCollection().uniqueIndices(Shape.fromKM(17, 72));
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherTest.java
new file mode 100644
index 0000000000..f711a57201
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/UniqueIndexProducerFromHasherTest.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+public class UniqueIndexProducerFromHasherTest extends AbstractIndexProducerTest {
+
+ @Override
+ protected IndexProducer createProducer() {
+ return new SimpleHasher(0, 1).uniqueIndices(Shape.fromKM(17, 72));
+ }
+
+ @Override
+ protected IndexProducer createEmptyProducer() {
+ return NullHasher.INSTANCE.indices(Shape.fromKM(17, 72));
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/checkstyle.xml b/src/test/java/org/apache/commons/collections4/bloomfilter/checkstyle.xml
new file mode 100644
index 0000000000..0b79c22dca
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/checkstyle.xml
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java
deleted file mode 100644
index 5e925b9755..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.nio.charset.StandardCharsets;
-import java.util.NoSuchElementException;
-import java.util.PrimitiveIterator.OfInt;
-
-import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-/**
- * {@link DynamicHasher.Builder} tests.
- */
-public class DynamicHasherBuilderTest {
-
- private DynamicHasher.Builder builder;
- private final HashFunction hf = new MD5Cyclic();
- private final Shape shape = new Shape(hf, 1, 345, 1);
- private final String testString = HasherBuilderTest.getExtendedString();
-
- /**
- * Tests that hashing a byte array works as expected.
- */
- @Test
- public void buildTest_byteArray() {
- final byte[] bytes = testString.getBytes();
- final DynamicHasher hasher = builder.with(bytes).build();
- final int expected = (int) Math.floorMod((long) hf.apply(bytes, 0), (long) shape.getNumberOfBits());
-
- final OfInt iter = hasher.iterator(shape);
-
- assertTrue(iter.hasNext());
- assertEquals(expected, iter.nextInt());
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that an empty hasher works as expected.
- */
- @Test
- public void buildTest_Empty() {
- final DynamicHasher hasher = builder.build();
-
- final OfInt iter = hasher.iterator(shape);
-
- assertFalse(iter.hasNext());
-
- assertThrows(NoSuchElementException.class, () -> iter.nextInt(), "Should have thrown NoSuchElementException");
- }
-
- /**
- * Tests that hashing a string works as expected.
- */
- @Test
- public void buildTest_String() {
- final byte[] bytes = testString.getBytes(StandardCharsets.UTF_8);
- final DynamicHasher hasher = builder.with(testString, StandardCharsets.UTF_8).build();
- final int expected = (int) Math.floorMod((long) hf.apply(bytes, 0), (long) shape.getNumberOfBits());
-
- final OfInt iter = hasher.iterator(shape);
-
- assertTrue(iter.hasNext());
- assertEquals(expected, iter.nextInt());
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that hashing a string works as expected.
- */
- @Test
- public void buildTest_UnencodedString() {
- final byte[] bytes = testString.getBytes(StandardCharsets.UTF_16LE);
- final DynamicHasher hasher = builder.withUnencoded(testString).build();
- final int expected = (int) Math.floorMod((long) hf.apply(bytes, 0), (long) shape.getNumberOfBits());
-
- final OfInt iter = hasher.iterator(shape);
-
- assertTrue(iter.hasNext());
- assertEquals(expected, iter.nextInt());
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that build resets the builder.
- */
- @Test
- public void buildResetTest() {
- builder.with(new byte[] {123});
- final OfInt iter = builder.build().iterator(shape);
-
- assertTrue(iter.hasNext());
- iter.next();
- assertFalse(iter.hasNext());
-
- // Nothing added since last build so it should be an empty hasher
- final OfInt iter2 = builder.build().iterator(shape);
- assertFalse(iter2.hasNext());
- }
-
- /**
- * Sets up the builder for testing.
- */
- @BeforeEach
- public void setup() {
- builder = new DynamicHasher.Builder(hf);
- }
-
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java
deleted file mode 100644
index 10de0b5b30..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.nio.charset.StandardCharsets;
-import java.util.NoSuchElementException;
-import java.util.PrimitiveIterator.OfInt;
-
-import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
-
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests the {@link DynamicHasher}.
- */
-public class DynamicHasherTest {
-
- private DynamicHasher.Builder builder;
- private Shape shape;
-
- private final HashFunctionIdentity testFunction = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test Function";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-
- };
-
- /**
- * Sets up the DynamicHasher.
- */
- @BeforeEach
- public void setup() {
- builder = new DynamicHasher.Builder(new MD5Cyclic());
- shape = new Shape(new MD5Cyclic(), 3, 72, 17);
- }
-
- /**
- * Tests that the expected bits are returned from hashing.
- */
- @Test
- public void testGetBits() {
-
- final int[] expected = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62};
-
- final Hasher hasher = builder.with("Hello", StandardCharsets.UTF_8).build();
-
- final OfInt iter = hasher.iterator(shape);
-
- for (final int element : expected) {
- assertTrue(iter.hasNext());
- assertEquals(element, iter.nextInt());
- }
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that bits from multiple hashes are returned correctly.
- */
- @Test
- public void testGetBits_MultipleHashes() {
- final int[] expected = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62, 1, 63, 53, 43, 17, 7, 69,
- 59, 49, 39, 13, 3, 65, 55, 45, 35, 25};
-
- final Hasher hasher = builder.with("Hello", StandardCharsets.UTF_8).with("World", StandardCharsets.UTF_8).build();
-
- final OfInt iter = hasher.iterator(shape);
-
- for (final int element : expected) {
- assertTrue(iter.hasNext());
- assertEquals(element, iter.nextInt());
- }
- assertFalse(iter.hasNext());
-
- assertThrows(NoSuchElementException.class, () -> iter.next(), "Should have thrown NoSuchElementException");
- }
-
- /**
- * Tests that retrieving bits for the wrong shape throws an exception.
- */
- @Test
- public void testGetBits_WrongShape() {
-
- final Hasher hasher = builder.with("Hello", StandardCharsets.UTF_8).build();
-
- assertThrows(IllegalArgumentException.class, () -> hasher.iterator(new Shape(testFunction, 3, 72, 17)), "Should have thrown IllegalArgumentException");
- }
-
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImplTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImplTest.java
deleted file mode 100644
index 479cfa5188..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImplTest.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.Signedness;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.ProcessType;
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests the HashFunctionIdentity implementation ({@link HashFunctionIdentityImpl})..
- */
-public class HashFunctionIdentityImplTest {
-
- /**
- * Tests a copy constructor of the HashFunctionIdentity.
- */
- @Test
- public void copyConstructorTest() {
- final HashFunctionIdentity identity = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "NAME";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Provider";
- }
-
- @Override
- public long getSignature() {
- return -1L;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-
- };
- final HashFunctionIdentityImpl impl = new HashFunctionIdentityImpl(identity);
- assertEquals("NAME", impl.getName());
- assertEquals("Provider", impl.getProvider());
- assertEquals(Signedness.SIGNED, impl.getSignedness());
- assertEquals(ProcessType.CYCLIC, impl.getProcessType());
- assertEquals(-1L, impl.getSignature());
- }
-
- /**
- * Test the constructor from component values.
- */
- @Test
- public void valuesConstructorTest() {
- final HashFunctionIdentityImpl impl = new HashFunctionIdentityImpl("Provider", "NAME", Signedness.UNSIGNED,
- ProcessType.ITERATIVE, -2L);
- assertEquals("NAME", impl.getName());
- assertEquals("Provider", impl.getProvider());
- assertEquals(Signedness.UNSIGNED, impl.getSignedness());
- assertEquals(ProcessType.ITERATIVE, impl.getProcessType());
- assertEquals(-2L, impl.getSignature());
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionValidatorTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionValidatorTest.java
deleted file mode 100644
index e68df55b26..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionValidatorTest.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.ProcessType;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.Signedness;
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests of the {@link HashFunctionValidator}.
- */
-public class HashFunctionValidatorTest {
-
- /**
- * Tests that name is used in the equality check.
- */
- @Test
- public void testName() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl2", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
-
- assertTrue(HashFunctionValidator.areEqual(impl1, impl1));
- assertTrue(HashFunctionValidator.areEqual(impl2, impl2));
- assertFalse(HashFunctionValidator.areEqual(impl1, impl2));
- assertFalse(HashFunctionValidator.areEqual(impl2, impl1));
- }
-
- /**
- * Tests that name is not affected by case.
- */
- @Test
- public void testNameIsCaseInsensitive() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "IMPL1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
-
- assertTrue(HashFunctionValidator.areEqual(impl1, impl2));
- }
-
- /**
- * Tests that process type is used in the equality check.
- */
- @Test
- public void testProcessType() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.ITERATIVE, 300L);
-
- assertTrue(HashFunctionValidator.areEqual(impl1, impl1));
- assertTrue(HashFunctionValidator.areEqual(impl2, impl2));
- assertFalse(HashFunctionValidator.areEqual(impl1, impl2));
- assertFalse(HashFunctionValidator.areEqual(impl2, impl1));
- }
-
- /**
- * Tests that provider is not used in the equality check.
- */
- @Test
- public void testProviderIsNotUsedInEqualityCheck() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite2", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
-
- assertTrue(HashFunctionValidator.areEqual(impl1, impl1));
- assertTrue(HashFunctionValidator.areEqual(impl2, impl2));
- assertTrue(HashFunctionValidator.areEqual(impl1, impl2));
- assertTrue(HashFunctionValidator.areEqual(impl2, impl1));
- }
-
- /**
- * Tests that signedness is used in the equality check.
- */
- @Test
- public void testSignedness() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.UNSIGNED,
- ProcessType.CYCLIC, 300L);
-
- assertTrue(HashFunctionValidator.areEqual(impl1, impl1));
- assertTrue(HashFunctionValidator.areEqual(impl2, impl2));
- assertFalse(HashFunctionValidator.areEqual(impl1, impl2));
- assertFalse(HashFunctionValidator.areEqual(impl2, impl1));
- }
-
- /**
- * Test the check method throws when the two hash functions are not equal.
- */
- @Test
- public void testCheckThrows() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.UNSIGNED,
- ProcessType.CYCLIC, 300L);
- assertThrows(IllegalArgumentException.class, () -> HashFunctionValidator.checkAreEqual(impl1, impl2));
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherBuilderTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherBuilderTest.java
deleted file mode 100644
index 303034053a..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherBuilderTest.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher.Builder;
-import org.apache.commons.lang3.NotImplementedException;
-import org.junit.jupiter.api.Test;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.CharBuffer;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-
-import static org.junit.jupiter.api.Assertions.assertArrayEquals;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-/**
- * Tests the
- * {@link org.apache.commons.collections4.bloomfilter.hasher.Hasher.Builder Hasher.Builder}.
- */
-public class HasherBuilderTest {
-
- /**
- * Simple class to collect byte[] items added to the builder.
- */
- private static class TestBuilder implements Hasher.Builder {
- ArrayList items = new ArrayList<>();
-
- @Override
- public Hasher build() {
- throw new NotImplementedException("Not required");
- }
-
- @Override
- public Builder with(final byte[] item) {
- items.add(item);
- return this;
- }
- }
-
- /**
- * Tests that adding CharSequence items works correctly.
- */
- @Test
- public void withCharSequenceTest() {
- final String ascii = "plain";
- final String extended = getExtendedString();
- for (final String s : new String[] {ascii, extended}) {
- for (final Charset cs : new Charset[] {
- StandardCharsets.ISO_8859_1, StandardCharsets.UTF_8, StandardCharsets.UTF_16
- }) {
- final TestBuilder builder = new TestBuilder();
- builder.with(s, cs);
- assertArrayEquals(s.getBytes(cs), builder.items.get(0));
- }
- }
- }
-
- /**
- * Tests that adding unencoded CharSequence items works correctly.
- */
- @Test
- public void withUnencodedCharSequenceTest() {
- final String ascii = "plain";
- final String extended = getExtendedString();
- for (final String s : new String[] {ascii, extended}) {
- final TestBuilder builder = new TestBuilder();
- builder.withUnencoded(s);
- final byte[] encoded = builder.items.get(0);
- final char[] original = s.toCharArray();
- // Should be twice the length
- assertEquals(original.length * 2, encoded.length);
- // Should be little endian (lower bits first)
- final CharBuffer buffer = ByteBuffer.wrap(encoded)
- .order(ByteOrder.LITTLE_ENDIAN).asCharBuffer();
- for (int i = 0; i < original.length; i++) {
- assertEquals(original[i], buffer.get(i));
- }
- }
- }
-
- /**
- * Gets a string with non-standard characters.
- *
- * @return the extended string
- */
- static String getExtendedString() {
- final char[] data = {'e', 'x', 't', 'e', 'n', 'd', 'e', 'd', ' ',
- // Add some characters that are non standard
- // non-ascii
- 0xCA98,
- // UTF-16 surrogate pair
- 0xD803, 0xDE6D
- // Add other cases here ...
- };
- return String.valueOf(data);
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/ShapeTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/ShapeTest.java
deleted file mode 100644
index a393451a50..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/ShapeTest.java
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import static org.junit.jupiter.api.Assertions.assertAll;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotEquals;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.ProcessType;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.Signedness;
-
-import java.util.ArrayList;
-
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests the {@link Shape} class.
- */
-public class ShapeTest {
-
- private final HashFunctionIdentity testFunction = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test Function";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-
- };
-
- /*
- * values from https://hur.st/bloomfilter/?n=5&p=.1&m=&k=
- *
- * n = 5
- *
- * p = 0.100375138 (1 in 10)
- *
- * m = 24 (3B)
- *
- * k = 3
- */
-
- private final Shape shape = new Shape(testFunction, 5, 0.1);
-
- /**
- * Tests that if the number of bits less than 1 an IllegalArgumentException is thrown.
- */
- @Test
- public void constructor_items_bits_BadNumberOfBitsTest() {
- assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 5, 0),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that if the number of hash functions is less than 1 an IllegalArgumentException is thrown.
- */
- @Test
- public void constructor_items_bits_BadNumberOfHashFunctionsTest() {
- assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 16, 8),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that if the number of items less than 1 an IllegalArgumentException is thrown.
- */
- @Test
- public void constructor_items_bits_BadNumberOfItemsTest() {
- assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 0, 24),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that if the number of bits is less than 1 an exception is thrown
- */
- @Test
- public void constructor_items_bits_hash_BadNumberOfBitsTest() {
- assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 5, 0, 1),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that if the number of hash functions is less than 1 an exception is thrown.
- */
- @Test
- public void constructor_items_bits_hash_BadNumberOfHashFunctionsTest() {
- assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 5, 24, 0),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that if the number of items is less than 1 an exception is thrown.
- */
- @Test
- public void constructor_items_bits_hash_BadNumberOfItemsTest() {
- assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 0, 24, 1),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that if the calculated probability is greater than or equal to 1 an IllegalArgumentException is thrown
- */
- @Test
- public void constructor_items_bits_hash_BadProbabilityTest() {
- assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 4000, 8, 1),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that when the number of items, number of bits and number of hash functions is passed the values are
- * calculated correctly.
- */
- @Test
- public void constructor_items_bits_hashTest() {
- /*
- * values from https://hur.st/bloomfilter/?n=5&m=24&k=4
- */
- final Shape filterConfig = new Shape(testFunction, 5, 24, 4);
-
- assertEquals(24, filterConfig.getNumberOfBits());
- assertEquals(4, filterConfig.getNumberOfHashFunctions());
- assertEquals(5, filterConfig.getNumberOfItems());
- assertEquals(0.102194782, filterConfig.getProbability(), 0.000001);
- }
-
- /**
- * Tests that the number of items and number of bits is passed the other values are calculated correctly.
- */
- @Test
- public void constructor_items_bitsTest() {
- /*
- * values from https://hur.st/bloomfilter/?n=5&m=24
- */
- final Shape filterConfig = new Shape(testFunction, 5, 24);
-
- assertEquals(24, filterConfig.getNumberOfBits());
- assertEquals(3, filterConfig.getNumberOfHashFunctions());
- assertEquals(5, filterConfig.getNumberOfItems());
- assertEquals(0.100375138, filterConfig.getProbability(), 0.000001);
- }
-
- /**
- * Tests that if the number of items is less than 1 an IllegalArgumentException is thrown.
- */
- @Test
- public void constructor_items_probability_BadNumberOfItemsTest() {
- assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 0, 1.0 / 10),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that if the probability is less than or equal to 0 or more than or equal to 1 an IllegalArgumentException is thrown.
- */
- @Test
- public void constructor_items_probability_BadProbabilityTest() {
- assertAll(
- () -> assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 10, 0.0),
- "Should have thrown IllegalArgumentException"),
- () -> assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 10, 1.0),
- "Should have thrown IllegalArgumentException"),
- () -> assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 10, Double.NaN),
- "Should have thrown IllegalArgumentException")
- );
- }
-
- /**
- * Tests that if calculated number of bits is greater than Integer.MAX_VALUE an IllegalArgumentException is thrown.
- */
- @Test
- public void constructor_items_probability_NumberOfBitsOverflowTest() {
- assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, Integer.MAX_VALUE, 1.0 / 10),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests the the probability is calculated correctly.
- */
- @Test
- public void constructor_items_probability_Test() {
-
- assertEquals(24, shape.getNumberOfBits());
- assertEquals(3, shape.getNumberOfHashFunctions());
- assertEquals(5, shape.getNumberOfItems());
- assertEquals(0.100375138, shape.getProbability(), 0.000001);
- }
-
- /**
- * Tests that the constructor with a null name, number of items and size of filter fails.
- */
- @Test
- public void constructor_nm_noName() {
- assertThrows(NullPointerException.class, () -> new Shape(null, 5, 72),
- "Should throw NullPointerException");
- }
-
- /**
- * Tests that the constructor with a null name, number of items, size of filter, and number of functions fails.
- */
- @Test
- public void constructor_nmk_noName() {
- assertThrows(NullPointerException.class, () -> new Shape(null, 5, 72, 17),
- "Should throw NullPointerException");
- }
-
- /**
- * Tests that the constructor with a null name, number of items, and probability fails.
- */
- @Test
- public void constructor_np_noName() {
- assertThrows(NullPointerException.class, () -> new Shape(null, 5, 0.1),
- "Should throw NullPointerException");
- }
-
- /**
- * Tests that the constructor with a null name, probability, size of filter, and number of functions fails.
- */
- @Test
- public void constructor_pmk_noName() {
- assertThrows(NullPointerException.class, () -> new Shape(null, 0.1, 72, 17),
- "Should throw NullPointerException");
- }
-
- /**
- * Tests that if the number of bits is less than 1 an exception is thrown
- */
- @Test
- public void constructor_probability_bits_hash_BadNumberOfBitsTest() {
- assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 0.5, 0, 1),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that if the number of functions is less than 1 an exception is thrown
- */
- @Test
- public void constructor_probability_bits_hash_BadNumberOfHashFunctionsTest() {
- assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 0.5, 24, 0),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that invalid probability values cause and IllegalArgumentException to be thrown.
- */
- @Test
- public void constructor_probability_bits_hash_BadProbabilityTest() {
- assertAll(
- // probability should not be 0
- () -> assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 0.0, 24, 1),
- "Should have thrown IllegalArgumentException"),
- // probability should not be = -1
- () -> assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, -1.0, 24, 1),
- "Should have thrown IllegalArgumentException"),
- // probability should not be < -1
- () -> assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, -1.5, 24, 1),
- "Should have thrown IllegalArgumentException"),
- // probability should not be = 1
- () -> assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 1.0, 24, 1),
- "Should have thrown IllegalArgumentException"),
- // probability should not be > 1
- () -> assertThrows(IllegalArgumentException.class, () -> new Shape(testFunction, 2.0, 24, 1),
- "Should have thrown IllegalArgumentException")
- );
- }
-
- /**
- * Tests the calculated values of calling the constructor with the probability, number of bits and number of hash
- * functions.
- */
- @Test
- public void constructor_probability_bits_hashTest() {
- /*
- * values from https://hur.st/bloomfilter/?n=5&p=.1&m=24&k=3
- */
- final Shape filterConfig = new Shape(testFunction, 0.1, 24, 3);
-
- assertEquals(24, filterConfig.getNumberOfBits());
- assertEquals(3, filterConfig.getNumberOfHashFunctions());
- assertEquals(5, filterConfig.getNumberOfItems());
- assertEquals(0.100375138, filterConfig.getProbability(), 0.000001);
- }
-
- /**
- * Test equality of shape.
- */
- @Test
- public void equalsTest() {
-
- assertEquals(shape, shape);
- assertEquals(shape, new Shape(testFunction, 5, 1.0 / 10));
- assertNotEquals(shape, null);
- assertNotEquals(shape, new Shape(testFunction, 5, 1.0 / 11));
- assertNotEquals(shape, new Shape(testFunction, 4, 1.0 / 10));
- // Number of bits does not change equality,
- // only the number of bits and the number of hash functions
- final int numberOfBits = 10000;
- final int numberOfItems = 15;
- final int numberOfHashFunctions = 4;
- assertEquals(new Shape(testFunction, numberOfItems, numberOfBits, numberOfHashFunctions),
- new Shape(testFunction, numberOfItems + 1, numberOfBits, numberOfHashFunctions));
- assertNotEquals(new Shape(testFunction, numberOfItems, numberOfBits, numberOfHashFunctions),
- new Shape(testFunction, numberOfItems, numberOfBits + 1, numberOfHashFunctions));
- assertNotEquals(new Shape(testFunction, numberOfItems, numberOfBits, numberOfHashFunctions),
- new Shape(testFunction, numberOfItems, numberOfBits, numberOfHashFunctions + 1));
-
- final HashFunctionIdentity testFunction2 = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test Function2";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-
- };
-
- assertNotEquals(shape, new Shape(testFunction2, 4, 1.0 / 10));
- }
-
- /**
- * Test that hashCode satisfies the contract between {@link Object#hashCode()} and
- * {@link Object#equals(Object)}. Equal shapes must have the same hash code.
- */
- @Test
- public void hashCodeTest() {
- // Hash function equality is based on process type, signedness and name (case insensitive)
- final ArrayList list = new ArrayList<>();
- list.add(new HashFunctionIdentityImpl("Provider", "Name", Signedness.SIGNED, ProcessType.ITERATIVE, 0L));
- // Provider changes
- list.add(new HashFunctionIdentityImpl("PROVIDER", "Name", Signedness.SIGNED, ProcessType.ITERATIVE, 0L));
- list.add(new HashFunctionIdentityImpl("Provider2", "Name", Signedness.SIGNED, ProcessType.ITERATIVE, 0L));
- // Name changes
- list.add(new HashFunctionIdentityImpl("Provider", "name", Signedness.SIGNED, ProcessType.ITERATIVE, 0L));
- list.add(new HashFunctionIdentityImpl("Provider", "NAME", Signedness.SIGNED, ProcessType.ITERATIVE, 0L));
- list.add(new HashFunctionIdentityImpl("Provider", "Other", Signedness.SIGNED, ProcessType.ITERATIVE, 0L));
- // Signedness changes
- list.add(new HashFunctionIdentityImpl("Provider", "Name", Signedness.UNSIGNED, ProcessType.ITERATIVE, 0L));
- // ProcessType changes
- list.add(new HashFunctionIdentityImpl("Provider", "Name", Signedness.SIGNED, ProcessType.CYCLIC, 0L));
- // Signature changes
- list.add(new HashFunctionIdentityImpl("Provider", "Name", Signedness.SIGNED, ProcessType.ITERATIVE, 1L));
-
- // Create shapes that only differ in the hash function.
- final int numberOfItems = 30;
- final int numberOfBits = 3000;
- final int numberOfHashFunctions = 10;
- final Shape shape1 = new Shape(list.get(0), numberOfItems, numberOfBits, numberOfHashFunctions);
- assertEquals(shape1, shape1);
-
- // Try variations
- for (int i = 1; i < list.size(); i++) {
- final Shape shape2 = new Shape(list.get(i), numberOfItems, numberOfBits, numberOfHashFunctions);
- assertEquals(shape2, shape2);
-
- // Equal shapes must have the same hash code
- if (shape1.equals(shape2)) {
- assertEquals(shape1.hashCode(), shape2.hashCode());
- }
- }
- }
-
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasherTest.java
deleted file mode 100644
index 7522d36ad4..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasherTest.java
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.PrimitiveIterator.OfInt;
-
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests the {@link StaticHasher}.
- */
-public class StaticHasherTest {
-
- private final HashFunctionIdentity testFunction = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test Function";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-
- };
-
- private final HashFunctionIdentity testFunctionX = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test FunctionX";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-
- };
-
- private final Shape shape = new Shape(testFunction, 3, 72, 17);
-
- /**
- * Compare 2 static hashers to verify they have the same bits enabled.
- *
- * @param hasher1 the first static hasher.
- * @param hasher2 the second static hasher.
- */
- private void assertSameBits(final StaticHasher hasher1, final StaticHasher hasher2) {
- final OfInt iter1 = hasher1.iterator(shape);
- final OfInt iter2 = hasher2.iterator(shape);
-
- while (iter1.hasNext()) {
- assertTrue(iter2.hasNext(), "Not enough data in second hasher");
- assertEquals(iter1.nextInt(), iter2.nextInt());
- }
- assertFalse(iter2.hasNext(), "Too much data in second hasher");
- }
-
- /**
- * Tests that passing a hasher other than a StaticHasher to the constructor works as
- * expected.
- */
- @Test
- public void testConstructor_Hasher() {
- final int[] expected = {1, 3, 5, 7, 9};
-
- final Hasher testHasher = new Hasher() {
-
- @Override
- public OfInt iterator(final Shape shape) {
- final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
- return Arrays.stream(values).iterator();
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return testFunction;
- }
- };
-
- final StaticHasher hasher = new StaticHasher(testHasher, shape);
- final OfInt iter = hasher.iterator(shape);
- for (final int element : expected) {
- assertTrue(iter.hasNext());
- assertEquals(element, iter.nextInt());
- }
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that passing a hasher other than a StaticHasher and the wrong Shape to the
- * constructor throws an IllegalArgumentException.
- */
- @Test
- public void testConstructor_Hasher_WrongShape() {
- final Hasher testHasher = new Hasher() {
-
- @Override
- public OfInt iterator(final Shape shape) {
- final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
- return Arrays.stream(values).iterator();
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return testFunctionX;
- }
- };
-
- assertThrows(IllegalArgumentException.class, () -> new StaticHasher(testHasher, shape),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Test that the iterator based constructor works correctly and removes duplicates.
- */
- @Test
- public void testConstructor_Iterator() {
-
- final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
- Iterator iter = Arrays.stream(values).iterator();
- final StaticHasher hasher = new StaticHasher(iter, shape);
-
- assertEquals(5, hasher.size());
- assertEquals(shape, hasher.getShape());
- // All function properties are equal
- assertEquals(testFunction.getName(), hasher.getHashFunctionIdentity().getName());
- assertEquals(testFunction.getProcessType(), hasher.getHashFunctionIdentity().getProcessType());
- assertEquals(testFunction.getProvider(), hasher.getHashFunctionIdentity().getProvider());
- assertEquals(testFunction.getSignedness(), hasher.getHashFunctionIdentity().getSignedness());
-
- iter = hasher.iterator(shape);
- int idx = 0;
- while (iter.hasNext()) {
- assertEquals(Integer.valueOf(values[idx]), iter.next(), "Error at idx " + idx);
- idx++;
- }
- assertEquals(5, idx);
- }
-
- /**
- * Tests that if the iterator passed to the constructor contains a value greater than
- * or equal to Shape.numberOfBits() an exception is thrown.
- */
- @Test
- public void testConstructor_Iterator_ValueTooBig() {
-
- final int[] values = {shape.getNumberOfBits(), 3, 5, 7, 9, 3, 5, 1};
- final Iterator iter = Arrays.stream(values).iterator();
-
- assertThrows(IllegalArgumentException.class, () -> new StaticHasher(iter, shape),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that if the iterator passed to the constructor contains a value less than 0
- * (zero) an exception is thrown.
- */
- @Test
- public void testConstructor_Iterator_ValueTooSmall() {
-
- final int[] values = {-1, 3, 5, 7, 9, 3, 5, 1};
- final Iterator iter = Arrays.stream(values).iterator();
-
- assertThrows(IllegalArgumentException.class, () -> new StaticHasher(iter, shape),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that the constructor that accepts a static hasher properly builds the hasher.
- */
- @Test
- public void testConstructor_StaticHasher() {
- final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
- final Iterator iter = Arrays.stream(values).iterator();
- final StaticHasher hasher = new StaticHasher(iter, shape);
-
- final StaticHasher hasher2 = new StaticHasher(hasher, shape);
- assertEquals(shape, hasher2.getShape());
- assertSameBits(hasher, hasher2);
- }
-
- /**
- * Tests that calling the constructor with a hasher and the wrong shape throws an
- * IllegalArgumentException.
- */
- @Test
- public void testConstructor_StaticHasher_WrongShape() {
- final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
- final Iterator iter = Arrays.stream(values).iterator();
- final StaticHasher hasher = new StaticHasher(iter, new Shape(testFunctionX, 3, 72, 17));
-
- assertThrows(IllegalArgumentException.class, () -> new StaticHasher(hasher, shape),
- "Should have thrown IllegalArgumentException");
- }
-
- /**
- * Tests that iterator returns the proper values.
- */
- @Test
- public void testGetBits() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-
- final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
- assertEquals(17, hasher.size());
- final OfInt iter = hasher.iterator(shape);
- for (int i = 0; i < 17; i++) {
- assertTrue(iter.hasNext());
- assertEquals(i, iter.nextInt());
- }
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that iterator does not return duplicates and orders the indices.
- */
- @Test
- public void testGetBits_DuplicateValues() {
- final int[] input = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62, 1, 63, 53, 43, 17, 7, 69, 59,
- 49, 39, 13, 3, 65, 55, 45, 35, 25};
- final int[] expected = {1, 2, 3, 6, 7, 10, 11, 13, 15, 17, 19, 23, 24, 25, 35, 36, 39, 43, 44, 45, 48, 49, 53, 55, 57,
- 59, 61, 62, 63, 65, 69, 70};
-
- final StaticHasher hasher = new StaticHasher(Arrays.stream(input).iterator(), shape);
-
- final OfInt iter = hasher.iterator(shape);
- for (final int element : expected) {
- assertTrue(iter.hasNext());
- assertEquals(element, iter.nextInt());
- }
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that gitBits is called with the wrong shape an exception is thrown.
- */
- @Test
- public void testGetBits_WrongShape() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
-
- assertThrows(IllegalArgumentException.class, () -> hasher.iterator(new Shape(testFunctionX, 3, 72, 17)),
- "Should have thrown IllegalArgumentException");
- }
-
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/AbstractHashFunctionTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/AbstractHashFunctionTest.java
deleted file mode 100644
index 5498d699cb..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/AbstractHashFunctionTest.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests the signature of a hash function.
- */
-public abstract class AbstractHashFunctionTest {
-
- /**
- * Test that the signature is properly generated.
- */
- @Test
- public void signatureTest() {
- final HashFunction hf = createHashFunction();
- final long expected = hf.apply(HashFunctionIdentity.prepareSignatureBuffer(hf), 0);
- assertEquals(expected, hf.getSignature());
- // Should be repeatable
- final long expected2 = hf.apply(HashFunctionIdentity.prepareSignatureBuffer(hf), 0);
- assertEquals(expected, expected2);
- assertEquals("Apache Commons Collections", hf.getProvider());
- }
-
- /**
- * Creates the hash function.
- *
- * @return the hash function
- */
- protected abstract HashFunction createHashFunction();
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5CyclicTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5CyclicTest.java
deleted file mode 100644
index 9b0d9a83e1..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5CyclicTest.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests the MD5 cyclic hash function.
- */
-public class MD5CyclicTest extends AbstractHashFunctionTest {
-
- /**
- * Test that the apply function returns the proper values.
- */
- @Test
- public void applyTest() {
- final MD5Cyclic md5 = new MD5Cyclic();
- final long l1 = 0x8b1a9953c4611296L;
- final long l2 = 0xa827abf8c47804d7L;
- final byte[] buffer = "Hello".getBytes();
-
- long l = md5.apply(buffer, 0);
- assertEquals(l1, l);
- l = md5.apply(buffer, 1);
- assertEquals(l1 + l2, l);
- l = md5.apply(buffer, 2);
- assertEquals(l1 + l2 + l2, l);
- }
-
- @Override
- protected HashFunction createHashFunction() {
- return new MD5Cyclic();
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x64CyclicTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x64CyclicTest.java
deleted file mode 100644
index 9e17c2ec89..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x64CyclicTest.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import java.nio.charset.StandardCharsets;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-import org.junit.jupiter.api.Test;
-
-/**
- * Test that the Murmur3 128 x64 hash function works correctly.
- */
-public class Murmur128x64CyclicTest extends AbstractHashFunctionTest {
-
- /**
- * Test that the apply function returns the proper values.
- */
- @Test
- public void applyTest() {
- final Murmur128x64Cyclic murmur = new Murmur128x64Cyclic();
-
- final long l1 = 0xe7eb60dabb386407L;
- final long l2 = 0xc3ca49f691f73056L;
- final byte[] buffer = "Now is the time for all good men to come to the aid of their country"
- .getBytes(StandardCharsets.UTF_8);
-
- long l = murmur.apply(buffer, 0);
- assertEquals(l1, l);
- l = murmur.apply(buffer, 1);
- assertEquals(l1 + l2, l);
- l = murmur.apply(buffer, 2);
- assertEquals(l1 + l2 + l2, l);
- }
-
- @Override
- protected HashFunction createHashFunction() {
- return new Murmur128x64Cyclic();
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86IterativeTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86IterativeTest.java
deleted file mode 100644
index bca60c1e4b..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86IterativeTest.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import java.nio.charset.StandardCharsets;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-import org.junit.jupiter.api.Test;
-
-/**
- * Test that the Murmur3 32 x86 hash function works correctly.
- */
-public class Murmur32x86IterativeTest extends AbstractHashFunctionTest {
-
- /**
- * Test that the apply function returns the proper values.
- */
- @Test
- public void applyTest() {
- final Murmur32x86Iterative murmur = new Murmur32x86Iterative();
-
- final byte[] buffer = "Now is the time for all good men to come to the aid of their country"
- .getBytes(StandardCharsets.UTF_8);
-
- long l = murmur.apply(buffer, 0);
- assertEquals(82674681, l);
- l = murmur.apply(buffer, 1);
- assertEquals(-1475490736, l);
- l = murmur.apply(buffer, 2);
- assertEquals(-1561435247, l);
- }
-
- @Override
- protected HashFunction createHashFunction() {
- return new Murmur32x86Iterative();
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterativeTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterativeTest.java
deleted file mode 100644
index 5595efdc77..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterativeTest.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests that the Objects hash works correctly.
- */
-public class ObjectsHashIterativeTest extends AbstractHashFunctionTest {
-
- /**
- * Test that the apply function returns the proper values.
- */
- @Test
- public void applyTest() {
- final ObjectsHashIterative obj = new ObjectsHashIterative();
-
- final byte[] buffer = "Now is the time for all good men to come to the aid of their country"
- .getBytes(StandardCharsets.UTF_8);
-
- long l = obj.apply(buffer, 0);
- long prev = 0;
- assertEquals(Arrays.deepHashCode(new Object[] {prev, buffer}), l);
- for (int i = 1; i <= 5; i++) {
- prev += l;
- l = obj.apply(buffer, i);
- assertEquals(Arrays.deepHashCode(new Object[] {prev, buffer}), l);
- }
- }
-
- @Override
- protected HashFunction createHashFunction() {
- return new ObjectsHashIterative();
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/map/AbstractMapTest.java b/src/test/java/org/apache/commons/collections4/map/AbstractMapTest.java
index a9a45197c0..21fca7c321 100644
--- a/src/test/java/org/apache/commons/collections4/map/AbstractMapTest.java
+++ b/src/test/java/org/apache/commons/collections4/map/AbstractMapTest.java
@@ -1015,7 +1015,7 @@ public void testMapRemove() {
}
/**
- * Tests that the {@link Map#values} collection is backed by
+ * Tests that the {@link Map#bitMaps} collection is backed by
* the underlying map for clear().
*/
@Test
@@ -1184,7 +1184,7 @@ public void testEntrySetRemove3() {
}
/**
- * Tests that the {@link Map#values} collection is backed by
+ * Tests that the {@link Map#bitMaps} collection is backed by
* the underlying map by removing from the values collection
* and testing if the value was removed from the map.
*