From 86f12e0af64ebf488d2a0c28dd68a8cb931b911e Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sat, 7 Sep 2019 14:36:51 +0200 Subject: [PATCH] Add hash map for long to long mappings. --- primitiveCollections/build.gradle | 1 + .../BenchmarkLongLongMapForEach.java | 91 +++++ .../org/lucares/collections/LongFunction.java | 6 + .../lucares/collections/LongLongConsumer.java | 5 + .../lucares/collections/LongLongHashMap.java | 347 ++++++++++++++++++ .../collections/LongLongHashMapTest.java | 184 ++++++++++ 6 files changed, 634 insertions(+) create mode 100644 primitiveCollections/src/jmh/java/org/lucares/collections/BenchmarkLongLongMapForEach.java create mode 100644 primitiveCollections/src/main/java/org/lucares/collections/LongFunction.java create mode 100644 primitiveCollections/src/main/java/org/lucares/collections/LongLongConsumer.java create mode 100644 primitiveCollections/src/main/java/org/lucares/collections/LongLongHashMap.java create mode 100644 primitiveCollections/src/test/java/org/lucares/collections/LongLongHashMapTest.java diff --git a/primitiveCollections/build.gradle b/primitiveCollections/build.gradle index d446615..03e7feb 100644 --- a/primitiveCollections/build.gradle +++ b/primitiveCollections/build.gradle @@ -4,5 +4,6 @@ group='org.lucares' version = '0.1.'+new SimpleDateFormat("YYYYMMddHHmmss").format(new Date()); dependencies { + jmh 'org.eclipse.collections:eclipse-collections:10.0.0' } diff --git a/primitiveCollections/src/jmh/java/org/lucares/collections/BenchmarkLongLongMapForEach.java b/primitiveCollections/src/jmh/java/org/lucares/collections/BenchmarkLongLongMapForEach.java new file mode 100644 index 0000000..e1d8bb9 --- /dev/null +++ b/primitiveCollections/src/jmh/java/org/lucares/collections/BenchmarkLongLongMapForEach.java @@ -0,0 +1,91 @@ +package org.lucares.collections; + +import java.util.Random; +import java.util.concurrent.TimeUnit; +import java.util.stream.LongStream; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +@State(Scope.Benchmark) +@BenchmarkMode(Mode.Throughput) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(1) +public class BenchmarkLongLongMapForEach { + + @Param({ "1000", "10000", "50000" }) + private int values; + + private LongLongHashMap unsortedMap = new LongLongHashMap(); + private LongLongHashMap unsortedMapLowValues = new LongLongHashMap(); + + private org.eclipse.collections.impl.map.mutable.primitive.LongLongHashMap eclipseUnsortedMap = new org.eclipse.collections.impl.map.mutable.primitive.LongLongHashMap(); + + @Setup + public void setup() throws Exception { + final int seed = 6789; + final Random rand = new Random(seed); + LongStream.generate(rand::nextLong).limit(values).forEach(l -> unsortedMap.put(l, 2 * l)); + + rand.setSeed(seed); + final LongList lowValues = LongList.range(0, values); + lowValues.shuffle(rand); + lowValues.stream().forEach(l -> unsortedMapLowValues.put(l, 2 * l)); + + rand.setSeed(seed); + LongStream.generate(rand::nextLong).limit(values).forEach(l -> eclipseUnsortedMap.put(l, 2 * l)); + } + + @TearDown + public void tearDown() { + unsortedMap = null; + unsortedMapLowValues = null; + eclipseUnsortedMap = null; + } + + @Benchmark + public void testLongLongMapForEach(final Blackhole blackhole) throws Exception { + final long[] tmp = new long[] { 0 }; + unsortedMap.forEach((k, v) -> tmp[0] += k); + blackhole.consume(tmp[0]); + } + + @Benchmark + public void testLongLongMapForEachLowValues(final Blackhole blackhole) throws Exception { + final long[] tmp = new long[] { 0 }; + unsortedMapLowValues.forEach((k, v) -> tmp[0] += k); + blackhole.consume(tmp[0]); + } + + @Benchmark + public void testLongLongMapForEachOrdered(final Blackhole blackhole) throws Exception { + final long[] tmp = new long[] { 0 }; + unsortedMap.forEachOrdered((k, v) -> tmp[0] += k); + blackhole.consume(tmp[0]); + } + + @Benchmark + public void testLongLongMapForEachOrderedLowValues(final Blackhole blackhole) throws Exception { + final long[] tmp = new long[] { 0 }; + unsortedMapLowValues.forEachOrdered((k, v) -> tmp[0] += k); + blackhole.consume(tmp[0]); + } + + @Benchmark + public void testEclipseLongLongMapForEach(final Blackhole blackhole) throws Exception { + final long[] tmp = new long[] { 0 }; + eclipseUnsortedMap.forEachKeyValue((k, v) -> tmp[0] += k); + blackhole.consume(tmp[0]); + } +} diff --git a/primitiveCollections/src/main/java/org/lucares/collections/LongFunction.java b/primitiveCollections/src/main/java/org/lucares/collections/LongFunction.java new file mode 100644 index 0000000..76de520 --- /dev/null +++ b/primitiveCollections/src/main/java/org/lucares/collections/LongFunction.java @@ -0,0 +1,6 @@ +package org.lucares.collections; + +@FunctionalInterface +public interface LongFunction { + long apply(long value); +} diff --git a/primitiveCollections/src/main/java/org/lucares/collections/LongLongConsumer.java b/primitiveCollections/src/main/java/org/lucares/collections/LongLongConsumer.java new file mode 100644 index 0000000..19182a2 --- /dev/null +++ b/primitiveCollections/src/main/java/org/lucares/collections/LongLongConsumer.java @@ -0,0 +1,5 @@ +package org.lucares.collections; + +public interface LongLongConsumer { + public void accept(long key, long value); +} diff --git a/primitiveCollections/src/main/java/org/lucares/collections/LongLongHashMap.java b/primitiveCollections/src/main/java/org/lucares/collections/LongLongHashMap.java new file mode 100644 index 0000000..466f01c --- /dev/null +++ b/primitiveCollections/src/main/java/org/lucares/collections/LongLongHashMap.java @@ -0,0 +1,347 @@ +package org.lucares.collections; + +import java.util.Arrays; +import java.util.NoSuchElementException; + +/** + * A hash map where key and value are primitive longs. + */ +public class LongLongHashMap { + + // There is no equivalent to null for primitive values. Therefore we have to add + // special handling for one long value. Otherwise we couldn't tell if a key is + // in the map or not. We chose 0L, because LongList is initially all 0L. + private static final long NULL_KEY = 0L; + + private static final long EMPTY_SLOT = 0L; + + /** + * The maximum size of an array. + */ + private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8; + + private final double fillFactor; + + private long[] keys; + private long[] values; + private int size = 0; + + private Long zeroValue = null; + + /** + * Create a new {@link LongLongHashMap} with the given initial capacity and load + * factor. + * + * @param initialCapacity the initial capacity + * @param loadFactor the load factor + */ + public LongLongHashMap(final int initialCapacity, final double loadFactor) { + + if (initialCapacity < 0) { + throw new IllegalArgumentException("initial capacity must be non-negative"); + } + if (initialCapacity > MAX_ARRAY_SIZE) { + throw new IllegalArgumentException("initial capacity must be smaller or equal to " + MAX_ARRAY_SIZE); + } + if (loadFactor <= 0 || Double.isNaN(loadFactor)) + throw new IllegalArgumentException("Illegal load factor: " + loadFactor); + + this.fillFactor = loadFactor; + keys = new long[initialCapacity]; + values = new long[initialCapacity]; + } + + /** + * Create a new {@link LongLongHashMap} with initial capacity 8 and load factor + * 0.75. + */ + public LongLongHashMap() { + this(8, 0.75); + } + + /** + * The number of entries in this map. + * + * @return the size + */ + public int size() { + return size; + } + + /** + * The capacity of this map. + * + * @return the capacity + */ + int getCapacity() { + return keys.length; + } + + /** + * Add the given key and value to the map. + * + * @param key the key + * @param value the value + */ + public void put(final long key, final long value) { + + if (key == NULL_KEY) { + size += zeroValue == null ? 1 : 0; + zeroValue = value; + return; + } + + if ((keys.length * fillFactor) < size) { + growAndRehash(); + } + + final boolean added = putInternal(key, value); + if (added) { + size++; + } + } + + private boolean putInternal(final long key, final long value) { + final int searchStart = spread(key); + int currentPosition = searchStart; + + do { + // found a free place, insert the value + if (keys[currentPosition] == EMPTY_SLOT) { + keys[currentPosition] = key; + values[currentPosition] = value; + return true; + } + // value exists, update it + if (keys[currentPosition] == key) { + keys[currentPosition] = key; + values[currentPosition] = value; + return false; + } + currentPosition = (currentPosition + 1) % keys.length; + } while (currentPosition != searchStart); + + throw new IllegalStateException("map is full"); + } + + /** + * Returns the value for the given key if it exists. This method throws a + * {@link NoSuchElementException} if the key does not exist. Use + * {@link #containsKey(long)} to check before calling {@link #get(long)}. + * + * @param key the key + * @return the value if it exists + * @throws NoSuchElementException if the value does not exist + */ + public long get(final long key) { + + if (key == NULL_KEY) { + if (zeroValue != null) { + return zeroValue; + } + throw new NoSuchElementException(); + } + + final int searchStart = spread(key); + int currentPosition = searchStart; + do { + if (keys[currentPosition] == key) { + return values[currentPosition]; + } + currentPosition = (currentPosition + 1) % keys.length; + } while (currentPosition != searchStart); + throw new NoSuchElementException(); + } + + /** + * Check if the map contains the given key. + * + * @param key the key + * @return true iff the map contains the key + */ + public boolean containsKey(final long key) { + + if (key == NULL_KEY) { + return zeroValue != null; + } + + final int searchStart = spread(key); + int currentPosition = searchStart; + do { + if (keys[currentPosition] == key) { + return true; + } + currentPosition = (currentPosition + 1) % keys.length; + } while (currentPosition != searchStart); + return false; + } + + /** + * Remove the given key and its value from the map. + * + * @param key the key + */ + public void remove(final long key) { + + if (key == NULL_KEY) { + size -= zeroValue != null ? 1 : 0; + zeroValue = null; + return; + } + + final int searchStart = spread(key); + int currentPosition = searchStart; + do { + if (keys[currentPosition] == key) { + keys[currentPosition] = EMPTY_SLOT; + size--; + return; + } + currentPosition = (currentPosition + 1) % keys.length; + } while (currentPosition != searchStart); + } + + /** + * Computes a mapping for the given key and its current value. + *

+ * The mapping for given key is updated by calling {@code function} with the old + * value. The return value will be set as new value. If the map does not contain + * a mapping for the key, then {@code function} is with + * {@code initialValueIfAbsent}. + * + * @param key the key + * @param initialValueIfAbsent value used if there is no current mapping for the + * key + * @param function called to update an existing value + */ + public void compute(final long key, final long initialValueIfAbsent, final LongFunction function) { + if (key == NULL_KEY) { + if (zeroValue != null) { + zeroValue = function.apply(zeroValue); + return; + } + zeroValue = function.apply(initialValueIfAbsent); + return; + } + + final int searchStart = spread(key); + int currentPosition = searchStart; + do { + if (keys[currentPosition] == key) { + final long updatedValue = function.apply(values[currentPosition]); + values[currentPosition] = updatedValue; + return; + } + currentPosition = (currentPosition + 1) % keys.length; + } while (currentPosition != searchStart); + + // key not found -> add it + final long newZeroValue = function.apply(initialValueIfAbsent); + put(key, newZeroValue); + } + + /** + * Calls the {@link LongLongConsumer#accept(long, long)} method for all entries + * in this map. The order is based on the hash value and is therefore not + * deterministic. Don't rely on the order! + * + * @param consumer the consumer + */ + public void forEach(final LongLongConsumer consumer) { + + if (zeroValue != null) { + consumer.accept(0, zeroValue); + } + + for (int i = 0; i < keys.length; i++) { + if (keys[i] != EMPTY_SLOT) { + consumer.accept(keys[i], values[i]); + } + } + } + + /** + * Calls the {@link LongLongConsumer#accept(long, long)} method for all entries + * in this map. This method iterates over the keys in ascending order. + *

+ * Note: this method is slower than {@link #forEach(LongLongConsumer)}. + * + * @param consumer the consumer + */ + public void forEachOrdered(final LongLongConsumer consumer) { + + if (zeroValue != null) { + consumer.accept(0, zeroValue); + } + + final long[] sortedKeys = Arrays.copyOf(keys, keys.length); + Arrays.parallelSort(sortedKeys); + + for (int i = 0; i < sortedKeys.length; i++) { + final long key = sortedKeys[i]; + if (key != EMPTY_SLOT) { + consumer.accept(key, get(key)); + } else if (key == EMPTY_SLOT) { + final int posFirstKey = findPosOfFirstPositiveKey(sortedKeys); + i = posFirstKey - 1; + } + } + } + + static int findPosOfFirstPositiveKey(final long[] sortedKeys) { + + if (sortedKeys.length == 0) { + return -1; + } + if (sortedKeys.length == 1) { + return sortedKeys[0] > EMPTY_SLOT ? 0 : -1; + } + + int low = 0; + int high = sortedKeys.length - 1; + int pos = -1; + + while (low <= high) { + pos = (low + high) / 2; + if (sortedKeys[pos] <= EMPTY_SLOT) { + low = pos + 1; + } else { + high = pos - 1; + } + } + + if (low < sortedKeys.length && sortedKeys[low] <= EMPTY_SLOT) { + low++; + } + + return low < sortedKeys.length && sortedKeys[low] > EMPTY_SLOT ? low : -1; + } + + private void growAndRehash() { + final long[] oldKeys = keys; + final long[] oldValues = values; + + final int newSize = Math.min(keys.length * 2, MAX_ARRAY_SIZE); + + keys = new long[newSize]; + values = new long[newSize]; + + for (int i = 0; i < oldKeys.length; i++) { + final long key = oldKeys[i]; + if (key != EMPTY_SLOT) { + final long value = oldValues[i]; + putInternal(key, value); + } + } + } + + // visible for test + int spread(final long key) { + return hash(key) % keys.length; + } + + private int hash(final long l) { + return Math.abs(Long.hashCode(l)); + } + +} diff --git a/primitiveCollections/src/test/java/org/lucares/collections/LongLongHashMapTest.java b/primitiveCollections/src/test/java/org/lucares/collections/LongLongHashMapTest.java new file mode 100644 index 0000000..9589236 --- /dev/null +++ b/primitiveCollections/src/test/java/org/lucares/collections/LongLongHashMapTest.java @@ -0,0 +1,184 @@ +package org.lucares.collections; + +import java.util.Random; +import java.util.stream.LongStream; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class LongLongHashMapTest { + @Test + public void testPutRemove() { + putGetRemove(1); + } + + @Test + public void testNullValue() { + putGetRemove(0); + } + + private void putGetRemove(final long key) { + final LongLongHashMap map = new LongLongHashMap(); + + final long valueA = 2L; + final long valueB = 3L; + + // value does not exist + Assertions.assertFalse(map.containsKey(key)); + Assertions.assertEquals(0, map.size()); + + // add value and check it is in the map + map.put(key, valueA); + Assertions.assertTrue(map.containsKey(key)); + Assertions.assertEquals(valueA, map.get(key)); + Assertions.assertEquals(1, map.size()); + + // overwrite value + map.put(key, valueB); + Assertions.assertEquals(valueB, map.get(key)); + Assertions.assertEquals(1, map.size()); + + // remove value and check it is gone + map.remove(key); + Assertions.assertFalse(map.containsKey(key)); + Assertions.assertEquals(0, map.size()); + } + + @Test + public void testComputeZeroKey() { + final LongLongHashMap map = new LongLongHashMap(); + + final long key = 1; + map.compute(key, 6, l -> l + 1); + Assertions.assertEquals(7, map.get(key), "initialValueIfAbsent is used when there is no mapping for the key"); + + map.compute(key, 6, l -> l + 1); + Assertions.assertEquals(8, map.get(key), "update function is called when 'zeroKey' is set"); + } + + @Test + public void testCompute() { + final LongLongHashMap map = new LongLongHashMap(); + final long key = 1; + map.compute(key, 6, l -> l + 1); + Assertions.assertEquals(7, map.get(key), "initialValueIfAbsent is used when there is no mapping for the key"); + + map.compute(key, 6, l -> l + 1); + Assertions.assertEquals(8, map.get(key), "update function is called when key is set"); + } + + @Test + public void testGrowMap() { + final LongLongHashMap map = new LongLongHashMap(4, 0.75); + + final int numEntries = 12; + final Random rand = new Random(12345); + final LongList entries = LongList.of(LongStream.generate(rand::nextLong).limit(numEntries).toArray()); + + entries.stream().forEachOrdered(l -> { + map.put(l, l); + }); + entries.stream().forEachOrdered(l -> { + Assertions.assertEquals(l, map.get(l)); + }); + Assertions.assertEquals(16, map.getCapacity(), "capacity after adding 12 entries must be a the smallest number " + + "that satisfies initialCapacity * 2^n >= entries/fillFactor"); + } + + @Test + public void testMultipleValuesOnSamePosition() { + final int initialCapacity = 20; + final LongLongHashMap map = new LongLongHashMap(initialCapacity, 0.75); + // find to values that yield the same 'spread' (position in the table) + final LongList keysWithSameSpread = findKeysWithSameSpread(map); + Assertions.assertTrue(keysWithSameSpread.size() > 5); + + keysWithSameSpread.stream().forEach(l -> map.put(l, l)); + Assertions.assertEquals(keysWithSameSpread.size(), map.size()); + keysWithSameSpread.stream().forEach(l -> Assertions.assertEquals(l, map.get(l))); + } + + @Test + public void testForEach() { + final LongLongHashMap map = new LongLongHashMap(); + final Random rand = new Random(6789); + final LongList entries = LongList.of(LongStream.generate(rand::nextLong).limit(15).toArray()); + + entries.stream().forEachOrdered(l -> { + map.put(l, 2 * l); + }); + + map.forEach((k, v) -> { + Assertions.assertEquals(k * 2, v, "value is key*2"); + Assertions.assertTrue(entries.indexOf(k) >= 0, "value " + k + " in entries: " + entries); + }); + } + + @Test + public void testForEachOrdered() { + final LongLongHashMap map = new LongLongHashMap(); + final Random rand = new Random(6789); + final LongList entries = LongList.of(LongStream.generate(rand::nextLong).limit(15).toArray()); + + entries.stream().forEachOrdered(l -> { + map.put(l, 2 * l); + }); + + final LongList actualOrderOfKeys = new LongList(); + map.forEachOrdered((k, v) -> { + Assertions.assertEquals(k * 2, v, "value is key*2"); + Assertions.assertTrue(entries.indexOf(k) >= 0, "value " + k + " in entries: " + entries); + actualOrderOfKeys.add(k); + }); + + Assertions.assertTrue(actualOrderOfKeys.isSorted(), "keys are sorted"); + } + + @Test + public void testFindPositionOfFirstPositiveKey() { + + Assertions.assertEquals(-1, LongLongHashMap.findPosOfFirstPositiveKey(new long[] {})); + Assertions.assertEquals(-1, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 0 })); + Assertions.assertEquals(0, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 1 })); + Assertions.assertEquals(1, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 0, 1 })); + Assertions.assertEquals(0, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 1, 1 })); + Assertions.assertEquals(2, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { -1, 0, 1 })); + Assertions.assertEquals(0, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 1, 1, 1, 1, 1, 1, 1, 1, 1 })); + Assertions.assertEquals(0, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 1, 1, 1, 1, 1, 1, 1, 1 })); + Assertions.assertEquals(4, + LongLongHashMap.findPosOfFirstPositiveKey(new long[] { -1, -1, -1, -1, 1, 1, 1, 1, 1 })); + Assertions.assertEquals(4, + LongLongHashMap.findPosOfFirstPositiveKey(new long[] { -1, -1, -1, -1, 1, 1, 1, 1 })); + Assertions.assertEquals(3, + LongLongHashMap.findPosOfFirstPositiveKey(new long[] { -1, -1, -1, 1, 1, 1, 1, 1, 1 })); + Assertions.assertEquals(3, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { -1, -1, -1, 1, 1, 1, 1, 1 })); + Assertions.assertEquals(-1, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 0, 0, 0, 0, 0 })); + Assertions.assertEquals(-1, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 0, 0, 0, 0, 0, 0 })); + Assertions.assertEquals(4, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 0, 0, 0, 0, 1, 1, 1, 1 })); + Assertions.assertEquals(5, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 0, 0, 0, 0, 0, 1, 1, 1 })); + Assertions.assertEquals(6, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 0, 0, 0, 0, 0, 0, 1, 1 })); + Assertions.assertEquals(4, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 0, 0, 0, 0, 1, 1, 1 })); + Assertions.assertEquals(5, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 0, 0, 0, 0, 0, 1, 1 })); + Assertions.assertEquals(6, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { 0, 0, 0, 0, 0, 0, 1 })); + Assertions.assertEquals(4, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { -1, 0, 0, 0, 1, 1, 1 })); + Assertions.assertEquals(5, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { -1, 0, 0, 0, 0, 1, 1 })); + Assertions.assertEquals(6, LongLongHashMap.findPosOfFirstPositiveKey(new long[] { -1, 0, 0, 0, 0, 0, 1 })); + } + + private LongList findKeysWithSameSpread(final LongLongHashMap map) { + final LongList result = new LongList(); + final int spread = map.spread(1); + result.add(1); + for (long l = 2; l < 10000; l++) { + final int s = map.spread(l); + if (s == spread) { + result.add(l); + if (result.size() > 10) { + break; + } + } + } + + return result; + } +}