diff --git a/primitiveCollections/src/jmh/java/org/lucares/collections/BenchmarkMultiwayMerge.java b/primitiveCollections/src/jmh/java/org/lucares/collections/BenchmarkMultiwayMerge.java index a888a18..162d5e0 100644 --- a/primitiveCollections/src/jmh/java/org/lucares/collections/BenchmarkMultiwayMerge.java +++ b/primitiveCollections/src/jmh/java/org/lucares/collections/BenchmarkMultiwayMerge.java @@ -21,19 +21,29 @@ import org.openjdk.jmh.annotations.Warmup; @State(Scope.Benchmark) @BenchmarkMode(Mode.Throughput) -@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) -@Measurement(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) -@Fork(2) +@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 3, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@Fork(1) public class BenchmarkMultiwayMerge { - @Param({ "10000" , "20000" }) - private int values; + private static final String OPTION_RANDOM = "random"; - @Param({ "3","5", "10","20" }) - private int numLists; + private static final String OPTION_NON_OVERLAP = "non-overlap"; - @Param({ "true" }) - private boolean random; + // @Param({ "3","5", "10","20", "1000" }) + @Param({ "1000" }) + private int anumLists; + + // @Param({ "10", "1000" , "20000" }) + @Param({ "1000" }) + private int bvalues; + + @Param({ OPTION_RANDOM, OPTION_NON_OVERLAP }) + // @Param({ OPTION_NON_OVERLAP}) + private String ctype; + + @Param({ "0", "500" }) + private int dconcatNonOverlap; private List longSorted = null; @@ -41,18 +51,37 @@ public class BenchmarkMultiwayMerge { public void setup() throws Exception { ThreadLocalRandom rng = ThreadLocalRandom.current(); longSorted = new ArrayList<>(); - for (int i = 0; i < numLists; i++) { - LongList list = new LongList(values); - if (random) { - for (int j = 0; j < values; j++) { - list.add(rng.nextLong()); - } - list.sort(); - } else { - LongStream.range(0, values).forEachOrdered(list::add); + if (ctype.equalsIgnoreCase(OPTION_NON_OVERLAP)) { + + final LongList list = randomList(bvalues * anumLists, rng); + list.sort(); + for (int i = 0; i < anumLists; i++) { + longSorted.add(list.sublist(i * bvalues, (i + 1) * bvalues)); + } + + } else { + for (int i = 0; i < anumLists; i++) { + final LongList list; + if (ctype.equalsIgnoreCase(OPTION_RANDOM)) { + list = randomList(bvalues, rng); + list.sort(); + } else { + list = new LongList(bvalues); + LongStream.range(0, bvalues).forEachOrdered(list::add); + } + longSorted.add(list); } - longSorted.add(list); } + + LongList.FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN = dconcatNonOverlap; + } + + private LongList randomList(int values, ThreadLocalRandom rng) { + final LongList list = new LongList(values); + for (int j = 0; j < values; j++) { + list.add(rng.nextLong()); + } + return list; } @TearDown @@ -66,7 +95,7 @@ public class BenchmarkMultiwayMerge { LongList.union(longSorted); } - @Benchmark + // @Benchmark public void testUnionSortedLists_TwowayMergeImplementation() throws Exception { twowayMerge(longSorted); @@ -81,26 +110,17 @@ public class BenchmarkMultiwayMerge { public static void main(String[] args) throws Exception { System.out.println("\n\n----------------\nstart"); - // -XX:+PrintCompilation - if (args != null) { + for (int i = 0; i < 80; i++) { BenchmarkMultiwayMerge benchmark = new BenchmarkMultiwayMerge(); - benchmark.numLists = 10; - benchmark.values = 10000; + benchmark.anumLists = 1000; + benchmark.bvalues = 1000; + benchmark.ctype = "non-overlapping"; + benchmark.dconcatNonOverlap = 500; benchmark.setup(); + long start = System.nanoTime(); benchmark.testUnionSortedLists_MultiwayMerge(); - } else { - for (int i = 0; i < 8; i++) { - BenchmarkMultiwayMerge benchmark = new BenchmarkMultiwayMerge(); - benchmark.numLists = 10; - benchmark.values = 10000; - benchmark.setup(); - System.out.println("\n\n----------------\n" + i); - for (int j = 0; j < 1000; j++) { - // benchmark.testUnionSortedLists_MultiwayMerge(); - benchmark.testUnionSortedLists_TwowayMergeImplementation(); - } - } + System.out.println("total: " + (System.nanoTime() - start) / 1_000_000.0 + " ms"); } System.out.println("done"); } diff --git a/primitiveCollections/src/main/java/org/lucares/collections/LongList.java b/primitiveCollections/src/main/java/org/lucares/collections/LongList.java index 8586471..0c2209c 100644 --- a/primitiveCollections/src/main/java/org/lucares/collections/LongList.java +++ b/primitiveCollections/src/main/java/org/lucares/collections/LongList.java @@ -6,8 +6,10 @@ import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.List; +import java.util.Map.Entry; import java.util.Random; import java.util.Spliterator.OfLong; +import java.util.TreeMap; import java.util.concurrent.ThreadLocalRandom; import java.util.stream.LongStream; import java.util.stream.StreamSupport; @@ -31,6 +33,12 @@ public final class LongList implements Serializable, Cloneable { private static final long[] EMPTY_ARRAY = {}; + /** + * If the average length of the lists is longer than this value, then we'll + * first try to concatenate non-overlapping lists before the union is computed. + */ + public static int FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN = 500; + /** * The array containing the values. It is transient, so that we can implement * our own serialization. @@ -606,6 +614,14 @@ public final class LongList implements Serializable, Cloneable { return data[pos]; } + public long first() { + return get(0); + } + + public long last() { + return get(size() - 1); + } + /** * Unsafe version of {@link #get(long)} that does not check for out of bounds * access if assertions are disabled. The caller has to make sure that pos is @@ -669,7 +685,7 @@ public final class LongList implements Serializable, Cloneable { System.arraycopy(data, 0, input, 0, size); return input; } - + long[] getArrayInternal() { return data; } @@ -1028,6 +1044,7 @@ public final class LongList implements Serializable, Cloneable { try { final LongList result = (LongList) super.clone(); result.data = size == 0 ? EMPTY_ARRAY : Arrays.copyOf(data, size); + result.sorted = sorted; return result; } catch (final CloneNotSupportedException e) { throw new IllegalStateException(e); @@ -1157,7 +1174,7 @@ public final class LongList implements Serializable, Cloneable { * TODO check time complexity If all lists are sorted, then the time complexity * is O(n+m), where n is the length of the first list and m the length of the * second list. If at least one list is not sorted, then the time complexity is - * O(m*log(m)), where m is the length of the longer list. + * O(m*log(m)), where m is the length of the longest list. * * @param longLists the lists * @return the union of both lists @@ -1184,28 +1201,173 @@ public final class LongList implements Serializable, Cloneable { case 0: return new LongList(); case 1: - return longLists.iterator().next().clone(); + // remove duplicate values + return unionInternal(longLists.iterator().next(), LongList.of()); case 2: final Iterator it = longLists.iterator(); final LongList a = it.next(); final LongList b = it.next(); return unionInternal(a, b); default: - final Collection sortedLists = subsetOfSortedLists(longLists); - final Collection unsortedLists = subsetOfUnsortedLists(longLists); + final List sortedLists = toSortedLists(longLists); - final LongList unionSorted = MultiwayLongMerger.unionSorted(sortedLists); + final double averageLength = totalLength(longLists) / (double) longLists.size(); - final LongList result; - if (unsortedLists.isEmpty()) { - result = unionSorted; + final List sortedConcatenatedLists; + // benchmarks showed that concatenation is beneficial for longer lists + if (averageLength > FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN) + { + final ListConcatenater listConcatenater = new ListConcatenater(sortedLists); + sortedConcatenatedLists = listConcatenater.concatenateNonOverlapping(); } else { - final LongList unionUnsorted = unionUnsorted(unsortedLists); - result = unionInternal(unionSorted, unionUnsorted); + sortedConcatenatedLists = sortedLists; } + switch (sortedConcatenatedLists.size()) { + case 0: + return new LongList(); + case 1: + // remove duplicate values + return unionInternal(sortedConcatenatedLists.get(0), LongList.of()); + case 2: + case 3: + case 4: + case 5: + // benchmarks have shown that the trivial merge is faster when merging only a + // few lists + return unionRepeatedTwowayMerge(sortedConcatenatedLists); + default: + final LongList multiwayMerged = MultiwayLongMerger.unionSorted(sortedConcatenatedLists); + return multiwayMerged; + } + } + } + + private static int totalLength(Collection longLists) { + int totalLength = 0; + for (LongList longList : longLists) { + totalLength += longList.size(); + } + return totalLength; + } + + private static LongList unionRepeatedTwowayMerge(final List sortedLongLists) { + + LongList result = sortedLongLists.get(0); + for (int i = 1; i < sortedLongLists.size(); i++) { + result = LongList.unionSorted(result, sortedLongLists.get(i)); + } + + return result; + } + + private static class ListConcatenater { + + private static class ListLongList { + private final List list = new ArrayList<>(); + + public ListLongList(LongList longList) { + list.add(longList); + } + + public void add(ListLongList listLongList) { + list.addAll(listLongList.list); + } + + public LongList toLongList() { + switch (list.size()) { + case 0: + return new LongList(0); + case 1: + return list.get(0); + default: + int capacity = Math.toIntExact(list.stream().mapToLong(LongList::size).sum()); + final LongList result = new LongList(capacity); + result.addAll(list); + return result; + } + } + + public long first() { + return list.get(0).first(); + } + + public long last() { + return list.get(list.size() - 1).last(); + } + } + + final TreeMap> lowestValueMap = new TreeMap<>(); + final TreeMap> highestValueMap = new TreeMap<>(); + + public ListConcatenater(final Collection sortedLongLists) { + sortedLongLists.stream().map(ListLongList::new).forEach(this::index); + } + + private void index(ListLongList listLongList) { + final long lowestValue = listLongList.first(); + final long highestValue = listLongList.last(); + lowestValueMap.computeIfAbsent(lowestValue, k -> new ArrayList<>()).add(listLongList); + highestValueMap.computeIfAbsent(highestValue, k -> new ArrayList<>()).add(listLongList); + } + + private void removeFromIndex(ListLongList listLongList) { + lowestValueMap.get(listLongList.first()).remove(listLongList); + highestValueMap.get(listLongList.last()).remove(listLongList); + } + + public List concatenateNonOverlapping() { + for (Entry> e : highestValueMap.entrySet()) { + final long highestValue = e.getKey(); + if (highestValue == Long.MAX_VALUE) { + continue; + } + + final Iterator it = e.getValue().iterator(); + while (it.hasNext()) { + final ListLongList lowList = it.next(); + final Entry> ceilingEntry = lowestValueMap.ceilingEntry(highestValue + 1); + if (ceilingEntry != null && !ceilingEntry.getValue().isEmpty()) { + final ListLongList highList = ceilingEntry.getValue().get(0); + removeFromIndex(highList); + + it.remove(); // prevents concurrent modification that would happen in removeFromIndex() + removeFromIndex(lowList); + + lowList.add(highList); + + index(lowList); + } + } + } + + final List result = new ArrayList<>(); + for (List l : highestValueMap.values()) { + for (ListLongList listLongList : l) { + result.add(listLongList.toLongList()); + } + } return result; } + + } + + private static List toSortedLists(final Collection longLists) { + final List result = new ArrayList<>(); + + for (LongList longList : longLists) { + if (longList.isEmpty()) { + // skip, no need to merge an empty list + } else if (longList.isSorted()) { + result.add(longList); + } else { + final LongList copy = longList.clone(); + copy.sort(); + result.add(copy); + } + } + + return result; } private static LongList unionSorted(final LongList a, final LongList b) { @@ -1270,55 +1432,21 @@ public final class LongList implements Serializable, Cloneable { private static LongList unionUnsorted(final LongList a, final LongList b) { final LongList aSorted = new LongList(a); - aSorted.parallelSort(); + if (!aSorted.isSorted()) { + aSorted.parallelSort(); + } final LongList bSorted = new LongList(b); - bSorted.parallelSort(); + if (!bSorted.isSorted()) { + bSorted.parallelSort(); + } return unionSorted(aSorted, bSorted); } - private static LongList unionUnsorted(Collection longLists) { - - final List sortedLists = new ArrayList<>(); - for (LongList longList : longLists) { - final LongList copy = new LongList(longList); - copy.sort(); - sortedLists.add(copy); - } - return MultiwayLongMerger.unionSorted(sortedLists); - } - private void checkIfSorted() { sorted = true; for (int i = 1; i < size && sorted; i++) { sorted = data[i - 1] <= data[i]; } } - - private static Collection subsetOfSortedLists(Collection longLists) { - - final List result = new ArrayList<>(); - - for (LongList longList : longLists) { - if (longList.isSorted()) { - result.add(longList); - } - } - - return result; - } - - private static Collection subsetOfUnsortedLists(Collection longLists) { - - final List result = new ArrayList<>(); - - for (LongList longList : longLists) { - if (!longList.isSorted()) { - result.add(longList); - } - } - - return result; - } - } diff --git a/primitiveCollections/src/main/java/org/lucares/collections/MultiwayLongMerger.java b/primitiveCollections/src/main/java/org/lucares/collections/MultiwayLongMerger.java index 4bf6301..2cc5561 100644 --- a/primitiveCollections/src/main/java/org/lucares/collections/MultiwayLongMerger.java +++ b/primitiveCollections/src/main/java/org/lucares/collections/MultiwayLongMerger.java @@ -250,10 +250,4 @@ class MultiwayLongMerger { } } - - public static void main(String[] args) { - for (int i = 0; i < 20; i++) { - System.out.println(i + " " + (Long.highestOneBit(i - 1) << 1)); - } - } } diff --git a/primitiveCollections/src/test/java/org/lucares/collections/LongListTest.java b/primitiveCollections/src/test/java/org/lucares/collections/LongListTest.java index 467a59a..fe41300 100644 --- a/primitiveCollections/src/test/java/org/lucares/collections/LongListTest.java +++ b/primitiveCollections/src/test/java/org/lucares/collections/LongListTest.java @@ -5,7 +5,9 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Random; import java.util.concurrent.ConcurrentLinkedQueue; @@ -1613,32 +1615,132 @@ public class LongListTest { Assertions.assertEquals(LongList.of(2), actual); Assertions.assertEquals(LongList.union(a, b), LongList.union(b, a)); } - + @Test public void testUnionSortedLists_three() { final LongList a = LongList.of(1, 2, 3); final LongList b = LongList.of(2, 4, 6); final LongList c = LongList.of(3, 5, 7); - final LongList actual = LongList.union(a, b, c); - Assertions.assertEquals(LongList.of(1,2,3,4,5,6,7), actual); + final LongList actual = LongList.union(a, b, c); + Assertions.assertEquals(LongList.of(1, 2, 3, 4, 5, 6, 7), actual); Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, c, a)); Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, a, c)); } - + @Test public void testUnionSortedLists_four_LongMinValue() { - final LongList a = LongList.of(Long.MIN_VALUE, 2, 3,Long.MAX_VALUE); + final LongList a = LongList.of(Long.MIN_VALUE, 2, 3, Long.MAX_VALUE); final LongList b = LongList.of(2, 4, 6, Long.MAX_VALUE); final LongList c = LongList.of(Long.MIN_VALUE, 5, 7); final LongList d = LongList.of(Long.MIN_VALUE, Long.MIN_VALUE); - final LongList actual = LongList.union(a, b, c, d); - Assertions.assertEquals(LongList.of(Long.MIN_VALUE,2,3,4,5,6,7, Long.MAX_VALUE), actual); + final LongList actual = LongList.union(a, b, c, d); + Assertions.assertEquals(LongList.of(Long.MIN_VALUE, 2, 3, 4, 5, 6, 7, Long.MAX_VALUE), actual); Assertions.assertEquals(LongList.union(a, b, c, d), LongList.union(b, c, a, d)); Assertions.assertEquals(LongList.union(a, b, c, d), LongList.union(d, b, a, c)); } + @Test + public void testUnionSortedLists_Concatenating_with_empty_list() { + // aims to use the ListConcatenater in LongList.union() + // that means we need at least three lists and they must all be non-overlapping + final LongList a = LongList.of(1, 2, 3); + final LongList b = LongList.of(); + final LongList c = LongList.of(10,11); + + final LongList actual = LongList.union(a, b, c); + Assertions.assertEquals(LongList.of(1,2,3,10,11), actual); + Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, c, a)); + Assertions.assertEquals(LongList.union(a, b, c), LongList.union( b, a, c)); + } + + @Test + public void testUnionSortedLists_Concatenating_results_in_one_list() { + // aims to use the ListConcatenater in LongList.union() + // that means we need at least three lists and they must all be non-overlapping + final LongList a = LongList.of(1, 2, 3); + final LongList b = LongList.of(4); + final LongList c = LongList.of(10,11); + + final LongList actual = LongList.union(a, b, c); + Assertions.assertEquals(LongList.of(1,2,3,4,10,11), actual); + Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, c, a)); + Assertions.assertEquals(LongList.union(a, b, c), LongList.union( b, a, c)); + } + + @Test + public void testUnionSortedLists_Concatenating_results_in_two_lists() { + // aims to use the ListConcatenater in LongList.union() + // that means we need at least three lists + final LongList a = LongList.of(1, 2, 3); + final LongList b = LongList.of(3, 4); + final LongList c = LongList.of(4, 10,11); // can be concatenated to a + + final LongList actual = LongList.union(a, b, c); + Assertions.assertEquals(LongList.of(1,2,3,4,10,11), actual); + Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, c, a)); + Assertions.assertEquals(LongList.union(a, b, c), LongList.union( b, a, c)); + } + + @Test + public void testUnionSortedLists_Multiway_merge () { + // aims to use the MultiwayLongMerge + // that means we need overlapping lists so that ListConcatenater will return at least six lists + // this is done by adding 1 to all six lists + final LongList a = LongList.of(1, 2, 3); + final LongList b = LongList.of(1, 3, 4); + final LongList c = LongList.of(1, 4, 10,11); + final LongList d = LongList.of(1, 6,9); + final LongList e = LongList.of(1, 123, 144); + final LongList f = LongList.of(1, 411, 1011,1111); + + final LongList actual = LongList.union(a, b, c,d,e,f); + Assertions.assertEquals(LongList.of(1,2,3,4,6,9,10,11,123,144,411,1011,1111), actual); + Assertions.assertEquals(LongList.union(a, b, c,d,e,f), LongList.union(b, c, a,d,e,f)); + Assertions.assertEquals(LongList.union(a, b, c,d,e,f), LongList.union( b, d,f,a,e, c)); + } + + @Test + public void testUnionSortedLists_ten_lists_fifteen_elements_random() { + testUnionSortedLists(10, 15, true); + } + + @Test + public void testUnionSortedLists_ten_lists_fifteen_elements_equal() { + testUnionSortedLists(10, 15, false); + } + + private void testUnionSortedLists(int numLists, int values, boolean random) { + ThreadLocalRandom rng = ThreadLocalRandom.current(); + List longSorted = new ArrayList<>(); + for (int i = 0; i < numLists; i++) { + LongList list = new LongList(values); + if (random) { + for (int j = 0; j < values; j++) { + list.add(rng.nextLong()); + } + list.sort(); + } else { + LongStream.range(0, values).forEachOrdered(list::add); + } + longSorted.add(list); + } + + final LongList actual = LongList.union(longSorted); + + final LongList concatenatedList = new LongList(); + concatenatedList.addAll(longSorted); + final LongList expected = LongList.union(concatenatedList, LongList.of()); + Assertions.assertEquals(expected, actual); + + Collections.shuffle(longSorted); + final LongList unionShuffled1 = LongList.union(longSorted); + Collections.shuffle(longSorted); + final LongList unionShuffled2 = LongList.union(longSorted); + Assertions.assertEquals(unionShuffled1, unionShuffled2); + } + @Test public void testUnionUnsortedLists() { final LongList a = LongList.of(1, 0, 3, 4);