improve performance of LongList.union

This commit is contained in:
2020-11-15 12:39:07 +01:00
parent 54fbebf0b7
commit 6bd4b9b424
4 changed files with 344 additions and 100 deletions

View File

@@ -21,19 +21,29 @@ import org.openjdk.jmh.annotations.Warmup;
@State(Scope.Benchmark) @State(Scope.Benchmark)
@BenchmarkMode(Mode.Throughput) @BenchmarkMode(Mode.Throughput)
@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) @Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) @Measurement(iterations = 3, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Fork(2) @Fork(1)
public class BenchmarkMultiwayMerge { public class BenchmarkMultiwayMerge {
@Param({ "10000" , "20000" }) private static final String OPTION_RANDOM = "random";
private int values;
@Param({ "3","5", "10","20" }) private static final String OPTION_NON_OVERLAP = "non-overlap";
private int numLists;
@Param({ "true" }) // @Param({ "3","5", "10","20", "1000" })
private boolean random; @Param({ "1000" })
private int anumLists;
// @Param({ "10", "1000" , "20000" })
@Param({ "1000" })
private int bvalues;
@Param({ OPTION_RANDOM, OPTION_NON_OVERLAP })
// @Param({ OPTION_NON_OVERLAP})
private String ctype;
@Param({ "0", "500" })
private int dconcatNonOverlap;
private List<LongList> longSorted = null; private List<LongList> longSorted = null;
@@ -41,20 +51,39 @@ public class BenchmarkMultiwayMerge {
public void setup() throws Exception { public void setup() throws Exception {
ThreadLocalRandom rng = ThreadLocalRandom.current(); ThreadLocalRandom rng = ThreadLocalRandom.current();
longSorted = new ArrayList<>(); longSorted = new ArrayList<>();
for (int i = 0; i < numLists; i++) { if (ctype.equalsIgnoreCase(OPTION_NON_OVERLAP)) {
LongList list = new LongList(values);
if (random) { final LongList list = randomList(bvalues * anumLists, rng);
for (int j = 0; j < values; j++) { list.sort();
list.add(rng.nextLong()); for (int i = 0; i < anumLists; i++) {
longSorted.add(list.sublist(i * bvalues, (i + 1) * bvalues));
} }
} else {
for (int i = 0; i < anumLists; i++) {
final LongList list;
if (ctype.equalsIgnoreCase(OPTION_RANDOM)) {
list = randomList(bvalues, rng);
list.sort(); list.sort();
} else { } else {
LongStream.range(0, values).forEachOrdered(list::add); list = new LongList(bvalues);
LongStream.range(0, bvalues).forEachOrdered(list::add);
} }
longSorted.add(list); longSorted.add(list);
} }
} }
LongList.FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN = dconcatNonOverlap;
}
private LongList randomList(int values, ThreadLocalRandom rng) {
final LongList list = new LongList(values);
for (int j = 0; j < values; j++) {
list.add(rng.nextLong());
}
return list;
}
@TearDown @TearDown
public void tearDown() { public void tearDown() {
longSorted = null; longSorted = null;
@@ -66,7 +95,7 @@ public class BenchmarkMultiwayMerge {
LongList.union(longSorted); LongList.union(longSorted);
} }
@Benchmark // @Benchmark
public void testUnionSortedLists_TwowayMergeImplementation() throws Exception { public void testUnionSortedLists_TwowayMergeImplementation() throws Exception {
twowayMerge(longSorted); twowayMerge(longSorted);
@@ -81,26 +110,17 @@ public class BenchmarkMultiwayMerge {
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
System.out.println("\n\n----------------\nstart"); System.out.println("\n\n----------------\nstart");
// -XX:+PrintCompilation
if (args != null) { for (int i = 0; i < 80; i++) {
BenchmarkMultiwayMerge benchmark = new BenchmarkMultiwayMerge(); BenchmarkMultiwayMerge benchmark = new BenchmarkMultiwayMerge();
benchmark.numLists = 10; benchmark.anumLists = 1000;
benchmark.values = 10000; benchmark.bvalues = 1000;
benchmark.ctype = "non-overlapping";
benchmark.dconcatNonOverlap = 500;
benchmark.setup(); benchmark.setup();
long start = System.nanoTime();
benchmark.testUnionSortedLists_MultiwayMerge(); benchmark.testUnionSortedLists_MultiwayMerge();
} else { System.out.println("total: " + (System.nanoTime() - start) / 1_000_000.0 + " ms");
for (int i = 0; i < 8; i++) {
BenchmarkMultiwayMerge benchmark = new BenchmarkMultiwayMerge();
benchmark.numLists = 10;
benchmark.values = 10000;
benchmark.setup();
System.out.println("\n\n----------------\n" + i);
for (int j = 0; j < 1000; j++) {
// benchmark.testUnionSortedLists_MultiwayMerge();
benchmark.testUnionSortedLists_TwowayMergeImplementation();
}
}
} }
System.out.println("done"); System.out.println("done");
} }

View File

@@ -6,8 +6,10 @@ import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map.Entry;
import java.util.Random; import java.util.Random;
import java.util.Spliterator.OfLong; import java.util.Spliterator.OfLong;
import java.util.TreeMap;
import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.ThreadLocalRandom;
import java.util.stream.LongStream; import java.util.stream.LongStream;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
@@ -31,6 +33,12 @@ public final class LongList implements Serializable, Cloneable {
private static final long[] EMPTY_ARRAY = {}; private static final long[] EMPTY_ARRAY = {};
/**
* If the average length of the lists is longer than this value, then we'll
* first try to concatenate non-overlapping lists before the union is computed.
*/
public static int FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN = 500;
/** /**
* The array containing the values. It is transient, so that we can implement * The array containing the values. It is transient, so that we can implement
* our own serialization. * our own serialization.
@@ -606,6 +614,14 @@ public final class LongList implements Serializable, Cloneable {
return data[pos]; return data[pos];
} }
public long first() {
return get(0);
}
public long last() {
return get(size() - 1);
}
/** /**
* Unsafe version of {@link #get(long)} that does not check for out of bounds * Unsafe version of {@link #get(long)} that does not check for out of bounds
* access if assertions are disabled. The caller has to make sure that pos is * access if assertions are disabled. The caller has to make sure that pos is
@@ -1028,6 +1044,7 @@ public final class LongList implements Serializable, Cloneable {
try { try {
final LongList result = (LongList) super.clone(); final LongList result = (LongList) super.clone();
result.data = size == 0 ? EMPTY_ARRAY : Arrays.copyOf(data, size); result.data = size == 0 ? EMPTY_ARRAY : Arrays.copyOf(data, size);
result.sorted = sorted;
return result; return result;
} catch (final CloneNotSupportedException e) { } catch (final CloneNotSupportedException e) {
throw new IllegalStateException(e); throw new IllegalStateException(e);
@@ -1157,7 +1174,7 @@ public final class LongList implements Serializable, Cloneable {
* TODO check time complexity If all lists are sorted, then the time complexity * TODO check time complexity If all lists are sorted, then the time complexity
* is O(n+m), where n is the length of the first list and m the length of the * is O(n+m), where n is the length of the first list and m the length of the
* second list. If at least one list is not sorted, then the time complexity is * second list. If at least one list is not sorted, then the time complexity is
* O(m*log(m)), where m is the length of the longer list. * O(m*log(m)), where m is the length of the longest list.
* *
* @param longLists the lists * @param longLists the lists
* @return the union of both lists * @return the union of both lists
@@ -1184,28 +1201,173 @@ public final class LongList implements Serializable, Cloneable {
case 0: case 0:
return new LongList(); return new LongList();
case 1: case 1:
return longLists.iterator().next().clone(); // remove duplicate values
return unionInternal(longLists.iterator().next(), LongList.of());
case 2: case 2:
final Iterator<LongList> it = longLists.iterator(); final Iterator<LongList> it = longLists.iterator();
final LongList a = it.next(); final LongList a = it.next();
final LongList b = it.next(); final LongList b = it.next();
return unionInternal(a, b); return unionInternal(a, b);
default: default:
final Collection<LongList> sortedLists = subsetOfSortedLists(longLists); final List<LongList> sortedLists = toSortedLists(longLists);
final Collection<LongList> unsortedLists = subsetOfUnsortedLists(longLists);
final LongList unionSorted = MultiwayLongMerger.unionSorted(sortedLists); final double averageLength = totalLength(longLists) / (double) longLists.size();
final LongList result; final List<LongList> sortedConcatenatedLists;
if (unsortedLists.isEmpty()) { // benchmarks showed that concatenation is beneficial for longer lists
result = unionSorted; if (averageLength > FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN)
{
final ListConcatenater listConcatenater = new ListConcatenater(sortedLists);
sortedConcatenatedLists = listConcatenater.concatenateNonOverlapping();
} else { } else {
final LongList unionUnsorted = unionUnsorted(unsortedLists); sortedConcatenatedLists = sortedLists;
result = unionInternal(unionSorted, unionUnsorted); }
switch (sortedConcatenatedLists.size()) {
case 0:
return new LongList();
case 1:
// remove duplicate values
return unionInternal(sortedConcatenatedLists.get(0), LongList.of());
case 2:
case 3:
case 4:
case 5:
// benchmarks have shown that the trivial merge is faster when merging only a
// few lists
return unionRepeatedTwowayMerge(sortedConcatenatedLists);
default:
final LongList multiwayMerged = MultiwayLongMerger.unionSorted(sortedConcatenatedLists);
return multiwayMerged;
}
}
}
private static int totalLength(Collection<LongList> longLists) {
int totalLength = 0;
for (LongList longList : longLists) {
totalLength += longList.size();
}
return totalLength;
}
private static LongList unionRepeatedTwowayMerge(final List<LongList> sortedLongLists) {
LongList result = sortedLongLists.get(0);
for (int i = 1; i < sortedLongLists.size(); i++) {
result = LongList.unionSorted(result, sortedLongLists.get(i));
} }
return result; return result;
} }
private static class ListConcatenater {
private static class ListLongList {
private final List<LongList> list = new ArrayList<>();
public ListLongList(LongList longList) {
list.add(longList);
}
public void add(ListLongList listLongList) {
list.addAll(listLongList.list);
}
public LongList toLongList() {
switch (list.size()) {
case 0:
return new LongList(0);
case 1:
return list.get(0);
default:
int capacity = Math.toIntExact(list.stream().mapToLong(LongList::size).sum());
final LongList result = new LongList(capacity);
result.addAll(list);
return result;
}
}
public long first() {
return list.get(0).first();
}
public long last() {
return list.get(list.size() - 1).last();
}
}
final TreeMap<Long, List<ListLongList>> lowestValueMap = new TreeMap<>();
final TreeMap<Long, List<ListLongList>> highestValueMap = new TreeMap<>();
public ListConcatenater(final Collection<LongList> sortedLongLists) {
sortedLongLists.stream().map(ListLongList::new).forEach(this::index);
}
private void index(ListLongList listLongList) {
final long lowestValue = listLongList.first();
final long highestValue = listLongList.last();
lowestValueMap.computeIfAbsent(lowestValue, k -> new ArrayList<>()).add(listLongList);
highestValueMap.computeIfAbsent(highestValue, k -> new ArrayList<>()).add(listLongList);
}
private void removeFromIndex(ListLongList listLongList) {
lowestValueMap.get(listLongList.first()).remove(listLongList);
highestValueMap.get(listLongList.last()).remove(listLongList);
}
public List<LongList> concatenateNonOverlapping() {
for (Entry<Long, List<ListLongList>> e : highestValueMap.entrySet()) {
final long highestValue = e.getKey();
if (highestValue == Long.MAX_VALUE) {
continue;
}
final Iterator<ListLongList> it = e.getValue().iterator();
while (it.hasNext()) {
final ListLongList lowList = it.next();
final Entry<Long, List<ListLongList>> ceilingEntry = lowestValueMap.ceilingEntry(highestValue + 1);
if (ceilingEntry != null && !ceilingEntry.getValue().isEmpty()) {
final ListLongList highList = ceilingEntry.getValue().get(0);
removeFromIndex(highList);
it.remove(); // prevents concurrent modification that would happen in removeFromIndex()
removeFromIndex(lowList);
lowList.add(highList);
index(lowList);
}
}
}
final List<LongList> result = new ArrayList<>();
for (List<ListLongList> l : highestValueMap.values()) {
for (ListLongList listLongList : l) {
result.add(listLongList.toLongList());
}
}
return result;
}
}
private static List<LongList> toSortedLists(final Collection<LongList> longLists) {
final List<LongList> result = new ArrayList<>();
for (LongList longList : longLists) {
if (longList.isEmpty()) {
// skip, no need to merge an empty list
} else if (longList.isSorted()) {
result.add(longList);
} else {
final LongList copy = longList.clone();
copy.sort();
result.add(copy);
}
}
return result;
} }
private static LongList unionSorted(final LongList a, final LongList b) { private static LongList unionSorted(final LongList a, final LongList b) {
@@ -1270,55 +1432,21 @@ public final class LongList implements Serializable, Cloneable {
private static LongList unionUnsorted(final LongList a, final LongList b) { private static LongList unionUnsorted(final LongList a, final LongList b) {
final LongList aSorted = new LongList(a); final LongList aSorted = new LongList(a);
if (!aSorted.isSorted()) {
aSorted.parallelSort(); aSorted.parallelSort();
}
final LongList bSorted = new LongList(b); final LongList bSorted = new LongList(b);
if (!bSorted.isSorted()) {
bSorted.parallelSort(); bSorted.parallelSort();
}
return unionSorted(aSorted, bSorted); return unionSorted(aSorted, bSorted);
} }
private static LongList unionUnsorted(Collection<LongList> longLists) {
final List<LongList> sortedLists = new ArrayList<>();
for (LongList longList : longLists) {
final LongList copy = new LongList(longList);
copy.sort();
sortedLists.add(copy);
}
return MultiwayLongMerger.unionSorted(sortedLists);
}
private void checkIfSorted() { private void checkIfSorted() {
sorted = true; sorted = true;
for (int i = 1; i < size && sorted; i++) { for (int i = 1; i < size && sorted; i++) {
sorted = data[i - 1] <= data[i]; sorted = data[i - 1] <= data[i];
} }
} }
private static Collection<LongList> subsetOfSortedLists(Collection<LongList> longLists) {
final List<LongList> result = new ArrayList<>();
for (LongList longList : longLists) {
if (longList.isSorted()) {
result.add(longList);
}
}
return result;
}
private static Collection<LongList> subsetOfUnsortedLists(Collection<LongList> longLists) {
final List<LongList> result = new ArrayList<>();
for (LongList longList : longLists) {
if (!longList.isSorted()) {
result.add(longList);
}
}
return result;
}
} }

View File

@@ -250,10 +250,4 @@ class MultiwayLongMerger {
} }
} }
public static void main(String[] args) {
for (int i = 0; i < 20; i++) {
System.out.println(i + " " + (Long.highestOneBit(i - 1) << 1));
}
}
} }

View File

@@ -5,7 +5,9 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.ObjectInputStream; import java.io.ObjectInputStream;
import java.io.ObjectOutputStream; import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentLinkedQueue;
@@ -1639,6 +1641,106 @@ public class LongListTest {
Assertions.assertEquals(LongList.union(a, b, c, d), LongList.union(d, b, a, c)); Assertions.assertEquals(LongList.union(a, b, c, d), LongList.union(d, b, a, c));
} }
@Test
public void testUnionSortedLists_Concatenating_with_empty_list() {
// aims to use the ListConcatenater in LongList.union()
// that means we need at least three lists and they must all be non-overlapping
final LongList a = LongList.of(1, 2, 3);
final LongList b = LongList.of();
final LongList c = LongList.of(10,11);
final LongList actual = LongList.union(a, b, c);
Assertions.assertEquals(LongList.of(1,2,3,10,11), actual);
Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, c, a));
Assertions.assertEquals(LongList.union(a, b, c), LongList.union( b, a, c));
}
@Test
public void testUnionSortedLists_Concatenating_results_in_one_list() {
// aims to use the ListConcatenater in LongList.union()
// that means we need at least three lists and they must all be non-overlapping
final LongList a = LongList.of(1, 2, 3);
final LongList b = LongList.of(4);
final LongList c = LongList.of(10,11);
final LongList actual = LongList.union(a, b, c);
Assertions.assertEquals(LongList.of(1,2,3,4,10,11), actual);
Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, c, a));
Assertions.assertEquals(LongList.union(a, b, c), LongList.union( b, a, c));
}
@Test
public void testUnionSortedLists_Concatenating_results_in_two_lists() {
// aims to use the ListConcatenater in LongList.union()
// that means we need at least three lists
final LongList a = LongList.of(1, 2, 3);
final LongList b = LongList.of(3, 4);
final LongList c = LongList.of(4, 10,11); // can be concatenated to a
final LongList actual = LongList.union(a, b, c);
Assertions.assertEquals(LongList.of(1,2,3,4,10,11), actual);
Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, c, a));
Assertions.assertEquals(LongList.union(a, b, c), LongList.union( b, a, c));
}
@Test
public void testUnionSortedLists_Multiway_merge () {
// aims to use the MultiwayLongMerge
// that means we need overlapping lists so that ListConcatenater will return at least six lists
// this is done by adding 1 to all six lists
final LongList a = LongList.of(1, 2, 3);
final LongList b = LongList.of(1, 3, 4);
final LongList c = LongList.of(1, 4, 10,11);
final LongList d = LongList.of(1, 6,9);
final LongList e = LongList.of(1, 123, 144);
final LongList f = LongList.of(1, 411, 1011,1111);
final LongList actual = LongList.union(a, b, c,d,e,f);
Assertions.assertEquals(LongList.of(1,2,3,4,6,9,10,11,123,144,411,1011,1111), actual);
Assertions.assertEquals(LongList.union(a, b, c,d,e,f), LongList.union(b, c, a,d,e,f));
Assertions.assertEquals(LongList.union(a, b, c,d,e,f), LongList.union( b, d,f,a,e, c));
}
@Test
public void testUnionSortedLists_ten_lists_fifteen_elements_random() {
testUnionSortedLists(10, 15, true);
}
@Test
public void testUnionSortedLists_ten_lists_fifteen_elements_equal() {
testUnionSortedLists(10, 15, false);
}
private void testUnionSortedLists(int numLists, int values, boolean random) {
ThreadLocalRandom rng = ThreadLocalRandom.current();
List<LongList> longSorted = new ArrayList<>();
for (int i = 0; i < numLists; i++) {
LongList list = new LongList(values);
if (random) {
for (int j = 0; j < values; j++) {
list.add(rng.nextLong());
}
list.sort();
} else {
LongStream.range(0, values).forEachOrdered(list::add);
}
longSorted.add(list);
}
final LongList actual = LongList.union(longSorted);
final LongList concatenatedList = new LongList();
concatenatedList.addAll(longSorted);
final LongList expected = LongList.union(concatenatedList, LongList.of());
Assertions.assertEquals(expected, actual);
Collections.shuffle(longSorted);
final LongList unionShuffled1 = LongList.union(longSorted);
Collections.shuffle(longSorted);
final LongList unionShuffled2 = LongList.union(longSorted);
Assertions.assertEquals(unionShuffled1, unionShuffled2);
}
@Test @Test
public void testUnionUnsortedLists() { public void testUnionUnsortedLists() {
final LongList a = LongList.of(1, 0, 3, 4); final LongList a = LongList.of(1, 0, 3, 4);