improve performance of LongList.union
This commit is contained in:
@@ -21,19 +21,29 @@ import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
@State(Scope.Benchmark)
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Measurement(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Fork(2)
|
||||
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Measurement(iterations = 3, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Fork(1)
|
||||
public class BenchmarkMultiwayMerge {
|
||||
|
||||
@Param({ "10000" , "20000" })
|
||||
private int values;
|
||||
private static final String OPTION_RANDOM = "random";
|
||||
|
||||
@Param({ "3","5", "10","20" })
|
||||
private int numLists;
|
||||
private static final String OPTION_NON_OVERLAP = "non-overlap";
|
||||
|
||||
@Param({ "true" })
|
||||
private boolean random;
|
||||
// @Param({ "3","5", "10","20", "1000" })
|
||||
@Param({ "1000" })
|
||||
private int anumLists;
|
||||
|
||||
// @Param({ "10", "1000" , "20000" })
|
||||
@Param({ "1000" })
|
||||
private int bvalues;
|
||||
|
||||
@Param({ OPTION_RANDOM, OPTION_NON_OVERLAP })
|
||||
// @Param({ OPTION_NON_OVERLAP})
|
||||
private String ctype;
|
||||
|
||||
@Param({ "0", "500" })
|
||||
private int dconcatNonOverlap;
|
||||
|
||||
private List<LongList> longSorted = null;
|
||||
|
||||
@@ -41,20 +51,39 @@ public class BenchmarkMultiwayMerge {
|
||||
public void setup() throws Exception {
|
||||
ThreadLocalRandom rng = ThreadLocalRandom.current();
|
||||
longSorted = new ArrayList<>();
|
||||
for (int i = 0; i < numLists; i++) {
|
||||
LongList list = new LongList(values);
|
||||
if (random) {
|
||||
for (int j = 0; j < values; j++) {
|
||||
list.add(rng.nextLong());
|
||||
if (ctype.equalsIgnoreCase(OPTION_NON_OVERLAP)) {
|
||||
|
||||
final LongList list = randomList(bvalues * anumLists, rng);
|
||||
list.sort();
|
||||
for (int i = 0; i < anumLists; i++) {
|
||||
longSorted.add(list.sublist(i * bvalues, (i + 1) * bvalues));
|
||||
}
|
||||
|
||||
} else {
|
||||
for (int i = 0; i < anumLists; i++) {
|
||||
final LongList list;
|
||||
if (ctype.equalsIgnoreCase(OPTION_RANDOM)) {
|
||||
list = randomList(bvalues, rng);
|
||||
list.sort();
|
||||
} else {
|
||||
LongStream.range(0, values).forEachOrdered(list::add);
|
||||
list = new LongList(bvalues);
|
||||
LongStream.range(0, bvalues).forEachOrdered(list::add);
|
||||
}
|
||||
longSorted.add(list);
|
||||
}
|
||||
}
|
||||
|
||||
LongList.FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN = dconcatNonOverlap;
|
||||
}
|
||||
|
||||
private LongList randomList(int values, ThreadLocalRandom rng) {
|
||||
final LongList list = new LongList(values);
|
||||
for (int j = 0; j < values; j++) {
|
||||
list.add(rng.nextLong());
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
@TearDown
|
||||
public void tearDown() {
|
||||
longSorted = null;
|
||||
@@ -66,7 +95,7 @@ public class BenchmarkMultiwayMerge {
|
||||
LongList.union(longSorted);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
// @Benchmark
|
||||
public void testUnionSortedLists_TwowayMergeImplementation() throws Exception {
|
||||
|
||||
twowayMerge(longSorted);
|
||||
@@ -81,26 +110,17 @@ public class BenchmarkMultiwayMerge {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
System.out.println("\n\n----------------\nstart");
|
||||
// -XX:+PrintCompilation
|
||||
|
||||
if (args != null) {
|
||||
for (int i = 0; i < 80; i++) {
|
||||
BenchmarkMultiwayMerge benchmark = new BenchmarkMultiwayMerge();
|
||||
benchmark.numLists = 10;
|
||||
benchmark.values = 10000;
|
||||
benchmark.anumLists = 1000;
|
||||
benchmark.bvalues = 1000;
|
||||
benchmark.ctype = "non-overlapping";
|
||||
benchmark.dconcatNonOverlap = 500;
|
||||
benchmark.setup();
|
||||
long start = System.nanoTime();
|
||||
benchmark.testUnionSortedLists_MultiwayMerge();
|
||||
} else {
|
||||
for (int i = 0; i < 8; i++) {
|
||||
BenchmarkMultiwayMerge benchmark = new BenchmarkMultiwayMerge();
|
||||
benchmark.numLists = 10;
|
||||
benchmark.values = 10000;
|
||||
benchmark.setup();
|
||||
System.out.println("\n\n----------------\n" + i);
|
||||
for (int j = 0; j < 1000; j++) {
|
||||
// benchmark.testUnionSortedLists_MultiwayMerge();
|
||||
benchmark.testUnionSortedLists_TwowayMergeImplementation();
|
||||
}
|
||||
}
|
||||
System.out.println("total: " + (System.nanoTime() - start) / 1_000_000.0 + " ms");
|
||||
}
|
||||
System.out.println("done");
|
||||
}
|
||||
|
||||
@@ -6,8 +6,10 @@ import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Random;
|
||||
import java.util.Spliterator.OfLong;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.stream.LongStream;
|
||||
import java.util.stream.StreamSupport;
|
||||
@@ -31,6 +33,12 @@ public final class LongList implements Serializable, Cloneable {
|
||||
|
||||
private static final long[] EMPTY_ARRAY = {};
|
||||
|
||||
/**
|
||||
* If the average length of the lists is longer than this value, then we'll
|
||||
* first try to concatenate non-overlapping lists before the union is computed.
|
||||
*/
|
||||
public static int FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN = 500;
|
||||
|
||||
/**
|
||||
* The array containing the values. It is transient, so that we can implement
|
||||
* our own serialization.
|
||||
@@ -606,6 +614,14 @@ public final class LongList implements Serializable, Cloneable {
|
||||
return data[pos];
|
||||
}
|
||||
|
||||
public long first() {
|
||||
return get(0);
|
||||
}
|
||||
|
||||
public long last() {
|
||||
return get(size() - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unsafe version of {@link #get(long)} that does not check for out of bounds
|
||||
* access if assertions are disabled. The caller has to make sure that pos is
|
||||
@@ -1028,6 +1044,7 @@ public final class LongList implements Serializable, Cloneable {
|
||||
try {
|
||||
final LongList result = (LongList) super.clone();
|
||||
result.data = size == 0 ? EMPTY_ARRAY : Arrays.copyOf(data, size);
|
||||
result.sorted = sorted;
|
||||
return result;
|
||||
} catch (final CloneNotSupportedException e) {
|
||||
throw new IllegalStateException(e);
|
||||
@@ -1157,7 +1174,7 @@ public final class LongList implements Serializable, Cloneable {
|
||||
* TODO check time complexity If all lists are sorted, then the time complexity
|
||||
* is O(n+m), where n is the length of the first list and m the length of the
|
||||
* second list. If at least one list is not sorted, then the time complexity is
|
||||
* O(m*log(m)), where m is the length of the longer list.
|
||||
* O(m*log(m)), where m is the length of the longest list.
|
||||
*
|
||||
* @param longLists the lists
|
||||
* @return the union of both lists
|
||||
@@ -1184,28 +1201,173 @@ public final class LongList implements Serializable, Cloneable {
|
||||
case 0:
|
||||
return new LongList();
|
||||
case 1:
|
||||
return longLists.iterator().next().clone();
|
||||
// remove duplicate values
|
||||
return unionInternal(longLists.iterator().next(), LongList.of());
|
||||
case 2:
|
||||
final Iterator<LongList> it = longLists.iterator();
|
||||
final LongList a = it.next();
|
||||
final LongList b = it.next();
|
||||
return unionInternal(a, b);
|
||||
default:
|
||||
final Collection<LongList> sortedLists = subsetOfSortedLists(longLists);
|
||||
final Collection<LongList> unsortedLists = subsetOfUnsortedLists(longLists);
|
||||
final List<LongList> sortedLists = toSortedLists(longLists);
|
||||
|
||||
final LongList unionSorted = MultiwayLongMerger.unionSorted(sortedLists);
|
||||
final double averageLength = totalLength(longLists) / (double) longLists.size();
|
||||
|
||||
final LongList result;
|
||||
if (unsortedLists.isEmpty()) {
|
||||
result = unionSorted;
|
||||
final List<LongList> sortedConcatenatedLists;
|
||||
// benchmarks showed that concatenation is beneficial for longer lists
|
||||
if (averageLength > FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN)
|
||||
{
|
||||
final ListConcatenater listConcatenater = new ListConcatenater(sortedLists);
|
||||
sortedConcatenatedLists = listConcatenater.concatenateNonOverlapping();
|
||||
} else {
|
||||
final LongList unionUnsorted = unionUnsorted(unsortedLists);
|
||||
result = unionInternal(unionSorted, unionUnsorted);
|
||||
sortedConcatenatedLists = sortedLists;
|
||||
}
|
||||
|
||||
switch (sortedConcatenatedLists.size()) {
|
||||
case 0:
|
||||
return new LongList();
|
||||
case 1:
|
||||
// remove duplicate values
|
||||
return unionInternal(sortedConcatenatedLists.get(0), LongList.of());
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
// benchmarks have shown that the trivial merge is faster when merging only a
|
||||
// few lists
|
||||
return unionRepeatedTwowayMerge(sortedConcatenatedLists);
|
||||
default:
|
||||
final LongList multiwayMerged = MultiwayLongMerger.unionSorted(sortedConcatenatedLists);
|
||||
return multiwayMerged;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int totalLength(Collection<LongList> longLists) {
|
||||
int totalLength = 0;
|
||||
for (LongList longList : longLists) {
|
||||
totalLength += longList.size();
|
||||
}
|
||||
return totalLength;
|
||||
}
|
||||
|
||||
private static LongList unionRepeatedTwowayMerge(final List<LongList> sortedLongLists) {
|
||||
|
||||
LongList result = sortedLongLists.get(0);
|
||||
for (int i = 1; i < sortedLongLists.size(); i++) {
|
||||
result = LongList.unionSorted(result, sortedLongLists.get(i));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static class ListConcatenater {
|
||||
|
||||
private static class ListLongList {
|
||||
private final List<LongList> list = new ArrayList<>();
|
||||
|
||||
public ListLongList(LongList longList) {
|
||||
list.add(longList);
|
||||
}
|
||||
|
||||
public void add(ListLongList listLongList) {
|
||||
list.addAll(listLongList.list);
|
||||
}
|
||||
|
||||
public LongList toLongList() {
|
||||
switch (list.size()) {
|
||||
case 0:
|
||||
return new LongList(0);
|
||||
case 1:
|
||||
return list.get(0);
|
||||
default:
|
||||
int capacity = Math.toIntExact(list.stream().mapToLong(LongList::size).sum());
|
||||
final LongList result = new LongList(capacity);
|
||||
result.addAll(list);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
public long first() {
|
||||
return list.get(0).first();
|
||||
}
|
||||
|
||||
public long last() {
|
||||
return list.get(list.size() - 1).last();
|
||||
}
|
||||
}
|
||||
|
||||
final TreeMap<Long, List<ListLongList>> lowestValueMap = new TreeMap<>();
|
||||
final TreeMap<Long, List<ListLongList>> highestValueMap = new TreeMap<>();
|
||||
|
||||
public ListConcatenater(final Collection<LongList> sortedLongLists) {
|
||||
sortedLongLists.stream().map(ListLongList::new).forEach(this::index);
|
||||
}
|
||||
|
||||
private void index(ListLongList listLongList) {
|
||||
final long lowestValue = listLongList.first();
|
||||
final long highestValue = listLongList.last();
|
||||
lowestValueMap.computeIfAbsent(lowestValue, k -> new ArrayList<>()).add(listLongList);
|
||||
highestValueMap.computeIfAbsent(highestValue, k -> new ArrayList<>()).add(listLongList);
|
||||
}
|
||||
|
||||
private void removeFromIndex(ListLongList listLongList) {
|
||||
lowestValueMap.get(listLongList.first()).remove(listLongList);
|
||||
highestValueMap.get(listLongList.last()).remove(listLongList);
|
||||
}
|
||||
|
||||
public List<LongList> concatenateNonOverlapping() {
|
||||
for (Entry<Long, List<ListLongList>> e : highestValueMap.entrySet()) {
|
||||
final long highestValue = e.getKey();
|
||||
if (highestValue == Long.MAX_VALUE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final Iterator<ListLongList> it = e.getValue().iterator();
|
||||
while (it.hasNext()) {
|
||||
final ListLongList lowList = it.next();
|
||||
final Entry<Long, List<ListLongList>> ceilingEntry = lowestValueMap.ceilingEntry(highestValue + 1);
|
||||
if (ceilingEntry != null && !ceilingEntry.getValue().isEmpty()) {
|
||||
final ListLongList highList = ceilingEntry.getValue().get(0);
|
||||
removeFromIndex(highList);
|
||||
|
||||
it.remove(); // prevents concurrent modification that would happen in removeFromIndex()
|
||||
removeFromIndex(lowList);
|
||||
|
||||
lowList.add(highList);
|
||||
|
||||
index(lowList);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final List<LongList> result = new ArrayList<>();
|
||||
for (List<ListLongList> l : highestValueMap.values()) {
|
||||
for (ListLongList listLongList : l) {
|
||||
result.add(listLongList.toLongList());
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static List<LongList> toSortedLists(final Collection<LongList> longLists) {
|
||||
final List<LongList> result = new ArrayList<>();
|
||||
|
||||
for (LongList longList : longLists) {
|
||||
if (longList.isEmpty()) {
|
||||
// skip, no need to merge an empty list
|
||||
} else if (longList.isSorted()) {
|
||||
result.add(longList);
|
||||
} else {
|
||||
final LongList copy = longList.clone();
|
||||
copy.sort();
|
||||
result.add(copy);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static LongList unionSorted(final LongList a, final LongList b) {
|
||||
@@ -1270,55 +1432,21 @@ public final class LongList implements Serializable, Cloneable {
|
||||
|
||||
private static LongList unionUnsorted(final LongList a, final LongList b) {
|
||||
final LongList aSorted = new LongList(a);
|
||||
if (!aSorted.isSorted()) {
|
||||
aSorted.parallelSort();
|
||||
}
|
||||
final LongList bSorted = new LongList(b);
|
||||
if (!bSorted.isSorted()) {
|
||||
bSorted.parallelSort();
|
||||
}
|
||||
|
||||
return unionSorted(aSorted, bSorted);
|
||||
}
|
||||
|
||||
private static LongList unionUnsorted(Collection<LongList> longLists) {
|
||||
|
||||
final List<LongList> sortedLists = new ArrayList<>();
|
||||
for (LongList longList : longLists) {
|
||||
final LongList copy = new LongList(longList);
|
||||
copy.sort();
|
||||
sortedLists.add(copy);
|
||||
}
|
||||
return MultiwayLongMerger.unionSorted(sortedLists);
|
||||
}
|
||||
|
||||
private void checkIfSorted() {
|
||||
sorted = true;
|
||||
for (int i = 1; i < size && sorted; i++) {
|
||||
sorted = data[i - 1] <= data[i];
|
||||
}
|
||||
}
|
||||
|
||||
private static Collection<LongList> subsetOfSortedLists(Collection<LongList> longLists) {
|
||||
|
||||
final List<LongList> result = new ArrayList<>();
|
||||
|
||||
for (LongList longList : longLists) {
|
||||
if (longList.isSorted()) {
|
||||
result.add(longList);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Collection<LongList> subsetOfUnsortedLists(Collection<LongList> longLists) {
|
||||
|
||||
final List<LongList> result = new ArrayList<>();
|
||||
|
||||
for (LongList longList : longLists) {
|
||||
if (!longList.isSorted()) {
|
||||
result.add(longList);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -250,10 +250,4 @@ class MultiwayLongMerger {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
for (int i = 0; i < 20; i++) {
|
||||
System.out.println(i + " " + (Long.highestOneBit(i - 1) << 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,9 @@ import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.ObjectInputStream;
|
||||
import java.io.ObjectOutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
@@ -1639,6 +1641,106 @@ public class LongListTest {
|
||||
Assertions.assertEquals(LongList.union(a, b, c, d), LongList.union(d, b, a, c));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionSortedLists_Concatenating_with_empty_list() {
|
||||
// aims to use the ListConcatenater in LongList.union()
|
||||
// that means we need at least three lists and they must all be non-overlapping
|
||||
final LongList a = LongList.of(1, 2, 3);
|
||||
final LongList b = LongList.of();
|
||||
final LongList c = LongList.of(10,11);
|
||||
|
||||
final LongList actual = LongList.union(a, b, c);
|
||||
Assertions.assertEquals(LongList.of(1,2,3,10,11), actual);
|
||||
Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, c, a));
|
||||
Assertions.assertEquals(LongList.union(a, b, c), LongList.union( b, a, c));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionSortedLists_Concatenating_results_in_one_list() {
|
||||
// aims to use the ListConcatenater in LongList.union()
|
||||
// that means we need at least three lists and they must all be non-overlapping
|
||||
final LongList a = LongList.of(1, 2, 3);
|
||||
final LongList b = LongList.of(4);
|
||||
final LongList c = LongList.of(10,11);
|
||||
|
||||
final LongList actual = LongList.union(a, b, c);
|
||||
Assertions.assertEquals(LongList.of(1,2,3,4,10,11), actual);
|
||||
Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, c, a));
|
||||
Assertions.assertEquals(LongList.union(a, b, c), LongList.union( b, a, c));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionSortedLists_Concatenating_results_in_two_lists() {
|
||||
// aims to use the ListConcatenater in LongList.union()
|
||||
// that means we need at least three lists
|
||||
final LongList a = LongList.of(1, 2, 3);
|
||||
final LongList b = LongList.of(3, 4);
|
||||
final LongList c = LongList.of(4, 10,11); // can be concatenated to a
|
||||
|
||||
final LongList actual = LongList.union(a, b, c);
|
||||
Assertions.assertEquals(LongList.of(1,2,3,4,10,11), actual);
|
||||
Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, c, a));
|
||||
Assertions.assertEquals(LongList.union(a, b, c), LongList.union( b, a, c));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionSortedLists_Multiway_merge () {
|
||||
// aims to use the MultiwayLongMerge
|
||||
// that means we need overlapping lists so that ListConcatenater will return at least six lists
|
||||
// this is done by adding 1 to all six lists
|
||||
final LongList a = LongList.of(1, 2, 3);
|
||||
final LongList b = LongList.of(1, 3, 4);
|
||||
final LongList c = LongList.of(1, 4, 10,11);
|
||||
final LongList d = LongList.of(1, 6,9);
|
||||
final LongList e = LongList.of(1, 123, 144);
|
||||
final LongList f = LongList.of(1, 411, 1011,1111);
|
||||
|
||||
final LongList actual = LongList.union(a, b, c,d,e,f);
|
||||
Assertions.assertEquals(LongList.of(1,2,3,4,6,9,10,11,123,144,411,1011,1111), actual);
|
||||
Assertions.assertEquals(LongList.union(a, b, c,d,e,f), LongList.union(b, c, a,d,e,f));
|
||||
Assertions.assertEquals(LongList.union(a, b, c,d,e,f), LongList.union( b, d,f,a,e, c));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionSortedLists_ten_lists_fifteen_elements_random() {
|
||||
testUnionSortedLists(10, 15, true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionSortedLists_ten_lists_fifteen_elements_equal() {
|
||||
testUnionSortedLists(10, 15, false);
|
||||
}
|
||||
|
||||
private void testUnionSortedLists(int numLists, int values, boolean random) {
|
||||
ThreadLocalRandom rng = ThreadLocalRandom.current();
|
||||
List<LongList> longSorted = new ArrayList<>();
|
||||
for (int i = 0; i < numLists; i++) {
|
||||
LongList list = new LongList(values);
|
||||
if (random) {
|
||||
for (int j = 0; j < values; j++) {
|
||||
list.add(rng.nextLong());
|
||||
}
|
||||
list.sort();
|
||||
} else {
|
||||
LongStream.range(0, values).forEachOrdered(list::add);
|
||||
}
|
||||
longSorted.add(list);
|
||||
}
|
||||
|
||||
final LongList actual = LongList.union(longSorted);
|
||||
|
||||
final LongList concatenatedList = new LongList();
|
||||
concatenatedList.addAll(longSorted);
|
||||
final LongList expected = LongList.union(concatenatedList, LongList.of());
|
||||
Assertions.assertEquals(expected, actual);
|
||||
|
||||
Collections.shuffle(longSorted);
|
||||
final LongList unionShuffled1 = LongList.union(longSorted);
|
||||
Collections.shuffle(longSorted);
|
||||
final LongList unionShuffled2 = LongList.union(longSorted);
|
||||
Assertions.assertEquals(unionShuffled1, unionShuffled2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionUnsortedLists() {
|
||||
final LongList a = LongList.of(1, 0, 3, 4);
|
||||
|
||||
Reference in New Issue
Block a user