improve performance of LongList.union

This commit is contained in:
2020-11-15 12:39:07 +01:00
parent 54fbebf0b7
commit 6bd4b9b424
4 changed files with 344 additions and 100 deletions

View File

@@ -6,8 +6,10 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Spliterator.OfLong;
import java.util.TreeMap;
import java.util.concurrent.ThreadLocalRandom;
import java.util.stream.LongStream;
import java.util.stream.StreamSupport;
@@ -31,6 +33,12 @@ public final class LongList implements Serializable, Cloneable {
private static final long[] EMPTY_ARRAY = {};
/**
* If the average length of the lists is longer than this value, then we'll
* first try to concatenate non-overlapping lists before the union is computed.
*/
public static int FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN = 500;
/**
* The array containing the values. It is transient, so that we can implement
* our own serialization.
@@ -606,6 +614,14 @@ public final class LongList implements Serializable, Cloneable {
return data[pos];
}
public long first() {
return get(0);
}
public long last() {
return get(size() - 1);
}
/**
* Unsafe version of {@link #get(long)} that does not check for out of bounds
* access if assertions are disabled. The caller has to make sure that pos is
@@ -669,7 +685,7 @@ public final class LongList implements Serializable, Cloneable {
System.arraycopy(data, 0, input, 0, size);
return input;
}
long[] getArrayInternal() {
return data;
}
@@ -1028,6 +1044,7 @@ public final class LongList implements Serializable, Cloneable {
try {
final LongList result = (LongList) super.clone();
result.data = size == 0 ? EMPTY_ARRAY : Arrays.copyOf(data, size);
result.sorted = sorted;
return result;
} catch (final CloneNotSupportedException e) {
throw new IllegalStateException(e);
@@ -1157,7 +1174,7 @@ public final class LongList implements Serializable, Cloneable {
* TODO check time complexity If all lists are sorted, then the time complexity
* is O(n+m), where n is the length of the first list and m the length of the
* second list. If at least one list is not sorted, then the time complexity is
* O(m*log(m)), where m is the length of the longer list.
* O(m*log(m)), where m is the length of the longest list.
*
* @param longLists the lists
* @return the union of both lists
@@ -1184,28 +1201,173 @@ public final class LongList implements Serializable, Cloneable {
case 0:
return new LongList();
case 1:
return longLists.iterator().next().clone();
// remove duplicate values
return unionInternal(longLists.iterator().next(), LongList.of());
case 2:
final Iterator<LongList> it = longLists.iterator();
final LongList a = it.next();
final LongList b = it.next();
return unionInternal(a, b);
default:
final Collection<LongList> sortedLists = subsetOfSortedLists(longLists);
final Collection<LongList> unsortedLists = subsetOfUnsortedLists(longLists);
final List<LongList> sortedLists = toSortedLists(longLists);
final LongList unionSorted = MultiwayLongMerger.unionSorted(sortedLists);
final double averageLength = totalLength(longLists) / (double) longLists.size();
final LongList result;
if (unsortedLists.isEmpty()) {
result = unionSorted;
final List<LongList> sortedConcatenatedLists;
// benchmarks showed that concatenation is beneficial for longer lists
if (averageLength > FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN)
{
final ListConcatenater listConcatenater = new ListConcatenater(sortedLists);
sortedConcatenatedLists = listConcatenater.concatenateNonOverlapping();
} else {
final LongList unionUnsorted = unionUnsorted(unsortedLists);
result = unionInternal(unionSorted, unionUnsorted);
sortedConcatenatedLists = sortedLists;
}
switch (sortedConcatenatedLists.size()) {
case 0:
return new LongList();
case 1:
// remove duplicate values
return unionInternal(sortedConcatenatedLists.get(0), LongList.of());
case 2:
case 3:
case 4:
case 5:
// benchmarks have shown that the trivial merge is faster when merging only a
// few lists
return unionRepeatedTwowayMerge(sortedConcatenatedLists);
default:
final LongList multiwayMerged = MultiwayLongMerger.unionSorted(sortedConcatenatedLists);
return multiwayMerged;
}
}
}
private static int totalLength(Collection<LongList> longLists) {
int totalLength = 0;
for (LongList longList : longLists) {
totalLength += longList.size();
}
return totalLength;
}
private static LongList unionRepeatedTwowayMerge(final List<LongList> sortedLongLists) {
LongList result = sortedLongLists.get(0);
for (int i = 1; i < sortedLongLists.size(); i++) {
result = LongList.unionSorted(result, sortedLongLists.get(i));
}
return result;
}
private static class ListConcatenater {
private static class ListLongList {
private final List<LongList> list = new ArrayList<>();
public ListLongList(LongList longList) {
list.add(longList);
}
public void add(ListLongList listLongList) {
list.addAll(listLongList.list);
}
public LongList toLongList() {
switch (list.size()) {
case 0:
return new LongList(0);
case 1:
return list.get(0);
default:
int capacity = Math.toIntExact(list.stream().mapToLong(LongList::size).sum());
final LongList result = new LongList(capacity);
result.addAll(list);
return result;
}
}
public long first() {
return list.get(0).first();
}
public long last() {
return list.get(list.size() - 1).last();
}
}
final TreeMap<Long, List<ListLongList>> lowestValueMap = new TreeMap<>();
final TreeMap<Long, List<ListLongList>> highestValueMap = new TreeMap<>();
public ListConcatenater(final Collection<LongList> sortedLongLists) {
sortedLongLists.stream().map(ListLongList::new).forEach(this::index);
}
private void index(ListLongList listLongList) {
final long lowestValue = listLongList.first();
final long highestValue = listLongList.last();
lowestValueMap.computeIfAbsent(lowestValue, k -> new ArrayList<>()).add(listLongList);
highestValueMap.computeIfAbsent(highestValue, k -> new ArrayList<>()).add(listLongList);
}
private void removeFromIndex(ListLongList listLongList) {
lowestValueMap.get(listLongList.first()).remove(listLongList);
highestValueMap.get(listLongList.last()).remove(listLongList);
}
public List<LongList> concatenateNonOverlapping() {
for (Entry<Long, List<ListLongList>> e : highestValueMap.entrySet()) {
final long highestValue = e.getKey();
if (highestValue == Long.MAX_VALUE) {
continue;
}
final Iterator<ListLongList> it = e.getValue().iterator();
while (it.hasNext()) {
final ListLongList lowList = it.next();
final Entry<Long, List<ListLongList>> ceilingEntry = lowestValueMap.ceilingEntry(highestValue + 1);
if (ceilingEntry != null && !ceilingEntry.getValue().isEmpty()) {
final ListLongList highList = ceilingEntry.getValue().get(0);
removeFromIndex(highList);
it.remove(); // prevents concurrent modification that would happen in removeFromIndex()
removeFromIndex(lowList);
lowList.add(highList);
index(lowList);
}
}
}
final List<LongList> result = new ArrayList<>();
for (List<ListLongList> l : highestValueMap.values()) {
for (ListLongList listLongList : l) {
result.add(listLongList.toLongList());
}
}
return result;
}
}
private static List<LongList> toSortedLists(final Collection<LongList> longLists) {
final List<LongList> result = new ArrayList<>();
for (LongList longList : longLists) {
if (longList.isEmpty()) {
// skip, no need to merge an empty list
} else if (longList.isSorted()) {
result.add(longList);
} else {
final LongList copy = longList.clone();
copy.sort();
result.add(copy);
}
}
return result;
}
private static LongList unionSorted(final LongList a, final LongList b) {
@@ -1270,55 +1432,21 @@ public final class LongList implements Serializable, Cloneable {
private static LongList unionUnsorted(final LongList a, final LongList b) {
final LongList aSorted = new LongList(a);
aSorted.parallelSort();
if (!aSorted.isSorted()) {
aSorted.parallelSort();
}
final LongList bSorted = new LongList(b);
bSorted.parallelSort();
if (!bSorted.isSorted()) {
bSorted.parallelSort();
}
return unionSorted(aSorted, bSorted);
}
private static LongList unionUnsorted(Collection<LongList> longLists) {
final List<LongList> sortedLists = new ArrayList<>();
for (LongList longList : longLists) {
final LongList copy = new LongList(longList);
copy.sort();
sortedLists.add(copy);
}
return MultiwayLongMerger.unionSorted(sortedLists);
}
private void checkIfSorted() {
sorted = true;
for (int i = 1; i < size && sorted; i++) {
sorted = data[i - 1] <= data[i];
}
}
private static Collection<LongList> subsetOfSortedLists(Collection<LongList> longLists) {
final List<LongList> result = new ArrayList<>();
for (LongList longList : longLists) {
if (longList.isSorted()) {
result.add(longList);
}
}
return result;
}
private static Collection<LongList> subsetOfUnsortedLists(Collection<LongList> longLists) {
final List<LongList> result = new ArrayList<>();
for (LongList longList : longLists) {
if (!longList.isSorted()) {
result.add(longList);
}
}
return result;
}
}

View File

@@ -250,10 +250,4 @@ class MultiwayLongMerger {
}
}
public static void main(String[] args) {
for (int i = 0; i < 20; i++) {
System.out.println(i + " " + (Long.highestOneBit(i - 1) << 1));
}
}
}