improve performance of LongList.union
This commit is contained in:
@@ -6,8 +6,10 @@ import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Random;
|
||||
import java.util.Spliterator.OfLong;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.stream.LongStream;
|
||||
import java.util.stream.StreamSupport;
|
||||
@@ -31,6 +33,12 @@ public final class LongList implements Serializable, Cloneable {
|
||||
|
||||
private static final long[] EMPTY_ARRAY = {};
|
||||
|
||||
/**
|
||||
* If the average length of the lists is longer than this value, then we'll
|
||||
* first try to concatenate non-overlapping lists before the union is computed.
|
||||
*/
|
||||
public static int FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN = 500;
|
||||
|
||||
/**
|
||||
* The array containing the values. It is transient, so that we can implement
|
||||
* our own serialization.
|
||||
@@ -606,6 +614,14 @@ public final class LongList implements Serializable, Cloneable {
|
||||
return data[pos];
|
||||
}
|
||||
|
||||
public long first() {
|
||||
return get(0);
|
||||
}
|
||||
|
||||
public long last() {
|
||||
return get(size() - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unsafe version of {@link #get(long)} that does not check for out of bounds
|
||||
* access if assertions are disabled. The caller has to make sure that pos is
|
||||
@@ -669,7 +685,7 @@ public final class LongList implements Serializable, Cloneable {
|
||||
System.arraycopy(data, 0, input, 0, size);
|
||||
return input;
|
||||
}
|
||||
|
||||
|
||||
long[] getArrayInternal() {
|
||||
return data;
|
||||
}
|
||||
@@ -1028,6 +1044,7 @@ public final class LongList implements Serializable, Cloneable {
|
||||
try {
|
||||
final LongList result = (LongList) super.clone();
|
||||
result.data = size == 0 ? EMPTY_ARRAY : Arrays.copyOf(data, size);
|
||||
result.sorted = sorted;
|
||||
return result;
|
||||
} catch (final CloneNotSupportedException e) {
|
||||
throw new IllegalStateException(e);
|
||||
@@ -1157,7 +1174,7 @@ public final class LongList implements Serializable, Cloneable {
|
||||
* TODO check time complexity If all lists are sorted, then the time complexity
|
||||
* is O(n+m), where n is the length of the first list and m the length of the
|
||||
* second list. If at least one list is not sorted, then the time complexity is
|
||||
* O(m*log(m)), where m is the length of the longer list.
|
||||
* O(m*log(m)), where m is the length of the longest list.
|
||||
*
|
||||
* @param longLists the lists
|
||||
* @return the union of both lists
|
||||
@@ -1184,28 +1201,173 @@ public final class LongList implements Serializable, Cloneable {
|
||||
case 0:
|
||||
return new LongList();
|
||||
case 1:
|
||||
return longLists.iterator().next().clone();
|
||||
// remove duplicate values
|
||||
return unionInternal(longLists.iterator().next(), LongList.of());
|
||||
case 2:
|
||||
final Iterator<LongList> it = longLists.iterator();
|
||||
final LongList a = it.next();
|
||||
final LongList b = it.next();
|
||||
return unionInternal(a, b);
|
||||
default:
|
||||
final Collection<LongList> sortedLists = subsetOfSortedLists(longLists);
|
||||
final Collection<LongList> unsortedLists = subsetOfUnsortedLists(longLists);
|
||||
final List<LongList> sortedLists = toSortedLists(longLists);
|
||||
|
||||
final LongList unionSorted = MultiwayLongMerger.unionSorted(sortedLists);
|
||||
final double averageLength = totalLength(longLists) / (double) longLists.size();
|
||||
|
||||
final LongList result;
|
||||
if (unsortedLists.isEmpty()) {
|
||||
result = unionSorted;
|
||||
final List<LongList> sortedConcatenatedLists;
|
||||
// benchmarks showed that concatenation is beneficial for longer lists
|
||||
if (averageLength > FLAGS_UNION_CONCATENATE_NON_OVERLAPPING_AVG_MIN)
|
||||
{
|
||||
final ListConcatenater listConcatenater = new ListConcatenater(sortedLists);
|
||||
sortedConcatenatedLists = listConcatenater.concatenateNonOverlapping();
|
||||
} else {
|
||||
final LongList unionUnsorted = unionUnsorted(unsortedLists);
|
||||
result = unionInternal(unionSorted, unionUnsorted);
|
||||
sortedConcatenatedLists = sortedLists;
|
||||
}
|
||||
|
||||
switch (sortedConcatenatedLists.size()) {
|
||||
case 0:
|
||||
return new LongList();
|
||||
case 1:
|
||||
// remove duplicate values
|
||||
return unionInternal(sortedConcatenatedLists.get(0), LongList.of());
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
// benchmarks have shown that the trivial merge is faster when merging only a
|
||||
// few lists
|
||||
return unionRepeatedTwowayMerge(sortedConcatenatedLists);
|
||||
default:
|
||||
final LongList multiwayMerged = MultiwayLongMerger.unionSorted(sortedConcatenatedLists);
|
||||
return multiwayMerged;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int totalLength(Collection<LongList> longLists) {
|
||||
int totalLength = 0;
|
||||
for (LongList longList : longLists) {
|
||||
totalLength += longList.size();
|
||||
}
|
||||
return totalLength;
|
||||
}
|
||||
|
||||
private static LongList unionRepeatedTwowayMerge(final List<LongList> sortedLongLists) {
|
||||
|
||||
LongList result = sortedLongLists.get(0);
|
||||
for (int i = 1; i < sortedLongLists.size(); i++) {
|
||||
result = LongList.unionSorted(result, sortedLongLists.get(i));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static class ListConcatenater {
|
||||
|
||||
private static class ListLongList {
|
||||
private final List<LongList> list = new ArrayList<>();
|
||||
|
||||
public ListLongList(LongList longList) {
|
||||
list.add(longList);
|
||||
}
|
||||
|
||||
public void add(ListLongList listLongList) {
|
||||
list.addAll(listLongList.list);
|
||||
}
|
||||
|
||||
public LongList toLongList() {
|
||||
switch (list.size()) {
|
||||
case 0:
|
||||
return new LongList(0);
|
||||
case 1:
|
||||
return list.get(0);
|
||||
default:
|
||||
int capacity = Math.toIntExact(list.stream().mapToLong(LongList::size).sum());
|
||||
final LongList result = new LongList(capacity);
|
||||
result.addAll(list);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
public long first() {
|
||||
return list.get(0).first();
|
||||
}
|
||||
|
||||
public long last() {
|
||||
return list.get(list.size() - 1).last();
|
||||
}
|
||||
}
|
||||
|
||||
final TreeMap<Long, List<ListLongList>> lowestValueMap = new TreeMap<>();
|
||||
final TreeMap<Long, List<ListLongList>> highestValueMap = new TreeMap<>();
|
||||
|
||||
public ListConcatenater(final Collection<LongList> sortedLongLists) {
|
||||
sortedLongLists.stream().map(ListLongList::new).forEach(this::index);
|
||||
}
|
||||
|
||||
private void index(ListLongList listLongList) {
|
||||
final long lowestValue = listLongList.first();
|
||||
final long highestValue = listLongList.last();
|
||||
lowestValueMap.computeIfAbsent(lowestValue, k -> new ArrayList<>()).add(listLongList);
|
||||
highestValueMap.computeIfAbsent(highestValue, k -> new ArrayList<>()).add(listLongList);
|
||||
}
|
||||
|
||||
private void removeFromIndex(ListLongList listLongList) {
|
||||
lowestValueMap.get(listLongList.first()).remove(listLongList);
|
||||
highestValueMap.get(listLongList.last()).remove(listLongList);
|
||||
}
|
||||
|
||||
public List<LongList> concatenateNonOverlapping() {
|
||||
for (Entry<Long, List<ListLongList>> e : highestValueMap.entrySet()) {
|
||||
final long highestValue = e.getKey();
|
||||
if (highestValue == Long.MAX_VALUE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final Iterator<ListLongList> it = e.getValue().iterator();
|
||||
while (it.hasNext()) {
|
||||
final ListLongList lowList = it.next();
|
||||
final Entry<Long, List<ListLongList>> ceilingEntry = lowestValueMap.ceilingEntry(highestValue + 1);
|
||||
if (ceilingEntry != null && !ceilingEntry.getValue().isEmpty()) {
|
||||
final ListLongList highList = ceilingEntry.getValue().get(0);
|
||||
removeFromIndex(highList);
|
||||
|
||||
it.remove(); // prevents concurrent modification that would happen in removeFromIndex()
|
||||
removeFromIndex(lowList);
|
||||
|
||||
lowList.add(highList);
|
||||
|
||||
index(lowList);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final List<LongList> result = new ArrayList<>();
|
||||
for (List<ListLongList> l : highestValueMap.values()) {
|
||||
for (ListLongList listLongList : l) {
|
||||
result.add(listLongList.toLongList());
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static List<LongList> toSortedLists(final Collection<LongList> longLists) {
|
||||
final List<LongList> result = new ArrayList<>();
|
||||
|
||||
for (LongList longList : longLists) {
|
||||
if (longList.isEmpty()) {
|
||||
// skip, no need to merge an empty list
|
||||
} else if (longList.isSorted()) {
|
||||
result.add(longList);
|
||||
} else {
|
||||
final LongList copy = longList.clone();
|
||||
copy.sort();
|
||||
result.add(copy);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static LongList unionSorted(final LongList a, final LongList b) {
|
||||
@@ -1270,55 +1432,21 @@ public final class LongList implements Serializable, Cloneable {
|
||||
|
||||
private static LongList unionUnsorted(final LongList a, final LongList b) {
|
||||
final LongList aSorted = new LongList(a);
|
||||
aSorted.parallelSort();
|
||||
if (!aSorted.isSorted()) {
|
||||
aSorted.parallelSort();
|
||||
}
|
||||
final LongList bSorted = new LongList(b);
|
||||
bSorted.parallelSort();
|
||||
if (!bSorted.isSorted()) {
|
||||
bSorted.parallelSort();
|
||||
}
|
||||
|
||||
return unionSorted(aSorted, bSorted);
|
||||
}
|
||||
|
||||
private static LongList unionUnsorted(Collection<LongList> longLists) {
|
||||
|
||||
final List<LongList> sortedLists = new ArrayList<>();
|
||||
for (LongList longList : longLists) {
|
||||
final LongList copy = new LongList(longList);
|
||||
copy.sort();
|
||||
sortedLists.add(copy);
|
||||
}
|
||||
return MultiwayLongMerger.unionSorted(sortedLists);
|
||||
}
|
||||
|
||||
private void checkIfSorted() {
|
||||
sorted = true;
|
||||
for (int i = 1; i < size && sorted; i++) {
|
||||
sorted = data[i - 1] <= data[i];
|
||||
}
|
||||
}
|
||||
|
||||
private static Collection<LongList> subsetOfSortedLists(Collection<LongList> longLists) {
|
||||
|
||||
final List<LongList> result = new ArrayList<>();
|
||||
|
||||
for (LongList longList : longLists) {
|
||||
if (longList.isSorted()) {
|
||||
result.add(longList);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Collection<LongList> subsetOfUnsortedLists(Collection<LongList> longLists) {
|
||||
|
||||
final List<LongList> result = new ArrayList<>();
|
||||
|
||||
for (LongList longList : longLists) {
|
||||
if (!longList.isSorted()) {
|
||||
result.add(longList);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -250,10 +250,4 @@ class MultiwayLongMerger {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
for (int i = 0; i < 20; i++) {
|
||||
System.out.println(i + " " + (Long.highestOneBit(i - 1) << 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user