multiway merge of multiple sorted lists
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
package org.lucares.collections;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Spliterator.OfLong;
|
||||
@@ -41,7 +43,7 @@ public final class LongList implements Serializable, Cloneable {
|
||||
* Keeps track of whether or not the list is sorted. This allows us to use
|
||||
* binary search for {@link #indexOf(int)} and efficient algorithms for
|
||||
* {@link #intersection(LongList, LongList)} /
|
||||
* {@link #union(LongList, LongList)} / {@link #uniq()} /
|
||||
* {@link #unionInternal(LongList, LongList)} / {@link #uniq()} /
|
||||
* {@link #removeAll(int, int)}. An empty list is sorted.
|
||||
*/
|
||||
private boolean sorted = true;
|
||||
@@ -121,7 +123,7 @@ public final class LongList implements Serializable, Cloneable {
|
||||
*
|
||||
* @param startInclusive the lower bound (inclusive)
|
||||
* @param endInclusive the upper bound (inclusive)
|
||||
* @return the {@link IntList}
|
||||
* @return the {@link LongList}
|
||||
*/
|
||||
public static LongList rangeClosed(final long startInclusive, final long endInclusive) {
|
||||
if (startInclusive > endInclusive) {
|
||||
@@ -137,6 +139,32 @@ public final class LongList implements Serializable, Cloneable {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new list with the values from the given start index to the end of
|
||||
* the list.
|
||||
*
|
||||
* @param startInclusive start index
|
||||
* @return {@code LongList}
|
||||
*/
|
||||
public LongList sublist(final int startInclusive) {
|
||||
return sublist(startInclusive, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new list with the values of the given range.
|
||||
*
|
||||
* @param startInclusive the start index
|
||||
* @param endExclusive the end index (exclusive)
|
||||
* @return {@link LongList}
|
||||
*/
|
||||
public LongList sublist(final int startInclusive, int endExclusive) {
|
||||
final LongList result = new LongList(endExclusive - startInclusive);
|
||||
result.data = Arrays.copyOfRange(data, startInclusive, endExclusive);
|
||||
result.size = result.data.length;
|
||||
result.sorted = sorted;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} if this list contains no elements.
|
||||
*
|
||||
@@ -1114,27 +1142,32 @@ public final class LongList implements Serializable, Cloneable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list with all elements that are in list {@code a} or {@code b}
|
||||
* (logical or).
|
||||
* Returns a list with all elements that are in list {@code a} or {@code b} or
|
||||
* ... or {@code n} (logical or).
|
||||
* <p>
|
||||
* The result does not contain duplicate values.
|
||||
* <p>
|
||||
* If both lists were sorted, then the output list will also be sorted. If at
|
||||
* If all lists were sorted, then the output list will also be sorted. If at
|
||||
* least one list is unsorted, then the order is undefined.
|
||||
* <p>
|
||||
* If both lists are sorted, then the time complexity is O(n+m), where n is the
|
||||
* length of the first list and m the length of the second list. If at least one
|
||||
* list is not sorted, then the time complexity is O(m*log(m)), where m is the
|
||||
* length of the longer list.
|
||||
* TODO check time complexity If all lists are sorted, then the time complexity
|
||||
* is O(n+m), where n is the length of the first list and m the length of the
|
||||
* second list. If at least one list is not sorted, then the time complexity is
|
||||
* O(m*log(m)), where m is the length of the longer list.
|
||||
*
|
||||
* @param a the first list
|
||||
* @param b the second list
|
||||
* @param longLists the lists
|
||||
* @return the union of both lists
|
||||
*/
|
||||
public static LongList union(final LongList a, final LongList b) {
|
||||
public static LongList union(final LongList... longLists) {
|
||||
return union(List.of(longLists));
|
||||
}
|
||||
|
||||
private static LongList unionInternal(final LongList a, final LongList b) {
|
||||
final LongList result;
|
||||
|
||||
if (a.isSorted() && b.isSorted()) {
|
||||
if (a.isEmpty() && b.isEmpty()) {
|
||||
result = new LongList();
|
||||
} else if (a.isSorted() && b.isSorted()) {
|
||||
result = unionSorted(a, b);
|
||||
} else {
|
||||
result = unionUnsorted(a, b);
|
||||
@@ -1142,6 +1175,35 @@ public final class LongList implements Serializable, Cloneable {
|
||||
return result;
|
||||
}
|
||||
|
||||
public static LongList union(final Collection<LongList> longLists) {
|
||||
switch (longLists.size()) {
|
||||
case 0:
|
||||
return new LongList();
|
||||
case 1:
|
||||
return longLists.iterator().next().clone();
|
||||
case 2:
|
||||
final Iterator<LongList> it = longLists.iterator();
|
||||
final LongList a = it.next();
|
||||
final LongList b = it.next();
|
||||
return unionInternal(a, b);
|
||||
default:
|
||||
final Collection<LongList> sortedLists = subsetOfSortedLists(longLists);
|
||||
final Collection<LongList> unsortedLists = subsetOfUnsortedLists(longLists);
|
||||
|
||||
final LongList unionSorted = MultiwayLongMerger.unionSorted(sortedLists);
|
||||
|
||||
final LongList result;
|
||||
if (unsortedLists.isEmpty()) {
|
||||
result = unionSorted;
|
||||
} else {
|
||||
final LongList unionUnsorted = unionUnsorted(unsortedLists);
|
||||
result = unionInternal(unionSorted, unionUnsorted);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private static LongList unionSorted(final LongList a, final LongList b) {
|
||||
|
||||
final int aSize = a.size();
|
||||
@@ -1211,6 +1273,17 @@ public final class LongList implements Serializable, Cloneable {
|
||||
return unionSorted(aSorted, bSorted);
|
||||
}
|
||||
|
||||
private static LongList unionUnsorted(Collection<LongList> longLists) {
|
||||
|
||||
final List<LongList> sortedLists = new ArrayList<>();
|
||||
for (LongList longList : longLists) {
|
||||
final LongList copy = new LongList(longList);
|
||||
copy.sort();
|
||||
sortedLists.add(copy);
|
||||
}
|
||||
return MultiwayLongMerger.unionSorted(sortedLists);
|
||||
}
|
||||
|
||||
private void checkIfSorted() {
|
||||
sorted = true;
|
||||
for (int i = 1; i < size && sorted; i++) {
|
||||
@@ -1218,4 +1291,30 @@ public final class LongList implements Serializable, Cloneable {
|
||||
}
|
||||
}
|
||||
|
||||
private static Collection<LongList> subsetOfSortedLists(Collection<LongList> longLists) {
|
||||
|
||||
final List<LongList> result = new ArrayList<>();
|
||||
|
||||
for (LongList longList : longLists) {
|
||||
if (longList.isSorted()) {
|
||||
result.add(longList);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Collection<LongList> subsetOfUnsortedLists(Collection<LongList> longLists) {
|
||||
|
||||
final List<LongList> result = new ArrayList<>();
|
||||
|
||||
for (LongList longList : longLists) {
|
||||
if (!longList.isSorted()) {
|
||||
result.add(longList);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,252 @@
|
||||
package org.lucares.collections;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
class MultiwayLongMerger {
|
||||
private static final long UNSET = Long.MIN_VALUE;
|
||||
|
||||
private static class LongQueue {
|
||||
final LongList wrapped;
|
||||
|
||||
int offset = 0;
|
||||
|
||||
public LongQueue(LongList wrapped) {
|
||||
this.wrapped = wrapped;
|
||||
}
|
||||
|
||||
boolean isEmpty() {
|
||||
return offset >= wrapped.size();
|
||||
}
|
||||
|
||||
long pop() {
|
||||
assert offset < wrapped.size();
|
||||
final long result = wrapped.get(offset);
|
||||
offset++;
|
||||
return result;
|
||||
}
|
||||
|
||||
public long peek() {
|
||||
return wrapped.get(offset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return wrapped.sublist(offset).toString();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return wrapped.size()-offset;
|
||||
}
|
||||
}
|
||||
|
||||
static LongList unionSorted(Collection<LongList> longLists) {
|
||||
|
||||
assertAllListsAreSorted(longLists);
|
||||
|
||||
final List<LongQueue> queues = new ArrayList<LongQueue>();
|
||||
boolean hasValueUNSET = initQueues(longLists, queues);
|
||||
|
||||
final LongList result = new LongList();
|
||||
if (hasValueUNSET) {
|
||||
result.add(UNSET);
|
||||
}
|
||||
|
||||
if (!queues.isEmpty()) {
|
||||
mergeQueues(queues, result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void mergeQueues(final List<LongQueue> queues, final LongList result) {
|
||||
final MinValuePriorityQueue selectionTree = new MinValuePriorityQueue(queues);
|
||||
|
||||
long previousValue = UNSET;
|
||||
long val;
|
||||
while ((val = selectionTree.pop()) != UNSET) {
|
||||
if (val != previousValue) {
|
||||
result.add(val);
|
||||
previousValue = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean initQueues(Collection<LongList> longLists, final List<LongQueue> queues) {
|
||||
boolean hasValueUNSET = false;
|
||||
for (LongList longList : longLists) {
|
||||
if (!longList.isEmpty()) {
|
||||
final LongQueue queue = new LongQueue(longList);
|
||||
while (!queue.isEmpty() && queue.peek() == UNSET) {
|
||||
queue.pop();
|
||||
hasValueUNSET = true;
|
||||
}
|
||||
if (!queue.isEmpty()) {
|
||||
queues.add(queue);
|
||||
}
|
||||
}
|
||||
}
|
||||
return hasValueUNSET;
|
||||
}
|
||||
|
||||
private static void assertAllListsAreSorted(Collection<LongList> longLists) {
|
||||
for (LongList longList : longLists) {
|
||||
if (!longList.isSorted()) {
|
||||
throw new IllegalArgumentException("lists must be sorted");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int nextPowOfTwo(int i) {
|
||||
return Integer.highestOneBit(i - 1) << 1;
|
||||
}
|
||||
|
||||
private static class MinValuePriorityQueue {
|
||||
|
||||
private List<LongQueue> longQueues;
|
||||
|
||||
/*
|
||||
* a classic heap where the nodes are layed out in breath first order. First the
|
||||
* root, then the nodes of level 1, then the nodes of level 2, ...
|
||||
*/
|
||||
private final long[] heap;
|
||||
|
||||
private final int size;
|
||||
|
||||
private final int firstLeafIndex;
|
||||
|
||||
public MinValuePriorityQueue(final Collection<LongQueue> longQueues) {
|
||||
this.longQueues = new ArrayList<>(longQueues);
|
||||
size = longQueues.size();
|
||||
heap = new long[2 * nextPowOfTwo(size) - 1];
|
||||
|
||||
firstLeafIndex = heap.length / 2;
|
||||
|
||||
Arrays.fill(heap, UNSET);
|
||||
init();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the smallest value of the heap. Returns
|
||||
* {@link MultiwayLongMerger#UNSET}={@value MultiwayLongMerger#UNSET} if the
|
||||
* heap is empty.
|
||||
*
|
||||
* @return the smallest value or
|
||||
* {@link MultiwayLongMerger#UNSET}={@value MultiwayLongMerger#UNSET} if
|
||||
* heap is empty
|
||||
*/
|
||||
public long pop() {
|
||||
long result = heap[0];
|
||||
fillWithMinOfChildren(0);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* <pre>
|
||||
* 7
|
||||
* 3
|
||||
* 8
|
||||
* 1
|
||||
* 9
|
||||
* 4
|
||||
* 10
|
||||
* 0
|
||||
* 11
|
||||
* 5
|
||||
* 12
|
||||
* 2
|
||||
* 13
|
||||
* 6
|
||||
* 14
|
||||
* </pre>
|
||||
*/
|
||||
private void init() {
|
||||
// fill leaf nodes
|
||||
int offset = firstLeafIndex;
|
||||
for (int j = 0; j < size; j++) {
|
||||
final LongQueue q = longQueues.get(j);
|
||||
heap[offset + j] = q.isEmpty() ? UNSET : q.pop();
|
||||
}
|
||||
|
||||
// fill the non-leaf layers (from the leafs up to the root)
|
||||
while (offset > 0) {
|
||||
offset /= 2; //
|
||||
for (int i = offset; i <= offset * 2; i++) {
|
||||
fillWithMinOfChildren(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int leftChildIndex(int i) {
|
||||
return i * 2 + 1;
|
||||
}
|
||||
|
||||
private int rightChildIndex(int i) {
|
||||
return i * 2 + 2;
|
||||
}
|
||||
|
||||
private boolean isLeaf(int i) {
|
||||
return i >= firstLeafIndex;
|
||||
}
|
||||
|
||||
private int leafIndexToListIndex(int i) {
|
||||
assert isLeaf(i) : "index " + i + " is not a leaf";
|
||||
return i - firstLeafIndex;
|
||||
}
|
||||
|
||||
private void fillWithMinOfChildren(int index) {
|
||||
final int leftChildIndex = index * 2 + 1; //leftChildIndex(index);
|
||||
final int rightChildIndex = leftChildIndex+1;//rightChildIndex(index);
|
||||
|
||||
final long valueOfLeftChild = heap[leftChildIndex];
|
||||
final long valueOfRightChild = heap[rightChildIndex];
|
||||
|
||||
final int chosenValue;
|
||||
|
||||
if (valueOfLeftChild == UNSET) {
|
||||
if (valueOfRightChild == UNSET) {
|
||||
heap[index] = UNSET;
|
||||
return;
|
||||
} else {
|
||||
//left < right
|
||||
heap[index] = valueOfRightChild;
|
||||
chosenValue = rightChildIndex;
|
||||
}
|
||||
} else if (valueOfRightChild == UNSET) {
|
||||
// left > right
|
||||
heap[index] = valueOfLeftChild;
|
||||
chosenValue = leftChildIndex;
|
||||
} else {
|
||||
if (valueOfLeftChild < valueOfRightChild) {
|
||||
// left < right
|
||||
heap[index] = valueOfLeftChild;
|
||||
chosenValue = leftChildIndex;
|
||||
} else {
|
||||
// left >= right
|
||||
heap[index] = valueOfRightChild;
|
||||
chosenValue = rightChildIndex;
|
||||
}
|
||||
}
|
||||
|
||||
refillValue(chosenValue);
|
||||
}
|
||||
|
||||
private void refillValue(int index) {
|
||||
if (isLeaf(index)) {
|
||||
final int listIndex = index - firstLeafIndex; //leafIndexToListIndex(index);
|
||||
final LongQueue queue = longQueues.get(listIndex);
|
||||
heap[index] = queue.isEmpty() ? UNSET : queue.pop();
|
||||
return;
|
||||
}
|
||||
fillWithMinOfChildren(index);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
for (int i = 0; i < 20; i++) {
|
||||
System.out.println(i + " " + (Long.highestOneBit(i - 1) << 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1614,6 +1614,31 @@ public class LongListTest {
|
||||
Assertions.assertEquals(LongList.union(a, b), LongList.union(b, a));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionSortedLists_three() {
|
||||
final LongList a = LongList.of(1, 2, 3);
|
||||
final LongList b = LongList.of(2, 4, 6);
|
||||
final LongList c = LongList.of(3, 5, 7);
|
||||
|
||||
final LongList actual = LongList.union(a, b, c);
|
||||
Assertions.assertEquals(LongList.of(1,2,3,4,5,6,7), actual);
|
||||
Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, c, a));
|
||||
Assertions.assertEquals(LongList.union(a, b, c), LongList.union(b, a, c));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionSortedLists_four_LongMinValue() {
|
||||
final LongList a = LongList.of(Long.MIN_VALUE, 2, 3,Long.MAX_VALUE);
|
||||
final LongList b = LongList.of(2, 4, 6, Long.MAX_VALUE);
|
||||
final LongList c = LongList.of(Long.MIN_VALUE, 5, 7);
|
||||
final LongList d = LongList.of(Long.MIN_VALUE, Long.MIN_VALUE);
|
||||
|
||||
final LongList actual = LongList.union(a, b, c, d);
|
||||
Assertions.assertEquals(LongList.of(Long.MIN_VALUE,2,3,4,5,6,7, Long.MAX_VALUE), actual);
|
||||
Assertions.assertEquals(LongList.union(a, b, c, d), LongList.union(b, c, a, d));
|
||||
Assertions.assertEquals(LongList.union(a, b, c, d), LongList.union(d, b, a, c));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnionUnsortedLists() {
|
||||
final LongList a = LongList.of(1, 0, 3, 4);
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
package org.lucares.collections;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class MultiwayLongMergerTest {
|
||||
|
||||
@Test
|
||||
public void testMergeTwoLists() {
|
||||
|
||||
LongList a = LongList.of(1,2,3);
|
||||
LongList b = LongList.of(1,3,5);
|
||||
LongList expected = LongList.of(1,2,3,5);
|
||||
|
||||
|
||||
LongList union = MultiwayLongMerger.unionSorted(Arrays.asList(a,b));
|
||||
Assertions.assertEquals(expected, union);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMergeThreeLists() {
|
||||
|
||||
LongList a = LongList.of(1,2,3);
|
||||
LongList b = LongList.of(1,3,5);
|
||||
LongList c = LongList.of(2,3,5);
|
||||
LongList expected = LongList.of(1,2,3,5);
|
||||
|
||||
|
||||
LongList union = MultiwayLongMerger.unionSorted(Arrays.asList(a,b,c));
|
||||
Assertions.assertEquals(expected, union);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMergeListsWithLongMin() {
|
||||
|
||||
LongList a = LongList.of(Long.MIN_VALUE,2,3);
|
||||
LongList b = LongList.of(1,3,5);
|
||||
LongList c = LongList.of(Long.MIN_VALUE,Long.MIN_VALUE);
|
||||
LongList expected = LongList.of(Long.MIN_VALUE,1,2,3,5);
|
||||
|
||||
|
||||
LongList union = MultiwayLongMerger.unionSorted(Arrays.asList(a,b,c));
|
||||
Assertions.assertEquals(expected, union);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMergeEmptyLists() {
|
||||
|
||||
LongList a = LongList.of();
|
||||
LongList b = LongList.of();
|
||||
LongList expected = LongList.of();
|
||||
|
||||
|
||||
LongList union = MultiwayLongMerger.unionSorted(Arrays.asList(a,b));
|
||||
Assertions.assertEquals(expected, union);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user