performance improvement for queries with wildcards
Computing the union of many LongLists was inefficient, because we were using a trivial algorithm. I replaced the algorithm with a multi way merge. The old algorithm had a runtime of O(n!*m) where n is the number of lists and m the length or the longest list. The new algorithm has a runtime of O(log(n) * n*m).
This commit is contained in:
@@ -11,12 +11,12 @@ import org.lucares.collections.LongList;
|
||||
import org.lucares.pdb.api.DateTimeRange;
|
||||
import org.lucares.pdb.api.Tag;
|
||||
import org.lucares.pdb.blockstorage.LongStreamFile;
|
||||
import org.lucares.pdb.datastore.internal.DataStore;
|
||||
import org.lucares.pdb.datastore.internal.DatePartitioner;
|
||||
import org.lucares.pdb.datastore.internal.ParititionId;
|
||||
import org.lucares.pdb.datastore.internal.PartitionDiskStore;
|
||||
import org.lucares.pdb.datastore.internal.PartitionLongList;
|
||||
import org.lucares.pdb.datastore.internal.PartitionPersistentMap;
|
||||
import org.lucares.pdb.datastore.internal.DataStore;
|
||||
import org.lucares.pdb.datastore.internal.DatePartitioner;
|
||||
import org.lucares.pdb.datastore.lang.Expression.And;
|
||||
import org.lucares.pdb.datastore.lang.Expression.Not;
|
||||
import org.lucares.pdb.datastore.lang.Expression.Or;
|
||||
@@ -170,7 +170,7 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<PartitionLongLis
|
||||
}
|
||||
});
|
||||
|
||||
final LongList mergedDocsIdsForPartition = merge(docIdsForPartition);
|
||||
final LongList mergedDocsIdsForPartition = LongList.union(docIdsForPartition);
|
||||
result.put(partitionId, mergedDocsIdsForPartition);
|
||||
}
|
||||
|
||||
@@ -179,17 +179,6 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<PartitionLongLis
|
||||
return result;
|
||||
}
|
||||
|
||||
private LongList merge(final Collection<LongList> lists) {
|
||||
|
||||
LongList result = new LongList();
|
||||
|
||||
for (final LongList list : lists) {
|
||||
result = LongList.union(result, list);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static LongList concatenateLists(final Collection<LongList> lists) {
|
||||
|
||||
final int totalSize = lists.stream().mapToInt(LongList::size).sum();
|
||||
|
||||
Reference in New Issue
Block a user