performance improvement for queries with wildcards

Computing the union of many LongLists was inefficient, because we were
using a trivial algorithm. I replaced the algorithm with a multi way
merge. The old algorithm had a runtime of O(n!*m) where n is the number
of lists and m the length or the longest list. The new algorithm has a
runtime of O(log(n) * n*m).
This commit is contained in:
2020-11-15 13:02:15 +01:00
parent 356810c355
commit 6dc0e3c250
2 changed files with 4 additions and 15 deletions

View File

@@ -11,12 +11,12 @@ import org.lucares.collections.LongList;
import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.Tag;
import org.lucares.pdb.blockstorage.LongStreamFile;
import org.lucares.pdb.datastore.internal.DataStore;
import org.lucares.pdb.datastore.internal.DatePartitioner;
import org.lucares.pdb.datastore.internal.ParititionId;
import org.lucares.pdb.datastore.internal.PartitionDiskStore;
import org.lucares.pdb.datastore.internal.PartitionLongList;
import org.lucares.pdb.datastore.internal.PartitionPersistentMap;
import org.lucares.pdb.datastore.internal.DataStore;
import org.lucares.pdb.datastore.internal.DatePartitioner;
import org.lucares.pdb.datastore.lang.Expression.And;
import org.lucares.pdb.datastore.lang.Expression.Not;
import org.lucares.pdb.datastore.lang.Expression.Or;
@@ -170,7 +170,7 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<PartitionLongLis
}
});
final LongList mergedDocsIdsForPartition = merge(docIdsForPartition);
final LongList mergedDocsIdsForPartition = LongList.union(docIdsForPartition);
result.put(partitionId, mergedDocsIdsForPartition);
}
@@ -179,17 +179,6 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<PartitionLongLis
return result;
}
private LongList merge(final Collection<LongList> lists) {
LongList result = new LongList();
for (final LongList list : lists) {
result = LongList.union(result, list);
}
return result;
}
private static LongList concatenateLists(final Collection<LongList> lists) {
final int totalSize = lists.stream().mapToInt(LongList::size).sum();