performance improvement for queries with wildcards
Computing the union of many LongLists was inefficient, because we were using a trivial algorithm. I replaced the algorithm with a multi way merge. The old algorithm had a runtime of O(n!*m) where n is the number of lists and m the length or the longest list. The new algorithm has a runtime of O(log(n) * n*m).
This commit is contained in:
@@ -36,7 +36,7 @@ ext {
|
|||||||
lib_log4j2_core = "org.apache.logging.log4j:log4j-core:${version_log4j2}"
|
lib_log4j2_core = "org.apache.logging.log4j:log4j-core:${version_log4j2}"
|
||||||
lib_log4j2_slf4j_impl = "org.apache.logging.log4j:log4j-slf4j-impl:${version_log4j2}"
|
lib_log4j2_slf4j_impl = "org.apache.logging.log4j:log4j-slf4j-impl:${version_log4j2}"
|
||||||
|
|
||||||
lib_primitive_collections='org.lucares:primitiveCollections:0.1.20191226154550'
|
lib_primitive_collections='org.lucares:primitiveCollections:0.1.20201115124106'
|
||||||
|
|
||||||
lib_spring_boot_log4j2="org.springframework.boot:spring-boot-starter-log4j2:${version_spring}"
|
lib_spring_boot_log4j2="org.springframework.boot:spring-boot-starter-log4j2:${version_spring}"
|
||||||
lib_spring_boot_test="org.springframework.boot:spring-boot-starter-test:${version_spring}"
|
lib_spring_boot_test="org.springframework.boot:spring-boot-starter-test:${version_spring}"
|
||||||
|
|||||||
@@ -11,12 +11,12 @@ import org.lucares.collections.LongList;
|
|||||||
import org.lucares.pdb.api.DateTimeRange;
|
import org.lucares.pdb.api.DateTimeRange;
|
||||||
import org.lucares.pdb.api.Tag;
|
import org.lucares.pdb.api.Tag;
|
||||||
import org.lucares.pdb.blockstorage.LongStreamFile;
|
import org.lucares.pdb.blockstorage.LongStreamFile;
|
||||||
|
import org.lucares.pdb.datastore.internal.DataStore;
|
||||||
|
import org.lucares.pdb.datastore.internal.DatePartitioner;
|
||||||
import org.lucares.pdb.datastore.internal.ParititionId;
|
import org.lucares.pdb.datastore.internal.ParititionId;
|
||||||
import org.lucares.pdb.datastore.internal.PartitionDiskStore;
|
import org.lucares.pdb.datastore.internal.PartitionDiskStore;
|
||||||
import org.lucares.pdb.datastore.internal.PartitionLongList;
|
import org.lucares.pdb.datastore.internal.PartitionLongList;
|
||||||
import org.lucares.pdb.datastore.internal.PartitionPersistentMap;
|
import org.lucares.pdb.datastore.internal.PartitionPersistentMap;
|
||||||
import org.lucares.pdb.datastore.internal.DataStore;
|
|
||||||
import org.lucares.pdb.datastore.internal.DatePartitioner;
|
|
||||||
import org.lucares.pdb.datastore.lang.Expression.And;
|
import org.lucares.pdb.datastore.lang.Expression.And;
|
||||||
import org.lucares.pdb.datastore.lang.Expression.Not;
|
import org.lucares.pdb.datastore.lang.Expression.Not;
|
||||||
import org.lucares.pdb.datastore.lang.Expression.Or;
|
import org.lucares.pdb.datastore.lang.Expression.Or;
|
||||||
@@ -170,7 +170,7 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<PartitionLongLis
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
final LongList mergedDocsIdsForPartition = merge(docIdsForPartition);
|
final LongList mergedDocsIdsForPartition = LongList.union(docIdsForPartition);
|
||||||
result.put(partitionId, mergedDocsIdsForPartition);
|
result.put(partitionId, mergedDocsIdsForPartition);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -179,17 +179,6 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<PartitionLongLis
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private LongList merge(final Collection<LongList> lists) {
|
|
||||||
|
|
||||||
LongList result = new LongList();
|
|
||||||
|
|
||||||
for (final LongList list : lists) {
|
|
||||||
result = LongList.union(result, list);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static LongList concatenateLists(final Collection<LongList> lists) {
|
private static LongList concatenateLists(final Collection<LongList> lists) {
|
||||||
|
|
||||||
final int totalSize = lists.stream().mapToInt(LongList::size).sum();
|
final int totalSize = lists.stream().mapToInt(LongList::size).sum();
|
||||||
|
|||||||
Reference in New Issue
Block a user