performance improvement for queries with wildcards

Computing the union of many LongLists was inefficient, because we were
using a trivial algorithm. I replaced the algorithm with a multi way
merge. The old algorithm had a runtime of O(n!*m) where n is the number
of lists and m the length or the longest list. The new algorithm has a
runtime of O(log(n) * n*m).
This commit is contained in:
2020-11-15 13:02:15 +01:00
parent 356810c355
commit 6dc0e3c250
2 changed files with 4 additions and 15 deletions

View File

@@ -36,7 +36,7 @@ ext {
lib_log4j2_core = "org.apache.logging.log4j:log4j-core:${version_log4j2}" lib_log4j2_core = "org.apache.logging.log4j:log4j-core:${version_log4j2}"
lib_log4j2_slf4j_impl = "org.apache.logging.log4j:log4j-slf4j-impl:${version_log4j2}" lib_log4j2_slf4j_impl = "org.apache.logging.log4j:log4j-slf4j-impl:${version_log4j2}"
lib_primitive_collections='org.lucares:primitiveCollections:0.1.20191226154550' lib_primitive_collections='org.lucares:primitiveCollections:0.1.20201115124106'
lib_spring_boot_log4j2="org.springframework.boot:spring-boot-starter-log4j2:${version_spring}" lib_spring_boot_log4j2="org.springframework.boot:spring-boot-starter-log4j2:${version_spring}"
lib_spring_boot_test="org.springframework.boot:spring-boot-starter-test:${version_spring}" lib_spring_boot_test="org.springframework.boot:spring-boot-starter-test:${version_spring}"

View File

@@ -11,12 +11,12 @@ import org.lucares.collections.LongList;
import org.lucares.pdb.api.DateTimeRange; import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.Tag; import org.lucares.pdb.api.Tag;
import org.lucares.pdb.blockstorage.LongStreamFile; import org.lucares.pdb.blockstorage.LongStreamFile;
import org.lucares.pdb.datastore.internal.DataStore;
import org.lucares.pdb.datastore.internal.DatePartitioner;
import org.lucares.pdb.datastore.internal.ParititionId; import org.lucares.pdb.datastore.internal.ParititionId;
import org.lucares.pdb.datastore.internal.PartitionDiskStore; import org.lucares.pdb.datastore.internal.PartitionDiskStore;
import org.lucares.pdb.datastore.internal.PartitionLongList; import org.lucares.pdb.datastore.internal.PartitionLongList;
import org.lucares.pdb.datastore.internal.PartitionPersistentMap; import org.lucares.pdb.datastore.internal.PartitionPersistentMap;
import org.lucares.pdb.datastore.internal.DataStore;
import org.lucares.pdb.datastore.internal.DatePartitioner;
import org.lucares.pdb.datastore.lang.Expression.And; import org.lucares.pdb.datastore.lang.Expression.And;
import org.lucares.pdb.datastore.lang.Expression.Not; import org.lucares.pdb.datastore.lang.Expression.Not;
import org.lucares.pdb.datastore.lang.Expression.Or; import org.lucares.pdb.datastore.lang.Expression.Or;
@@ -170,7 +170,7 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<PartitionLongLis
} }
}); });
final LongList mergedDocsIdsForPartition = merge(docIdsForPartition); final LongList mergedDocsIdsForPartition = LongList.union(docIdsForPartition);
result.put(partitionId, mergedDocsIdsForPartition); result.put(partitionId, mergedDocsIdsForPartition);
} }
@@ -179,17 +179,6 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<PartitionLongLis
return result; return result;
} }
private LongList merge(final Collection<LongList> lists) {
LongList result = new LongList();
for (final LongList list : lists) {
result = LongList.union(result, list);
}
return result;
}
private static LongList concatenateLists(final Collection<LongList> lists) { private static LongList concatenateLists(final Collection<LongList> lists) {
final int totalSize = lists.stream().mapToInt(LongList::size).sum(); final int totalSize = lists.stream().mapToInt(LongList::size).sum();