From 6dc0e3c2509a50ca792adc3f3bd18cecb892f748 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sun, 15 Nov 2020 13:02:15 +0100 Subject: [PATCH] performance improvement for queries with wildcards Computing the union of many LongLists was inefficient, because we were using a trivial algorithm. I replaced the algorithm with a multi way merge. The old algorithm had a runtime of O(n!*m) where n is the number of lists and m the length or the longest list. The new algorithm has a runtime of O(log(n) * n*m). --- build.gradle | 2 +- .../lang/ExpressionToDocIdVisitor.java | 17 +++-------------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/build.gradle b/build.gradle index c59194f..37f15c4 100644 --- a/build.gradle +++ b/build.gradle @@ -36,7 +36,7 @@ ext { lib_log4j2_core = "org.apache.logging.log4j:log4j-core:${version_log4j2}" lib_log4j2_slf4j_impl = "org.apache.logging.log4j:log4j-slf4j-impl:${version_log4j2}" - lib_primitive_collections='org.lucares:primitiveCollections:0.1.20191226154550' + lib_primitive_collections='org.lucares:primitiveCollections:0.1.20201115124106' lib_spring_boot_log4j2="org.springframework.boot:spring-boot-starter-log4j2:${version_spring}" lib_spring_boot_test="org.springframework.boot:spring-boot-starter-test:${version_spring}" diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java index 4c4187d..88cc192 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java @@ -11,12 +11,12 @@ import org.lucares.collections.LongList; import org.lucares.pdb.api.DateTimeRange; import org.lucares.pdb.api.Tag; import org.lucares.pdb.blockstorage.LongStreamFile; +import org.lucares.pdb.datastore.internal.DataStore; +import org.lucares.pdb.datastore.internal.DatePartitioner; import org.lucares.pdb.datastore.internal.ParititionId; import org.lucares.pdb.datastore.internal.PartitionDiskStore; import org.lucares.pdb.datastore.internal.PartitionLongList; import org.lucares.pdb.datastore.internal.PartitionPersistentMap; -import org.lucares.pdb.datastore.internal.DataStore; -import org.lucares.pdb.datastore.internal.DatePartitioner; import org.lucares.pdb.datastore.lang.Expression.And; import org.lucares.pdb.datastore.lang.Expression.Not; import org.lucares.pdb.datastore.lang.Expression.Or; @@ -170,7 +170,7 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor lists) { - - LongList result = new LongList(); - - for (final LongList list : lists) { - result = LongList.union(result, list); - } - - return result; - } - private static LongList concatenateLists(final Collection lists) { final int totalSize = lists.stream().mapToInt(LongList::size).sum();