From 8579974051b9cc0556b8bbe9528e5a03a64279f6 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sat, 31 Aug 2019 19:30:54 +0200 Subject: [PATCH] performance improvement Queries like "firstname=John and lastname=???" were slightly inefficient. They fetched all firstnames, filtered to those that matched the prefix (e.g. John or Jonathan is this example) and then iterated over all those values and return the lastnames. Fixed by having two implementations. One for the case that only a few of the values in fieldA match and one for the case that many match. --- .../internal/QueryCompletionIndex.java | 10 +----- .../lang/FindValuesForQueryCompletion.java | 31 ++++++++++++++----- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java index 7d159f1..22269b9 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java @@ -342,18 +342,10 @@ public class QueryCompletionIndex implements AutoCloseable { * * @param dateRange the date range * @param fieldA the other field of the and expression - * @param valueA the value of the other field + * @param valueA {@link GlobMatcher} for the value of the other field * @param fieldB the field we are searching values for * @return values of fieldB */ - public SortedSet find(final DateTimeRange dateRange, final String fieldA, final String valueA, - final String fieldB) { - final Tag tag = new Tag(fieldA, valueA); - Preconditions.checkGreaterOrEqual(tag.getKey(), 0, "The field ''{0}'' is unkown", fieldA); - Preconditions.checkGreaterOrEqual(tag.getValue(), 0, "The value ''{0}'' is unkown", valueA); - return find(dateRange, tag, fieldB); - } - public SortedSet find(final DateTimeRange dateRange, final String fieldA, final GlobMatcher valueA, final String fieldB) { diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java index 2f65508..796bc3d 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java @@ -43,23 +43,40 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor visit(final Property property) { final long start = System.nanoTime(); - final TreeSet result = new TreeSet<>(); + final SortedSet result; final String fieldA = property.getField(); final String valueA = property.getValue().getValue(); final boolean hasField = index.hasField(dateTimeRange, fieldA); if (hasField) { + final SortedSet allValuesForField = index.findAllValuesForField(dateTimeRange, fieldA); final SortedSet valuesA = GloblikePattern.filterValues(allValuesForField, valueA, TreeSet::new); - for (final String v : valuesA) { - final SortedSet tmp = index.find(dateTimeRange, fieldA, v, field); - result.addAll(tmp); - } - } + final double valueInFieldAMatchPercentage = valuesA.size() / (double) allValuesForField.size(); + final boolean useMultiFetch = valuesA.size() <= 1 || valueInFieldAMatchPercentage < 0.5; // 50% was + // chosen + // arbitrarily + if (useMultiFetch) { + result = new TreeSet<>(); - METRIC_AND_CARET_LOGGER.debug("{}: {}ms", property, (System.nanoTime() - start) / 1_000_000.0); + for (final String v : valuesA) { + final Tag tagA = new Tag(fieldA, v); + final SortedSet tmp = index.find(dateTimeRange, tagA, field); + result.addAll(tmp); + } + } else { + result = index.find(dateTimeRange, fieldA, new GlobMatcher(valueA), field); + } + + METRIC_AND_CARET_LOGGER.debug("{}: {} and {}=???: {}ms matches in fieldA {} ({}%)", + useMultiFetch ? "multi-fetch" : "single-fetch", property, field, + (System.nanoTime() - start) / 1_000_000.0, valuesA.size(), valueInFieldAMatchPercentage * 100); + + } else { + result = new TreeSet<>(); + } return result; }