performance improvement

Queries like "firstname=John and lastname=???" were slightly inefficient. They fetched all firstnames, filtered to those that matched the prefix (e.g. John or Jonathan is this example) and then iterated over all those values and return the lastnames. Fixed by having two implementations. One for the case that only a few of the values in fieldA match and one for the case that many match.
2019-08-31 19:30:54 +02:00
parent d8a114dbaf
commit 8579974051
2 changed files with 25 additions and 16 deletions
--- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java
+++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java
@@ -342,18 +342,10 @@ public class QueryCompletionIndex implements AutoCloseable {
 	 *
 	 * @param dateRange the date range
 	 * @param fieldA    the other field of the and expression
-	 * @param valueA    the value of the other field
+	 * @param valueA    {@link GlobMatcher} for the value of the other field
 	 * @param fieldB    the field we are searching values for
 	 * @return values of fieldB
 	 */
-	public SortedSet<String> find(final DateTimeRange dateRange, final String fieldA, final String valueA,
-			final String fieldB) {
-		final Tag tag = new Tag(fieldA, valueA);
-		Preconditions.checkGreaterOrEqual(tag.getKey(), 0, "The field ''{0}'' is unkown", fieldA);
-		Preconditions.checkGreaterOrEqual(tag.getValue(), 0, "The value ''{0}'' is unkown", valueA);
-		return find(dateRange, tag, fieldB);
-	}
-
 	public SortedSet<String> find(final DateTimeRange dateRange, final String fieldA, final GlobMatcher valueA,
 			final String fieldB) {

--- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java
+++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java
@@ -43,23 +43,40 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<St
 		@Override
 		public SortedSet<String> visit(final Property property) {
 			final long start = System.nanoTime();
-			final TreeSet<String> result = new TreeSet<>();
+			final SortedSet<String> result;

 			final String fieldA = property.getField();
 			final String valueA = property.getValue().getValue();

 			final boolean hasField = index.hasField(dateTimeRange, fieldA);
 			if (hasField) {
+
 				final SortedSet<String> allValuesForField = index.findAllValuesForField(dateTimeRange, fieldA);
 				final SortedSet<String> valuesA = GloblikePattern.filterValues(allValuesForField, valueA, TreeSet::new);

-				for (final String v : valuesA) {
-					final SortedSet<String> tmp = index.find(dateTimeRange, fieldA, v, field);
-					result.addAll(tmp);
-				}
-			}
+				final double valueInFieldAMatchPercentage = valuesA.size() / (double) allValuesForField.size();
+				final boolean useMultiFetch = valuesA.size() <= 1 || valueInFieldAMatchPercentage < 0.5; // 50% was
+																											// chosen
+																											// arbitrarily
+				if (useMultiFetch) {
+					result = new TreeSet<>();

-			METRIC_AND_CARET_LOGGER.debug("{}: {}ms", property, (System.nanoTime() - start) / 1_000_000.0);
+					for (final String v : valuesA) {
+						final Tag tagA = new Tag(fieldA, v);
+						final SortedSet<String> tmp = index.find(dateTimeRange, tagA, field);
+						result.addAll(tmp);
+					}
+				} else {
+					result = index.find(dateTimeRange, fieldA, new GlobMatcher(valueA), field);
+				}
+
+				METRIC_AND_CARET_LOGGER.debug("{}: {} and {}=???: {}ms matches in fieldA {} ({}%)",
+						useMultiFetch ? "multi-fetch" : "single-fetch", property, field,
+						(System.nanoTime() - start) / 1_000_000.0, valuesA.size(), valueInFieldAMatchPercentage * 100);
+
+			} else {
+				result = new TreeSet<>();
+			}
 			return result;
 		}