handle globlike patterns in in-expressions
This commit is contained in:
@@ -0,0 +1,24 @@
|
||||
package org.lucares.pdb.datastore.internal;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.lucares.pdb.datastore.lang.GloblikePattern;
|
||||
|
||||
public class GlobMatcher {
|
||||
|
||||
private final Pattern pattern;
|
||||
|
||||
public GlobMatcher(final String globlike) {
|
||||
pattern = GloblikePattern.globlikeToRegex(globlike);
|
||||
}
|
||||
|
||||
public GlobMatcher(final Iterable<String> globlikes) {
|
||||
pattern = GloblikePattern.globlikeToRegex(globlikes);
|
||||
}
|
||||
|
||||
public boolean matches(final String s) {
|
||||
final Matcher matcher = pattern.matcher(s);
|
||||
return matcher.find();
|
||||
}
|
||||
}
|
||||
@@ -337,8 +337,8 @@ public class QueryCompletionIndex implements AutoCloseable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Find values that are yield results when executing the query "fieldA=valueA
|
||||
* and fieldB=???"
|
||||
* Find values for fieldB that are yield results when executing the query
|
||||
* "fieldA=valueA and fieldB=???"
|
||||
*
|
||||
* @param dateRange the date range
|
||||
* @param fieldA the other field of the and expression
|
||||
@@ -354,8 +354,28 @@ public class QueryCompletionIndex implements AutoCloseable {
|
||||
return find(dateRange, tag, fieldB);
|
||||
}
|
||||
|
||||
public SortedSet<String> find(final DateTimeRange dateRange, final String fieldA, final GlobMatcher valueA,
|
||||
final String fieldB) {
|
||||
|
||||
final SortedSet<String> result = new TreeSet<>();
|
||||
|
||||
final TwoTags keyPrefix = new TwoTags(fieldB, fieldA, null, null);
|
||||
|
||||
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
|
||||
tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
|
||||
|
||||
final String vA = k.getTagA().getValueAsString();
|
||||
|
||||
if (valueA.matches(vA)) {
|
||||
result.add(k.getTagB().getValueAsString());
|
||||
}
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find values that are yield results when executing the query
|
||||
* Find values for fieldB that are yield results when executing the query
|
||||
* "tag.field=tag.value and fieldB=???"
|
||||
*
|
||||
* @param dateRange the date range
|
||||
|
||||
@@ -7,6 +7,7 @@ import java.util.TreeSet;
|
||||
|
||||
import org.lucares.pdb.api.DateTimeRange;
|
||||
import org.lucares.pdb.api.Tag;
|
||||
import org.lucares.pdb.datastore.internal.GlobMatcher;
|
||||
import org.lucares.pdb.datastore.internal.QueryCompletionIndex;
|
||||
import org.lucares.pdb.datastore.lang.Expression.And;
|
||||
import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression;
|
||||
@@ -65,13 +66,12 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<St
|
||||
@Override
|
||||
public SortedSet<String> visit(final InExpression expression) {
|
||||
final long start = System.nanoTime();
|
||||
final SortedSet<String> result = new TreeSet<>();
|
||||
final String property = expression.getProperty();
|
||||
final SortedSet<String> result;
|
||||
final String fieldA = expression.getProperty();
|
||||
final List<String> values = expression.getValues();
|
||||
for (final String value : values) {
|
||||
final SortedSet<String> candidates = index.find(dateTimeRange, property, value, field);
|
||||
result.addAll(candidates);
|
||||
}
|
||||
|
||||
result = index.find(dateTimeRange, fieldA, new GlobMatcher(values), field);
|
||||
|
||||
METRIC_AND_CARET_LOGGER.debug("{}: {}ms", expression, (System.nanoTime() - start) / 1_000_000.0);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package org.lucares.pdb.datastore.lang;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
@@ -16,21 +18,42 @@ public class GloblikePattern {
|
||||
KEEP_EQUAL
|
||||
}
|
||||
|
||||
static Pattern globlikeToRegex(final String globPattern) {
|
||||
public static Pattern globlikeToRegex(final String globlike) {
|
||||
|
||||
final String valueRegex = "^" + globlikeToPattern(globlike);
|
||||
|
||||
LOGGER.trace(">{}< -> >{}<", globlike, valueRegex);
|
||||
|
||||
return Pattern.compile(valueRegex);
|
||||
}
|
||||
|
||||
public static Pattern globlikeToRegex(final Iterable<String> globlikes) {
|
||||
|
||||
final List<String> regex = new ArrayList<>();
|
||||
|
||||
for (final String globlike : globlikes) {
|
||||
regex.add(globlikeToPattern(globlike));
|
||||
}
|
||||
final StringBuilder fullRegex = new StringBuilder("^(");
|
||||
fullRegex.append(String.join("|", regex));
|
||||
fullRegex.append(")");
|
||||
|
||||
LOGGER.trace(">{}< -> >{}<", globlikes, fullRegex);
|
||||
|
||||
return Pattern.compile(fullRegex.toString());
|
||||
}
|
||||
|
||||
private static String globlikeToPattern(final String globlike) {
|
||||
// a character that cannot be in the globPattern
|
||||
final String dotPlaceholder = "\ue003"; // fourth character in the private use area
|
||||
|
||||
final String valueRegex = "^" + //
|
||||
globPattern//
|
||||
.replace("-", Pattern.quote("-"))//
|
||||
.replace(".", dotPlaceholder)//
|
||||
.replace("*", ".*")//
|
||||
.replace(dotPlaceholder, ".*\\.")//
|
||||
.replaceAll("([A-Z])", "[a-z]*$1");
|
||||
|
||||
LOGGER.trace(">{}< -> >{}<", globPattern, valueRegex);
|
||||
|
||||
return Pattern.compile(valueRegex);
|
||||
final String valueRegex = globlike//
|
||||
.replace("-", Pattern.quote("-"))//
|
||||
.replace(".", dotPlaceholder)//
|
||||
.replace("*", ".*")//
|
||||
.replace(dotPlaceholder, ".*\\.")//
|
||||
.replaceAll("([A-Z])", "[a-z]*$1");
|
||||
return valueRegex;
|
||||
}
|
||||
|
||||
public static <T extends Collection<String>> T filterValues(final Collection<String> availableValues,
|
||||
@@ -54,4 +77,5 @@ public class GloblikePattern {
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user