add CamelCase matching to the query language
This commit is contained in:
@@ -10,16 +10,12 @@ import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.lucares.collections.IntList;
|
||||
import org.lucares.pdb.datastore.Doc;
|
||||
import org.lucares.pdb.datastore.lang.Expression.And;
|
||||
import org.lucares.pdb.datastore.lang.Expression.Not;
|
||||
import org.lucares.pdb.datastore.lang.Expression.Or;
|
||||
import org.lucares.pdb.datastore.lang.Expression.Parentheses;
|
||||
import org.lucares.pdb.datastore.lang.Expression.Property;
|
||||
import org.lucares.pdb.datastore.lang.Expression.Terminal;
|
||||
import org.lucares.utils.CollectionUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -147,16 +143,11 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<IntList> {
|
||||
|
||||
result = merge(allValuesForKey.values());
|
||||
break;
|
||||
} else if (containsWildcard(value)) {
|
||||
|
||||
final Collection<IntList> docIds = filterByWildcard(propertyName, globToRegex(value));
|
||||
} else {
|
||||
final Collection<IntList> docIds = filterByWildcard(propertyName,
|
||||
GloblikePattern.globlikeToRegex(value));
|
||||
final IntList mergedDocIds = merge(docIds);
|
||||
result = IntList.union(result, mergedDocIds);
|
||||
} else {
|
||||
final IntList docIds = keyToValueToDocId.//
|
||||
getOrDefault(propertyName, EMPTY_VALUES).//
|
||||
getOrDefault(value, EMPTY_DOC_IDS);
|
||||
result = IntList.union(result, docIds);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,50 +159,13 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<IntList> {
|
||||
return allDocIds.getAllDocIds();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntList visit(final Property expression) {
|
||||
|
||||
final String propertyName = expression.property;
|
||||
final Terminal propertyValue = expression.value;
|
||||
final String stringValue = propertyValue.getValue();
|
||||
final long start = System.nanoTime();
|
||||
final IntList result;
|
||||
if (isMatchAll(stringValue)) {
|
||||
|
||||
final Map<String, IntList> allValuesForKey = keyToValueToDocId.getOrDefault(propertyName, EMPTY_VALUES);
|
||||
|
||||
result = merge(allValuesForKey.values());
|
||||
} else if (containsWildcard(stringValue)) {
|
||||
|
||||
final Collection<IntList> docIds = filterByWildcard(propertyName, globToRegex(stringValue));
|
||||
|
||||
result = merge(docIds);
|
||||
} else {
|
||||
result = keyToValueToDocId.getOrDefault(propertyName, EMPTY_VALUES).getOrDefault(stringValue,
|
||||
EMPTY_DOC_IDS);
|
||||
}
|
||||
LOGGER.trace("{} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, result.size());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private Pattern globToRegex(final String globPattern) {
|
||||
|
||||
final String[] tokens = StringUtils.splitPreserveAllTokens(globPattern, "*");
|
||||
|
||||
final List<String> quotedTokens = CollectionUtils.map(tokens, Pattern::quote);
|
||||
final String regex = String.join(".*", quotedTokens);
|
||||
|
||||
return Pattern.compile(regex);
|
||||
}
|
||||
|
||||
private List<IntList> filterByWildcard(final String propertyName, final Pattern valuePattern) {
|
||||
|
||||
final List<IntList> result = new ArrayList<>();
|
||||
|
||||
final Map<String, IntList> valueToDocId = keyToValueToDocId.getOrDefault(propertyName, EMPTY_VALUES);
|
||||
for (final Entry<String, IntList> entry : valueToDocId.entrySet()) {
|
||||
if (valuePattern.matcher(entry.getKey()).matches()) {
|
||||
if (valuePattern.matcher(entry.getKey()).find()) {
|
||||
result.add(entry.getValue());
|
||||
}
|
||||
}
|
||||
@@ -219,10 +173,6 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<IntList> {
|
||||
return result;
|
||||
}
|
||||
|
||||
private boolean containsWildcard(final String stringValue) {
|
||||
return stringValue.contains("*");
|
||||
}
|
||||
|
||||
private IntList merge(final Collection<IntList> lists) {
|
||||
|
||||
IntList result = new IntList();
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
package org.lucares.pdb.datastore.lang;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
class GloblikePattern {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(GloblikePattern.class);
|
||||
|
||||
static Pattern globlikeToRegex(final String globPattern) {
|
||||
// a character that cannot be in the globPattern
|
||||
final String dotPlaceholder = "\ue003"; // fourth character in the private use area
|
||||
|
||||
final String valueRegex = "^" + //
|
||||
globPattern//
|
||||
.replace("-", Pattern.quote("-"))//
|
||||
.replace(".", dotPlaceholder)//
|
||||
.replace("*", ".*")//
|
||||
.replace(dotPlaceholder, ".*\\.")//
|
||||
.replaceAll("([A-Z])", "[a-z]*$1");
|
||||
|
||||
LOGGER.trace(">{}< -> >{}<", globPattern, valueRegex);
|
||||
|
||||
return Pattern.compile(valueRegex);
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,13 @@
|
||||
package org.lucares.pdb.datastore.lang;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.antlr.v4.runtime.ANTLRErrorListener;
|
||||
import org.antlr.v4.runtime.CommonToken;
|
||||
@@ -243,30 +246,13 @@ public class QueryCompletionPdbLangParser extends PdbLangParser {
|
||||
|
||||
@Override
|
||||
public void exitPropertyTerminalExpression(final PropertyTerminalExpressionContext ctx) {
|
||||
// if (containsCaret(ctx)) {
|
||||
// final int start = ctx.getStart().getStartIndex();
|
||||
// final int end = ctx.getStop().getStopIndex();
|
||||
// final int ruleIndex = _ctx.getRuleIndex();
|
||||
//
|
||||
// final String prefix = ctx.getText().substring(0, caretPosition -
|
||||
// start);
|
||||
// ctx.getParent().children.get(0).getText();
|
||||
//
|
||||
// proposals.addAll(getPropertyValuesByPrefix(prefix));
|
||||
// }
|
||||
}
|
||||
|
||||
private SortedSet<String> getPropertyValuesByPrefix(final String propertyKey,
|
||||
final String propertyValuePrefix) {
|
||||
final SortedSet<String> availableValuesForKey = dataStore.getAvailableValuesForKey("", propertyKey);
|
||||
|
||||
final SortedSet<String> result = new TreeSet<>();
|
||||
|
||||
for (final String value : availableValuesForKey) {
|
||||
if (value.startsWith(propertyValuePrefix) && !value.equals(propertyValuePrefix)) {
|
||||
result.add(value);
|
||||
}
|
||||
}
|
||||
final SortedSet<String> result = filterValues(availableValuesForKey, propertyValuePrefix);
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -325,4 +311,19 @@ public class QueryCompletionPdbLangParser extends PdbLangParser {
|
||||
super(input);
|
||||
}
|
||||
|
||||
static SortedSet<String> filterValues(final Collection<String> availableValues, final String valuePattern) {
|
||||
final SortedSet<String> result = new TreeSet<>();
|
||||
|
||||
final Pattern pattern = GloblikePattern.globlikeToRegex(valuePattern);
|
||||
|
||||
for (final String value : availableValues) {
|
||||
final Matcher matcher = pattern.matcher(value);
|
||||
if (matcher.find() && !value.equals(valuePattern)) {
|
||||
result.add(value);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user