add CamelCase matching to the query language

This commit is contained in:
2018-09-12 13:42:23 +02:00
parent 86b8f93752
commit 61f131571a
5 changed files with 141 additions and 72 deletions

View File

@@ -10,16 +10,12 @@ import java.util.Map.Entry;
import java.util.Objects; import java.util.Objects;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.lucares.collections.IntList; import org.lucares.collections.IntList;
import org.lucares.pdb.datastore.Doc; import org.lucares.pdb.datastore.Doc;
import org.lucares.pdb.datastore.lang.Expression.And; import org.lucares.pdb.datastore.lang.Expression.And;
import org.lucares.pdb.datastore.lang.Expression.Not; import org.lucares.pdb.datastore.lang.Expression.Not;
import org.lucares.pdb.datastore.lang.Expression.Or; import org.lucares.pdb.datastore.lang.Expression.Or;
import org.lucares.pdb.datastore.lang.Expression.Parentheses; import org.lucares.pdb.datastore.lang.Expression.Parentheses;
import org.lucares.pdb.datastore.lang.Expression.Property;
import org.lucares.pdb.datastore.lang.Expression.Terminal;
import org.lucares.utils.CollectionUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@@ -147,16 +143,11 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<IntList> {
result = merge(allValuesForKey.values()); result = merge(allValuesForKey.values());
break; break;
} else if (containsWildcard(value)) { } else {
final Collection<IntList> docIds = filterByWildcard(propertyName,
final Collection<IntList> docIds = filterByWildcard(propertyName, globToRegex(value)); GloblikePattern.globlikeToRegex(value));
final IntList mergedDocIds = merge(docIds); final IntList mergedDocIds = merge(docIds);
result = IntList.union(result, mergedDocIds); result = IntList.union(result, mergedDocIds);
} else {
final IntList docIds = keyToValueToDocId.//
getOrDefault(propertyName, EMPTY_VALUES).//
getOrDefault(value, EMPTY_DOC_IDS);
result = IntList.union(result, docIds);
} }
} }
@@ -168,50 +159,13 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<IntList> {
return allDocIds.getAllDocIds(); return allDocIds.getAllDocIds();
} }
@Override
public IntList visit(final Property expression) {
final String propertyName = expression.property;
final Terminal propertyValue = expression.value;
final String stringValue = propertyValue.getValue();
final long start = System.nanoTime();
final IntList result;
if (isMatchAll(stringValue)) {
final Map<String, IntList> allValuesForKey = keyToValueToDocId.getOrDefault(propertyName, EMPTY_VALUES);
result = merge(allValuesForKey.values());
} else if (containsWildcard(stringValue)) {
final Collection<IntList> docIds = filterByWildcard(propertyName, globToRegex(stringValue));
result = merge(docIds);
} else {
result = keyToValueToDocId.getOrDefault(propertyName, EMPTY_VALUES).getOrDefault(stringValue,
EMPTY_DOC_IDS);
}
LOGGER.trace("{} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, result.size());
return result;
}
private Pattern globToRegex(final String globPattern) {
final String[] tokens = StringUtils.splitPreserveAllTokens(globPattern, "*");
final List<String> quotedTokens = CollectionUtils.map(tokens, Pattern::quote);
final String regex = String.join(".*", quotedTokens);
return Pattern.compile(regex);
}
private List<IntList> filterByWildcard(final String propertyName, final Pattern valuePattern) { private List<IntList> filterByWildcard(final String propertyName, final Pattern valuePattern) {
final List<IntList> result = new ArrayList<>(); final List<IntList> result = new ArrayList<>();
final Map<String, IntList> valueToDocId = keyToValueToDocId.getOrDefault(propertyName, EMPTY_VALUES); final Map<String, IntList> valueToDocId = keyToValueToDocId.getOrDefault(propertyName, EMPTY_VALUES);
for (final Entry<String, IntList> entry : valueToDocId.entrySet()) { for (final Entry<String, IntList> entry : valueToDocId.entrySet()) {
if (valuePattern.matcher(entry.getKey()).matches()) { if (valuePattern.matcher(entry.getKey()).find()) {
result.add(entry.getValue()); result.add(entry.getValue());
} }
} }
@@ -219,10 +173,6 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<IntList> {
return result; return result;
} }
private boolean containsWildcard(final String stringValue) {
return stringValue.contains("*");
}
private IntList merge(final Collection<IntList> lists) { private IntList merge(final Collection<IntList> lists) {
IntList result = new IntList(); IntList result = new IntList();

View File

@@ -0,0 +1,28 @@
package org.lucares.pdb.datastore.lang;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
class GloblikePattern {
private static final Logger LOGGER = LoggerFactory.getLogger(GloblikePattern.class);
static Pattern globlikeToRegex(final String globPattern) {
// a character that cannot be in the globPattern
final String dotPlaceholder = "\ue003"; // fourth character in the private use area
final String valueRegex = "^" + //
globPattern//
.replace("-", Pattern.quote("-"))//
.replace(".", dotPlaceholder)//
.replace("*", ".*")//
.replace(dotPlaceholder, ".*\\.")//
.replaceAll("([A-Z])", "[a-z]*$1");
LOGGER.trace(">{}< -> >{}<", globPattern, valueRegex);
return Pattern.compile(valueRegex);
}
}

View File

@@ -1,10 +1,13 @@
package org.lucares.pdb.datastore.lang; package org.lucares.pdb.datastore.lang;
import java.util.BitSet; import java.util.BitSet;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.SortedSet; import java.util.SortedSet;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.antlr.v4.runtime.ANTLRErrorListener; import org.antlr.v4.runtime.ANTLRErrorListener;
import org.antlr.v4.runtime.CommonToken; import org.antlr.v4.runtime.CommonToken;
@@ -243,30 +246,13 @@ public class QueryCompletionPdbLangParser extends PdbLangParser {
@Override @Override
public void exitPropertyTerminalExpression(final PropertyTerminalExpressionContext ctx) { public void exitPropertyTerminalExpression(final PropertyTerminalExpressionContext ctx) {
// if (containsCaret(ctx)) {
// final int start = ctx.getStart().getStartIndex();
// final int end = ctx.getStop().getStopIndex();
// final int ruleIndex = _ctx.getRuleIndex();
//
// final String prefix = ctx.getText().substring(0, caretPosition -
// start);
// ctx.getParent().children.get(0).getText();
//
// proposals.addAll(getPropertyValuesByPrefix(prefix));
// }
} }
private SortedSet<String> getPropertyValuesByPrefix(final String propertyKey, private SortedSet<String> getPropertyValuesByPrefix(final String propertyKey,
final String propertyValuePrefix) { final String propertyValuePrefix) {
final SortedSet<String> availableValuesForKey = dataStore.getAvailableValuesForKey("", propertyKey); final SortedSet<String> availableValuesForKey = dataStore.getAvailableValuesForKey("", propertyKey);
final SortedSet<String> result = new TreeSet<>(); final SortedSet<String> result = filterValues(availableValuesForKey, propertyValuePrefix);
for (final String value : availableValuesForKey) {
if (value.startsWith(propertyValuePrefix) && !value.equals(propertyValuePrefix)) {
result.add(value);
}
}
return result; return result;
} }
@@ -325,4 +311,19 @@ public class QueryCompletionPdbLangParser extends PdbLangParser {
super(input); super(input);
} }
static SortedSet<String> filterValues(final Collection<String> availableValues, final String valuePattern) {
final SortedSet<String> result = new TreeSet<>();
final Pattern pattern = GloblikePattern.globlikeToRegex(valuePattern);
for (final String value : availableValues) {
final Matcher matcher = pattern.matcher(value);
if (matcher.find() && !value.equals(valuePattern)) {
result.add(value);
}
}
return result;
}
} }

View File

@@ -132,6 +132,13 @@ public class ProposerTest {
); );
} }
public void testProposalWithWildcards() throws Exception {
assertProposals("name=*im", 8, //
new Proposal("Tim", "name=Tim", true, "name=Tim", 8), //
new Proposal("Timothy", "name=Timothy", true, "name=Timothy", 12)//
);
}
private void assertProposals(final String query, final int caretIndex, final Proposal... expected) private void assertProposals(final String query, final int caretIndex, final Proposal... expected)
throws InterruptedException { throws InterruptedException {

View File

@@ -0,0 +1,83 @@
package org.lucares.pdb.datastore.lang;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@Test
public class QueryCompletionPdbLangParserTest {
@DataProvider
public Object[][] providerPatterns() {
final List<Object[]> result = new ArrayList<>();
// opinion-size-age-shape-colour-origin-material-purpose Noun
final List<String> availableValues = new ArrayList<>();
availableValues.add("Tim");
availableValues.add("Timothy");
availableValues.add("Tanja");
availableValues.add("Danja");
availableValues.add("Wanja");
availableValues.add("BigOldGrey.Jennifer");
availableValues.add("BigYoungGreen.Jennifer");
availableValues.add("BigYoungBlue.Jenny");
availableValues.add("SmallRoundBlue.Peter");
{
// infix does not match
final SortedSet<String> expected = new TreeSet<>();
result.add(new Object[] { availableValues, "nj", expected });
}
{
final SortedSet<String> expected = new TreeSet<>();
expected.add("Danja");
expected.add("Tanja");
expected.add("Wanja");
result.add(new Object[] { availableValues, "*nj", expected });
}
{
final SortedSet<String> expected = new TreeSet<>();
expected.add("BigYoungBlue.Jenny");
result.add(new Object[] { availableValues, "BYB", expected });
}
{
final SortedSet<String> expected = new TreeSet<>();
expected.add("BigOldGrey.Jennifer");
expected.add("BigYoungGreen.Jennifer");
result.add(new Object[] { availableValues, "B*Gr", expected });
}
{
final SortedSet<String> expected = new TreeSet<>();
expected.add("BigOldGrey.Jennifer");
expected.add("BigYoungGreen.Jennifer");
expected.add("BigYoungBlue.Jenny");
result.add(new Object[] { availableValues, ".Jen", expected });
}
{
final SortedSet<String> expected = new TreeSet<>();
expected.add("BigYoungBlue.Jenny");
expected.add("BigYoungGreen.Jennifer");
result.add(new Object[] { availableValues, "BY.Jen", expected });
}
return result.toArray(new Object[0][]);
}
@Test(dataProvider = "providerPatterns")
public void testPatterns(final List<String> availableValues, final String valuePattern,
final SortedSet<String> expectedValues) {
final SortedSet<String> actual = QueryCompletionPdbLangParser.filterValues(availableValues, valuePattern);
Assert.assertEquals(actual, expectedValues);
}
}