From ea5e16fad52a1a0ab43a5c4b6fcf87c149945105 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sat, 18 Aug 2018 10:31:49 +0200 Subject: [PATCH] expressions now support in-queries --- .../org/lucares/pdb/datastore/lang/PdbLang.g4 | 11 ++- .../pdb/datastore/lang/Expression.java | 85 +++++++++++++++++++ .../lang/ExpressionToDocIdVisitor.java | 41 ++++++++- .../pdb/datastore/lang/ExpressionVisitor.java | 4 + .../lang/QueryCompletionPdbLangParser.java | 49 ++++++++++- .../pdb/datastore/lang/QueryLanguage.java | 37 ++++++++ .../pdb/datastore/internal/DataStoreTest.java | 7 ++ .../pdb/datastore/internal/ProposerTest.java | 24 +++++- 8 files changed, 248 insertions(+), 10 deletions(-) diff --git a/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 b/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 index 067b7b8..1f1d359 100644 --- a/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 +++ b/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 @@ -10,10 +10,16 @@ expression : LPAREN expression RPAREN #parenExpression | NOT expression #notExpression | prop=identifier eq=equal value=propValue #propertyExpression + | prop=identifier in=inExpr LPAREN listOfProperties=listOfPropValues RPAREN #inExpression | left=expression AND right=expression #binaryAndExpression | left=expression OR right=expression #binaryOrExpression ; - + +listOfPropValues + : value=propValue + | leftValue=propValue COMMA listOfProperties=listOfPropValues + ; + identifier : IDENTIFIER #identifierExpression ; @@ -22,13 +28,16 @@ propValue ; equal : EQUAL ; +inExpr : IN ; AND : 'and' ; OR : 'or' ; NOT : '!'; EQUAL : '=' ; +IN : 'in' ; LPAREN : '(' ; RPAREN : ')' ; +COMMA : ',' ; IDENTIFIER : JavaLetter JavaLetterOrDigit* ; diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java index 1299b2d..55f3551 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java @@ -1,5 +1,10 @@ package org.lucares.pdb.datastore.lang; +import java.util.ArrayList; +import java.util.List; + +import org.lucares.utils.CollectionUtils; + abstract public class Expression { public T visit(final ExpressionVisitor visitor) { @@ -434,4 +439,84 @@ abstract public class Expression { } } + static class ListOfPropertyValues extends Expression { + private final List propertyValues = new ArrayList<>(); + + public ListOfPropertyValues(final Terminal propertyValue) { + propertyValues.add(propertyValue); + } + + public ListOfPropertyValues(final Terminal propertyValue, final ListOfPropertyValues listOfPropertyValues) { + propertyValues.addAll(listOfPropertyValues.propertyValues); + propertyValues.add(propertyValue); + } + + public List getValues() { + return CollectionUtils.map(propertyValues, Terminal::getValue); + } + + @Override + public String toString() { + return "(" + String.join(", ", getValues()) + ")"; + } + } + + static class InExpression extends Expression { + private final String property; + private final List values; + + public InExpression(final String property, final List values) { + this.property = property; + this.values = values; + } + + @Override + public String toString() { + return property + " in (" + String.join(", ", values) + ")"; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return visitor.visit(this); + } + + public String getProperty() { + return property; + } + + public List getValues() { + return values; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((property == null) ? 0 : property.hashCode()); + result = prime * result + ((values == null) ? 0 : values.hashCode()); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + final InExpression other = (InExpression) obj; + if (property == null) { + if (other.property != null) + return false; + } else if (!property.equals(other.property)) + return false; + if (values == null) { + if (other.values != null) + return false; + } else if (!values.equals(other.values)) + return false; + return true; + } + } } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java index 96dc08e..f57248c 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java @@ -72,10 +72,10 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { final IntList rightFiles = right.visit(this); final long start = System.nanoTime(); - final IntList result = IntList.intersection(leftFiles,rightFiles); + final IntList result = IntList.intersection(leftFiles, rightFiles); LOGGER.trace("{} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, result.size()); assert result.isSorted(); - + return result; } @@ -126,7 +126,40 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { @Override public IntList visit(final Expression.MatchAll expression) { final long start = System.nanoTime(); - IntList result = getAllDocIds(); + final IntList result = getAllDocIds(); + LOGGER.trace("{} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, result.size()); + return result; + } + + @Override + public IntList visit(final Expression.InExpression expression) { + final long start = System.nanoTime(); + + final String propertyName = expression.getProperty(); + final List values = expression.getValues(); + + IntList result = new IntList(); + + for (final String value : values) { + if (isMatchAll(value)) { + + final Map allValuesForKey = keyToValueToDocId.getOrDefault(propertyName, EMPTY_VALUES); + + result = merge(allValuesForKey.values()); + break; + } else if (containsWildcard(value)) { + + final Collection docIds = filterByWildcard(propertyName, globToRegex(value)); + final IntList mergedDocIds = merge(docIds); + result = IntList.union(result, mergedDocIds); + } else { + final IntList docIds = keyToValueToDocId.// + getOrDefault(propertyName, EMPTY_VALUES).// + getOrDefault(value, EMPTY_DOC_IDS); + result = IntList.union(result, docIds); + } + } + LOGGER.trace("{} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, result.size()); return result; } @@ -185,7 +218,7 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { return result; } - + private boolean containsWildcard(final String stringValue) { return stringValue.contains("*"); } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java index 20e1e50..b4ff5d1 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java @@ -25,6 +25,10 @@ public abstract class ExpressionVisitor { throw new UnsupportedOperationException(); } + public T visit(final Expression.InExpression expression) { + throw new UnsupportedOperationException(); + } + public T visit(final Expression.Parentheses parentheses) { throw new UnsupportedOperationException(); } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionPdbLangParser.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionPdbLangParser.java index b9beb10..dd31de1 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionPdbLangParser.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionPdbLangParser.java @@ -54,7 +54,24 @@ public class QueryCompletionPdbLangParser extends PdbLangParser { final int end = node.getSymbol().getStopIndex(); if (_ctx instanceof PropertyTerminalExpressionContext) { - final String propertyKey = _ctx.getParent().children.get(0).getText(); + + final String postfixAfterInsertedTerminal; + final String propertyKey; + if (_ctx.getParent() instanceof ListOfPropValuesContext) { + // for in-expressions, e.g. key in (val) + ParserRuleContext parent = _ctx.getParent(); + while (parent instanceof ListOfPropValuesContext) { + parent = parent.getParent(); + } + + propertyKey = parent.children.get(0).getText(); + postfixAfterInsertedTerminal = ""; + } else { + // for property-expressions, e.g. key = val + propertyKey = _ctx.getParent().children.get(0).getText(); + postfixAfterInsertedTerminal = " "; + } + String propertyValuePrefix = node.getText().substring(0, caretPosition - start); propertyValuePrefix = propertyValuePrefix.replace(Proposer.PREFIX_MARKER, ""); final SortedSet proposedValues = getPropertyValuesByPrefix(propertyKey, @@ -64,10 +81,12 @@ public class QueryCompletionPdbLangParser extends PdbLangParser { proposedValues.stream()// .map(v -> { final StringBuilder newQuery = new StringBuilder(query); - newQuery.replace(start, end + 1, v + " "); // insert the terminal into the query + newQuery.replace(start, end + 1, v + postfixAfterInsertedTerminal); // insert the + // terminal into the + // query return new Proposal(v, newQuery.toString(), false, newQuery.toString(), - start + v.length() + 1); + start + v.length() + postfixAfterInsertedTerminal.length()); }).map(p -> { int count = 0; try { @@ -255,10 +274,26 @@ public class QueryCompletionPdbLangParser extends PdbLangParser { public void enterEqual(final EqualContext ctx) { } + @Override + public void enterInExpr(final InExprContext ctx) { + } + + @Override + public void enterInExpression(final InExpressionContext ctx) { + } + @Override public void exitEqual(final EqualContext ctx) { } + @Override + public void exitInExpr(final InExprContext ctx) { + } + + @Override + public void exitInExpression(final InExpressionContext ctx) { + } + private boolean isEOF(final TerminalNode node) { return node.getSymbol().getType() < 0; } @@ -283,6 +318,14 @@ public class QueryCompletionPdbLangParser extends PdbLangParser { public void reportContextSensitivity(final Parser recognizer, final DFA dfa, final int startIndex, final int stopIndex, final int prediction, final ATNConfigSet configs) { } + + @Override + public void enterListOfPropValues(final ListOfPropValuesContext ctx) { + } + + @Override + public void exitListOfPropValues(final ListOfPropValuesContext ctx) { + } } public QueryCompletionPdbLangParser(final TokenStream input) { diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java index 48d955e..f88e712 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java @@ -9,6 +9,8 @@ import org.antlr.v4.runtime.tree.ParseTree; import org.antlr.v4.runtime.tree.ParseTreeListener; import org.antlr.v4.runtime.tree.ParseTreeWalker; import org.lucares.pdb.datastore.lang.Expression.AndTemporary; +import org.lucares.pdb.datastore.lang.Expression.InExpression; +import org.lucares.pdb.datastore.lang.Expression.ListOfPropertyValues; import org.lucares.pdb.datastore.lang.Expression.Not; import org.lucares.pdb.datastore.lang.Expression.OrTemporary; import org.lucares.pdb.datastore.lang.Expression.Property; @@ -17,6 +19,8 @@ import org.lucares.pdb.datastore.lang.Expression.Terminal; import org.lucares.pdb.datastore.lang.PdbLangParser.BinaryAndExpressionContext; import org.lucares.pdb.datastore.lang.PdbLangParser.BinaryOrExpressionContext; import org.lucares.pdb.datastore.lang.PdbLangParser.IdentifierExpressionContext; +import org.lucares.pdb.datastore.lang.PdbLangParser.InExpressionContext; +import org.lucares.pdb.datastore.lang.PdbLangParser.ListOfPropValuesContext; import org.lucares.pdb.datastore.lang.PdbLangParser.NotExpressionContext; import org.lucares.pdb.datastore.lang.PdbLangParser.PropertyExpressionContext; import org.lucares.pdb.datastore.lang.PdbLangParser.PropertyTerminalExpressionContext; @@ -105,6 +109,39 @@ public class QueryLanguage { stack.push(operation.toExpression(left, right)); } + + @Override + public void exitListOfPropValues(final ListOfPropValuesContext ctx) { + final Expression topStackElement = stack.pop(); + + if (topStackElement instanceof ListOfPropertyValues) { + // there are at least two property values in the query + // e.g. in the expression "bird in (eagle, pigeon)" + final ListOfPropertyValues existingList = (ListOfPropertyValues) topStackElement; + final Terminal nextPropertyValue = (Terminal) stack.pop(); + + final ListOfPropertyValues newListOfPropertyValues = new ListOfPropertyValues(nextPropertyValue, + existingList); + stack.push(newListOfPropertyValues); + } else { + // this is the first or the only value in this list of property values + // e.g. in the expression "bird in (eagle)" + final Terminal propertyValue = (Terminal) topStackElement; + + final ListOfPropertyValues newListOfPropertyValues = new ListOfPropertyValues(propertyValue); + stack.push(newListOfPropertyValues); + } + } + + @Override + public void exitInExpression(final InExpressionContext ctx) { + + final ListOfPropertyValues propertyValues = (ListOfPropertyValues) stack.pop(); + final Terminal propertyName = (Terminal) stack.pop(); + + final InExpression inExpression = new InExpression(propertyName.getValue(), propertyValues.getValues()); + stack.push(inExpression); + } }; // Specify our entry point diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java index 76b7e38..1a97d12 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java @@ -99,6 +99,13 @@ public class DataStoreTest { assertSearch("dog=lab*dor", labradorJenny, labradorTim); assertSearch("dog=*lab*dor*", labradorJenny, labradorTim); + // 'in' queries + assertSearch("bird in (eagle, pigeon, flamingo)", eagleTim, pigeonJennifer, flamingoJennifer); + assertSearch("dog in (labrador) and name in (Tim, Jennifer)", labradorTim); + assertSearch("name in (Jenn*)", pigeonJennifer, flamingoJennifer, labradorJenny); + assertSearch("name in (*) and dog=labrador", labradorJenny, labradorTim); + assertSearch("name in (XYZ, *) and dog=labrador", labradorJenny, labradorTim); + } public void testGetByTags() throws IOException { diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java index 5bc37a8..aa91014 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java @@ -12,6 +12,7 @@ import java.util.Map; import org.lucares.pdb.api.Tags; import org.lucares.pdb.datastore.PdbDB; import org.lucares.pdb.datastore.Proposal; +import org.lucares.utils.CollectionUtils; import org.lucares.utils.file.FileUtils; import org.testng.Assert; import org.testng.annotations.AfterClass; @@ -104,6 +105,25 @@ public class ProposerTest { */ } + public void testInExpressions() throws Exception { + assertProposals("name in (Timothy,)", 17, // + new Proposal("Jennifer", "name in (Timothy,Jennifer)", true, "name in (Timothy,Jennifer)", 25), // + new Proposal("Jenny", "name in (Timothy,Jenny)", true, "name in (Timothy,Jenny)", 22), // + new Proposal("Tim", "name in (Timothy,Tim)", true, "name in (Timothy,Tim)", 20), // + new Proposal("Timothy", "name in (Timothy,Timothy)", true, "name in (Timothy,Timothy)", 24)// + ); + + assertProposals("name in (Timothy, J)", 19, // + new Proposal("Jennifer", "name in (Timothy, Jennifer)", true, "name in (Timothy, Jennifer)", 26), // + new Proposal("Jenny", "name in (Timothy, Jenny)", true, "name in (Timothy, Jenny)", 23)); + + assertProposals("name in (Tim)", 12, // + new Proposal("Timothy", "name in (Timothy)", true, "name in (Timothy)", 16)); + + /* + */ + } + public void testProposalOnEmptyValuePrefix() throws Exception { assertProposals("name=", 5, // new Proposal("Jennifer", "name=Jennifer ", true, "name=Jennifer ", 14), // @@ -129,8 +149,8 @@ public class ProposerTest { Collections.sort(expectedList); System.out.println("\n\n--- " + query + " ---"); - System.out.println("actual : " + actual); - System.out.println("expected: " + expectedList); + System.out.println("actual : " + String.join("\n", CollectionUtils.map(actual, Proposal::toString))); + System.out.println("expected: " + String.join("\n", CollectionUtils.map(expectedList, Proposal::toString))); Assert.assertEquals(actual, expectedList); } }