From bcb2e6ca833a5baa8bde405fbdf2299094946f2e Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Mon, 17 Apr 2017 16:25:14 +0200 Subject: [PATCH] add query completion We are using ANTLR listeners to find out where in the query the cursor is. Then we generate a list of keys/values that might fit at that position. With that information we can generate new queries and sort them by the number of results they yield. --- .../org/lucares/pdb/datastore/lang/PdbLang.g4 | 6 +- .../org/lucares/pdb/datastore/Proposal.java | 8 +- .../pdb/datastore/internal/DataStore.java | 18 +- .../pdb/datastore/internal/Proposer.java | 20 +- .../pdb/datastore/lang/ProposerParser.java | 39 +++ .../lang/QueryCompletionPdbLangParser.java | 276 ++++++++++++++++++ .../pdb/datastore/lang/QueryLanguage.java | 14 + .../pdb/datastore/internal/ProposerTest.java | 41 ++- .../src/main/resources/resources/js/search.js | 2 +- .../lucares/performance/db/PerformanceDb.java | 5 +- 10 files changed, 396 insertions(+), 33 deletions(-) create mode 100644 data-store/src/main/java/org/lucares/pdb/datastore/lang/ProposerParser.java create mode 100644 data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionPdbLangParser.java diff --git a/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 b/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 index b8c1176..35add51 100644 --- a/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 +++ b/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 @@ -18,7 +18,7 @@ identifier : IDENTIFIER #identifierExpression ; propValue - : identifier + : IDENTIFIER #propertyTerminalExpression ; equal : EQUAL ; @@ -29,12 +29,10 @@ NOT : '!'; EQUAL : '=' ; LPAREN : '(' ; RPAREN : ')' ; -WS : [ \r\t\u000C\n]+ -> skip; - - IDENTIFIER : JavaLetter JavaLetterOrDigit* ; +WS : [ \r\t\u000C\n]+ -> skip; fragment diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/Proposal.java b/data-store/src/main/java/org/lucares/pdb/datastore/Proposal.java index b437cce..4e45207 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/Proposal.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/Proposal.java @@ -14,6 +14,12 @@ public class Proposal implements Comparable { this.results = results; } + public Proposal(final Proposal proposal, final long results) { + this.proposedTag = proposal.proposedTag; + this.proposedQuery = proposal.proposedQuery; + this.results = results; + } + public String getProposedTag() { return proposedTag; } @@ -28,7 +34,7 @@ public class Proposal implements Comparable { @Override public String toString() { - return "Proposal [proposedTag=" + proposedTag + ", proposedQuery=" + proposedQuery + ", results=" + results + return "Proposal [proposedTag:" + proposedTag + ", proposedQuery:" + proposedQuery + ", results=" + results + "]"; } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index cb7a05c..5b02124 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -8,6 +8,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.regex.Matcher; @@ -175,12 +176,19 @@ public class DataStore { public SortedSet getAvailableValuesForKey(final String query, final String key) { final SortedSet result = new TreeSet<>(); - final List docs = search(query); - for (final Doc doc : docs) { - final String valueForKey = doc.getTags().getValue(key); + if (query.isEmpty()) { - if (valueForKey != null) { - result.add(valueForKey); + final Set values = keyToValueToDocId.getOrDefault(key, Collections.emptyMap()).keySet(); + result.addAll(values); + + } else { + final List docs = search(query); + for (final Doc doc : docs) { + final String valueForKey = doc.getTags().getValue(key); + + if (valueForKey != null) { + result.add(valueForKey); + } } } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/Proposer.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/Proposer.java index 2bf83a9..ff99da5 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/Proposer.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/Proposer.java @@ -1,12 +1,13 @@ package org.lucares.pdb.datastore.internal; -import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.SortedSet; +import java.util.TreeSet; import org.lucares.pdb.datastore.Proposal; +import org.lucares.pdb.datastore.lang.ProposerParser; import org.lucares.utils.CollectionUtils; public class Proposer { @@ -18,19 +19,19 @@ public class Proposer { } public List propose(final String query, final int caretIndex) { - final List result; + final SortedSet result; if (query.isEmpty()) { result = proposeForAllKeys(); } else { - throw new UnsupportedOperationException(); + result = ProposerParser.parse(query, dataStore, caretIndex); } - return result; + return CollectionUtils.filter(result, p -> p.getResults() >= 0); } - private List proposeForAllKeys() { - final List result; + private SortedSet proposeForAllKeys() { + final SortedSet result; final List fields = dataStore.getAvailableFields(); final Map fieldToQuery = CollectionUtils.createMapFromKeys(fields, f -> f + "=*"); @@ -39,9 +40,9 @@ public class Proposer { return result; } - private List computeProposalsForQueries(final Map keyToQuery) { + private SortedSet computeProposalsForQueries(final Map keyToQuery) { - final List result = new ArrayList<>(keyToQuery.size()); + final SortedSet result = new TreeSet<>(); for (final Entry e : keyToQuery.entrySet()) { final String key = e.getKey(); final String query = e.getValue(); @@ -51,7 +52,6 @@ public class Proposer { result.add(proposal); } - Collections.sort(result); return result; } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ProposerParser.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ProposerParser.java new file mode 100644 index 0000000..7e4131f --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ProposerParser.java @@ -0,0 +1,39 @@ +package org.lucares.pdb.datastore.lang; + +import java.util.SortedSet; + +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.lucares.pdb.datastore.Proposal; +import org.lucares.pdb.datastore.internal.DataStore; +import org.lucares.pdb.datastore.lang.QueryCompletionPdbLangParser.Listener; + +public class ProposerParser { + + public static SortedSet parse(final String query, final DataStore dataStore, final int caretIndex) { + + final ProposerParser lang = new ProposerParser(); + return lang.parseInternal(query, dataStore, caretIndex); + } + + private SortedSet parseInternal(final String query, final DataStore dataStore, final int caretIndex) { + + final CharStream in = CharStreams.fromString(query); + + final PdbLangLexer lexer = new PdbLangLexer(in); + + final CommonTokenStream tokens = new CommonTokenStream(lexer); + + final QueryCompletionPdbLangParser parser = new QueryCompletionPdbLangParser(tokens); + parser.setTrace(false); + + final Listener listener = parser.new Listener(query, dataStore, caretIndex); + parser.addErrorListener(listener); + parser.addParseListener(listener); + + parser.start(); + + return listener.getProposals(); + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionPdbLangParser.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionPdbLangParser.java new file mode 100644 index 0000000..b14b185 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionPdbLangParser.java @@ -0,0 +1,276 @@ +package org.lucares.pdb.datastore.lang; + +import java.util.BitSet; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.antlr.v4.runtime.ANTLRErrorListener; +import org.antlr.v4.runtime.CommonToken; +import org.antlr.v4.runtime.Parser; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Recognizer; +import org.antlr.v4.runtime.TokenStream; +import org.antlr.v4.runtime.atn.ATNConfigSet; +import org.antlr.v4.runtime.dfa.DFA; +import org.antlr.v4.runtime.tree.ErrorNode; +import org.antlr.v4.runtime.tree.TerminalNode; +import org.lucares.pdb.datastore.Proposal; +import org.lucares.pdb.datastore.internal.DataStore; +import org.lucares.utils.CollectionUtils; + +public class QueryCompletionPdbLangParser extends PdbLangParser { + + public class Listener implements PdbLangListener, ANTLRErrorListener { + + private final int caretPosition; + private final DataStore dataStore; + private final SortedSet proposals = new TreeSet<>(); + private final String query; + + public Listener(final String query, final DataStore dataStore, final int caretPosition) { + this.query = query; + this.dataStore = dataStore; + this.caretPosition = caretPosition; + } + + public SortedSet getProposals() { + return proposals; + } + + @Override + public void visitTerminal(final TerminalNode node) { + if (containsCaret(node) && !isEOF(node)) { + final int start = node.getSymbol().getStartIndex(); + final int end = node.getSymbol().getStopIndex(); + + if (_ctx instanceof PropertyTerminalExpressionContext) { + final String propertyKey = _ctx.getParent().children.get(0).getText(); + final String propertyValuePrefix = node.getText().substring(0, caretPosition - start); + final SortedSet proposedValues = getPropertyValuesByPrefix(propertyKey, + propertyValuePrefix); + + proposedValues.stream()// + .map(v -> { + final StringBuilder newQuery = new StringBuilder(query); + newQuery.replace(start, end + 1, v + " "); + + return new Proposal(v, newQuery.toString(), -1); + }).map(p -> { + + final int count = dataStore.count(p.getProposedQuery()); + return new Proposal(p, count); + }).forEach(proposals::add); + + } else if (_ctx instanceof IdentifierExpressionContext) { + final String propertyKeyPrefix = node.getText().substring(0, caretPosition - start); + + final StringBuilder newQueryPattern = new StringBuilder(query); + newQueryPattern.replace(start, end + 1, "%s"); + + addProposalsForKeys(propertyKeyPrefix, newQueryPattern.toString()); + } + } + } + + @Override + public void syntaxError(final Recognizer recognizer, final Object offendingSymbol, final int line, + final int charPositionInLine, final String msg, final RecognitionException e) { + if (!isEOF(offendingSymbol) && offendingSymbol instanceof CommonToken) { + + final CommonToken token = (CommonToken) offendingSymbol; + final String text = token.getText(); + + if ("and".startsWith(text)) { + final StringBuilder newQuery = new StringBuilder(query); + newQuery.replace(charPositionInLine, charPositionInLine + text.length(), " and "); + + proposals.add(new Proposal(" and ", newQuery.toString(), 1)); + } + if ("or".startsWith(text)) { + final StringBuilder newQuery = new StringBuilder(query); + newQuery.replace(charPositionInLine, charPositionInLine + text.length(), " or "); + + proposals.add(new Proposal(" or ", newQuery.toString(), 1)); + } + } + } + + private void addProposalsForKeys(final String propertyKeyPrefix, final String newQueryPattern) { + + final List availableKeys = dataStore.getAvailableFields(); + final List matchingKeys = CollectionUtils.filter(availableKeys, + s -> s.startsWith(propertyKeyPrefix)); + + matchingKeys.stream()// + .map(key -> { + + return new Proposal(key, String.format(newQueryPattern, key + "=* "), -1); + }).map(p -> { + + final String proposedQuery = p.getProposedQuery(); + final int count = count(proposedQuery); + return new Proposal(p, count); + }).forEach(proposals::add); + } + + private int count(final String proposedQuery) { + + try { + return dataStore.count(proposedQuery); + } catch (final SyntaxException e) { + return -1; + } + } + + private boolean isEOF(final Object offendingSymbol) { + + if (offendingSymbol instanceof CommonToken) { + return ((CommonToken) offendingSymbol).getType() < 0; + } + + return false; + } + + @Override + public void visitErrorNode(final ErrorNode node) { + } + + @Override + public void enterEveryRule(final ParserRuleContext ctx) { + } + + @Override + public void exitEveryRule(final ParserRuleContext ctx) { + } + + @Override + public void enterStart(final StartContext ctx) { + } + + @Override + public void exitStart(final StartContext ctx) { + } + + @Override + public void enterBinaryOrExpression(final BinaryOrExpressionContext ctx) { + } + + @Override + public void exitBinaryOrExpression(final BinaryOrExpressionContext ctx) { + } + + @Override + public void enterBinaryAndExpression(final BinaryAndExpressionContext ctx) { + } + + @Override + public void exitBinaryAndExpression(final BinaryAndExpressionContext ctx) { + } + + @Override + public void enterNotExpression(final NotExpressionContext ctx) { + } + + @Override + public void exitNotExpression(final NotExpressionContext ctx) { + } + + @Override + public void enterParenExpression(final ParenExpressionContext ctx) { + } + + @Override + public void exitParenExpression(final ParenExpressionContext ctx) { + } + + @Override + public void enterPropertyExpression(final PropertyExpressionContext ctx) { + } + + @Override + public void exitPropertyExpression(final PropertyExpressionContext ctx) { + } + + @Override + public void enterIdentifierExpression(final IdentifierExpressionContext ctx) { + } + + @Override + public void exitIdentifierExpression(final IdentifierExpressionContext ctx) { + } + + @Override + public void enterPropertyTerminalExpression(final PropertyTerminalExpressionContext ctx) { + } + + @Override + public void exitPropertyTerminalExpression(final PropertyTerminalExpressionContext ctx) { + // if (containsCaret(ctx)) { + // final int start = ctx.getStart().getStartIndex(); + // final int end = ctx.getStop().getStopIndex(); + // final int ruleIndex = _ctx.getRuleIndex(); + // + // final String prefix = ctx.getText().substring(0, caretPosition - + // start); + // ctx.getParent().children.get(0).getText(); + // + // proposals.addAll(getPropertyValuesByPrefix(prefix)); + // } + } + + private SortedSet getPropertyValuesByPrefix(final String propertyKey, + final String propertyValuePrefix) { + final SortedSet availableValuesForKey = dataStore.getAvailableValuesForKey("", propertyKey); + + final SortedSet result = new TreeSet<>(); + + for (final String value : availableValuesForKey) { + if (value.startsWith(propertyValuePrefix)) { + result.add(value); + } + } + + return result; + } + + @Override + public void enterEqual(final EqualContext ctx) { + } + + @Override + public void exitEqual(final EqualContext ctx) { + } + + private boolean isEOF(final TerminalNode node) { + return node.getSymbol().getType() < 0; + } + + private boolean containsCaret(final TerminalNode node) { + final int start = node.getSymbol().getStartIndex(); + final int end = node.getSymbol().getStopIndex(); + return start <= caretPosition && end + 1 >= caretPosition; + } + + @Override + public void reportAmbiguity(final Parser recognizer, final DFA dfa, final int startIndex, final int stopIndex, + final boolean exact, final BitSet ambigAlts, final ATNConfigSet configs) { + } + + @Override + public void reportAttemptingFullContext(final Parser recognizer, final DFA dfa, final int startIndex, + final int stopIndex, final BitSet conflictingAlts, final ATNConfigSet configs) { + } + + @Override + public void reportContextSensitivity(final Parser recognizer, final DFA dfa, final int startIndex, + final int stopIndex, final int prediction, final ATNConfigSet configs) { + } + } + + public QueryCompletionPdbLangParser(final TokenStream input) { + super(input); + } + +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java index f75d1e39..48d955e 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java @@ -19,6 +19,7 @@ import org.lucares.pdb.datastore.lang.PdbLangParser.BinaryOrExpressionContext; import org.lucares.pdb.datastore.lang.PdbLangParser.IdentifierExpressionContext; import org.lucares.pdb.datastore.lang.PdbLangParser.NotExpressionContext; import org.lucares.pdb.datastore.lang.PdbLangParser.PropertyExpressionContext; +import org.lucares.pdb.datastore.lang.PdbLangParser.PropertyTerminalExpressionContext; public class QueryLanguage { @@ -55,6 +56,19 @@ public class QueryLanguage { stack.push(new Terminal(ctx.getText(), line, startIndex, stopIndex)); } + @Override + public void exitPropertyTerminalExpression(final PropertyTerminalExpressionContext ctx) { + if (ctx.getText().length() > 255) { + throw new SyntaxException(ctx, "token too long"); + } + + final int line = ctx.getStart().getLine(); + final int startIndex = ctx.getStart().getStartIndex(); + final int stopIndex = ctx.getStart().getStopIndex(); + + stack.push(new Terminal(ctx.getText(), line, startIndex, stopIndex)); + } + @Override public void exitPropertyExpression(final PropertyExpressionContext ctx) { // System.out.println("property expression"); diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java index 4975fca..f614c2b 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java @@ -14,8 +14,8 @@ import org.lucares.pdb.datastore.PdbDB; import org.lucares.pdb.datastore.Proposal; import org.lucares.utils.file.FileUtils; import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @Test @@ -25,19 +25,20 @@ public class ProposerTest { private PdbDB db; private Map tagsToPath; - @BeforeMethod - public void beforeMethod() throws IOException { + @BeforeClass + public void beforeClass() throws Exception { dataDirectory = Files.createTempDirectory("pdb"); + initDatabase(); } - @AfterMethod - public void afterMethod() throws IOException { + @AfterClass + public void afterClass() throws IOException { FileUtils.delete(dataDirectory); db = null; tagsToPath = null; } - public void testProposals() throws Exception { + private void initDatabase() throws Exception { tagsToPath = new LinkedHashMap<>(); final Tags eagleTim = Tags.create("bird", "eagle", "name", "Tim"); final Tags pigeonJennifer = Tags.create("bird", "pigeon", "name", "Jennifer"); @@ -57,6 +58,9 @@ public class ProposerTest { final Path newFile = db.createNewFile(tags); tagsToPath.put(tags, newFile); } + } + + public void testEmptyQuery() throws Exception { assertProposals("", 0, // new Proposal("name", "name=*", 5), // @@ -65,6 +69,26 @@ public class ProposerTest { ); } + public void testPrefixOfKey() throws Exception { + assertProposals("bi", 2, // + new Proposal("bird", "bird=* ", 3) // + ); + assertProposals("bird", 4, // + new Proposal("bird", "bird=* ", 3) // + ); + } + + public void testPrefixOfValue() throws Exception { + assertProposals("name =Je", 8, // + new Proposal("Jennifer", "name =Jennifer ", 2), // + new Proposal("Jenny", "name =Jenny ", 1) // + ); + + assertProposals("bird=eagle and n", 16, // + new Proposal("name", "bird=eagle and name=* ", 1) // + ); + } + private void assertProposals(final String query, final int caretIndex, final Proposal... expected) throws InterruptedException { @@ -72,7 +96,8 @@ public class ProposerTest { final List expectedList = Arrays.asList(expected); Collections.sort(expectedList); - System.out.println("actual: " + actual); + System.out.println("\n\n--- " + query + " ---"); + System.out.println("actual : " + actual); System.out.println("expected: " + expectedList); Assert.assertEquals(expectedList, actual); } diff --git a/pdb-ui/src/main/resources/resources/js/search.js b/pdb-ui/src/main/resources/resources/js/search.js index a899aa5..cebfbb7 100644 --- a/pdb-ui/src/main/resources/resources/js/search.js +++ b/pdb-ui/src/main/resources/resources/js/search.js @@ -12,7 +12,7 @@ $(document).ready(function(){ HttpMethod: "GET", Delay: 300, _QueryArg: function() { - var caretIndex = document.getElementById('search-input').selectionStart; + var caretIndex = document.getElementById('search-input').selectionStart + 1; return 'caretIndex=' + caretIndex + '&query'; }, _Pre: function() { diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java b/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java index 3bc92d2..56e1165 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java @@ -5,7 +5,6 @@ import java.nio.file.Path; import java.time.OffsetDateTime; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Optional; @@ -190,9 +189,7 @@ public class PerformanceDb implements AutoCloseable { public List autocomplete(final String query, final int caretIndex) { - // TODO implement proposals - // return db.proposeTagForQuery(query, caretIndex); - return Collections.emptyList(); + return db.propose(query, caretIndex); } public List getFields() {