From 27b83234ccc1162af8cdca25d83aa873ec358f2f Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sat, 9 Feb 2019 15:21:35 +0100 Subject: [PATCH] group proposal as if they were hierarchical We interpret dots ('.') as hierarchy delimiter in. That way we can reduce the number of proposed values and show only those for the next level. --- .../pdb/datastore/lang/CandidateGrouper.java | 51 +++++++++++++++ .../pdb/datastore/lang/NewProposerParser.java | 10 ++- .../datastore/lang/CandidateGrouperTest.java | 65 +++++++++++++++++++ 3 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 data-store/src/main/java/org/lucares/pdb/datastore/lang/CandidateGrouper.java create mode 100644 data-store/src/test/java/org/lucares/pdb/datastore/lang/CandidateGrouperTest.java diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/CandidateGrouper.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/CandidateGrouper.java new file mode 100644 index 0000000..1fbd915 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/CandidateGrouper.java @@ -0,0 +1,51 @@ +package org.lucares.pdb.datastore.lang; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.regex.Pattern; + +public class CandidateGrouper { + public SortedSet group(final Collection values, final String queryWithCaretMarker) { + + final TreeSet result = new TreeSet<>(); + final int numDotsInValue = countDotsInValue(queryWithCaretMarker); + + for (final String value : values) { + // keep everyting up to the (numDotsInValue+1)-th + final String[] token = value.split(Pattern.quote(".")); + final List tokenlist = new ArrayList<>(Arrays.asList(token)); + final List prefix = tokenlist.subList(0, numDotsInValue + 1); + String shortenedValue = String.join(".", prefix); + if (tokenlist.size() > numDotsInValue + 1) { + shortenedValue += "."; + } + result.add(shortenedValue); + } + + return result; + } + + private int countDotsInValue(final String queryWithCaretMarker) { + + int count = 0; + int index = queryWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER) - 1; + final String delimiter = " (),=!"; + + while (index >= 0) { + final char c = queryWithCaretMarker.charAt(index); + if (delimiter.indexOf(c) >= 0) { + break; + } + if (c == '.') { + count++; + } + index--; + } + + return count; + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java index f9d8eb2..a87b956 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java @@ -167,8 +167,11 @@ public class NewProposerParser { final SortedSet candidateValues = normalizedExpression .visit(new FindValuesForQueryCompletion(queryCompletionIndex)); + final SortedSet candidateValuesCutAtDots = cutAtDots(candidateValues, queryWithCaretMarker); + // translate the candidate values to proposals - final List proposals = generateProposals(queryWithCaretMarker, expression, candidateValues); + final List proposals = generateProposals(queryWithCaretMarker, expression, + candidateValuesCutAtDots); return proposals; } catch (final SyntaxException e) { @@ -178,6 +181,11 @@ public class NewProposerParser { } } + private SortedSet cutAtDots(final SortedSet candidateValues, final String queryWithCaretMarker) { + final CandidateGrouper grouper = new CandidateGrouper(); + return grouper.group(candidateValues, queryWithCaretMarker); + } + private List generateProposals(final String queryWithCaretMarker, final Expression expression, final SortedSet candidateValues) { final List proposals = new ArrayList<>(); diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/lang/CandidateGrouperTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/lang/CandidateGrouperTest.java new file mode 100644 index 0000000..9f7b8cc --- /dev/null +++ b/data-store/src/test/java/org/lucares/pdb/datastore/lang/CandidateGrouperTest.java @@ -0,0 +1,65 @@ +package org.lucares.pdb.datastore.lang; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; + +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +@Test +public class CandidateGrouperTest { + + @DataProvider + public Object[][] providerGroup() { + final List result = new ArrayList<>(); + + result.add(new Object[] { // + Set.of("aa.xx.AA.XX", "aa.yy.BB", "aa.xx.BB", "aa.xx.AA.YY"), // + "name = |", // + Set.of("aa.") }); + result.add(new Object[] { // + Set.of("aa.xx.AA.XX", "aa.yy.BB", "aa.xx.BB", "aa.xx.AA.YY"), // + "name = a|", // + Set.of("aa.") }); + result.add(new Object[] { // + Set.of("aa.xx.AA.XX", "aa.yy.BB", "aa.xx.BB", "aa.xx.AA.YY"), // + "name = aa|", // + Set.of("aa.") }); + result.add(new Object[] { // + Set.of("aa.xx.AA.XX", "aa.yy.BB", "aa.xx.BB", "aa.xx.AA.YY"), // + "name = aa.|", // + Set.of("aa.xx.", "aa.yy.") }); + result.add(new Object[] { // + Set.of("aa.xx.AA.XX", "aa.xx.BB", "aa.xx.AA.YY"), // + "name = aa.x|", // + Set.of("aa.xx.") }); + result.add(new Object[] { // + Set.of("aa.xx.AA.XX", "aa.xx.BB", "aa.xx.AA.YY"), // + "name = aa.xx.|", // + Set.of("aa.xx.AA.", "aa.xx.BB") }); + result.add(new Object[] { // + Set.of("aa.xx.AA.XX", "aa.xx.AA.YY"), // + "name = aa.xx.AA.|", // + Set.of("aa.xx.AA.XX", "aa.xx.AA.YY") }); + result.add(new Object[] { // + Set.of("XX.YY.ZZ", "XX.YY"), // + "name = XX.Y|", // + Set.of("XX.YY.", "XX.YY") }); + + return result.toArray(new Object[0][]); + } + + @Test(dataProvider = "providerGroup") + public void testGroup(final Set values, final String queryWithCaretMarker, final Set expected) { + final CandidateGrouper grouper = new CandidateGrouper(); + + final String query = queryWithCaretMarker.replace("|", NewProposerParser.CARET_MARKER); + + final SortedSet actual = grouper.group(values, query); + + Assert.assertEquals(actual, expected); + } +}