introduce clustering for query completion indices

This commit is contained in:
2019-03-16 10:19:28 +01:00
parent fb9f8592ac
commit b5e2d0a217
14 changed files with 197 additions and 136 deletions

View File

@@ -14,6 +14,10 @@ public class ClusterId {
this.clusterId = clusterId; this.clusterId = clusterId;
} }
public static ClusterId of(final String clusterId) {
return new ClusterId(clusterId);
}
/** /**
* @return the id, e.g. a time like 201902 (cluster for entries of February * @return the id, e.g. a time like 201902 (cluster for entries of February
* 2019) * 2019)

View File

@@ -45,10 +45,18 @@ public class ClusteredPersistentMap<K, V> implements AutoCloseable {
}; };
} }
private PersistentMap<K, V> getExistingPersistentMap(final ClusterId clusterId) {
return maps.computeIfAbsent(clusterId, supplier);
}
private PersistentMap<K, V> getPersistentMapCreateIfNotExists(final ClusterId clusterId) {
return maps.computeIfAbsent(clusterId, creator);
}
public V getValue(final ClusterId clusterId, final K key) { public V getValue(final ClusterId clusterId, final K key) {
try { try {
final PersistentMap<K, V> map = maps.computeIfAbsent(clusterId, supplier); final PersistentMap<K, V> map = getExistingPersistentMap(clusterId);
return map != null ? map.getValue(key) : null; return map != null ? map.getValue(key) : null;
} catch (final IOException e) { } catch (final IOException e) {
throw new ReadRuntimeException(e); throw new ReadRuntimeException(e);
@@ -61,7 +69,7 @@ public class ClusteredPersistentMap<K, V> implements AutoCloseable {
final List<ClusterId> clusterIds = clusterIdSource.toClusterIds(); final List<ClusterId> clusterIds = clusterIdSource.toClusterIds();
for (final ClusterId clusterId : clusterIds) { for (final ClusterId clusterId : clusterIds) {
final PersistentMap<K, V> map = maps.computeIfAbsent(clusterId, creator); final PersistentMap<K, V> map = getPersistentMapCreateIfNotExists(clusterId);
if (map != null) { if (map != null) {
final V value = map.getValue(key); final V value = map.getValue(key);
if (value != null) { if (value != null) {
@@ -79,7 +87,7 @@ public class ClusteredPersistentMap<K, V> implements AutoCloseable {
public V putValue(final ClusterId clusterId, final K key, final V value) { public V putValue(final ClusterId clusterId, final K key, final V value) {
try { try {
final PersistentMap<K, V> map = maps.computeIfAbsent(clusterId, creator); final PersistentMap<K, V> map = getPersistentMapCreateIfNotExists(clusterId);
return map.putValue(key, value); return map.putValue(key, value);
} catch (final IOException e) { } catch (final IOException e) {
throw new ReadRuntimeException(e); throw new ReadRuntimeException(e);
@@ -88,7 +96,7 @@ public class ClusteredPersistentMap<K, V> implements AutoCloseable {
public void visitValues(final ClusterId clusterId, final K keyPrefix, final Visitor<K, V> visitor) { public void visitValues(final ClusterId clusterId, final K keyPrefix, final Visitor<K, V> visitor) {
try { try {
final PersistentMap<K, V> map = maps.computeIfAbsent(clusterId, creator); final PersistentMap<K, V> map = getExistingPersistentMap(clusterId);
if (map != null) { if (map != null) {
map.visitValues(keyPrefix, visitor); map.visitValues(keyPrefix, visitor);
} }
@@ -102,7 +110,7 @@ public class ClusteredPersistentMap<K, V> implements AutoCloseable {
final List<ClusterId> clusterIds = clusterIdSource.toClusterIds(); final List<ClusterId> clusterIds = clusterIdSource.toClusterIds();
for (final ClusterId clusterId : clusterIds) { for (final ClusterId clusterId : clusterIds) {
final PersistentMap<K, V> map = maps.get(clusterId); final PersistentMap<K, V> map = getExistingPersistentMap(clusterId);
if (map != null) { if (map != null) {
map.visitValues(keyPrefix, visitor); map.visitValues(keyPrefix, visitor);
} }

View File

@@ -18,6 +18,7 @@ import java.util.function.Consumer;
import org.lucares.collections.LongList; import org.lucares.collections.LongList;
import org.lucares.pdb.api.DateTimeRange; import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.Query; import org.lucares.pdb.api.Query;
import org.lucares.pdb.api.QueryWithCaretMarker;
import org.lucares.pdb.api.RuntimeIOException; import org.lucares.pdb.api.RuntimeIOException;
import org.lucares.pdb.api.StringCompressor; import org.lucares.pdb.api.StringCompressor;
import org.lucares.pdb.api.Tag; import org.lucares.pdb.api.Tag;
@@ -159,7 +160,7 @@ public class DataStore implements AutoCloseable {
// index the tags, so that we can efficiently find all possible values for a // index the tags, so that we can efficiently find all possible values for a
// field in a query // field in a query
queryCompletionIndex.addTags(tags); queryCompletionIndex.addTags(clusterId, tags);
return newFilesRootBlockOffset; return newFilesRootBlockOffset;
} catch (final IOException e) { } catch (final IOException e) {
@@ -343,10 +344,10 @@ public class DataStore implements AutoCloseable {
}); });
} }
public List<Proposal> propose(final String query, final int caretIndex) { public List<Proposal> propose(final QueryWithCaretMarker query) {
final NewProposerParser newProposerParser = new NewProposerParser(queryCompletionIndex); final NewProposerParser newProposerParser = new NewProposerParser(queryCompletionIndex);
final List<Proposal> proposals = newProposerParser.propose(query, caretIndex); final List<Proposal> proposals = newProposerParser.propose(query);
LOGGER.debug("Proposals for query {}: {}", query, proposals); LOGGER.debug("Proposals for query {}: {}", query, proposals);
return proposals; return proposals;
} }

View File

@@ -7,7 +7,7 @@ import java.util.SortedSet;
import java.util.TreeSet; import java.util.TreeSet;
import org.lucares.collections.LongList; import org.lucares.collections.LongList;
import org.lucares.pdb.api.RuntimeIOException; import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.Tag; import org.lucares.pdb.api.Tag;
import org.lucares.pdb.api.Tags; import org.lucares.pdb.api.Tags;
import org.lucares.pdb.map.Empty; import org.lucares.pdb.map.Empty;
@@ -208,22 +208,22 @@ public class QueryCompletionIndex implements AutoCloseable {
} }
} }
private final PersistentMap<TwoTags, Empty> tagToTagIndex; private final ClusteredPersistentMap<TwoTags, Empty> tagToTagIndex;
private final PersistentMap<Tag, Empty> fieldToValueIndex; private final ClusteredPersistentMap<Tag, Empty> fieldToValueIndex;
private final PersistentMap<String, Empty> fieldIndex; private final ClusteredPersistentMap<String, Empty> fieldIndex;
public QueryCompletionIndex(final Path basePath) throws IOException { public QueryCompletionIndex(final Path basePath) throws IOException {
final Path tagToTagIndexFile = basePath.resolve("queryCompletionTagToTagIndex.bs"); tagToTagIndex = new ClusteredPersistentMap<>(basePath, "queryCompletionTagToTagIndex.bs", new EncoderTwoTags(),
tagToTagIndex = new PersistentMap<>(tagToTagIndexFile, new EncoderTwoTags(), PersistentMap.EMPTY_ENCODER); PersistentMap.EMPTY_ENCODER);
final Path fieldToValueIndexFile = basePath.resolve("queryCompletionFieldToValueIndex.bs"); fieldToValueIndex = new ClusteredPersistentMap<>(basePath, "queryCompletionFieldToValueIndex.bs",
fieldToValueIndex = new PersistentMap<>(fieldToValueIndexFile, new EncoderTag(), PersistentMap.EMPTY_ENCODER); new EncoderTag(), PersistentMap.EMPTY_ENCODER);
final Path fieldIndexFile = basePath.resolve("queryCompletionFieldIndex.bs"); fieldIndex = new ClusteredPersistentMap<>(basePath, "queryCompletionFieldIndex.bs", new EncoderField(),
fieldIndex = new PersistentMap<>(fieldIndexFile, new EncoderField(), PersistentMap.EMPTY_ENCODER); PersistentMap.EMPTY_ENCODER);
} }
public void addTags(final Tags tags) throws IOException { public void addTags(final ClusterId clusterId, final Tags tags) throws IOException {
final List<Tag> listOfTagsA = tags.toTags(); final List<Tag> listOfTagsA = tags.toTags();
final List<Tag> listOfTagsB = tags.toTags(); final List<Tag> listOfTagsB = tags.toTags();
@@ -231,14 +231,14 @@ public class QueryCompletionIndex implements AutoCloseable {
for (final Tag tagA : listOfTagsA) { for (final Tag tagA : listOfTagsA) {
for (final Tag tagB : listOfTagsB) { for (final Tag tagB : listOfTagsB) {
final TwoTags key = new TwoTags(tagA, tagB); final TwoTags key = new TwoTags(tagA, tagB);
tagToTagIndex.putValue(key, Empty.INSTANCE); tagToTagIndex.putValue(clusterId, key, Empty.INSTANCE);
} }
} }
// create indices of all tags and all fields // create indices of all tags and all fields
for (final Tag tag : listOfTagsA) { for (final Tag tag : listOfTagsA) {
fieldToValueIndex.putValue(tag, Empty.INSTANCE); fieldToValueIndex.putValue(clusterId, tag, Empty.INSTANCE);
fieldIndex.putValue(tag.getKeyAsString(), Empty.INSTANCE); fieldIndex.putValue(clusterId, tag.getKeyAsString(), Empty.INSTANCE);
} }
} }
@@ -247,76 +247,67 @@ public class QueryCompletionIndex implements AutoCloseable {
tagToTagIndex.close(); tagToTagIndex.close();
} }
public SortedSet<String> find(final String property, final String value, final String field) { public SortedSet<String> find(final DateTimeRange dateRange, final String property, final String value,
final String field) {
final Tag tag = new Tag(property, value); final Tag tag = new Tag(property, value);
return find(tag, field); return find(dateRange, tag, field);
} }
public SortedSet<String> find(final Tag tag, final String field) { public SortedSet<String> find(final DateTimeRange dateRange, final Tag tag, final String field) {
try {
final SortedSet<String> result = new TreeSet<>(); final SortedSet<String> result = new TreeSet<>();
final int tagBKey = Tags.STRING_COMPRESSOR.put(field); final int tagBKey = Tags.STRING_COMPRESSOR.put(field);
final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See
// EncoderTwoTags // EncoderTwoTags
final TwoTags keyPrefix = new TwoTags(tag, tagB); final TwoTags keyPrefix = new TwoTags(tag, tagB);
tagToTagIndex.visitValues(keyPrefix, (k, v) -> {
final ClusterIdSource clusterIdSource = new DateCluster(dateRange);
tagToTagIndex.visitValues(clusterIdSource, keyPrefix, (k, v) -> {
result.add(k.getTagB().getValueAsString());
});
return result;
}
public SortedSet<String> findAllValuesForField(final DateTimeRange dateRange, final String field) {
final SortedSet<String> result = new TreeSet<>();
final int tagKey = Tags.STRING_COMPRESSOR.put(field);
final Tag keyPrefix = new Tag(tagKey, -1); // the value must be negative for the prefix search to work. See
final ClusterIdSource clusterIdSource = new DateCluster(dateRange);
fieldToValueIndex.visitValues(clusterIdSource, keyPrefix, (k, v) -> {
result.add(k.getValueAsString());
});
return result;
}
public SortedSet<String> findAllValuesNotForField(final DateTimeRange dateRange, final Tag tag,
final String field) {
final SortedSet<String> result = new TreeSet<>();
final TwoTags keyPrefix = new TwoTags(field, tag.getKeyAsString(), null, null);
final int negatedValueA = tag.getValue();
final ClusterIdSource clusterIdSource = new DateCluster(dateRange);
tagToTagIndex.visitValues(clusterIdSource, keyPrefix, (k, v) -> {
final int valueA = k.getTagA().getValue();
if (valueA != negatedValueA) {
result.add(k.getTagB().getValueAsString()); result.add(k.getTagB().getValueAsString());
}); }
});
return result; return result;
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
} }
public SortedSet<String> findAllValuesForField(final String field) { public SortedSet<String> findAllFields(final DateTimeRange dateRange) {
try { final SortedSet<String> result = new TreeSet<>();
final SortedSet<String> result = new TreeSet<>(); final ClusterIdSource clusterIdSource = new DateCluster(dateRange);
final int tagKey = Tags.STRING_COMPRESSOR.put(field); fieldIndex.visitValues(clusterIdSource, "", (k, v) -> {
final Tag keyPrefix = new Tag(tagKey, -1); // the value must be negative for the prefix search to work. See result.add(k);
});
fieldToValueIndex.visitValues(keyPrefix, (k, v) -> { return result;
result.add(k.getValueAsString());
});
return result;
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
} }
public SortedSet<String> findAllValuesNotForField(final Tag tag, final String field) {
try {
final SortedSet<String> result = new TreeSet<>();
final TwoTags keyPrefix = new TwoTags(field, tag.getKeyAsString(), null, null);
final int negatedValueA = tag.getValue();
tagToTagIndex.visitValues(keyPrefix, (k, v) -> {
final int valueA = k.getTagA().getValue();
if (valueA != negatedValueA) {
result.add(k.getTagB().getValueAsString());
}
});
return result;
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
public SortedSet<String> findAllFields() {
try {
final SortedSet<String> result = new TreeSet<>();
fieldIndex.visitValues("", (k, v) -> {
result.add(k);
});
return result;
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
} }

View File

@@ -5,6 +5,7 @@ import java.util.List;
import java.util.SortedSet; import java.util.SortedSet;
import java.util.TreeSet; import java.util.TreeSet;
import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.Tag; import org.lucares.pdb.api.Tag;
import org.lucares.pdb.datastore.internal.QueryCompletionIndex; import org.lucares.pdb.datastore.internal.QueryCompletionIndex;
import org.lucares.pdb.datastore.lang.Expression.And; import org.lucares.pdb.datastore.lang.Expression.And;
@@ -29,8 +30,11 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<St
private static final class AndCaretExpressionVisitor extends ExpressionVisitor<SortedSet<String>> { private static final class AndCaretExpressionVisitor extends ExpressionVisitor<SortedSet<String>> {
private final QueryCompletionIndex index; private final QueryCompletionIndex index;
private final String field; private final String field;
private final DateTimeRange dateTimeRange;
public AndCaretExpressionVisitor(final QueryCompletionIndex queryCompletionIndex, final String field) { public AndCaretExpressionVisitor(final DateTimeRange dateTimeRange,
final QueryCompletionIndex queryCompletionIndex, final String field) {
this.dateTimeRange = dateTimeRange;
index = queryCompletionIndex; index = queryCompletionIndex;
this.field = field; this.field = field;
} }
@@ -41,7 +45,7 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<St
final String fieldA = property.getProperty(); final String fieldA = property.getProperty();
final String valueA = property.getValue().getValue(); final String valueA = property.getValue().getValue();
SortedSet<String> result = index.find(fieldA, valueA, field); final SortedSet<String> result = index.find(dateTimeRange, fieldA, valueA, field);
METRIC_AND_CARET_LOGGER.debug("{}: {}ms", property, (System.nanoTime() - start) / 1_000_000.0); METRIC_AND_CARET_LOGGER.debug("{}: {}ms", property, (System.nanoTime() - start) / 1_000_000.0);
return result; return result;
} }
@@ -53,7 +57,7 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<St
final String property = expression.getProperty(); final String property = expression.getProperty();
final List<String> values = expression.getValues(); final List<String> values = expression.getValues();
for (final String value : values) { for (final String value : values) {
final SortedSet<String> candidates = index.find(property, value, field); final SortedSet<String> candidates = index.find(dateTimeRange, property, value, field);
result.addAll(candidates); result.addAll(candidates);
} }
METRIC_AND_CARET_LOGGER.debug("{}: {}ms", expression, (System.nanoTime() - start) / 1_000_000.0); METRIC_AND_CARET_LOGGER.debug("{}: {}ms", expression, (System.nanoTime() - start) / 1_000_000.0);
@@ -119,8 +123,8 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<St
final Property property = (Property) expression.getExpression(); final Property property = (Property) expression.getExpression();
final Tag tag = new Tag(property.getProperty(), property.getValueAsString()); final Tag tag = new Tag(property.getProperty(), property.getValueAsString());
final SortedSet<String> valuesNotForField = index.findAllValuesNotForField(tag, field); final SortedSet<String> valuesNotForField = index.findAllValuesNotForField(dateTimeRange, tag, field);
final SortedSet<String> valuesForField = index.find(tag, field); final SortedSet<String> valuesForField = index.find(dateTimeRange, tag, field);
final SortedSet<String> valuesOnlyAvailableInField = CollectionUtils.removeAll(valuesForField, final SortedSet<String> valuesOnlyAvailableInField = CollectionUtils.removeAll(valuesForField,
valuesNotForField, TreeSet::new); valuesNotForField, TreeSet::new);
@@ -134,7 +138,11 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<St
private final QueryCompletionIndex queryCompletionIndex; private final QueryCompletionIndex queryCompletionIndex;
public FindValuesForQueryCompletion(final QueryCompletionIndex queryCompletionIndex) { private final DateTimeRange dateRange;
public FindValuesForQueryCompletion(final DateTimeRange dateRange,
final QueryCompletionIndex queryCompletionIndex) {
this.dateRange = dateRange;
this.queryCompletionIndex = queryCompletionIndex; this.queryCompletionIndex = queryCompletionIndex;
} }
@@ -145,7 +153,7 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<St
final String field = property.getProperty(); final String field = property.getProperty();
final String value = property.getValue().getValue(); final String value = property.getValue().getValue();
final SortedSet<String> allValuesForField = queryCompletionIndex.findAllValuesForField(field); final SortedSet<String> allValuesForField = queryCompletionIndex.findAllValuesForField(dateRange, field);
final String valuePrefix = value.substring(0, value.indexOf(NewProposerParser.CARET_MARKER)); final String valuePrefix = value.substring(0, value.indexOf(NewProposerParser.CARET_MARKER));
@@ -167,7 +175,7 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<St
final Expression rightHandExpression = expression.getExpression(); final Expression rightHandExpression = expression.getExpression();
final SortedSet<String> candidateValues = rightHandExpression final SortedSet<String> candidateValues = rightHandExpression
.visit(new AndCaretExpressionVisitor(queryCompletionIndex, field)); .visit(new AndCaretExpressionVisitor(dateRange, queryCompletionIndex, field));
final TreeSet<String> result = GloblikePattern.filterValues(candidateValues, valuePrefix, TreeSet::new); final TreeSet<String> result = GloblikePattern.filterValues(candidateValues, valuePrefix, TreeSet::new);
@@ -185,15 +193,15 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<St
final String valuePattern = valueWithCaretMarker.substring(0, final String valuePattern = valueWithCaretMarker.substring(0,
valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER)); valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER));
final SortedSet<String> allValuesForField = queryCompletionIndex final SortedSet<String> allValuesForField = queryCompletionIndex.findAllValuesForField(dateRange,
.findAllValuesForField(caretExpression.getProperty()); caretExpression.getProperty());
final SortedSet<String> valuesForFieldMatchingCaretExpression = GloblikePattern.filterValues(allValuesForField, final SortedSet<String> valuesForFieldMatchingCaretExpression = GloblikePattern.filterValues(allValuesForField,
valuePattern, TreeSet::new); valuePattern, TreeSet::new);
final Expression rightHandExpression = expression.getExpression(); final Expression rightHandExpression = expression.getExpression();
final SortedSet<String> rightHandValues = rightHandExpression final SortedSet<String> rightHandValues = rightHandExpression
.visit(new AndCaretExpressionVisitor(queryCompletionIndex, field)); .visit(new AndCaretExpressionVisitor(dateRange, queryCompletionIndex, field));
if (rightHandValues.size() == 1) { if (rightHandValues.size() == 1) {
// there is only one alternative and that one must not be chosen // there is only one alternative and that one must not be chosen
@@ -213,7 +221,7 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<St
if (innerExpression instanceof Property) { if (innerExpression instanceof Property) {
final long start = System.nanoTime(); final long start = System.nanoTime();
field = ((Property) innerExpression).getProperty(); field = ((Property) innerExpression).getProperty();
final SortedSet<String> allValuesForField = queryCompletionIndex.findAllValuesForField(field); final SortedSet<String> allValuesForField = queryCompletionIndex.findAllValuesForField(dateRange, field);
final String valueWithCaretMarker = ((Property) innerExpression).getValue().getValue(); final String valueWithCaretMarker = ((Property) innerExpression).getValue().getValue();
final String valuePrefix = valueWithCaretMarker.substring(0, final String valuePrefix = valueWithCaretMarker.substring(0,
valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER)); valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER));

View File

@@ -9,20 +9,21 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.QueryConstants;
import org.lucares.pdb.api.QueryWithCaretMarker;
import org.lucares.pdb.datastore.Proposal; import org.lucares.pdb.datastore.Proposal;
import org.lucares.pdb.datastore.internal.QueryCompletionIndex; import org.lucares.pdb.datastore.internal.QueryCompletionIndex;
import org.lucares.utils.CollectionUtils; import org.lucares.utils.CollectionUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
public class NewProposerParser { public class NewProposerParser implements QueryConstants {
private static final Logger LOGGER = LoggerFactory.getLogger(NewProposerParser.class); private static final Logger LOGGER = LoggerFactory.getLogger(NewProposerParser.class);
private final static Logger METRICS_LOGGER_PROPOSE = LoggerFactory.getLogger("org.lucares.metrics.propose"); private final static Logger METRICS_LOGGER_PROPOSE = LoggerFactory.getLogger("org.lucares.metrics.propose");
public final static String CARET_MARKER = "\ue001"; // character in the private use area
/* /*
* Regex matching a java identifier without a caret marker. We define it as a * Regex matching a java identifier without a caret marker. We define it as a
* blacklist, because this is easer. The regex is only used <em>after</em> the * blacklist, because this is easer. The regex is only used <em>after</em> the
@@ -36,16 +37,16 @@ public class NewProposerParser {
this.queryCompletionIndex = queryCompletionIndex; this.queryCompletionIndex = queryCompletionIndex;
} }
public List<Proposal> propose(final String query, final int caretIndex) { public List<Proposal> propose(final QueryWithCaretMarker query) {
final long start = System.nanoTime(); final long start = System.nanoTime();
List<Proposal> proposals; List<Proposal> proposals;
if (StringUtils.isBlank(query)) { if (StringUtils.isBlank(query.getQuery())) {
proposals = proposeForAllKeys(); proposals = proposeForAllKeys(query.getDateRange());
} else { } else {
final List<Proposal> foundProposals = proposalsForValues(query, caretIndex); final List<Proposal> foundProposals = proposalsForValues(query);
if (foundProposals.isEmpty()) { if (foundProposals.isEmpty()) {
proposals = proposalsForNonValues(query, caretIndex); proposals = proposalsForNonValues(query);
} else { } else {
proposals = foundProposals; proposals = foundProposals;
} }
@@ -56,7 +57,7 @@ public class NewProposerParser {
return proposals; return proposals;
} }
private List<Proposal> proposalsForNonValues(final String query, final int caretIndex) { private List<Proposal> proposalsForNonValues(final QueryWithCaretMarker query) {
final List<Proposal> proposals = new ArrayList<>(); final List<Proposal> proposals = new ArrayList<>();
/* /*
@@ -66,9 +67,7 @@ public class NewProposerParser {
* location in the query (not at the caret position). * location in the query (not at the caret position).
*/ */
final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString(); final List<String> tokens = QueryLanguage.getTokens(query.getQueryWithCaretMarker());
final List<String> tokens = QueryLanguage.getTokens(queryWithCaretMarker);
final int indexTokenWithCaret = CollectionUtils.indexOf(tokens, t -> t.contains(CARET_MARKER)); final int indexTokenWithCaret = CollectionUtils.indexOf(tokens, t -> t.contains(CARET_MARKER));
if (indexTokenWithCaret > 0) { if (indexTokenWithCaret > 0) {
@@ -78,7 +77,7 @@ public class NewProposerParser {
case "and": case "and":
case "or": case "or":
case "!": case "!":
proposals.addAll(proposeForAllKeys(queryWithCaretMarker)); proposals.addAll(proposeForAllKeys(query));
break; break;
case ")": case ")":
@@ -87,24 +86,25 @@ public class NewProposerParser {
break; break;
} }
} else if (indexTokenWithCaret == 0) { } else if (indexTokenWithCaret == 0) {
proposals.addAll(proposeForAllKeys(queryWithCaretMarker)); proposals.addAll(proposeForAllKeys(query));
} }
return proposals; return proposals;
} }
private Collection<? extends Proposal> proposeForAllKeys(final String queryWithCaretMarker) { private Collection<? extends Proposal> proposeForAllKeys(final QueryWithCaretMarker query) {
final List<Proposal> proposals = new ArrayList<>(); final List<Proposal> proposals = new ArrayList<>();
final String wordPrefix = wordPrefix(queryWithCaretMarker); final String wordPrefix = wordPrefix(query.getQueryWithCaretMarker());
if (wordPrefix != null) { if (wordPrefix != null) {
final SortedSet<String> allFields = queryCompletionIndex.findAllFields(); final SortedSet<String> allFields = queryCompletionIndex.findAllFields(query.getDateRange());
for (final String field : allFields) { for (final String field : allFields) {
if (!field.startsWith(wordPrefix)) { if (!field.startsWith(wordPrefix)) {
continue; continue;
} }
final String queryWithCaretMarker = query.getQueryWithCaretMarker();
final String proposedQuery = queryWithCaretMarker final String proposedQuery = queryWithCaretMarker
.replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, field + "=* "); .replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, field + "=* ");
final String newQueryWithCaretMarker = queryWithCaretMarker final String newQueryWithCaretMarker = queryWithCaretMarker
@@ -131,10 +131,10 @@ public class NewProposerParser {
return null; return null;
} }
private List<Proposal> proposeForAllKeys() { private List<Proposal> proposeForAllKeys(final DateTimeRange dateRange) {
final List<Proposal> proposals = new ArrayList<>(); final List<Proposal> proposals = new ArrayList<>();
final SortedSet<String> allFields = queryCompletionIndex.findAllFields(); final SortedSet<String> allFields = queryCompletionIndex.findAllFields(dateRange);
for (final String field : allFields) { for (final String field : allFields) {
final String proposedQuery = field + "=*"; final String proposedQuery = field + "=*";
final String newQuery = field + "="; final String newQuery = field + "=";
@@ -146,26 +146,26 @@ public class NewProposerParser {
return proposals; return proposals;
} }
List<Proposal> proposalsForValues(final String query, final int caretIndex) { List<Proposal> proposalsForValues(final QueryWithCaretMarker query) {
try { try {
// Add caret marker, so that we know where the caret is. // Add caret marker, so that we know where the caret is.
// This also makes sure that a query like "name=|" ('|' is the caret) can be // This also makes sure that a query like "name=|" ('|' is the caret) can be
// parsed. // parsed.
// Without the caret marker the query would be "name=", which is not a valid // Without the caret marker the query would be "name=", which is not a valid
// expression. // expression.
final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString(); final String queryWithCaretMarker = query.getQueryWithCaretMarker();
// parse the query // parse the query
final Expression expression = QueryLanguageParser.parse(queryWithCaretMarker); final Expression expression = QueryLanguageParser.parse(queryWithCaretMarker);
// normalize it, so that we can use the queryCompletionIndex to search vor // normalize it, so that we can use the queryCompletionIndex to search for
// candidate values // candidate values
final QueryCompletionExpressionOptimizer optimizer = new QueryCompletionExpressionOptimizer(); final QueryCompletionExpressionOptimizer optimizer = new QueryCompletionExpressionOptimizer();
final Expression normalizedExpression = optimizer.normalizeExpression(expression); final Expression normalizedExpression = optimizer.normalizeExpression(expression);
// find all candidate values // find all candidate values
final SortedSet<String> candidateValues = normalizedExpression final SortedSet<String> candidateValues = normalizedExpression
.visit(new FindValuesForQueryCompletion(queryCompletionIndex)); .visit(new FindValuesForQueryCompletion(query.getDateRange(), queryCompletionIndex));
final SortedSet<String> candidateValuesCutAtDots = cutAtDots(candidateValues, queryWithCaretMarker); final SortedSet<String> candidateValuesCutAtDots = cutAtDots(candidateValues, queryWithCaretMarker);

View File

@@ -25,6 +25,7 @@ import javax.swing.JTextField;
import org.lucares.pdb.api.DateTimeRange; import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.Query; import org.lucares.pdb.api.Query;
import org.lucares.pdb.api.QueryWithCaretMarker;
import org.lucares.pdb.api.Tags; import org.lucares.pdb.api.Tags;
import org.lucares.pdb.blockstorage.BSFile; import org.lucares.pdb.blockstorage.BSFile;
import org.lucares.pdb.datastore.Doc; import org.lucares.pdb.datastore.Doc;
@@ -230,7 +231,8 @@ public class DataStoreTest {
Tags.createAndAddToDictionary("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"), Tags.createAndAddToDictionary("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"),
Tags.createAndAddToDictionary("type", "cat", "subtype", "lion", "age", "four", "name", "John")); Tags.createAndAddToDictionary("type", "cat", "subtype", "lion", "age", "four", "name", "John"));
final ClusterId clusterId = DateIndexExtension.now(); final DateTimeRange dateRange = DateTimeRange.relativeMillis(0);
final ClusterId clusterId = DateIndexExtension.toClusterIds(dateRange).get(0);
tags.forEach(t -> dataStore.createNewFile(clusterId, t)); tags.forEach(t -> dataStore.createNewFile(clusterId, t));
final JFrame frame = new JFrame(); final JFrame frame = new JFrame();
@@ -251,7 +253,9 @@ public class DataStoreTest {
final String query = input.getText(); final String query = input.getText();
final int caretIndex = input.getCaretPosition(); final int caretIndex = input.getCaretPosition();
final List<Proposal> proposals = dataStore.propose(query, caretIndex); final QueryWithCaretMarker q = new QueryWithCaretMarker(query, dateRange, caretIndex);
final List<Proposal> proposals = dataStore.propose(q);
final StringBuilder out = new StringBuilder(); final StringBuilder out = new StringBuilder();
@@ -291,7 +295,7 @@ public class DataStoreTest {
final List<String> expectedProposedValues) { final List<String> expectedProposedValues) {
final String query = queryWithCaret.replace("|", ""); final String query = queryWithCaret.replace("|", "");
final int caretIndex = queryWithCaret.indexOf("|"); final int caretIndex = queryWithCaret.indexOf("|");
final List<Proposal> proposals = dataStore.propose(query, caretIndex); final List<Proposal> proposals = dataStore.propose(new QueryWithCaretMarker(query, dateRange, caretIndex));
System.out.println( System.out.println(
"proposed values: " + proposals.stream().map(Proposal::getProposedTag).collect(Collectors.toList())); "proposed values: " + proposals.stream().map(Proposal::getProposedTag).collect(Collectors.toList()));

View File

@@ -7,6 +7,8 @@ import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.QueryWithCaretMarker;
import org.lucares.pdb.api.Tags; import org.lucares.pdb.api.Tags;
import org.lucares.pdb.datastore.Proposal; import org.lucares.pdb.datastore.Proposal;
import org.lucares.utils.CollectionUtils; import org.lucares.utils.CollectionUtils;
@@ -21,6 +23,7 @@ public class ProposerTest {
private Path dataDirectory; private Path dataDirectory;
private DataStore dataStore; private DataStore dataStore;
private DateTimeRange dateRange;
@BeforeClass @BeforeClass
public void beforeClass() throws Exception { public void beforeClass() throws Exception {
@@ -38,7 +41,8 @@ public class ProposerTest {
private void initDatabase() throws Exception { private void initDatabase() throws Exception {
dataStore = new DataStore(dataDirectory); dataStore = new DataStore(dataDirectory);
final ClusterId now = DateIndexExtension.now(); dateRange = DateTimeRange.now();
final ClusterId now = DateIndexExtension.toClusterIds(dateRange).get(0);
final Tags eagleTim = Tags.createAndAddToDictionary("bird", "eagle", "name", "Tim"); final Tags eagleTim = Tags.createAndAddToDictionary("bird", "eagle", "name", "Tim");
final Tags eagleTimothy = Tags.createAndAddToDictionary("bird", "eagle", "name", "Timothy"); final Tags eagleTimothy = Tags.createAndAddToDictionary("bird", "eagle", "name", "Timothy");
@@ -158,7 +162,7 @@ public class ProposerTest {
private void assertProposals(final String query, final int caretIndex, final Proposal... expected) private void assertProposals(final String query, final int caretIndex, final Proposal... expected)
throws InterruptedException { throws InterruptedException {
final List<Proposal> actual = dataStore.propose(query, caretIndex); final List<Proposal> actual = dataStore.propose(new QueryWithCaretMarker(query, dateRange, caretIndex));
final List<Proposal> expectedList = Arrays.asList(expected); final List<Proposal> expectedList = Arrays.asList(expected);
Collections.sort(expectedList); Collections.sort(expectedList);

View File

@@ -7,6 +7,7 @@ import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.SortedSet; import java.util.SortedSet;
import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.StringCompressor; import org.lucares.pdb.api.StringCompressor;
import org.lucares.pdb.api.Tag; import org.lucares.pdb.api.Tag;
import org.lucares.pdb.api.Tags; import org.lucares.pdb.api.Tags;
@@ -41,26 +42,31 @@ public class QueryCompletionIndexTest {
Tags.createAndAddToDictionary("firstname", "John", "lastname", "Miller", "country", "Atlantis")// C Tags.createAndAddToDictionary("firstname", "John", "lastname", "Miller", "country", "Atlantis")// C
); );
final DateTimeRange dateRange = DateTimeRange.relativeMillis(1);
final ClusterId clusterId = DateIndexExtension.toClusterIds(dateRange).get(0);
try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory)) { try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory)) {
for (final Tags t : tags) { for (final Tags t : tags) {
index.addTags(t); index.addTags(clusterId, t);
} }
// all firstnames where lastname=Doe are returned sorted alphabetically. // all firstnames where lastname=Doe are returned sorted alphabetically.
// tags A and B match // tags A and B match
final SortedSet<String> firstnamesWithLastnameDoe = index.find(new Tag("lastname", "Doe"), "firstname"); final SortedSet<String> firstnamesWithLastnameDoe = index.find(dateRange, new Tag("lastname", "Doe"),
"firstname");
Assert.assertEquals(firstnamesWithLastnameDoe, Arrays.asList("Jane", "John")); Assert.assertEquals(firstnamesWithLastnameDoe, Arrays.asList("Jane", "John"));
// no duplicates are returned: // no duplicates are returned:
// tags A and C match firstname=John, but both have country=Atlantis // tags A and C match firstname=John, but both have country=Atlantis
final SortedSet<String> countryWithFirstnameJohn = index.find(new Tag("firstname", "John"), "country"); final SortedSet<String> countryWithFirstnameJohn = index.find(dateRange, new Tag("firstname", "John"),
"country");
Assert.assertEquals(countryWithFirstnameJohn, Arrays.asList("Atlantis")); Assert.assertEquals(countryWithFirstnameJohn, Arrays.asList("Atlantis"));
// findAllValuesForField sorts alphabetically // findAllValuesForField sorts alphabetically
final SortedSet<String> firstnames = index.findAllValuesForField("firstname"); final SortedSet<String> firstnames = index.findAllValuesForField(dateRange, "firstname");
Assert.assertEquals(firstnames, Arrays.asList("Jane", "John"), "found: " + firstnames); Assert.assertEquals(firstnames, Arrays.asList("Jane", "John"), "found: " + firstnames);
final SortedSet<String> countries = index.findAllValuesForField("country"); final SortedSet<String> countries = index.findAllValuesForField(dateRange, "country");
Assert.assertEquals(countries, Arrays.asList("Atlantis", "ElDorado")); Assert.assertEquals(countries, Arrays.asList("Atlantis", "ElDorado"));
} }
} }

View File

@@ -15,8 +15,13 @@ public class DateTimeRange {
this.end = end; this.end = end;
} }
public static DateTimeRange now() {
return relativeMillis(0);
}
public static DateTimeRange relative(final long amount, final TemporalUnit unit) { public static DateTimeRange relative(final long amount, final TemporalUnit unit) {
return new DateTimeRange(OffsetDateTime.now().minus(amount, unit), OffsetDateTime.now()); final OffsetDateTime now = OffsetDateTime.now();
return new DateTimeRange(now.minus(amount, unit), now);
} }
public static DateTimeRange relativeMillis(final long amount) { public static DateTimeRange relativeMillis(final long amount) {

View File

@@ -0,0 +1,5 @@
package org.lucares.pdb.api;
public interface QueryConstants {
String CARET_MARKER = "\ue001"; // character in the private use area
}

View File

@@ -0,0 +1,18 @@
package org.lucares.pdb.api;
public class QueryWithCaretMarker extends Query implements QueryConstants {
private final int caretIndex;
public QueryWithCaretMarker(final String query, final DateTimeRange dateRange, final int caretIndex) {
super(query, dateRange);
this.caretIndex = caretIndex;
}
public String getQueryWithCaretMarker() {
final StringBuilder queryBuilder = new StringBuilder(getQuery());
final StringBuilder queryWithCaretMarker = queryBuilder.insert(caretIndex, CARET_MARKER);
return queryWithCaretMarker.toString();
}
}

View File

@@ -17,6 +17,7 @@ import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.lucares.pdb.api.DateTimeRange; import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.Query; import org.lucares.pdb.api.Query;
import org.lucares.pdb.api.QueryWithCaretMarker;
import org.lucares.pdb.datastore.Proposal; import org.lucares.pdb.datastore.Proposal;
import org.lucares.pdb.plot.api.AxisScale; import org.lucares.pdb.plot.api.AxisScale;
import org.lucares.pdb.plot.api.Limit; import org.lucares.pdb.plot.api.Limit;
@@ -210,10 +211,15 @@ public class PdbController implements HardcodedValues, PropertyKeys {
AutocompleteResponse autocomplete(@RequestParam(name = "query") final String query, AutocompleteResponse autocomplete(@RequestParam(name = "query") final String query,
@RequestParam(name = "caretIndex") final int caretIndex) { @RequestParam(name = "caretIndex") final int caretIndex) {
final AutocompleteResponse result = new AutocompleteResponse(); // TODO get date range from UI
// TODO time range must not be static
final DateTimeRange dateRange = DateTimeRange.relativeYears(5);
final int zeroBasedCaretIndex = caretIndex - 1; final int zeroBasedCaretIndex = caretIndex - 1;
final QueryWithCaretMarker q = new QueryWithCaretMarker(query, dateRange, zeroBasedCaretIndex);
final List<Proposal> proposals = db.autocomplete(query, zeroBasedCaretIndex); final AutocompleteResponse result = new AutocompleteResponse();
final List<Proposal> proposals = db.autocomplete(q);
final List<Proposal> nonEmptyProposals = CollectionUtils.filter(proposals, p -> p.hasResults()); final List<Proposal> nonEmptyProposals = CollectionUtils.filter(proposals, p -> p.hasResults());
final List<AutocompleteProposal> autocompleteProposals = toAutocompleteProposals(nonEmptyProposals); final List<AutocompleteProposal> autocompleteProposals = toAutocompleteProposals(nonEmptyProposals);

View File

@@ -17,6 +17,7 @@ import org.lucares.pdb.api.Entries;
import org.lucares.pdb.api.Entry; import org.lucares.pdb.api.Entry;
import org.lucares.pdb.api.GroupResult; import org.lucares.pdb.api.GroupResult;
import org.lucares.pdb.api.Query; import org.lucares.pdb.api.Query;
import org.lucares.pdb.api.QueryWithCaretMarker;
import org.lucares.pdb.api.Result; import org.lucares.pdb.api.Result;
import org.lucares.pdb.api.Tags; import org.lucares.pdb.api.Tags;
import org.lucares.pdb.datastore.InvalidValueException; import org.lucares.pdb.datastore.InvalidValueException;
@@ -168,9 +169,9 @@ public class PerformanceDb implements AutoCloseable {
} }
} }
public List<Proposal> autocomplete(final String query, final int caretIndex) { public List<Proposal> autocomplete(final QueryWithCaretMarker query) {
return dataStore.propose(query, caretIndex); return dataStore.propose(query);
} }
public List<String> getFields(final DateTimeRange dateRange) { public List<String> getFields(final DateTimeRange dateRange) {