rewrite query completion

The old implementation searched for all possible values and then
executed each query to see what matches.
The new implementation uses several indices to find only
the matching values.
This commit is contained in:
2019-02-02 15:35:56 +01:00
parent 72e9a9ebe3
commit 76e5d441de
20 changed files with 1676 additions and 126 deletions

View File

@@ -0,0 +1,107 @@
package org.lucares.pdb.map;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Duration;
import java.util.NoSuchElementException;
import java.util.stream.Stream;
import org.lucares.collections.LongList;
import org.lucares.pdb.blockstorage.BSFile;
import org.lucares.pdb.diskstorage.DiskStorage;
import org.lucares.pdb.map.PersistentMap.EncoderDecoder;
import org.lucares.utils.Preconditions;
import org.lucares.utils.cache.HotEntryCache;
import org.lucares.utils.cache.HotEntryCache.Event;
import org.lucares.utils.cache.HotEntryCache.EventListener;
import org.lucares.utils.cache.HotEntryCache.EventType;
/**
* Combines {@link PersistentMap} and {@link BSFile} to represent a map where
* the values are lists of longs.
*/
public class PersistentMapOfListsOfLongs<K> implements AutoCloseable {
private static final class RemovalListener<KEY> implements EventListener<KEY, BSFile> {
@Override
public void onEvent(final Event<KEY, BSFile> event) {
event.getValue().close();
}
}
private final PersistentMap<K, Long> map;
private final Path mapPath;
private final DiskStorage diskStore;
private final Path diskStorePath;
private final HotEntryCache<K, BSFile> writerCache;
/**
* Creates a new map that stores indexed streams/lists of longs.
* <p>
* This class creates two files on disk. One for the index and one for the lists
* of longs.
*
* @param path the folder where to store the map
* @param filePrefix prefix of the files
* @param keyEncoder {@link EncoderDecoder} for the key
* @throws IOException
*/
public PersistentMapOfListsOfLongs(final Path path, final String filePrefix, final EncoderDecoder<K> keyEncoder)
throws IOException {
Preconditions.checkTrue(Files.isDirectory(path), "must be a directory {0}", path);
mapPath = path.resolve(filePrefix + "_index.bs");
diskStorePath = path.resolve(filePrefix + "_data.bs");
map = new PersistentMap<>(mapPath, keyEncoder, PersistentMap.LONG_CODER);
diskStore = new DiskStorage(diskStorePath);
writerCache = new HotEntryCache<>(Duration.ofMinutes(10), filePrefix + "Cache");
writerCache.addListener(new RemovalListener<K>(), EventType.EVICTED, EventType.REMOVED);
}
public synchronized void appendLong(final K key, final long value) throws IOException {
BSFile cachedWriter = writerCache.get(key);
if (cachedWriter == null) {
final Long bsFileBlockNumber = map.getValue(key);
if (bsFileBlockNumber == null) {
cachedWriter = BSFile.newFile(diskStore);
map.putValue(key, cachedWriter.getRootBlockOffset());
} else {
cachedWriter = BSFile.existingFile(bsFileBlockNumber, diskStore);
}
writerCache.put(key, cachedWriter);
}
cachedWriter.append(value);
}
public synchronized boolean hasKey(final K key) throws IOException {
return map.getValue(key) != null;
}
public synchronized Stream<LongList> getLongs(final K key) throws IOException {
final Long bsFileBlockNumber = map.getValue(key);
if (bsFileBlockNumber == null) {
throw new NoSuchElementException("the map at '" + mapPath + "' does not contain the key '" + key + "'");
}
final BSFile bsFile = BSFile.existingFile(bsFileBlockNumber, diskStore);
return bsFile.streamOfLongLists();
}
@Override
public void close() throws IOException {
try {
try {
writerCache.forEach(bsFile -> bsFile.close());
} finally {
map.close();
}
} finally {
diskStore.close();
}
}
}

View File

@@ -0,0 +1,62 @@
package org.lucares.pdb.map;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.lucares.collections.LongList;
import org.lucares.utils.file.FileUtils;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@Test
public class PersistentMapOfListsOfLongsTest {
private Path dataDirectory;
@BeforeMethod
public void beforeMethod() throws IOException {
dataDirectory = Files.createTempDirectory("pdb");
}
@AfterMethod
public void afterMethod() throws IOException {
FileUtils.delete(dataDirectory);
}
public void test() throws IOException {
final String mapFilePrefix = "test";
final String keyA = "a";
final String keyB = "b";
final int size = 10;
final LongList a = LongList.range(0, size);
a.shuffle();
final LongList b = LongList.range(0, size);
b.shuffle();
try (PersistentMapOfListsOfLongs<String> map = new PersistentMapOfListsOfLongs<>(dataDirectory, mapFilePrefix,
PersistentMap.STRING_CODER)) {
for (int i = 0; i < size; i++) {
map.appendLong(keyA, a.get(i));
map.appendLong(keyB, b.get(i));
}
}
try (PersistentMapOfListsOfLongs<String> map = new PersistentMapOfListsOfLongs<>(dataDirectory, mapFilePrefix,
PersistentMap.STRING_CODER)) {
final LongList actualA = new LongList();
map.getLongs(keyA).forEachOrdered(actualA::addAll);
Assert.assertEquals(actualA, a);
final LongList actualB = new LongList();
map.getLongs(keyB).forEachOrdered(actualB::addAll);
Assert.assertEquals(actualB, b);
}
}
}

View File

@@ -51,7 +51,7 @@ fragment
JavaLetter
: [a-zA-Z0-9$_] // these are the "java letters" below 0x7F
| [\u002a] // asterisk, used for wildcards
| [\ue001] // used to help parser identify empty identifiers (character is the second in the private use area)
| [\ue001] // used to help parser identify empty identifiers and to find the caret position when searching for proposals (character is the second in the private use area)
| // covers all characters above 0x7F which are not a surrogate
~[\u0000-\u007F\uD800-\uDBFF]
{Character.isJavaIdentifierStart(_input.LA(-1))}?
@@ -64,7 +64,7 @@ fragment
JavaLetterOrDigit
: [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F
| [\u002a] // asterisk, used for wildcards
| [\ue001] // used to help parser identify empty identifiers (character is the second in the private use area)
| [\ue001] // used to help parser identify empty identifiers and to find the caret position when searching for proposals (character is the second in the private use area)
| '.'
| '/'
| '-'

View File

@@ -161,7 +161,6 @@ public class DataStore implements AutoCloseable {
private final DiskStorage diskStorage;
private final Path diskStorageFilePath;
private final Path storageBasePath;
private final Path queryCompletionIndexFile;
public DataStore(final Path dataDirectory) throws IOException {
storageBasePath = storageDirectory(dataDirectory);
@@ -183,8 +182,7 @@ public class DataStore implements AutoCloseable {
final Path docIdToDocIndexPath = storageBasePath.resolve("docIdToDocIndex.bs");
docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, ENCODER_DOC);
queryCompletionIndexFile = storageBasePath.resolve("queryCompletionIndex.bs");
queryCompletionIndex = new QueryCompletionIndex(queryCompletionIndexFile);
queryCompletionIndex = new QueryCompletionIndex(storageBasePath);
}
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
@@ -195,39 +193,47 @@ public class DataStore implements AutoCloseable {
return dataDirectory.resolve(SUBDIR_STORAGE);
}
public long createNewFile(final Tags tags) throws IOException {
// visible for test
QueryCompletionIndex getQueryCompletionIndex() {
return queryCompletionIndex;
}
final long newFilesRootBlockOffset = diskStorage.allocateBlock(BSFile.BLOCK_SIZE);
public long createNewFile(final Tags tags) {
try {
final long newFilesRootBlockOffset = diskStorage.allocateBlock(BSFile.BLOCK_SIZE);
final long docId = createUniqueDocId();
final Doc doc = new Doc(tags, newFilesRootBlockOffset);
docIdToDoc.putValue(docId, doc);
final long docId = createUniqueDocId();
final Doc doc = new Doc(tags, newFilesRootBlockOffset);
docIdToDoc.putValue(docId, doc);
final Long oldDocId = tagsToDocId.putValue(tags, docId);
Preconditions.checkNull(oldDocId, "There must be at most one document for tags: {0}", tags);
final Long oldDocId = tagsToDocId.putValue(tags, docId);
Preconditions.checkNull(oldDocId, "There must be at most one document for tags: {0}", tags);
// store mapping from tag to docId, so that we can find all docs for a given tag
final List<Tag> ts = new ArrayList<>(tags.toTags());
ts.add(TAG_ALL_DOCS);
for (final Tag tag : ts) {
// store mapping from tag to docId, so that we can find all docs for a given tag
final List<Tag> ts = new ArrayList<>(tags.toTags());
ts.add(TAG_ALL_DOCS);
for (final Tag tag : ts) {
Long diskStoreOffsetForDocIdsOfTag = tagToDocsId.getValue(tag);
Long diskStoreOffsetForDocIdsOfTag = tagToDocsId.getValue(tag);
if (diskStoreOffsetForDocIdsOfTag == null) {
diskStoreOffsetForDocIdsOfTag = diskStorage.allocateBlock(BSFile.BLOCK_SIZE);
tagToDocsId.putValue(tag, diskStoreOffsetForDocIdsOfTag);
if (diskStoreOffsetForDocIdsOfTag == null) {
diskStoreOffsetForDocIdsOfTag = diskStorage.allocateBlock(BSFile.BLOCK_SIZE);
tagToDocsId.putValue(tag, diskStoreOffsetForDocIdsOfTag);
}
try (final BSFile docIdsOfTag = BSFile.existingFile(diskStoreOffsetForDocIdsOfTag, diskStorage)) {
docIdsOfTag.append(docId);
}
}
try (final BSFile docIdsOfTag = BSFile.existingFile(diskStoreOffsetForDocIdsOfTag, diskStorage)) {
docIdsOfTag.append(docId);
}
// index the tags, so that we can efficiently find all possible values for a
// field in a query
queryCompletionIndex.addTags(tags);
return newFilesRootBlockOffset;
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
// index the tags, so that we can efficiently find all possible values for a
// field in a query
queryCompletionIndex.addTags(tags);
return newFilesRootBlockOffset;
}
private long createUniqueDocId() {
@@ -357,11 +363,10 @@ public class DataStore implements AutoCloseable {
public List<Proposal> propose(final String query, final int caretIndex) {
final NewProposerParser newProposerParser = new NewProposerParser(this);
final NewProposerParser newProposerParser = new NewProposerParser(queryCompletionIndex);
final List<Proposal> proposals = newProposerParser.propose(query, caretIndex);
System.out.println(proposals);
return new Proposer(this).propose(query, caretIndex);
LOGGER.debug("Proposals for query {}: {}", query, proposals);
return proposals;
}
public DiskStorage getDiskStorage() {

View File

@@ -7,11 +7,13 @@ import java.util.SortedSet;
import java.util.TreeSet;
import org.lucares.collections.LongList;
import org.lucares.pdb.api.RuntimeIOException;
import org.lucares.pdb.api.Tag;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.map.Empty;
import org.lucares.pdb.map.PersistentMap;
import org.lucares.pdb.map.PersistentMap.EncoderDecoder;
import org.lucares.utils.Preconditions;
import org.lucares.utils.byteencoder.VariableByteEncoder;
/**
@@ -22,30 +24,42 @@ import org.lucares.utils.byteencoder.VariableByteEncoder;
* <br>
* The expensive way is to execute the query for all available lastnames and
* keep those that return at least one result.<br>
* A more effiecient way uses an index that lists all lastnames that occurr with
* A more effiecient way uses an index that lists all lastnames that occur with
* firstname=John. If we write this as table, then it looks like this:
*
* <pre>
*┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┓
*┃ fieldA ┃ valueA ┃ fieldB ┃ valueB ┃
*┃ fieldB ┃ fieldA ┃ valueA ┃ valueB ┃
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
*┃firstname┃ John ┃lastname ┃ Connor ┃
*┃lastname ┃firstname┃ John ┃ Connor ┃
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
*┃firstname┃ John ┃lastname ┃Carpenter┃
*┃lastname ┃firstname┃ John ┃Carpenter┃
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
*┃firstname┃ John ┃country ┃ Germany ┃
*┃country ┃firstname┃ John ┃ Germany ┃
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
*┃firstname┃ John ┃lastname ┃ Nash ┃
*┃lastname ┃firstname┃ John ┃ Nash ┃
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
*┃firstname┃ Rick ┃lastname ┃ Meyer ┃
*┃lastname ┃firstname┃ Rick ┃ Meyer ┃
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
*┃firstname┃ Rick ┃lastname ┃ Castle ┃
*┃lastname ┃firstname┃ Rick ┃ Castle ┃
*┗━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┛
* </pre>
*
* The lastnames where firstname=John are Connor, Carpenter and Nash. Given such
* a table we can just for all rows with fieldA=firstname and valueA=John and
* fieldB = lastname.
* The lastnames where firstname=John are: Connor, Carpenter and Nash. Given
* such a table we can just for all rows with fieldA=firstname and valueA=John
* and fieldB = lastname.
* <p>
* Please note, that the columns for fieldA and fieldB come first. This is to
* make this index more suitable for IN-expressions and wildcard expressions of
* fieldA. Because we can now find all values for lastname where firstname=J*n*
* by searching for fieldA=firstname and fieldB=lastname, then do the wildcard
* evaluation while iterating over those hits. We do not have to expand the
* wildcard and the do hundreds or thousands of queries.
* <p>
* Please note, that fieldB comes before fieldA. This is, so that we can run
* inverse searches more efficiently. E.g. finding all values for
* fieldB=lastname where fieldA=firstname has a value != Connor. This is used
* for queries like 'NOT (firstname=Connor) and lastname=|'
* <p>
* The values in this index represent such a table.
* <p>
@@ -63,6 +77,12 @@ public class QueryCompletionIndex implements AutoCloseable {
this.tagB = tagB;
}
public TwoTags(final String fieldB, final String fieldA, final String valueA, final String valueB) {
tagA = new Tag(fieldA, valueA);
tagB = new Tag(fieldB, valueB);
}
public Tag getTagA() {
return tagA;
}
@@ -75,7 +95,29 @@ public class QueryCompletionIndex implements AutoCloseable {
public String toString() {
return tagA + "::" + tagB;
}
}
public static final class FieldField {
private final int fieldA;
private final int fieldB;
public FieldField(final int fieldA, final int fieldB) {
this.fieldA = fieldA;
this.fieldB = fieldB;
}
public int getFieldA() {
return fieldA;
}
public int getFieldB() {
return fieldB;
}
@Override
public String toString() {
return fieldA + "::" + fieldB;
}
}
private static final class EncoderTwoTags implements EncoderDecoder<TwoTags> {
@@ -86,17 +128,22 @@ public class QueryCompletionIndex implements AutoCloseable {
final Tag tagA = tagAndField.getTagA();
final Tag tagB = tagAndField.getTagB();
tmp.add(tagA.getKey());
tmp.add(tagA.getValue());
tmp.add(tagB.getKey());
tmp.add(tagA.getKey());
// A query for tagA.key and tagA.value and tagB.key is done by setting
// tagB.value==0.
// The query is then executed as a prefix search. Thus tagB.value must not be
// part of the byte array that is returned.
if (tagB.getValue() >= 0) {
tmp.add(tagB.getValue());
if (tagA.getValue() >= 0) {
tmp.add(tagA.getValue());
// A query for tagA.key and tagA.value and tagB.key is done by setting
// tagB.value==-1.
// The query is then executed as a prefix search. Thus tagB.value must not be
// part of the byte array that is returned.
if (tagB.getValue() >= 0) {
tmp.add(tagB.getValue());
}
} else {
Preconditions.checkSmaller(tagB.getValue(), 0,
"if no value for tagA is given, then tagB must also be empty");
}
return VariableByteEncoder.encode(tmp);
@@ -106,9 +153,9 @@ public class QueryCompletionIndex implements AutoCloseable {
public TwoTags decode(final byte[] bytes) {
final LongList tmp = VariableByteEncoder.decode(bytes);
final int tagAKey = (int) tmp.get(0);
final int tagAValue = (int) tmp.get(1);
final int tagBKey = (int) tmp.get(2);
final int tagBKey = (int) tmp.get(0);
final int tagAKey = (int) tmp.get(1);
final int tagAValue = (int) tmp.get(2);
final int tagBValue = (int) tmp.get(3);
final Tag tagA = new Tag(tagAKey, tagAValue);
@@ -118,23 +165,81 @@ public class QueryCompletionIndex implements AutoCloseable {
}
}
private final PersistentMap<TwoTags, Empty> tagToTagIndex;
private static final class EncoderTag implements EncoderDecoder<Tag> {
public QueryCompletionIndex(final Path indexFile) throws IOException {
tagToTagIndex = new PersistentMap<>(indexFile, new EncoderTwoTags(), PersistentMap.EMPTY_ENCODER);
@Override
public byte[] encode(final Tag tag) {
final LongList longList = new LongList(2);
longList.add(tag.getKey());
if (tag.getValue() >= 0) {
longList.add(tag.getValue());
}
return VariableByteEncoder.encode(longList);
}
@Override
public Tag decode(final byte[] bytes) {
final LongList tmp = VariableByteEncoder.decode(bytes);
final int key = (int) tmp.get(0);
final int value = (int) tmp.get(1);
return new Tag(key, value);
}
}
private static final class EncoderField implements EncoderDecoder<String> {
@Override
public byte[] encode(final String field) {
if (field.isEmpty()) {
return new byte[0];
}
return VariableByteEncoder.encode(Tags.STRING_COMPRESSOR.put(field));
}
@Override
public String decode(final byte[] bytes) {
final long compressedString = VariableByteEncoder.decodeFirstValue(bytes);
return Tags.STRING_COMPRESSOR.get((int) compressedString);
}
}
private final PersistentMap<TwoTags, Empty> tagToTagIndex;
private final PersistentMap<Tag, Empty> fieldToValueIndex;
private final PersistentMap<String, Empty> fieldIndex;
public QueryCompletionIndex(final Path basePath) throws IOException {
final Path tagToTagIndexFile = basePath.resolve("queryCompletionTagToTagIndex.bs");
tagToTagIndex = new PersistentMap<>(tagToTagIndexFile, new EncoderTwoTags(), PersistentMap.EMPTY_ENCODER);
final Path fieldToValueIndexFile = basePath.resolve("queryCompletionFieldToValueIndex.bs");
fieldToValueIndex = new PersistentMap<>(fieldToValueIndexFile, new EncoderTag(), PersistentMap.EMPTY_ENCODER);
final Path fieldIndexFile = basePath.resolve("queryCompletionFieldIndex.bs");
fieldIndex = new PersistentMap<>(fieldIndexFile, new EncoderField(), PersistentMap.EMPTY_ENCODER);
}
public void addTags(final Tags tags) throws IOException {
final List<Tag> listOfTagsA = tags.toTags();
final List<Tag> listOfTagsB = tags.toTags();
// index all combinations of tagA and tagB
// index all combinations of tagA and tagB and fieldA to fieldB
for (final Tag tagA : listOfTagsA) {
for (final Tag tagB : listOfTagsB) {
final TwoTags key = new TwoTags(tagA, tagB);
tagToTagIndex.putValue(key, Empty.INSTANCE);
}
}
// create indices of all tags and all fields
for (final Tag tag : listOfTagsA) {
fieldToValueIndex.putValue(tag, Empty.INSTANCE);
fieldIndex.putValue(tag.getKeyAsString(), Empty.INSTANCE);
}
}
@Override
@@ -142,17 +247,76 @@ public class QueryCompletionIndex implements AutoCloseable {
tagToTagIndex.close();
}
public SortedSet<String> find(final Tag tag, final String field) throws IOException {
final SortedSet<String> result = new TreeSet<>();
final int tagBKey = Tags.STRING_COMPRESSOR.put(field);
final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See
// EncoderTwoTags
final TwoTags keyPrefix = new TwoTags(tag, tagB);
tagToTagIndex.visitValues(keyPrefix, (k, v) -> {
result.add(k.getTagB().getValueAsString());
});
return result;
public SortedSet<String> find(final String property, final String value, final String field) {
final Tag tag = new Tag(property, value);
return find(tag, field);
}
public SortedSet<String> find(final Tag tag, final String field) {
try {
final SortedSet<String> result = new TreeSet<>();
final int tagBKey = Tags.STRING_COMPRESSOR.put(field);
final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See
// EncoderTwoTags
final TwoTags keyPrefix = new TwoTags(tag, tagB);
tagToTagIndex.visitValues(keyPrefix, (k, v) -> {
result.add(k.getTagB().getValueAsString());
});
return result;
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
public SortedSet<String> findAllValuesForField(final String field) {
try {
final SortedSet<String> result = new TreeSet<>();
final int tagKey = Tags.STRING_COMPRESSOR.put(field);
final Tag keyPrefix = new Tag(tagKey, -1); // the value must be negative for the prefix search to work. See
fieldToValueIndex.visitValues(keyPrefix, (k, v) -> {
result.add(k.getValueAsString());
});
return result;
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
public SortedSet<String> findAllValuesNotForField(final Tag tag, final String field) {
try {
final SortedSet<String> result = new TreeSet<>();
final TwoTags keyPrefix = new TwoTags(field, tag.getKeyAsString(), null, null);
final int negatedValueA = tag.getValue();
tagToTagIndex.visitValues(keyPrefix, (k, v) -> {
final int valueA = k.getTagA().getValue();
if (valueA != negatedValueA) {
result.add(k.getTagB().getValueAsString());
}
});
return result;
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
public SortedSet<String> findAllFields() {
try {
final SortedSet<String> result = new TreeSet<>();
fieldIndex.visitValues("", (k, v) -> {
result.add(k);
});
return result;
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
}

View File

@@ -5,6 +5,7 @@ import java.util.Arrays;
import java.util.List;
import org.lucares.utils.CollectionUtils;
import org.lucares.utils.Preconditions;
abstract public class Expression {
@@ -12,30 +13,8 @@ abstract public class Expression {
throw new UnsupportedOperationException();
}
abstract static class UnaryExpression extends Expression {
private final int line;
private final int startIndex;
private final int stopIndex;
public UnaryExpression(final int line, final int startIndex, final int stopIndex) {
super();
this.line = line;
this.startIndex = startIndex;
this.stopIndex = stopIndex;
}
int getLine() {
return line;
}
int getStartIndex() {
return startIndex;
}
int getStopIndex() {
return stopIndex;
}
boolean containsCaret() {
throw new UnsupportedOperationException();
}
abstract static class TemporaryExpression extends Expression {
@@ -93,6 +72,11 @@ abstract public class Expression {
return "!" + expression;
}
@Override
boolean containsCaret() {
return expression.containsCaret();
}
@Override
public int hashCode() {
final int prime = 31;
@@ -150,7 +134,12 @@ abstract public class Expression {
@Override
public String toString() {
return " (" + left + " or " + right + ") ";
return "(" + left + " or " + right + ")";
}
@Override
boolean containsCaret() {
return left.containsCaret() || right.containsCaret();
}
@Override
@@ -191,7 +180,7 @@ abstract public class Expression {
return true;
}
public static Expression create(final List<Expression> or) {
public static Expression create(final List<? extends Expression> or) {
if (or.size() == 1) {
return or.get(0);
@@ -231,7 +220,12 @@ abstract public class Expression {
@Override
public String toString() {
return " (" + left + " and " + right + ") ";
return "(" + left + " and " + right + ")";
}
@Override
boolean containsCaret() {
return left.containsCaret() || right.containsCaret();
}
@Override
@@ -294,11 +288,11 @@ abstract public class Expression {
}
}
static class Terminal extends UnaryExpression {
static class Terminal extends Expression {
private final String value;
Terminal(final String value, final int line, final int startIndex, final int stopIndex) {
super(line, startIndex, stopIndex);
Terminal(final String value) {
this.value = value;
}
@@ -313,6 +307,11 @@ abstract public class Expression {
return value;
}
@Override
boolean containsCaret() {
return value.contains(NewProposerParser.CARET_MARKER);
}
@Override
public int hashCode() {
final int prime = 31;
@@ -365,7 +364,24 @@ abstract public class Expression {
@Override
public String toString() {
return " " + property + " = " + value.getValue() + " ";
return property + " = " + value.getValue();
}
@Override
boolean containsCaret() {
return value.containsCaret();
}
public String getProperty() {
return property;
}
public Terminal getValue() {
return value;
}
public String getValueAsString() {
return value.getValue();
}
@Override
@@ -398,6 +414,7 @@ abstract public class Expression {
return false;
return true;
}
}
static class Parentheses extends Expression {
@@ -419,7 +436,12 @@ abstract public class Expression {
@Override
public String toString() {
return " [ " + expression + " ] ";
return "[ " + expression + " ]";
}
@Override
boolean containsCaret() {
return expression.containsCaret();
}
@Override
@@ -473,6 +495,16 @@ abstract public class Expression {
public String toString() {
return "(" + String.join(", ", getValues()) + ")";
}
@Override
boolean containsCaret() {
for (final Terminal terminal : propertyValues) {
if (terminal.containsCaret()) {
return true;
}
}
return false;
}
}
static class InExpression extends Expression {
@@ -506,6 +538,16 @@ abstract public class Expression {
return values;
}
@Override
boolean containsCaret() {
for (final String value : values) {
if (value.contains(NewProposerParser.CARET_MARKER)) {
return true;
}
}
return false;
}
@Override
public int hashCode() {
final int prime = 31;
@@ -537,4 +579,147 @@ abstract public class Expression {
return true;
}
}
public static final class AndCaretExpression extends Expression {
Property caretExpression;
Expression expression;
public AndCaretExpression(final Property caretExpression, final Expression expression) {
Preconditions.checkTrue(caretExpression.containsCaret(), "the expression '{0}' must contain the caret",
caretExpression);
Preconditions.checkFalse(expression.containsCaret(), "the expression '{0}' must not contain the caret",
caretExpression);
this.caretExpression = caretExpression;
this.expression = expression;
}
@Override
public <T> T visit(final ExpressionVisitor<T> visitor) {
return visitor.visit(this);
}
@Override
boolean containsCaret() {
return caretExpression.containsCaret();
}
public Property getCaretExpression() {
return caretExpression;
}
public Expression getExpression() {
return expression;
}
@Override
public String toString() {
return "(" + caretExpression + " and " + expression + ")";
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((caretExpression == null) ? 0 : caretExpression.hashCode());
result = prime * result + ((expression == null) ? 0 : expression.hashCode());
return result;
}
@Override
public boolean equals(final Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
final AndCaretExpression other = (AndCaretExpression) obj;
if (caretExpression == null) {
if (other.caretExpression != null)
return false;
} else if (!caretExpression.equals(other.caretExpression))
return false;
if (expression == null) {
if (other.expression != null)
return false;
} else if (!expression.equals(other.expression))
return false;
return true;
}
}
public static final class AndNotCaretExpression extends Expression {
Property negatedCaretExpression;
Expression expression;
public AndNotCaretExpression(final Property negatedCaretExpression, final Expression expression) {
Preconditions.checkTrue(negatedCaretExpression.containsCaret(),
"the expression '{0}' must contain the caret", negatedCaretExpression);
Preconditions.checkFalse(expression.containsCaret(), "the expression '{0}' must not contain the caret",
expression);
this.negatedCaretExpression = negatedCaretExpression;
this.expression = expression;
}
@Override
public <T> T visit(final ExpressionVisitor<T> visitor) {
return visitor.visit(this);
}
@Override
boolean containsCaret() {
return negatedCaretExpression.containsCaret();
}
public Property getCaretExpression() {
return negatedCaretExpression;
}
public Expression getExpression() {
return expression;
}
@Override
public String toString() {
return "(!" + negatedCaretExpression + " and " + expression + ")";
}
}
public static final class CaretAndExpression extends Expression {
private final Property caretExpression;
private final Property otherExpression;
public CaretAndExpression(final Property caretExpression, final Property otherExpression) {
this.caretExpression = caretExpression;
this.otherExpression = otherExpression;
}
@Override
public <T> T visit(final ExpressionVisitor<T> visitor) {
return super.visit(visitor);
}
@Override
boolean containsCaret() {
Preconditions.checkTrue(caretExpression.containsCaret(),
"CaretAndExpression must contain the caret, but was: {0}", this);
return caretExpression.containsCaret();
}
@Override
public String toString() {
return "(caretAnd: " + caretExpression + " and " + otherExpression + ")";
}
public Property getCaretExpression() {
return caretExpression;
}
public Property getOtherExpression() {
return otherExpression;
}
}
}

View File

@@ -32,4 +32,16 @@ public abstract class ExpressionVisitor<T> {
public T visit(final Expression.Parentheses parentheses) {
throw new UnsupportedOperationException();
}
public T visit(final Expression.AndCaretExpression expression) {
throw new UnsupportedOperationException();
}
public T visit(final Expression.AndNotCaretExpression expression) {
throw new UnsupportedOperationException();
}
public T visit(final Expression.CaretAndExpression expression) {
throw new UnsupportedOperationException();
}
}

View File

@@ -0,0 +1,225 @@
package org.lucares.pdb.datastore.lang;
import java.util.Collections;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import org.lucares.pdb.api.Tag;
import org.lucares.pdb.datastore.internal.QueryCompletionIndex;
import org.lucares.pdb.datastore.lang.Expression.And;
import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression;
import org.lucares.pdb.datastore.lang.Expression.AndNotCaretExpression;
import org.lucares.pdb.datastore.lang.Expression.InExpression;
import org.lucares.pdb.datastore.lang.Expression.Not;
import org.lucares.pdb.datastore.lang.Expression.Or;
import org.lucares.pdb.datastore.lang.Expression.Property;
import org.lucares.utils.CollectionUtils;
public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<String>> {
private static final class AndCaretExpressionVisitor extends ExpressionVisitor<SortedSet<String>> {
private final QueryCompletionIndex index;
private final String field;
public AndCaretExpressionVisitor(final QueryCompletionIndex queryCompletionIndex, final String field) {
index = queryCompletionIndex;
this.field = field;
}
@Override
public SortedSet<String> visit(final Property property) {
final String fieldA = property.getProperty();
final String valueA = property.getValue().getValue();
return index.find(fieldA, valueA, field);
}
@Override
public SortedSet<String> visit(final InExpression expression) {
final SortedSet<String> result = new TreeSet<>();
final String property = expression.getProperty();
final List<String> values = expression.getValues();
for (final String value : values) {
final SortedSet<String> candidates = index.find(property, value, field);
result.addAll(candidates);
}
return result;
}
@Override
public SortedSet<String> visit(final And expression) {
final Expression left = expression.getLeft();
final Expression right = expression.getRight();
if (left instanceof Property && right instanceof Not) {
final Property leftProperty = (Property) left;
final SortedSet<String> allValuesForField = leftProperty.visit(this);
final Expression rightInnerExpression = ((Not) right).getExpression();
final SortedSet<String> rightResult = rightInnerExpression.visit(this);
return CollectionUtils.removeAll(allValuesForField, rightResult, TreeSet::new);
} else {
final SortedSet<String> result = left.visit(this);
final SortedSet<String> rightResult = right.visit(this);
result.retainAll(rightResult);
return result;
}
}
@Override
public SortedSet<String> visit(final Or expression) {
final Expression left = expression.getLeft();
final Expression right = expression.getRight();
final SortedSet<String> result = left.visit(this);
final SortedSet<String> rightResult = right.visit(this);
result.addAll(rightResult);
return result;
}
@Override
public SortedSet<String> visit(final Not expression) {
if (!(expression.getExpression() instanceof Property)) {
throw new UnsupportedOperationException("NOT expressions like '" + expression
+ "' are not supported. Only 'NOT property=value' expressions are supported.");
}
final Property property = (Property) expression.getExpression();
final Tag tag = new Tag(property.getProperty(), property.getValueAsString());
final SortedSet<String> valuesNotForField = index.findAllValuesNotForField(tag, field);
final SortedSet<String> valuesForField = index.find(tag, field);
final SortedSet<String> valuesOnlyAvailableInField = CollectionUtils.removeAll(valuesForField,
valuesNotForField, TreeSet::new);
final SortedSet<String> result = CollectionUtils.removeAll(valuesNotForField, valuesOnlyAvailableInField,
TreeSet::new);
return result;
}
}
private final QueryCompletionIndex queryCompletionIndex;
public FindValuesForQueryCompletion(final QueryCompletionIndex queryCompletionIndex) {
this.queryCompletionIndex = queryCompletionIndex;
}
@Override
public SortedSet<String> visit(final Property property) {
final String field = property.getProperty();
final String value = property.getValue().getValue();
final SortedSet<String> allValuesForField = queryCompletionIndex.findAllValuesForField(field);
final String valuePrefix = value.substring(0, value.indexOf(NewProposerParser.CARET_MARKER));
return GloblikePattern.filterValues(allValuesForField, valuePrefix, TreeSet::new);
}
@Override
public SortedSet<String> visit(final AndCaretExpression expression) {
final Property caretExpression = expression.getCaretExpression();
final String field = caretExpression.getProperty();
final String valueWithCaretMarker = caretExpression.getValue().getValue();
final String valuePrefix = valueWithCaretMarker.substring(0,
valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER));
final Expression rightHandExpression = expression.getExpression();
final SortedSet<String> candidateValues = rightHandExpression
.visit(new AndCaretExpressionVisitor(queryCompletionIndex, field));
return GloblikePattern.filterValues(candidateValues, valuePrefix, TreeSet::new);
}
@Override
public SortedSet<String> visit(final AndNotCaretExpression expression) {
final Property caretExpression = expression.getCaretExpression();
final String field = caretExpression.getProperty();
final String valueWithCaretMarker = caretExpression.getValue().getValue();
final String valuePattern = valueWithCaretMarker.substring(0,
valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER));
final SortedSet<String> allValuesForField = queryCompletionIndex
.findAllValuesForField(caretExpression.getProperty());
final SortedSet<String> valuesForFieldMatchingCaretExpression = GloblikePattern.filterValues(allValuesForField,
valuePattern, TreeSet::new);
final Expression rightHandExpression = expression.getExpression();
final SortedSet<String> rightHandValues = rightHandExpression
.visit(new AndCaretExpressionVisitor(queryCompletionIndex, field));
if (rightHandValues.size() == 1) {
// there is only one alternative and that one must not be chosen
return Collections.emptySortedSet();
}
final SortedSet<String> result = CollectionUtils.retainAll(rightHandValues,
valuesForFieldMatchingCaretExpression, TreeSet::new);
return result;
}
@Override
public SortedSet<String> visit(final Not expression) {
final String field;
final Expression innerExpression = expression.getExpression();
if (innerExpression instanceof Property) {
field = ((Property) innerExpression).getProperty();
final SortedSet<String> allValuesForField = queryCompletionIndex.findAllValuesForField(field);
final String valueWithCaretMarker = ((Property) innerExpression).getValue().getValue();
final String valuePrefix = valueWithCaretMarker.substring(0,
valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER));
final TreeSet<String> result = GloblikePattern.filterValues(allValuesForField, valuePrefix + "*",
TreeSet::new);
return result;
} else {
throw new UnsupportedOperationException();
}
}
@Override
public SortedSet<String> visit(final Or expression) {
final Expression left = expression.getLeft();
final Expression right = expression.getRight();
final SortedSet<String> result = left.visit(this);
final SortedSet<String> rightResult = right.visit(this);
result.addAll(rightResult);
return result;
}
@Override
public SortedSet<String> visit(final And expression) {
final Expression left = expression.getLeft();
final Expression right = expression.getRight();
final SortedSet<String> result = left.visit(this);
final SortedSet<String> rightResult = right.visit(this);
result.retainAll(rightResult);
return result;
}
}

View File

@@ -1,11 +1,14 @@
package org.lucares.pdb.datastore.lang;
import java.util.Collection;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
class GloblikePattern {
public class GloblikePattern {
private static final Logger LOGGER = LoggerFactory.getLogger(GloblikePattern.class);
@@ -25,4 +28,26 @@ class GloblikePattern {
return Pattern.compile(valueRegex);
}
public static <T extends Collection<String>> T filterValues(final Collection<String> availableValues,
final String valuePattern, final Supplier<T> generator) {
final T result = generator.get();
return filterValues(result, availableValues, valuePattern);
}
public static <T extends Collection<String>> T filterValues(final T result,
final Collection<String> availableValues, final String valuePattern) {
final Pattern pattern = GloblikePattern.globlikeToRegex(valuePattern);
for (final String value : availableValues) {
final Matcher matcher = pattern.matcher(value);
if (matcher.find() && !value.equals(valuePattern)) {
result.add(value);
}
}
return result;
}
}

View File

@@ -0,0 +1,79 @@
package org.lucares.pdb.datastore.lang;
import org.lucares.pdb.datastore.lang.Expression.And;
import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression;
import org.lucares.pdb.datastore.lang.Expression.AndNotCaretExpression;
import org.lucares.pdb.datastore.lang.Expression.CaretAndExpression;
import org.lucares.pdb.datastore.lang.Expression.Not;
import org.lucares.pdb.datastore.lang.Expression.Or;
import org.lucares.pdb.datastore.lang.Expression.Parentheses;
import org.lucares.pdb.datastore.lang.Expression.Property;
/**
* Visitor that returns the expressions without any modifications. Can be used
* as base class for visitors that modify expressions.
*/
public abstract class IdentityExpressionVisitor extends ExpressionVisitor<Expression> {
@Override
public Expression visit(final And expression) {
final Expression left = expression.getLeft().visit(this);
final Expression right = expression.getRight().visit(this);
return new And(left, right);
}
@Override
public Expression visit(final Or expression) {
final Expression left = expression.getLeft().visit(this);
final Expression right = expression.getRight().visit(this);
return new Or(left, right);
}
@Override
public Expression visit(final Not expression) {
return new Not(expression.getExpression().visit(this));
}
@Override
public Expression visit(final Property expression) {
return expression;
}
@Override
public Expression visit(final Expression.Terminal expression) {
return expression;
}
@Override
public Expression visit(final Expression.MatchAll expression) {
return expression;
}
@Override
public Expression visit(final Expression.InExpression expression) {
return expression;
}
@Override
public Expression visit(final Parentheses parentheses) {
return new Parentheses(parentheses.getExpression().visit(this));
}
@Override
public Expression visit(final AndCaretExpression expression) {
return expression;
}
@Override
public Expression visit(final AndNotCaretExpression expression) {
return expression;
}
@Override
public Expression visit(final CaretAndExpression expression) {
return expression;
}
}

View File

@@ -1,27 +1,195 @@
package org.lucares.pdb.datastore.lang;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.SortedSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.lucares.pdb.datastore.Proposal;
import org.lucares.pdb.datastore.internal.DataStore;
import org.lucares.pdb.datastore.internal.QueryCompletionIndex;
import org.lucares.utils.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class NewProposerParser {
private final static String CARET_MARKER = "\ue001"; // third character in the private use area
private static final Logger LOGGER = LoggerFactory.getLogger(NewProposerParser.class);
private final DataStore dataStore;
public final static String CARET_MARKER = "\ue001"; // character in the private use area
public NewProposerParser(final DataStore dataStore) {
this.dataStore = dataStore;
/*
* Regex matching a java identifier without a caret marker. We define it as a
* blacklist, because this is easer. The regex is only used <em>after</em> the
* query has already been validated with the proper grammar.
*/
private static final String REGEX_IDENTIFIER = "[^\\s,!\\(\\)=" + CARET_MARKER + "]*";
private final QueryCompletionIndex queryCompletionIndex;
public NewProposerParser(final QueryCompletionIndex queryCompletionIndex) {
this.queryCompletionIndex = queryCompletionIndex;
}
public List<Proposal> propose(final String query, final int caretIndex) {
List<Proposal> proposals;
if (StringUtils.isBlank(query)) {
proposals = proposeForAllKeys();
} else {
final String queryString = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString();
final List<Proposal> foundProposals = proposalsForValues(query, caretIndex);
if (foundProposals.isEmpty()) {
proposals = proposalsForNonValues(query, caretIndex);
} else {
proposals = foundProposals;
}
}
final Expression expression = QueryLanguageParser.parse(queryString);
return proposals;
}
private List<Proposal> proposalsForNonValues(final String query, final int caretIndex) {
final List<Proposal> proposals = new ArrayList<>();
/*
* This method is called when the query could not be parsed. It is likely that
* the next word is either a field or an operator. But is is also possible that
* the next word is a field-value, because the syntax error might be at another
* location in the query (not at the caret position).
*/
final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString();
final List<String> tokens = QueryLanguage.getTokens(queryWithCaretMarker);
final int indexTokenWithCaret = CollectionUtils.indexOf(tokens, t -> t.contains(CARET_MARKER));
if (indexTokenWithCaret > 0) {
final String previousToken = tokens.get(indexTokenWithCaret - 1);
switch (previousToken) {
case "(":
case "and":
case "or":
case "!":
proposals.addAll(proposeForAllKeys(queryWithCaretMarker));
break;
case ")":
default:
// proposals.addAll(proposal);
break;
}
} else if (indexTokenWithCaret == 0) {
proposals.addAll(proposeForAllKeys(queryWithCaretMarker));
}
return proposals;
}
private Collection<? extends Proposal> proposeForAllKeys(final String queryWithCaretMarker) {
final List<Proposal> proposals = new ArrayList<>();
final String wordPrefix = wordPrefix(queryWithCaretMarker);
if (wordPrefix != null) {
final SortedSet<String> allFields = queryCompletionIndex.findAllFields();
for (final String field : allFields) {
if (!field.startsWith(wordPrefix)) {
continue;
}
final String proposedQuery = queryWithCaretMarker
.replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, field + "=* ");
final String newQueryWithCaretMarker = queryWithCaretMarker
.replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, field + "=" + CARET_MARKER);
final String newQuery = newQueryWithCaretMarker.replace(CARET_MARKER, "");
final int newCaretPosition = newQueryWithCaretMarker.indexOf(CARET_MARKER);
final Proposal proposal = new Proposal(field, proposedQuery, true, newQuery, newCaretPosition);
proposals.add(proposal);
}
}
return proposals;
}
private String wordPrefix(final String queryWithCaretMarker) {
final Pattern pattern = Pattern.compile("(" + REGEX_IDENTIFIER + CARET_MARKER + ")");
final Matcher matcher = pattern.matcher(queryWithCaretMarker);
if (matcher.find()) {
final String group = matcher.group();
return group.replace(CARET_MARKER, "");
}
return null;
}
private List<Proposal> proposeForAllKeys() {
final List<Proposal> proposals = new ArrayList<>();
final SortedSet<String> allFields = queryCompletionIndex.findAllFields();
for (final String field : allFields) {
final String proposedQuery = field + "=*";
final String newQuery = field + "=";
final int newCaretPosition = newQuery.length();
final Proposal proposal = new Proposal(field, proposedQuery, true, newQuery, newCaretPosition);
proposals.add(proposal);
}
return proposals;
}
List<Proposal> proposalsForValues(final String query, final int caretIndex) {
try {
// Add caret marker, so that we know where the caret is.
// This also makes sure that a query like "name=|" ('|' is the caret) can be
// parsed.
// Without the caret marker the query would be "name=", which is not a valid
// expression.
final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString();
// parse the query
final Expression expression = QueryLanguageParser.parse(queryWithCaretMarker);
// normalize it, so that we can use the queryCompletionIndex to search vor
// candidate values
final QueryCompletionExpressionOptimizer optimizer = new QueryCompletionExpressionOptimizer();
final Expression normalizedExpression = optimizer.normalizeExpression(expression);
// find all candidate values
final SortedSet<String> candidateValues = normalizedExpression
.visit(new FindValuesForQueryCompletion(queryCompletionIndex));
// translate the candidate values to proposals
final List<Proposal> proposals = generateProposals(queryWithCaretMarker, expression, candidateValues);
return proposals;
} catch (final SyntaxException e) {
LOGGER.debug("Query ({}) is not valid. This is expected to happen "
+ "unless we are looking for proposals of values.", query, e);
return Collections.emptyList();
}
}
private List<Proposal> generateProposals(final String queryWithCaretMarker, final Expression expression,
final SortedSet<String> candidateValues) {
final List<Proposal> proposals = new ArrayList<>();
for (final String proposedTag : candidateValues) {
final String proposedQueryWithCaretMarker = queryWithCaretMarker
.replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, proposedTag + CARET_MARKER);
final String proposedQuery = proposedQueryWithCaretMarker.replace(CARET_MARKER, "");
final int newCaretPosition = proposedQueryWithCaretMarker.indexOf(CARET_MARKER);
final Proposal proposal = new Proposal(proposedTag, proposedQuery, true, proposedQuery, newCaretPosition);
proposals.add(proposal);
}
return proposals;
}
}

View File

@@ -0,0 +1,268 @@
package org.lucares.pdb.datastore.lang;
import java.util.ArrayList;
import java.util.List;
import org.lucares.pdb.datastore.internal.QueryCompletionIndex;
import org.lucares.pdb.datastore.lang.Expression.And;
import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression;
import org.lucares.pdb.datastore.lang.Expression.AndNotCaretExpression;
import org.lucares.pdb.datastore.lang.Expression.InExpression;
import org.lucares.pdb.datastore.lang.Expression.Not;
import org.lucares.pdb.datastore.lang.Expression.Or;
import org.lucares.pdb.datastore.lang.Expression.Property;
import org.lucares.pdb.datastore.lang.Expression.Terminal;
/**
* Query completion utilizes an index that contains all mappings of
* tags+fieldname to values. This index can be used to answer the question what
* the possible values for fields in simple and queries are.
* <p>
* E.g. Given the query "lastname=Doe and firstname=|" ('|' is the marker for
* the caret position). All possible values for firstname are in the index under
* "tagA.field=lastname and tagA.value=Doe and tagB.field=firstname". See also
* {@link QueryCompletionIndex}.
* <p>
* We can use this index for all boolean queries. But we have to normalize the
* queries first.
* <p>
* E.g. "(lastname=Doe or country=Atlantis) and firstname=|" will be normalized
* and split into two queries:
* <ol>
* <li>"lastname=Doe and firstname=|"
* <li>"country=Atlantis and firstname=|"
* </ol>
* Everything that is or'ed with the field for which we are doeing the
* completion can be removed. E.g. "lastname=Doe or firstname=|" will be
* normalized to "firstname=|, because the expression lastname=Doe does not
* change which values are possible for firstname.
* <p>
* Consequently, IN-expressions are normalized to PROPERTY-expressions.
* <p>
* E.g. "firstname=John,|,Frank" will be normalized to "firstname=|".
*/
public class QueryCompletionExpressionOptimizer {
private static final class ReplaceINExpressionsWithPropertyExpressionsVisitor extends IdentityExpressionVisitor {
@Override
public Expression visit(final InExpression expression) {
if (expression.containsCaret() || expression.getValues().size() == 1) {
final String property = expression.getProperty();
final List<String> values = expression.getValues();
final List<Property> propertyExpressions = new ArrayList<>();
for (final String value : values) {
propertyExpressions.add(new Property(property, new Terminal(value)));
}
return Expression.Or.create(propertyExpressions);
} else {
return super.visit(expression);
}
};
}
private static final class RemoveOrEdExpressions extends IdentityExpressionVisitor {
@Override
public Expression visit(final Or expression) {
final Expression left = expression.getLeft();
final Expression right = expression.getRight();
if (left.containsCaret() && !right.containsCaret()) {
return left;
}
if (!left.containsCaret() && right.containsCaret()) {
return right;
}
return super.visit(expression);
};
}
private static final class DistributiveNormalization extends IdentityExpressionVisitor {
@Override
public Expression visit(final And expression) {
final Expression left = expression.getLeft();
final Expression right = expression.getRight();
if (left instanceof Or) {
// (a or b) and c
// becomes
// a and c or b and c
final Expression ac = new And(((Or) left).getLeft(), right);
final Expression bc = new And(((Or) left).getRight(), right);
return new Or(ac, bc);
}
if (right instanceof Or) {
// a and (b or c)
// becomes
// a and b or a and c
final Expression ab = new And(left, ((Or) right).getLeft());
final Expression ac = new And(left, ((Or) right).getRight());
return new Or(ab, ac);
}
return super.visit(expression);
};
}
private static final class RotateAndExpressions extends IdentityExpressionVisitor {
@Override
public Expression visit(final And expression) {
final Expression left = expression.getLeft();
final Expression right = expression.getRight();
// (| and a) and b => | and (a and b)
//
// The expression with the caret is moved up
if (left.containsCaret() && left instanceof And) {
final Expression leftLeft = ((And) left).getLeft();
final Expression leftRight = ((And) left).getRight();
if (leftLeft.containsCaret()) {
return new And(leftLeft, new And(leftRight, right));
} else {
return new And(new And(leftLeft, right), leftRight);
}
} else if (right.containsCaret() && right instanceof And) {
final Expression rightLeft = ((And) right).getLeft();
final Expression rightRight = ((And) right).getRight();
if (rightLeft.containsCaret()) {
return new And(rightLeft, new And(rightRight, left));
} else {
return new And(new And(rightLeft, left), rightRight);
}
}
return super.visit(expression);
}
}
private static final class DoubleNegationExpressions extends IdentityExpressionVisitor {
@Override
public Expression visit(final Not expression) {
if (expression instanceof Not) {
if (expression.getExpression() instanceof Not) {
return ((Not) expression.getExpression()).getExpression();
}
}
return super.visit(expression);
}
}
private static final class DeMorgan extends IdentityExpressionVisitor {
@Override
public Expression visit(final Not expression) {
if (expression.getExpression() instanceof And) {
final And andExpression = (And) expression.getExpression();
final Expression left = andExpression.getLeft();
final Expression right = andExpression.getRight();
final Expression notLeft = new Not(left);
final Expression notRight = new Not(right);
return new Or(notLeft, notRight);
}
return super.visit(expression);
}
}
private static final class ToAndCaretExpressions extends IdentityExpressionVisitor {
@Override
public Expression visit(final And expression) {
final Expression left = expression.getLeft();
final Expression right = expression.getRight();
if (left.containsCaret() && left instanceof Property) {
return new AndCaretExpression((Property) left, right);
}
if (right.containsCaret() && right instanceof Property) {
return new AndCaretExpression((Property) right, left);
}
if (left.containsCaret()//
&& left instanceof Not//
&& ((Not) left).getExpression() instanceof Property) {
return new AndNotCaretExpression((Property) ((Not) left).getExpression(), right);
}
if (right.containsCaret()//
&& right instanceof Not//
&& ((Not) right).getExpression() instanceof Property) {
return new AndNotCaretExpression((Property) ((Not) right).getExpression(), left);
}
return super.visit(expression);
}
}
public Expression normalizeExpression(final Expression expression) {
Expression normalizingExpression = expression;
Expression previousExpression = normalizingExpression;
do {
previousExpression = normalizingExpression;
// replace all IN-expression, because they are just syntactic sugar for
// OR-expressions, but only for those that include the caret
normalizingExpression = normalizingExpression
.visit(new ReplaceINExpressionsWithPropertyExpressionsVisitor());
// Remove expressions that are OR'ed with the one that contains the caret.
// Everything that is OR'ed with the 'caret'-expression cannot change the
// possible values.
normalizingExpression = visitRepeatedly(normalizingExpression, new RemoveOrEdExpressions());
// In the end we want to have expressions like "firstname=Jane and lastname=|".
// To reach that goal we use the distributive law to modify expressions like
// "(firstname=Jane or firstname=John) and lastname=|" to "(firstname=Jane and
// lastname=|) or (firstname=John and lastname=|)"
normalizingExpression = visitRepeatedly(normalizingExpression, new DistributiveNormalization());
// (fn=John and (fn=John and ln=|)
// normalized to
// (fn=John and ln=|) and (fn=Jane and ln=|)
// or normalized to
// (fn=John and fn=Jane) and ln=|
normalizingExpression = visitRepeatedly(normalizingExpression, new RotateAndExpressions());
// normalize a NAND-expression into an OR with DeMorgan, the OR-Expression might
// later be removed
// not ( a and b) => (not a) or (not b)
normalizingExpression = visitRepeatedly(normalizingExpression, new DeMorgan());
// remove double negation
// not not a => a
normalizingExpression = visitRepeatedly(normalizingExpression, new DoubleNegationExpressions());
} while (!normalizingExpression.equals(previousExpression));
// Replaces all (a and |) expressions with a special expression that represents
// it.
// This special expression will then be used during evaluation.
return visitRepeatedly(normalizingExpression, new ToAndCaretExpressions());
}
private static Expression visitRepeatedly(final Expression expression,
final ExpressionVisitor<Expression> visitor) {
Expression previousExpression;
Expression result = expression;
do {
previousExpression = result;
result = previousExpression.visit(visitor);
if (!previousExpression.equals(result)) {
System.out.println(" translate: " + visitor.getClass().getSimpleName());
System.out.println(" in: " + previousExpression);
System.out.println(" out: " + result);
}
} while (!previousExpression.equals(result));
return result;
}
}

View File

@@ -1,10 +1,12 @@
package org.lucares.pdb.datastore.lang;
import java.util.List;
import java.util.Stack;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.ParseTreeListener;
import org.antlr.v4.runtime.tree.ParseTreeWalker;
@@ -22,6 +24,7 @@ import org.lucares.pdb.datastore.lang.PdbLangParser.IdentifierExpressionContext;
import org.lucares.pdb.datastore.lang.PdbLangParser.ListOfPropValuesContext;
import org.lucares.pdb.datastore.lang.PdbLangParser.NotExpressionContext;
import org.lucares.pdb.datastore.lang.PdbLangParser.PropertyTerminalExpressionContext;
import org.lucares.utils.CollectionUtils;
public class QueryLanguage {
@@ -51,11 +54,7 @@ public class QueryLanguage {
throw new SyntaxException(ctx, "token too long");
}
final int line = ctx.getStart().getLine();
final int startIndex = ctx.getStart().getStartIndex();
final int stopIndex = ctx.getStart().getStopIndex();
stack.push(new Terminal(ctx.getText(), line, startIndex, stopIndex));
stack.push(new Terminal(ctx.getText()));
}
@Override
@@ -64,11 +63,7 @@ public class QueryLanguage {
throw new SyntaxException(ctx, "token too long");
}
final int line = ctx.getStart().getLine();
final int startIndex = ctx.getStart().getStartIndex();
final int stopIndex = ctx.getStart().getStopIndex();
stack.push(new Terminal(ctx.getText(), line, startIndex, stopIndex));
stack.push(new Terminal(ctx.getText()));
}
@Override
@@ -145,4 +140,15 @@ public class QueryLanguage {
return stack.pop();
}
public static List<String> getTokens(final String input) {
final CharStream in = CharStreams.fromString(input);
final PdbLangLexer lexer = new PdbLangLexer(in);
final CommonTokenStream tokens = new CommonTokenStream(lexer);
tokens.fill();
final List<Token> tokenList = tokens.getTokens();
return CollectionUtils.map(tokenList, Token::getText);
}
}

View File

@@ -1,21 +1,36 @@
package org.lucares.pdb.datastore.internal;
import java.awt.BorderLayout;
import java.awt.event.KeyAdapter;
import java.awt.event.KeyEvent;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import javax.swing.JFrame;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.blockstorage.BSFile;
import org.lucares.pdb.datastore.Doc;
import org.lucares.pdb.datastore.Proposal;
import org.lucares.utils.CollectionUtils;
import org.lucares.utils.file.FileUtils;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@Test
@@ -107,6 +122,162 @@ public class DataStoreTest {
Assert.assertEquals(eagleTimBlockOffset % BSFile.BLOCK_SIZE, 0);
}
@DataProvider(name = "providerProposals")
public Iterator<Object[]> providerProposals() {
final List<Object[]> result = new ArrayList<>();
result.add(new Object[] { "type=bird and subtype=eagle and name=|", "name", Arrays.asList("Tim") });
// returns Tim, because it is the only dog's name starting with 'Ti'
result.add(new Object[] { "!name=Ti| and type=dog", "name", Arrays.asList("Tim") });
// all cats
result.add(new Object[] { "type=cat and !name=|", "name",
Arrays.asList("Jane", "John", "Paul", "Sam", "Timothy") });
// finds nothing, because there are not dogs names neither Jenny, nor Ti*
result.add(new Object[] { "!name=Ti| and type=dog and !name=Jenny", "name", Arrays.asList() });
result.add(new Object[] { "(type=bird and age=three or type=dog and age=three) and name=|", "name",
Arrays.asList("Jenny", "Tim") });
// all but Jennifer
result.add(new Object[] { "!(type=bird) and name=|", "name",
Arrays.asList("Jane", "Jenny", "John", "Paul", "Sam", "Tim", "Timothy") });
result.add(new Object[] { "type=bird and !subtype=eagle and name=|", "name", Arrays.asList("Jennifer") });
// DeMorgan
// TODO should only match "Jenny", because Jenny is the only non-bird name
// starting with 'Jen'
result.add(new Object[] { "!(type=bird and name=Jen|)", "name", Arrays.asList("Jennifer", "Jenny") });
result.add(new Object[] { "!(type=dog and name=|) and !type=cat", "name",
Arrays.asList("Jennifer", "Jenny", "Tim") });
return result.iterator();
}
@Test(dataProvider = "providerProposals")
public void testProposals(final String queryWithCaret, final String field,
final List<String> expectedProposedValues) throws Exception {
dataStore = new DataStore(dataDirectory);
final List<Tags> tags = Arrays.asList(
Tags.create("type", "bird", "subtype", "eagle", "age", "three", "name", "Tim"),
Tags.create("type", "bird", "subtype", "pigeon", "age", "two", "name", "Jennifer"),
Tags.create("type", "bird", "subtype", "flamingo", "age", "one", "name", "Jennifer"),
Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Jenny"),
Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Tim"),
Tags.create("type", "cat", "subtype", "tiger", "age", "one", "name", "Timothy"),
Tags.create("type", "cat", "subtype", "tiger", "age", "two", "name", "Paul"),
Tags.create("type", "cat", "subtype", "lion", "age", "three", "name", "Jane"),
Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"),
Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "John"));
tags.forEach(dataStore::createNewFile);
assertProposals(queryWithCaret, field, expectedProposedValues);
}
public static void main(final String[] args) throws IOException, InterruptedException {
final Path dir = Files.createTempDirectory("pdb");
try (DataStore dataStore = new DataStore(dir)) {
final List<Tags> tags = Arrays.asList(
Tags.create("type", "bird", "subtype", "eagle", "age", "three", "name", "Tim"),
Tags.create("type", "bird", "subtype", "pigeon", "age", "two", "name", "Jennifer"),
Tags.create("type", "bird", "subtype", "flamingo", "age", "one", "name", "Jennifer"),
Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Jenny"),
Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Tim"),
Tags.create("type", "cat", "subtype", "tiger", "age", "one", "name", "Timothy"),
Tags.create("type", "cat", "subtype", "tiger", "age", "two", "name", "Paul"),
Tags.create("type", "cat", "subtype", "lion", "age", "three", "name", "Jane"),
Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"),
Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "John"));
tags.forEach(dataStore::createNewFile);
final JFrame frame = new JFrame();
final JTextField input = new JTextField();
final JTextArea output = new JTextArea();
final JTextArea info = new JTextArea();
frame.add(input, BorderLayout.NORTH);
frame.add(output, BorderLayout.CENTER);
frame.add(info, BorderLayout.SOUTH);
input.setText("type=bird and !subtype=eagle and name=");
input.addKeyListener(new KeyAdapter() {
@Override
public void keyReleased(final KeyEvent e) {
final String query = input.getText();
final int caretIndex = input.getCaretPosition();
final List<Proposal> proposals = dataStore.propose(query, caretIndex);
final StringBuilder out = new StringBuilder();
for (final Proposal proposal : proposals) {
out.append(proposal.getProposedTag());
out.append(" ");
out.append(proposal.getProposedQuery());
out.append("\n");
}
final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, "|").toString();
out.append("\n");
out.append("\n");
out.append("input: " + queryWithCaretMarker);
output.setText(out.toString());
}
});
final List<Doc> docs = dataStore.search("");
final StringBuilder out = new StringBuilder();
out.append("info\n");
for (final Doc doc : docs) {
out.append(doc.getTags());
out.append("\n");
}
info.setText(out.toString());
frame.setSize(800, 600);
frame.setVisible(true);
TimeUnit.HOURS.sleep(1000);
}
}
private void assertProposals(final String queryWithCaret, final String field,
final List<String> expectedProposedValues) {
final String query = queryWithCaret.replace("|", "");
final int caretIndex = queryWithCaret.indexOf("|");
final List<Proposal> proposals = dataStore.propose(query, caretIndex);
System.out.println(
"proposed values: " + proposals.stream().map(Proposal::getProposedTag).collect(Collectors.toList()));
proposals.forEach(p -> assertQueryFindsResults(p.getNewQuery()));
final List<String> proposedValues = CollectionUtils.map(proposals, Proposal::getProposedTag);
Collections.sort(proposedValues);
Collections.sort(expectedProposedValues);
Assert.assertEquals(proposedValues.toString(), expectedProposedValues.toString(), "proposed values:");
}
private void assertQueryFindsResults(final String query) {
final List<Doc> result = dataStore.search(query);
Assert.assertFalse(result.isEmpty(), "The query '" + query + "' must return a result, but didn't.");
}
private void assertSearch(final String query, final Tags... tags) {
final List<Doc> actualDocs = dataStore.search(query);
final List<Long> actual = CollectionUtils.map(actualDocs, Doc::getRootBlockNumber);

View File

@@ -141,6 +141,19 @@ public class ProposerTest {
);
}
public void testProposalWithAndExpression() throws Exception {
assertProposals("name=*im and bird=eagle", 8, //
new Proposal("Tim", "name=Tim and bird=eagle", true, "name=Tim and bird=eagle", 8), //
new Proposal("Timothy", "name=Timothy and bird=eagle", true, "name=Timothy and bird=eagle", 12)//
);
assertProposals("name=*im and bird=eagle,pigeon", 8, //
new Proposal("Tim", "name=Tim and bird=eagle,pigeon", true, "name=Tim and bird=eagle,pigeon", 8), //
new Proposal("Timothy", "name=Timothy and bird=eagle,pigeon", true,
"name=Timothy and bird=eagle,pigeon", 12)//
);
}
private void assertProposals(final String query, final int caretIndex, final Proposal... expected)
throws InterruptedException {

View File

@@ -41,7 +41,7 @@ public class QueryCompletionIndexTest {
Tags.create("firstname", "John", "lastname", "Miller", "country", "Atlantis")// C
);
try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory.resolve("qci.bs"))) {
try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory)) {
for (final Tags t : tags) {
index.addTags(t);
}
@@ -55,6 +55,13 @@ public class QueryCompletionIndexTest {
// tags A and C match firstname=John, but both have country=Atlantis
final SortedSet<String> countryWithFirstnameJohn = index.find(new Tag("firstname", "John"), "country");
Assert.assertEquals(countryWithFirstnameJohn, Arrays.asList("Atlantis"));
// findAllValuesForField sorts alphabetically
final SortedSet<String> firstnames = index.findAllValuesForField("firstname");
Assert.assertEquals(firstnames, Arrays.asList("Jane", "John"), "found: " + firstnames);
final SortedSet<String> countries = index.findAllValuesForField("country");
Assert.assertEquals(countries, Arrays.asList("Atlantis", "ElDorado"));
}
}
}

View File

@@ -11,8 +11,8 @@ public class Tag implements Comparable<Tag> {
}
public Tag(final String key, final String value) {
this.key = Tags.STRING_COMPRESSOR.put(key);
this.value = Tags.STRING_COMPRESSOR.put(value);
this.key = key != null ? Tags.STRING_COMPRESSOR.put(key) : -1;
this.value = value != null ? Tags.STRING_COMPRESSOR.put(value) : -1;
}
@Override

View File

@@ -72,6 +72,13 @@ public class Tags implements Comparable<Tags> {
return result;
}
public static Tags create(final String key1, final String value1, final String key2, final String value2,
final String key3, final String value3, final String key4, final String value4) {
final Tags result = TagsBuilder.create().add(key1, value1).add(key2, value2).add(key3, value3).add(key4, value4)
.build();
return result;
}
public static Tags fromBytes(final byte[] bytes) {
final List<Tag> result = new ArrayList<>();
@@ -188,7 +195,7 @@ public class Tags implements Comparable<Tags> {
@Override
public String toString() {
return "Tags [tags=" + tags + "]";
return String.valueOf(tags);
}
@Override

View File

@@ -7,6 +7,7 @@ import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -62,4 +63,31 @@ public class CollectionUtils {
return collection.stream().filter(predicate).collect(Collectors.toList());
}
public static <T> int indexOf(final List<T> list, final Predicate<T> predicate) {
for (int i = 0; i < list.size(); i++) {
if (predicate.test(list.get(i))) {
return i;
}
}
return -1;
}
public static <V, T extends Collection<V>> T removeAll(final T collection, final T remove,
final Supplier<T> generator) {
final T result = generator.get();
result.addAll(collection);
result.removeAll(remove);
return result;
}
public static <V, T extends Collection<V>> T retainAll(final T collection, final T retain,
final Supplier<T> generator) {
final T result = generator.get();
result.addAll(collection);
result.retainAll(retain);
return result;
}
}

View File

@@ -39,6 +39,12 @@ public class Preconditions {
}
}
public static void checkSmaller(final long a, final long b, final String message, final Object... args) {
if (a >= b) {
throw new IllegalStateException(MessageFormat.format(message, args) + " Expected: " + a + " < " + b);
}
}
public static void checkEqual(final Object actual, final Object expected) {
checkEqual(actual, expected, "expected {0} is equal to {1}", actual, expected);
}
@@ -74,6 +80,18 @@ public class Preconditions {
checkEqual(actual, true, message, args);
}
/**
* Check that the given value is false.
*
* @param actual must be false
* @param message formatted with {@link MessageFormat}
* @param args arguments for the message
* @throws IllegalStateException if {@code actual} is not false
*/
public static void checkFalse(final boolean actual, final String message, final Object... args) {
checkEqual(actual, false, message, args);
}
public static void checkNull(final Object actual, final String message, final Object... args) {
if (actual != null) {
throw new IllegalStateException(MessageFormat.format(message, args));