From 76e5d441de2f19731a3d63a03be2587d3b79e718 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sat, 2 Feb 2019 15:35:56 +0100 Subject: [PATCH] rewrite query completion The old implementation searched for all possible values and then executed each query to see what matches. The new implementation uses several indices to find only the matching values. --- .../pdb/map/PersistentMapOfListsOfLongs.java | 107 +++++++ .../map/PersistentMapOfListsOfLongsTest.java | 62 ++++ .../org/lucares/pdb/datastore/lang/PdbLang.g4 | 4 +- .../pdb/datastore/internal/DataStore.java | 67 +++-- .../internal/QueryCompletionIndex.java | 242 +++++++++++++--- .../pdb/datastore/lang/Expression.java | 249 +++++++++++++--- .../pdb/datastore/lang/ExpressionVisitor.java | 12 + .../lang/FindValuesForQueryCompletion.java | 225 +++++++++++++++ .../pdb/datastore/lang/GloblikePattern.java | 27 +- .../lang/IdentityExpressionVisitor.java | 79 ++++++ .../pdb/datastore/lang/NewProposerParser.java | 182 +++++++++++- .../QueryCompletionExpressionOptimizer.java | 268 ++++++++++++++++++ .../pdb/datastore/lang/QueryLanguage.java | 26 +- .../pdb/datastore/internal/DataStoreTest.java | 171 +++++++++++ .../pdb/datastore/internal/ProposerTest.java | 13 + .../internal/QueryCompletionIndexTest.java | 9 +- .../main/java/org/lucares/pdb/api/Tag.java | 4 +- .../main/java/org/lucares/pdb/api/Tags.java | 9 +- .../org/lucares/utils/CollectionUtils.java | 28 ++ .../java/org/lucares/utils/Preconditions.java | 18 ++ 20 files changed, 1676 insertions(+), 126 deletions(-) create mode 100644 block-storage/src/main/java/org/lucares/pdb/map/PersistentMapOfListsOfLongs.java create mode 100644 block-storage/src/test/java/org/lucares/pdb/map/PersistentMapOfListsOfLongsTest.java create mode 100644 data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java create mode 100644 data-store/src/main/java/org/lucares/pdb/datastore/lang/IdentityExpressionVisitor.java create mode 100644 data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionExpressionOptimizer.java diff --git a/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapOfListsOfLongs.java b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapOfListsOfLongs.java new file mode 100644 index 0000000..03f0ba6 --- /dev/null +++ b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapOfListsOfLongs.java @@ -0,0 +1,107 @@ +package org.lucares.pdb.map; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.NoSuchElementException; +import java.util.stream.Stream; + +import org.lucares.collections.LongList; +import org.lucares.pdb.blockstorage.BSFile; +import org.lucares.pdb.diskstorage.DiskStorage; +import org.lucares.pdb.map.PersistentMap.EncoderDecoder; +import org.lucares.utils.Preconditions; +import org.lucares.utils.cache.HotEntryCache; +import org.lucares.utils.cache.HotEntryCache.Event; +import org.lucares.utils.cache.HotEntryCache.EventListener; +import org.lucares.utils.cache.HotEntryCache.EventType; + +/** + * Combines {@link PersistentMap} and {@link BSFile} to represent a map where + * the values are lists of longs. + */ +public class PersistentMapOfListsOfLongs implements AutoCloseable { + + private static final class RemovalListener implements EventListener { + @Override + public void onEvent(final Event event) { + event.getValue().close(); + } + } + + private final PersistentMap map; + private final Path mapPath; + private final DiskStorage diskStore; + private final Path diskStorePath; + + private final HotEntryCache writerCache; + + /** + * Creates a new map that stores indexed streams/lists of longs. + *

+ * This class creates two files on disk. One for the index and one for the lists + * of longs. + * + * @param path the folder where to store the map + * @param filePrefix prefix of the files + * @param keyEncoder {@link EncoderDecoder} for the key + * @throws IOException + */ + public PersistentMapOfListsOfLongs(final Path path, final String filePrefix, final EncoderDecoder keyEncoder) + throws IOException { + Preconditions.checkTrue(Files.isDirectory(path), "must be a directory {0}", path); + mapPath = path.resolve(filePrefix + "_index.bs"); + diskStorePath = path.resolve(filePrefix + "_data.bs"); + map = new PersistentMap<>(mapPath, keyEncoder, PersistentMap.LONG_CODER); + diskStore = new DiskStorage(diskStorePath); + + writerCache = new HotEntryCache<>(Duration.ofMinutes(10), filePrefix + "Cache"); + writerCache.addListener(new RemovalListener(), EventType.EVICTED, EventType.REMOVED); + } + + public synchronized void appendLong(final K key, final long value) throws IOException { + + BSFile cachedWriter = writerCache.get(key); + if (cachedWriter == null) { + final Long bsFileBlockNumber = map.getValue(key); + + if (bsFileBlockNumber == null) { + cachedWriter = BSFile.newFile(diskStore); + map.putValue(key, cachedWriter.getRootBlockOffset()); + } else { + cachedWriter = BSFile.existingFile(bsFileBlockNumber, diskStore); + } + writerCache.put(key, cachedWriter); + } + cachedWriter.append(value); + } + + public synchronized boolean hasKey(final K key) throws IOException { + return map.getValue(key) != null; + } + + public synchronized Stream getLongs(final K key) throws IOException { + final Long bsFileBlockNumber = map.getValue(key); + if (bsFileBlockNumber == null) { + throw new NoSuchElementException("the map at '" + mapPath + "' does not contain the key '" + key + "'"); + } + + final BSFile bsFile = BSFile.existingFile(bsFileBlockNumber, diskStore); + + return bsFile.streamOfLongLists(); + } + + @Override + public void close() throws IOException { + try { + try { + writerCache.forEach(bsFile -> bsFile.close()); + } finally { + map.close(); + } + } finally { + diskStore.close(); + } + } +} diff --git a/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapOfListsOfLongsTest.java b/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapOfListsOfLongsTest.java new file mode 100644 index 0000000..f0a7876 --- /dev/null +++ b/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapOfListsOfLongsTest.java @@ -0,0 +1,62 @@ +package org.lucares.pdb.map; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.lucares.collections.LongList; +import org.lucares.utils.file.FileUtils; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +@Test +public class PersistentMapOfListsOfLongsTest { + + private Path dataDirectory; + + @BeforeMethod + public void beforeMethod() throws IOException { + dataDirectory = Files.createTempDirectory("pdb"); + } + + @AfterMethod + public void afterMethod() throws IOException { + FileUtils.delete(dataDirectory); + } + + public void test() throws IOException { + + final String mapFilePrefix = "test"; + final String keyA = "a"; + final String keyB = "b"; + + final int size = 10; + final LongList a = LongList.range(0, size); + a.shuffle(); + final LongList b = LongList.range(0, size); + b.shuffle(); + + try (PersistentMapOfListsOfLongs map = new PersistentMapOfListsOfLongs<>(dataDirectory, mapFilePrefix, + PersistentMap.STRING_CODER)) { + + for (int i = 0; i < size; i++) { + map.appendLong(keyA, a.get(i)); + map.appendLong(keyB, b.get(i)); + } + } + + try (PersistentMapOfListsOfLongs map = new PersistentMapOfListsOfLongs<>(dataDirectory, mapFilePrefix, + PersistentMap.STRING_CODER)) { + + final LongList actualA = new LongList(); + map.getLongs(keyA).forEachOrdered(actualA::addAll); + Assert.assertEquals(actualA, a); + + final LongList actualB = new LongList(); + map.getLongs(keyB).forEachOrdered(actualB::addAll); + Assert.assertEquals(actualB, b); + } + } +} diff --git a/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 b/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 index 70ee164..d00d796 100644 --- a/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 +++ b/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 @@ -51,7 +51,7 @@ fragment JavaLetter : [a-zA-Z0-9$_] // these are the "java letters" below 0x7F | [\u002a] // asterisk, used for wildcards - | [\ue001] // used to help parser identify empty identifiers (character is the second in the private use area) + | [\ue001] // used to help parser identify empty identifiers and to find the caret position when searching for proposals (character is the second in the private use area) | // covers all characters above 0x7F which are not a surrogate ~[\u0000-\u007F\uD800-\uDBFF] {Character.isJavaIdentifierStart(_input.LA(-1))}? @@ -64,7 +64,7 @@ fragment JavaLetterOrDigit : [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F | [\u002a] // asterisk, used for wildcards - | [\ue001] // used to help parser identify empty identifiers (character is the second in the private use area) + | [\ue001] // used to help parser identify empty identifiers and to find the caret position when searching for proposals (character is the second in the private use area) | '.' | '/' | '-' diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index a3077b9..6e257af 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -161,7 +161,6 @@ public class DataStore implements AutoCloseable { private final DiskStorage diskStorage; private final Path diskStorageFilePath; private final Path storageBasePath; - private final Path queryCompletionIndexFile; public DataStore(final Path dataDirectory) throws IOException { storageBasePath = storageDirectory(dataDirectory); @@ -183,8 +182,7 @@ public class DataStore implements AutoCloseable { final Path docIdToDocIndexPath = storageBasePath.resolve("docIdToDocIndex.bs"); docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, ENCODER_DOC); - queryCompletionIndexFile = storageBasePath.resolve("queryCompletionIndex.bs"); - queryCompletionIndex = new QueryCompletionIndex(queryCompletionIndexFile); + queryCompletionIndex = new QueryCompletionIndex(storageBasePath); } private Path keyCompressionFile(final Path dataDirectory) throws IOException { @@ -195,39 +193,47 @@ public class DataStore implements AutoCloseable { return dataDirectory.resolve(SUBDIR_STORAGE); } - public long createNewFile(final Tags tags) throws IOException { + // visible for test + QueryCompletionIndex getQueryCompletionIndex() { + return queryCompletionIndex; + } - final long newFilesRootBlockOffset = diskStorage.allocateBlock(BSFile.BLOCK_SIZE); + public long createNewFile(final Tags tags) { + try { + final long newFilesRootBlockOffset = diskStorage.allocateBlock(BSFile.BLOCK_SIZE); - final long docId = createUniqueDocId(); - final Doc doc = new Doc(tags, newFilesRootBlockOffset); - docIdToDoc.putValue(docId, doc); + final long docId = createUniqueDocId(); + final Doc doc = new Doc(tags, newFilesRootBlockOffset); + docIdToDoc.putValue(docId, doc); - final Long oldDocId = tagsToDocId.putValue(tags, docId); - Preconditions.checkNull(oldDocId, "There must be at most one document for tags: {0}", tags); + final Long oldDocId = tagsToDocId.putValue(tags, docId); + Preconditions.checkNull(oldDocId, "There must be at most one document for tags: {0}", tags); - // store mapping from tag to docId, so that we can find all docs for a given tag - final List ts = new ArrayList<>(tags.toTags()); - ts.add(TAG_ALL_DOCS); - for (final Tag tag : ts) { + // store mapping from tag to docId, so that we can find all docs for a given tag + final List ts = new ArrayList<>(tags.toTags()); + ts.add(TAG_ALL_DOCS); + for (final Tag tag : ts) { - Long diskStoreOffsetForDocIdsOfTag = tagToDocsId.getValue(tag); + Long diskStoreOffsetForDocIdsOfTag = tagToDocsId.getValue(tag); - if (diskStoreOffsetForDocIdsOfTag == null) { - diskStoreOffsetForDocIdsOfTag = diskStorage.allocateBlock(BSFile.BLOCK_SIZE); - tagToDocsId.putValue(tag, diskStoreOffsetForDocIdsOfTag); + if (diskStoreOffsetForDocIdsOfTag == null) { + diskStoreOffsetForDocIdsOfTag = diskStorage.allocateBlock(BSFile.BLOCK_SIZE); + tagToDocsId.putValue(tag, diskStoreOffsetForDocIdsOfTag); + } + + try (final BSFile docIdsOfTag = BSFile.existingFile(diskStoreOffsetForDocIdsOfTag, diskStorage)) { + docIdsOfTag.append(docId); + } } - try (final BSFile docIdsOfTag = BSFile.existingFile(diskStoreOffsetForDocIdsOfTag, diskStorage)) { - docIdsOfTag.append(docId); - } + // index the tags, so that we can efficiently find all possible values for a + // field in a query + queryCompletionIndex.addTags(tags); + + return newFilesRootBlockOffset; + } catch (final IOException e) { + throw new RuntimeIOException(e); } - - // index the tags, so that we can efficiently find all possible values for a - // field in a query - queryCompletionIndex.addTags(tags); - - return newFilesRootBlockOffset; } private long createUniqueDocId() { @@ -357,11 +363,10 @@ public class DataStore implements AutoCloseable { public List propose(final String query, final int caretIndex) { - final NewProposerParser newProposerParser = new NewProposerParser(this); + final NewProposerParser newProposerParser = new NewProposerParser(queryCompletionIndex); final List proposals = newProposerParser.propose(query, caretIndex); - System.out.println(proposals); - - return new Proposer(this).propose(query, caretIndex); + LOGGER.debug("Proposals for query {}: {}", query, proposals); + return proposals; } public DiskStorage getDiskStorage() { diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java index dcd66ab..19d51a5 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java @@ -7,11 +7,13 @@ import java.util.SortedSet; import java.util.TreeSet; import org.lucares.collections.LongList; +import org.lucares.pdb.api.RuntimeIOException; import org.lucares.pdb.api.Tag; import org.lucares.pdb.api.Tags; import org.lucares.pdb.map.Empty; import org.lucares.pdb.map.PersistentMap; import org.lucares.pdb.map.PersistentMap.EncoderDecoder; +import org.lucares.utils.Preconditions; import org.lucares.utils.byteencoder.VariableByteEncoder; /** @@ -22,30 +24,42 @@ import org.lucares.utils.byteencoder.VariableByteEncoder; *
* The expensive way is to execute the query for all available lastnames and * keep those that return at least one result.
- * A more effiecient way uses an index that lists all lastnames that occurr with + * A more effiecient way uses an index that lists all lastnames that occur with * firstname=John. If we write this as table, then it looks like this: * *

  *┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┓
- *┃ fieldA  ┃ valueA  ┃ fieldB  ┃  valueB ┃
+ *┃ fieldB  ┃ fieldA  ┃ valueA  ┃  valueB ┃
  *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
- *┃firstname┃ John    ┃lastname ┃ Connor  ┃
+ *┃lastname ┃firstname┃ John    ┃ Connor  ┃
  *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
- *┃firstname┃ John    ┃lastname ┃Carpenter┃
+ *┃lastname ┃firstname┃ John    ┃Carpenter┃
  *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
- *┃firstname┃ John    ┃country  ┃ Germany ┃
+ *┃country  ┃firstname┃ John    ┃ Germany ┃
  *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
- *┃firstname┃ John    ┃lastname ┃ Nash    ┃
+ *┃lastname ┃firstname┃ John    ┃ Nash    ┃
  *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
- *┃firstname┃ Rick    ┃lastname ┃ Meyer   ┃
+ *┃lastname ┃firstname┃ Rick    ┃ Meyer   ┃
  *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
- *┃firstname┃ Rick    ┃lastname ┃ Castle  ┃
+ *┃lastname ┃firstname┃ Rick    ┃ Castle  ┃
  *┗━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┛
  * 
* - * The lastnames where firstname=John are Connor, Carpenter and Nash. Given such - * a table we can just for all rows with fieldA=firstname and valueA=John and - * fieldB = lastname. + * The lastnames where firstname=John are: Connor, Carpenter and Nash. Given + * such a table we can just for all rows with fieldA=firstname and valueA=John + * and fieldB = lastname. + *

+ * Please note, that the columns for fieldA and fieldB come first. This is to + * make this index more suitable for IN-expressions and wildcard expressions of + * fieldA. Because we can now find all values for lastname where firstname=J*n* + * by searching for fieldA=firstname and fieldB=lastname, then do the wildcard + * evaluation while iterating over those hits. We do not have to expand the + * wildcard and the do hundreds or thousands of queries. + *

+ * Please note, that fieldB comes before fieldA. This is, so that we can run + * inverse searches more efficiently. E.g. finding all values for + * fieldB=lastname where fieldA=firstname has a value != Connor. This is used + * for queries like 'NOT (firstname=Connor) and lastname=|' *

* The values in this index represent such a table. *

@@ -63,6 +77,12 @@ public class QueryCompletionIndex implements AutoCloseable { this.tagB = tagB; } + public TwoTags(final String fieldB, final String fieldA, final String valueA, final String valueB) { + + tagA = new Tag(fieldA, valueA); + tagB = new Tag(fieldB, valueB); + } + public Tag getTagA() { return tagA; } @@ -75,7 +95,29 @@ public class QueryCompletionIndex implements AutoCloseable { public String toString() { return tagA + "::" + tagB; } + } + public static final class FieldField { + private final int fieldA; + private final int fieldB; + + public FieldField(final int fieldA, final int fieldB) { + this.fieldA = fieldA; + this.fieldB = fieldB; + } + + public int getFieldA() { + return fieldA; + } + + public int getFieldB() { + return fieldB; + } + + @Override + public String toString() { + return fieldA + "::" + fieldB; + } } private static final class EncoderTwoTags implements EncoderDecoder { @@ -86,17 +128,22 @@ public class QueryCompletionIndex implements AutoCloseable { final Tag tagA = tagAndField.getTagA(); final Tag tagB = tagAndField.getTagB(); - tmp.add(tagA.getKey()); - tmp.add(tagA.getValue()); - tmp.add(tagB.getKey()); + tmp.add(tagA.getKey()); - // A query for tagA.key and tagA.value and tagB.key is done by setting - // tagB.value==0. - // The query is then executed as a prefix search. Thus tagB.value must not be - // part of the byte array that is returned. - if (tagB.getValue() >= 0) { - tmp.add(tagB.getValue()); + if (tagA.getValue() >= 0) { + tmp.add(tagA.getValue()); + + // A query for tagA.key and tagA.value and tagB.key is done by setting + // tagB.value==-1. + // The query is then executed as a prefix search. Thus tagB.value must not be + // part of the byte array that is returned. + if (tagB.getValue() >= 0) { + tmp.add(tagB.getValue()); + } + } else { + Preconditions.checkSmaller(tagB.getValue(), 0, + "if no value for tagA is given, then tagB must also be empty"); } return VariableByteEncoder.encode(tmp); @@ -106,9 +153,9 @@ public class QueryCompletionIndex implements AutoCloseable { public TwoTags decode(final byte[] bytes) { final LongList tmp = VariableByteEncoder.decode(bytes); - final int tagAKey = (int) tmp.get(0); - final int tagAValue = (int) tmp.get(1); - final int tagBKey = (int) tmp.get(2); + final int tagBKey = (int) tmp.get(0); + final int tagAKey = (int) tmp.get(1); + final int tagAValue = (int) tmp.get(2); final int tagBValue = (int) tmp.get(3); final Tag tagA = new Tag(tagAKey, tagAValue); @@ -118,23 +165,81 @@ public class QueryCompletionIndex implements AutoCloseable { } } - private final PersistentMap tagToTagIndex; + private static final class EncoderTag implements EncoderDecoder { - public QueryCompletionIndex(final Path indexFile) throws IOException { - tagToTagIndex = new PersistentMap<>(indexFile, new EncoderTwoTags(), PersistentMap.EMPTY_ENCODER); + @Override + public byte[] encode(final Tag tag) { + + final LongList longList = new LongList(2); + longList.add(tag.getKey()); + + if (tag.getValue() >= 0) { + longList.add(tag.getValue()); + } + return VariableByteEncoder.encode(longList); + } + + @Override + public Tag decode(final byte[] bytes) { + final LongList tmp = VariableByteEncoder.decode(bytes); + final int key = (int) tmp.get(0); + final int value = (int) tmp.get(1); + return new Tag(key, value); + } + + } + + private static final class EncoderField implements EncoderDecoder { + + @Override + public byte[] encode(final String field) { + + if (field.isEmpty()) { + return new byte[0]; + } + + return VariableByteEncoder.encode(Tags.STRING_COMPRESSOR.put(field)); + } + + @Override + public String decode(final byte[] bytes) { + final long compressedString = VariableByteEncoder.decodeFirstValue(bytes); + return Tags.STRING_COMPRESSOR.get((int) compressedString); + } + } + + private final PersistentMap tagToTagIndex; + private final PersistentMap fieldToValueIndex; + private final PersistentMap fieldIndex; + + public QueryCompletionIndex(final Path basePath) throws IOException { + final Path tagToTagIndexFile = basePath.resolve("queryCompletionTagToTagIndex.bs"); + tagToTagIndex = new PersistentMap<>(tagToTagIndexFile, new EncoderTwoTags(), PersistentMap.EMPTY_ENCODER); + + final Path fieldToValueIndexFile = basePath.resolve("queryCompletionFieldToValueIndex.bs"); + fieldToValueIndex = new PersistentMap<>(fieldToValueIndexFile, new EncoderTag(), PersistentMap.EMPTY_ENCODER); + + final Path fieldIndexFile = basePath.resolve("queryCompletionFieldIndex.bs"); + fieldIndex = new PersistentMap<>(fieldIndexFile, new EncoderField(), PersistentMap.EMPTY_ENCODER); } public void addTags(final Tags tags) throws IOException { final List listOfTagsA = tags.toTags(); final List listOfTagsB = tags.toTags(); - // index all combinations of tagA and tagB + // index all combinations of tagA and tagB and fieldA to fieldB for (final Tag tagA : listOfTagsA) { for (final Tag tagB : listOfTagsB) { final TwoTags key = new TwoTags(tagA, tagB); tagToTagIndex.putValue(key, Empty.INSTANCE); } } + + // create indices of all tags and all fields + for (final Tag tag : listOfTagsA) { + fieldToValueIndex.putValue(tag, Empty.INSTANCE); + fieldIndex.putValue(tag.getKeyAsString(), Empty.INSTANCE); + } } @Override @@ -142,17 +247,76 @@ public class QueryCompletionIndex implements AutoCloseable { tagToTagIndex.close(); } - public SortedSet find(final Tag tag, final String field) throws IOException { - final SortedSet result = new TreeSet<>(); - final int tagBKey = Tags.STRING_COMPRESSOR.put(field); - final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See - // EncoderTwoTags - final TwoTags keyPrefix = new TwoTags(tag, tagB); - tagToTagIndex.visitValues(keyPrefix, (k, v) -> { - result.add(k.getTagB().getValueAsString()); - }); - - return result; - + public SortedSet find(final String property, final String value, final String field) { + final Tag tag = new Tag(property, value); + return find(tag, field); } + + public SortedSet find(final Tag tag, final String field) { + try { + final SortedSet result = new TreeSet<>(); + final int tagBKey = Tags.STRING_COMPRESSOR.put(field); + final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See + // EncoderTwoTags + final TwoTags keyPrefix = new TwoTags(tag, tagB); + tagToTagIndex.visitValues(keyPrefix, (k, v) -> { + result.add(k.getTagB().getValueAsString()); + }); + + return result; + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + } + + public SortedSet findAllValuesForField(final String field) { + try { + final SortedSet result = new TreeSet<>(); + final int tagKey = Tags.STRING_COMPRESSOR.put(field); + final Tag keyPrefix = new Tag(tagKey, -1); // the value must be negative for the prefix search to work. See + + fieldToValueIndex.visitValues(keyPrefix, (k, v) -> { + result.add(k.getValueAsString()); + }); + + return result; + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + } + + public SortedSet findAllValuesNotForField(final Tag tag, final String field) { + try { + final SortedSet result = new TreeSet<>(); + + final TwoTags keyPrefix = new TwoTags(field, tag.getKeyAsString(), null, null); + + final int negatedValueA = tag.getValue(); + + tagToTagIndex.visitValues(keyPrefix, (k, v) -> { + + final int valueA = k.getTagA().getValue(); + if (valueA != negatedValueA) { + result.add(k.getTagB().getValueAsString()); + } + }); + + return result; + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + } + + public SortedSet findAllFields() { + try { + final SortedSet result = new TreeSet<>(); + fieldIndex.visitValues("", (k, v) -> { + result.add(k); + }); + return result; + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + } + } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java index 9fdb0c1..433024a 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java @@ -5,6 +5,7 @@ import java.util.Arrays; import java.util.List; import org.lucares.utils.CollectionUtils; +import org.lucares.utils.Preconditions; abstract public class Expression { @@ -12,30 +13,8 @@ abstract public class Expression { throw new UnsupportedOperationException(); } - abstract static class UnaryExpression extends Expression { - - private final int line; - private final int startIndex; - private final int stopIndex; - - public UnaryExpression(final int line, final int startIndex, final int stopIndex) { - super(); - this.line = line; - this.startIndex = startIndex; - this.stopIndex = stopIndex; - } - - int getLine() { - return line; - } - - int getStartIndex() { - return startIndex; - } - - int getStopIndex() { - return stopIndex; - } + boolean containsCaret() { + throw new UnsupportedOperationException(); } abstract static class TemporaryExpression extends Expression { @@ -93,6 +72,11 @@ abstract public class Expression { return "!" + expression; } + @Override + boolean containsCaret() { + return expression.containsCaret(); + } + @Override public int hashCode() { final int prime = 31; @@ -150,7 +134,12 @@ abstract public class Expression { @Override public String toString() { - return " (" + left + " or " + right + ") "; + return "(" + left + " or " + right + ")"; + } + + @Override + boolean containsCaret() { + return left.containsCaret() || right.containsCaret(); } @Override @@ -191,7 +180,7 @@ abstract public class Expression { return true; } - public static Expression create(final List or) { + public static Expression create(final List or) { if (or.size() == 1) { return or.get(0); @@ -231,7 +220,12 @@ abstract public class Expression { @Override public String toString() { - return " (" + left + " and " + right + ") "; + return "(" + left + " and " + right + ")"; + } + + @Override + boolean containsCaret() { + return left.containsCaret() || right.containsCaret(); } @Override @@ -294,11 +288,11 @@ abstract public class Expression { } } - static class Terminal extends UnaryExpression { + static class Terminal extends Expression { private final String value; - Terminal(final String value, final int line, final int startIndex, final int stopIndex) { - super(line, startIndex, stopIndex); + Terminal(final String value) { + this.value = value; } @@ -313,6 +307,11 @@ abstract public class Expression { return value; } + @Override + boolean containsCaret() { + return value.contains(NewProposerParser.CARET_MARKER); + } + @Override public int hashCode() { final int prime = 31; @@ -365,7 +364,24 @@ abstract public class Expression { @Override public String toString() { - return " " + property + " = " + value.getValue() + " "; + return property + " = " + value.getValue(); + } + + @Override + boolean containsCaret() { + return value.containsCaret(); + } + + public String getProperty() { + return property; + } + + public Terminal getValue() { + return value; + } + + public String getValueAsString() { + return value.getValue(); } @Override @@ -398,6 +414,7 @@ abstract public class Expression { return false; return true; } + } static class Parentheses extends Expression { @@ -419,7 +436,12 @@ abstract public class Expression { @Override public String toString() { - return " [ " + expression + " ] "; + return "[ " + expression + " ]"; + } + + @Override + boolean containsCaret() { + return expression.containsCaret(); } @Override @@ -473,6 +495,16 @@ abstract public class Expression { public String toString() { return "(" + String.join(", ", getValues()) + ")"; } + + @Override + boolean containsCaret() { + for (final Terminal terminal : propertyValues) { + if (terminal.containsCaret()) { + return true; + } + } + return false; + } } static class InExpression extends Expression { @@ -506,6 +538,16 @@ abstract public class Expression { return values; } + @Override + boolean containsCaret() { + for (final String value : values) { + if (value.contains(NewProposerParser.CARET_MARKER)) { + return true; + } + } + return false; + } + @Override public int hashCode() { final int prime = 31; @@ -537,4 +579,147 @@ abstract public class Expression { return true; } } + + public static final class AndCaretExpression extends Expression { + Property caretExpression; + Expression expression; + + public AndCaretExpression(final Property caretExpression, final Expression expression) { + Preconditions.checkTrue(caretExpression.containsCaret(), "the expression '{0}' must contain the caret", + caretExpression); + Preconditions.checkFalse(expression.containsCaret(), "the expression '{0}' must not contain the caret", + caretExpression); + this.caretExpression = caretExpression; + this.expression = expression; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return visitor.visit(this); + } + + @Override + boolean containsCaret() { + + return caretExpression.containsCaret(); + } + + public Property getCaretExpression() { + return caretExpression; + } + + public Expression getExpression() { + return expression; + } + + @Override + public String toString() { + return "(" + caretExpression + " and " + expression + ")"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((caretExpression == null) ? 0 : caretExpression.hashCode()); + result = prime * result + ((expression == null) ? 0 : expression.hashCode()); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + final AndCaretExpression other = (AndCaretExpression) obj; + if (caretExpression == null) { + if (other.caretExpression != null) + return false; + } else if (!caretExpression.equals(other.caretExpression)) + return false; + if (expression == null) { + if (other.expression != null) + return false; + } else if (!expression.equals(other.expression)) + return false; + return true; + } + } + + public static final class AndNotCaretExpression extends Expression { + Property negatedCaretExpression; + Expression expression; + + public AndNotCaretExpression(final Property negatedCaretExpression, final Expression expression) { + Preconditions.checkTrue(negatedCaretExpression.containsCaret(), + "the expression '{0}' must contain the caret", negatedCaretExpression); + Preconditions.checkFalse(expression.containsCaret(), "the expression '{0}' must not contain the caret", + expression); + this.negatedCaretExpression = negatedCaretExpression; + this.expression = expression; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return visitor.visit(this); + } + + @Override + boolean containsCaret() { + + return negatedCaretExpression.containsCaret(); + } + + public Property getCaretExpression() { + return negatedCaretExpression; + } + + public Expression getExpression() { + return expression; + } + + @Override + public String toString() { + return "(!" + negatedCaretExpression + " and " + expression + ")"; + } + } + + public static final class CaretAndExpression extends Expression { + + private final Property caretExpression; + private final Property otherExpression; + + public CaretAndExpression(final Property caretExpression, final Property otherExpression) { + this.caretExpression = caretExpression; + this.otherExpression = otherExpression; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return super.visit(visitor); + } + + @Override + boolean containsCaret() { + Preconditions.checkTrue(caretExpression.containsCaret(), + "CaretAndExpression must contain the caret, but was: {0}", this); + return caretExpression.containsCaret(); + } + + @Override + public String toString() { + return "(caretAnd: " + caretExpression + " and " + otherExpression + ")"; + } + + public Property getCaretExpression() { + return caretExpression; + } + + public Property getOtherExpression() { + return otherExpression; + } + } } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java index b4ff5d1..7447609 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java @@ -32,4 +32,16 @@ public abstract class ExpressionVisitor { public T visit(final Expression.Parentheses parentheses) { throw new UnsupportedOperationException(); } + + public T visit(final Expression.AndCaretExpression expression) { + throw new UnsupportedOperationException(); + } + + public T visit(final Expression.AndNotCaretExpression expression) { + throw new UnsupportedOperationException(); + } + + public T visit(final Expression.CaretAndExpression expression) { + throw new UnsupportedOperationException(); + } } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java new file mode 100644 index 0000000..b25a742 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java @@ -0,0 +1,225 @@ +package org.lucares.pdb.datastore.lang; + +import java.util.Collections; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.lucares.pdb.api.Tag; +import org.lucares.pdb.datastore.internal.QueryCompletionIndex; +import org.lucares.pdb.datastore.lang.Expression.And; +import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression; +import org.lucares.pdb.datastore.lang.Expression.AndNotCaretExpression; +import org.lucares.pdb.datastore.lang.Expression.InExpression; +import org.lucares.pdb.datastore.lang.Expression.Not; +import org.lucares.pdb.datastore.lang.Expression.Or; +import org.lucares.pdb.datastore.lang.Expression.Property; +import org.lucares.utils.CollectionUtils; + +public class FindValuesForQueryCompletion extends ExpressionVisitor> { + + private static final class AndCaretExpressionVisitor extends ExpressionVisitor> { + private final QueryCompletionIndex index; + private final String field; + + public AndCaretExpressionVisitor(final QueryCompletionIndex queryCompletionIndex, final String field) { + index = queryCompletionIndex; + this.field = field; + } + + @Override + public SortedSet visit(final Property property) { + + final String fieldA = property.getProperty(); + final String valueA = property.getValue().getValue(); + + return index.find(fieldA, valueA, field); + } + + @Override + public SortedSet visit(final InExpression expression) { + + final SortedSet result = new TreeSet<>(); + final String property = expression.getProperty(); + final List values = expression.getValues(); + for (final String value : values) { + final SortedSet candidates = index.find(property, value, field); + result.addAll(candidates); + } + + return result; + } + + @Override + public SortedSet visit(final And expression) { + + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + if (left instanceof Property && right instanceof Not) { + final Property leftProperty = (Property) left; + + final SortedSet allValuesForField = leftProperty.visit(this); + + final Expression rightInnerExpression = ((Not) right).getExpression(); + final SortedSet rightResult = rightInnerExpression.visit(this); + + return CollectionUtils.removeAll(allValuesForField, rightResult, TreeSet::new); + + } else { + + final SortedSet result = left.visit(this); + final SortedSet rightResult = right.visit(this); + + result.retainAll(rightResult); + + return result; + } + } + + @Override + public SortedSet visit(final Or expression) { + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + final SortedSet result = left.visit(this); + final SortedSet rightResult = right.visit(this); + + result.addAll(rightResult); + + return result; + } + + @Override + public SortedSet visit(final Not expression) { + + if (!(expression.getExpression() instanceof Property)) { + throw new UnsupportedOperationException("NOT expressions like '" + expression + + "' are not supported. Only 'NOT property=value' expressions are supported."); + } + + final Property property = (Property) expression.getExpression(); + final Tag tag = new Tag(property.getProperty(), property.getValueAsString()); + + final SortedSet valuesNotForField = index.findAllValuesNotForField(tag, field); + final SortedSet valuesForField = index.find(tag, field); + final SortedSet valuesOnlyAvailableInField = CollectionUtils.removeAll(valuesForField, + valuesNotForField, TreeSet::new); + + final SortedSet result = CollectionUtils.removeAll(valuesNotForField, valuesOnlyAvailableInField, + TreeSet::new); + + return result; + } + } + + private final QueryCompletionIndex queryCompletionIndex; + + public FindValuesForQueryCompletion(final QueryCompletionIndex queryCompletionIndex) { + this.queryCompletionIndex = queryCompletionIndex; + } + + @Override + public SortedSet visit(final Property property) { + + final String field = property.getProperty(); + final String value = property.getValue().getValue(); + + final SortedSet allValuesForField = queryCompletionIndex.findAllValuesForField(field); + + final String valuePrefix = value.substring(0, value.indexOf(NewProposerParser.CARET_MARKER)); + + return GloblikePattern.filterValues(allValuesForField, valuePrefix, TreeSet::new); + } + + @Override + public SortedSet visit(final AndCaretExpression expression) { + + final Property caretExpression = expression.getCaretExpression(); + final String field = caretExpression.getProperty(); + final String valueWithCaretMarker = caretExpression.getValue().getValue(); + final String valuePrefix = valueWithCaretMarker.substring(0, + valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER)); + + final Expression rightHandExpression = expression.getExpression(); + + final SortedSet candidateValues = rightHandExpression + .visit(new AndCaretExpressionVisitor(queryCompletionIndex, field)); + + return GloblikePattern.filterValues(candidateValues, valuePrefix, TreeSet::new); + } + + @Override + public SortedSet visit(final AndNotCaretExpression expression) { + + final Property caretExpression = expression.getCaretExpression(); + final String field = caretExpression.getProperty(); + final String valueWithCaretMarker = caretExpression.getValue().getValue(); + final String valuePattern = valueWithCaretMarker.substring(0, + valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER)); + + final SortedSet allValuesForField = queryCompletionIndex + .findAllValuesForField(caretExpression.getProperty()); + final SortedSet valuesForFieldMatchingCaretExpression = GloblikePattern.filterValues(allValuesForField, + valuePattern, TreeSet::new); + + final Expression rightHandExpression = expression.getExpression(); + + final SortedSet rightHandValues = rightHandExpression + .visit(new AndCaretExpressionVisitor(queryCompletionIndex, field)); + + if (rightHandValues.size() == 1) { + // there is only one alternative and that one must not be chosen + return Collections.emptySortedSet(); + } + final SortedSet result = CollectionUtils.retainAll(rightHandValues, + valuesForFieldMatchingCaretExpression, TreeSet::new); + return result; + } + + @Override + public SortedSet visit(final Not expression) { + + final String field; + final Expression innerExpression = expression.getExpression(); + if (innerExpression instanceof Property) { + field = ((Property) innerExpression).getProperty(); + final SortedSet allValuesForField = queryCompletionIndex.findAllValuesForField(field); + final String valueWithCaretMarker = ((Property) innerExpression).getValue().getValue(); + final String valuePrefix = valueWithCaretMarker.substring(0, + valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER)); + final TreeSet result = GloblikePattern.filterValues(allValuesForField, valuePrefix + "*", + TreeSet::new); + return result; + } else { + throw new UnsupportedOperationException(); + } + } + + @Override + public SortedSet visit(final Or expression) { + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + final SortedSet result = left.visit(this); + final SortedSet rightResult = right.visit(this); + + result.addAll(rightResult); + + return result; + } + + @Override + public SortedSet visit(final And expression) { + + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + final SortedSet result = left.visit(this); + final SortedSet rightResult = right.visit(this); + + result.retainAll(rightResult); + + return result; + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/GloblikePattern.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/GloblikePattern.java index 3a1688f..c75c377 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/GloblikePattern.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/GloblikePattern.java @@ -1,11 +1,14 @@ package org.lucares.pdb.datastore.lang; +import java.util.Collection; +import java.util.function.Supplier; +import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -class GloblikePattern { +public class GloblikePattern { private static final Logger LOGGER = LoggerFactory.getLogger(GloblikePattern.class); @@ -25,4 +28,26 @@ class GloblikePattern { return Pattern.compile(valueRegex); } + + public static > T filterValues(final Collection availableValues, + final String valuePattern, final Supplier generator) { + final T result = generator.get(); + + return filterValues(result, availableValues, valuePattern); + } + + public static > T filterValues(final T result, + final Collection availableValues, final String valuePattern) { + + final Pattern pattern = GloblikePattern.globlikeToRegex(valuePattern); + + for (final String value : availableValues) { + final Matcher matcher = pattern.matcher(value); + if (matcher.find() && !value.equals(valuePattern)) { + result.add(value); + } + } + + return result; + } } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/IdentityExpressionVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/IdentityExpressionVisitor.java new file mode 100644 index 0000000..da7a647 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/IdentityExpressionVisitor.java @@ -0,0 +1,79 @@ +package org.lucares.pdb.datastore.lang; + +import org.lucares.pdb.datastore.lang.Expression.And; +import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression; +import org.lucares.pdb.datastore.lang.Expression.AndNotCaretExpression; +import org.lucares.pdb.datastore.lang.Expression.CaretAndExpression; +import org.lucares.pdb.datastore.lang.Expression.Not; +import org.lucares.pdb.datastore.lang.Expression.Or; +import org.lucares.pdb.datastore.lang.Expression.Parentheses; +import org.lucares.pdb.datastore.lang.Expression.Property; + +/** + * Visitor that returns the expressions without any modifications. Can be used + * as base class for visitors that modify expressions. + */ +public abstract class IdentityExpressionVisitor extends ExpressionVisitor { + @Override + public Expression visit(final And expression) { + + final Expression left = expression.getLeft().visit(this); + final Expression right = expression.getRight().visit(this); + + return new And(left, right); + } + + @Override + public Expression visit(final Or expression) { + final Expression left = expression.getLeft().visit(this); + final Expression right = expression.getRight().visit(this); + + return new Or(left, right); + } + + @Override + public Expression visit(final Not expression) { + return new Not(expression.getExpression().visit(this)); + } + + @Override + public Expression visit(final Property expression) { + return expression; + } + + @Override + public Expression visit(final Expression.Terminal expression) { + return expression; + } + + @Override + public Expression visit(final Expression.MatchAll expression) { + return expression; + } + + @Override + public Expression visit(final Expression.InExpression expression) { + return expression; + } + + @Override + public Expression visit(final Parentheses parentheses) { + return new Parentheses(parentheses.getExpression().visit(this)); + } + + @Override + public Expression visit(final AndCaretExpression expression) { + return expression; + } + + @Override + public Expression visit(final AndNotCaretExpression expression) { + return expression; + } + + @Override + public Expression visit(final CaretAndExpression expression) { + return expression; + } + +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java index 4ab7624..c1b21fe 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java @@ -1,27 +1,195 @@ package org.lucares.pdb.datastore.lang; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.List; +import java.util.SortedSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.commons.lang3.StringUtils; import org.lucares.pdb.datastore.Proposal; -import org.lucares.pdb.datastore.internal.DataStore; +import org.lucares.pdb.datastore.internal.QueryCompletionIndex; +import org.lucares.utils.CollectionUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class NewProposerParser { - private final static String CARET_MARKER = "\ue001"; // third character in the private use area + private static final Logger LOGGER = LoggerFactory.getLogger(NewProposerParser.class); - private final DataStore dataStore; + public final static String CARET_MARKER = "\ue001"; // character in the private use area - public NewProposerParser(final DataStore dataStore) { - this.dataStore = dataStore; + /* + * Regex matching a java identifier without a caret marker. We define it as a + * blacklist, because this is easer. The regex is only used after the + * query has already been validated with the proper grammar. + */ + private static final String REGEX_IDENTIFIER = "[^\\s,!\\(\\)=" + CARET_MARKER + "]*"; + + private final QueryCompletionIndex queryCompletionIndex; + + public NewProposerParser(final QueryCompletionIndex queryCompletionIndex) { + this.queryCompletionIndex = queryCompletionIndex; } public List propose(final String query, final int caretIndex) { + List proposals; + if (StringUtils.isBlank(query)) { + proposals = proposeForAllKeys(); + } else { - final String queryString = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString(); + final List foundProposals = proposalsForValues(query, caretIndex); + if (foundProposals.isEmpty()) { + proposals = proposalsForNonValues(query, caretIndex); + } else { + proposals = foundProposals; + } + } - final Expression expression = QueryLanguageParser.parse(queryString); + return proposals; + } + + private List proposalsForNonValues(final String query, final int caretIndex) { + final List proposals = new ArrayList<>(); + + /* + * This method is called when the query could not be parsed. It is likely that + * the next word is either a field or an operator. But is is also possible that + * the next word is a field-value, because the syntax error might be at another + * location in the query (not at the caret position). + */ + + final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString(); + + final List tokens = QueryLanguage.getTokens(queryWithCaretMarker); + final int indexTokenWithCaret = CollectionUtils.indexOf(tokens, t -> t.contains(CARET_MARKER)); + + if (indexTokenWithCaret > 0) { + final String previousToken = tokens.get(indexTokenWithCaret - 1); + switch (previousToken) { + case "(": + case "and": + case "or": + case "!": + proposals.addAll(proposeForAllKeys(queryWithCaretMarker)); + break; + + case ")": + default: + // proposals.addAll(proposal); + break; + } + } else if (indexTokenWithCaret == 0) { + proposals.addAll(proposeForAllKeys(queryWithCaretMarker)); + } + + return proposals; + } + + private Collection proposeForAllKeys(final String queryWithCaretMarker) { + final List proposals = new ArrayList<>(); + final String wordPrefix = wordPrefix(queryWithCaretMarker); + + if (wordPrefix != null) { + final SortedSet allFields = queryCompletionIndex.findAllFields(); + for (final String field : allFields) { + + if (!field.startsWith(wordPrefix)) { + continue; + } + + final String proposedQuery = queryWithCaretMarker + .replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, field + "=* "); + final String newQueryWithCaretMarker = queryWithCaretMarker + .replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, field + "=" + CARET_MARKER); + final String newQuery = newQueryWithCaretMarker.replace(CARET_MARKER, ""); + final int newCaretPosition = newQueryWithCaretMarker.indexOf(CARET_MARKER); + final Proposal proposal = new Proposal(field, proposedQuery, true, newQuery, newCaretPosition); + proposals.add(proposal); + } + } + + return proposals; + } + + private String wordPrefix(final String queryWithCaretMarker) { + + final Pattern pattern = Pattern.compile("(" + REGEX_IDENTIFIER + CARET_MARKER + ")"); + final Matcher matcher = pattern.matcher(queryWithCaretMarker); + if (matcher.find()) { + final String group = matcher.group(); + return group.replace(CARET_MARKER, ""); + } return null; } + private List proposeForAllKeys() { + final List proposals = new ArrayList<>(); + + final SortedSet allFields = queryCompletionIndex.findAllFields(); + for (final String field : allFields) { + final String proposedQuery = field + "=*"; + final String newQuery = field + "="; + final int newCaretPosition = newQuery.length(); + final Proposal proposal = new Proposal(field, proposedQuery, true, newQuery, newCaretPosition); + proposals.add(proposal); + } + + return proposals; + } + + List proposalsForValues(final String query, final int caretIndex) { + try { + // Add caret marker, so that we know where the caret is. + // This also makes sure that a query like "name=|" ('|' is the caret) can be + // parsed. + // Without the caret marker the query would be "name=", which is not a valid + // expression. + final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString(); + + // parse the query + final Expression expression = QueryLanguageParser.parse(queryWithCaretMarker); + + // normalize it, so that we can use the queryCompletionIndex to search vor + // candidate values + final QueryCompletionExpressionOptimizer optimizer = new QueryCompletionExpressionOptimizer(); + final Expression normalizedExpression = optimizer.normalizeExpression(expression); + + // find all candidate values + final SortedSet candidateValues = normalizedExpression + .visit(new FindValuesForQueryCompletion(queryCompletionIndex)); + + // translate the candidate values to proposals + final List proposals = generateProposals(queryWithCaretMarker, expression, candidateValues); + + return proposals; + } catch (final SyntaxException e) { + LOGGER.debug("Query ({}) is not valid. This is expected to happen " + + "unless we are looking for proposals of values.", query, e); + return Collections.emptyList(); + } + } + + private List generateProposals(final String queryWithCaretMarker, final Expression expression, + final SortedSet candidateValues) { + final List proposals = new ArrayList<>(); + + for (final String proposedTag : candidateValues) { + + final String proposedQueryWithCaretMarker = queryWithCaretMarker + .replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, proposedTag + CARET_MARKER); + + final String proposedQuery = proposedQueryWithCaretMarker.replace(CARET_MARKER, ""); + final int newCaretPosition = proposedQueryWithCaretMarker.indexOf(CARET_MARKER); + + final Proposal proposal = new Proposal(proposedTag, proposedQuery, true, proposedQuery, newCaretPosition); + proposals.add(proposal); + } + + return proposals; + } + } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionExpressionOptimizer.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionExpressionOptimizer.java new file mode 100644 index 0000000..3701b74 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionExpressionOptimizer.java @@ -0,0 +1,268 @@ +package org.lucares.pdb.datastore.lang; + +import java.util.ArrayList; +import java.util.List; + +import org.lucares.pdb.datastore.internal.QueryCompletionIndex; +import org.lucares.pdb.datastore.lang.Expression.And; +import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression; +import org.lucares.pdb.datastore.lang.Expression.AndNotCaretExpression; +import org.lucares.pdb.datastore.lang.Expression.InExpression; +import org.lucares.pdb.datastore.lang.Expression.Not; +import org.lucares.pdb.datastore.lang.Expression.Or; +import org.lucares.pdb.datastore.lang.Expression.Property; +import org.lucares.pdb.datastore.lang.Expression.Terminal; + +/** + * Query completion utilizes an index that contains all mappings of + * tags+fieldname to values. This index can be used to answer the question what + * the possible values for fields in simple and queries are. + *

+ * E.g. Given the query "lastname=Doe and firstname=|" ('|' is the marker for + * the caret position). All possible values for firstname are in the index under + * "tagA.field=lastname and tagA.value=Doe and tagB.field=firstname". See also + * {@link QueryCompletionIndex}. + *

+ * We can use this index for all boolean queries. But we have to normalize the + * queries first. + *

+ * E.g. "(lastname=Doe or country=Atlantis) and firstname=|" will be normalized + * and split into two queries: + *

    + *
  1. "lastname=Doe and firstname=|" + *
  2. "country=Atlantis and firstname=|" + *
+ * Everything that is or'ed with the field for which we are doeing the + * completion can be removed. E.g. "lastname=Doe or firstname=|" will be + * normalized to "firstname=|, because the expression lastname=Doe does not + * change which values are possible for firstname. + *

+ * Consequently, IN-expressions are normalized to PROPERTY-expressions. + *

+ * E.g. "firstname=John,|,Frank" will be normalized to "firstname=|". + */ +public class QueryCompletionExpressionOptimizer { + + private static final class ReplaceINExpressionsWithPropertyExpressionsVisitor extends IdentityExpressionVisitor { + + @Override + public Expression visit(final InExpression expression) { + if (expression.containsCaret() || expression.getValues().size() == 1) { + final String property = expression.getProperty(); + final List values = expression.getValues(); + + final List propertyExpressions = new ArrayList<>(); + + for (final String value : values) { + propertyExpressions.add(new Property(property, new Terminal(value))); + } + + return Expression.Or.create(propertyExpressions); + } else { + return super.visit(expression); + } + }; + } + + private static final class RemoveOrEdExpressions extends IdentityExpressionVisitor { + @Override + public Expression visit(final Or expression) { + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + if (left.containsCaret() && !right.containsCaret()) { + return left; + } + if (!left.containsCaret() && right.containsCaret()) { + return right; + } + return super.visit(expression); + }; + } + + private static final class DistributiveNormalization extends IdentityExpressionVisitor { + + @Override + public Expression visit(final And expression) { + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + if (left instanceof Or) { + // (a or b) and c + // becomes + // a and c or b and c + final Expression ac = new And(((Or) left).getLeft(), right); + final Expression bc = new And(((Or) left).getRight(), right); + return new Or(ac, bc); + } + + if (right instanceof Or) { + // a and (b or c) + // becomes + // a and b or a and c + final Expression ab = new And(left, ((Or) right).getLeft()); + final Expression ac = new And(left, ((Or) right).getRight()); + return new Or(ab, ac); + } + return super.visit(expression); + }; + } + + private static final class RotateAndExpressions extends IdentityExpressionVisitor { + @Override + public Expression visit(final And expression) { + + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + // (| and a) and b => | and (a and b) + // + // The expression with the caret is moved up + if (left.containsCaret() && left instanceof And) { + final Expression leftLeft = ((And) left).getLeft(); + final Expression leftRight = ((And) left).getRight(); + + if (leftLeft.containsCaret()) { + return new And(leftLeft, new And(leftRight, right)); + } else { + return new And(new And(leftLeft, right), leftRight); + } + } else if (right.containsCaret() && right instanceof And) { + final Expression rightLeft = ((And) right).getLeft(); + final Expression rightRight = ((And) right).getRight(); + + if (rightLeft.containsCaret()) { + return new And(rightLeft, new And(rightRight, left)); + } else { + return new And(new And(rightLeft, left), rightRight); + } + } + + return super.visit(expression); + } + } + + private static final class DoubleNegationExpressions extends IdentityExpressionVisitor { + @Override + public Expression visit(final Not expression) { + if (expression instanceof Not) { + if (expression.getExpression() instanceof Not) { + return ((Not) expression.getExpression()).getExpression(); + } + } + return super.visit(expression); + } + } + + private static final class DeMorgan extends IdentityExpressionVisitor { + @Override + public Expression visit(final Not expression) { + + if (expression.getExpression() instanceof And) { + final And andExpression = (And) expression.getExpression(); + final Expression left = andExpression.getLeft(); + final Expression right = andExpression.getRight(); + + final Expression notLeft = new Not(left); + final Expression notRight = new Not(right); + + return new Or(notLeft, notRight); + } + + return super.visit(expression); + } + } + + private static final class ToAndCaretExpressions extends IdentityExpressionVisitor { + @Override + public Expression visit(final And expression) { + + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + if (left.containsCaret() && left instanceof Property) { + return new AndCaretExpression((Property) left, right); + } + if (right.containsCaret() && right instanceof Property) { + return new AndCaretExpression((Property) right, left); + } + + if (left.containsCaret()// + && left instanceof Not// + && ((Not) left).getExpression() instanceof Property) { + return new AndNotCaretExpression((Property) ((Not) left).getExpression(), right); + } + if (right.containsCaret()// + && right instanceof Not// + && ((Not) right).getExpression() instanceof Property) { + return new AndNotCaretExpression((Property) ((Not) right).getExpression(), left); + } + + return super.visit(expression); + } + } + + public Expression normalizeExpression(final Expression expression) { + + Expression normalizingExpression = expression; + Expression previousExpression = normalizingExpression; + do { + previousExpression = normalizingExpression; + // replace all IN-expression, because they are just syntactic sugar for + // OR-expressions, but only for those that include the caret + normalizingExpression = normalizingExpression + .visit(new ReplaceINExpressionsWithPropertyExpressionsVisitor()); + + // Remove expressions that are OR'ed with the one that contains the caret. + // Everything that is OR'ed with the 'caret'-expression cannot change the + // possible values. + normalizingExpression = visitRepeatedly(normalizingExpression, new RemoveOrEdExpressions()); + + // In the end we want to have expressions like "firstname=Jane and lastname=|". + // To reach that goal we use the distributive law to modify expressions like + // "(firstname=Jane or firstname=John) and lastname=|" to "(firstname=Jane and + // lastname=|) or (firstname=John and lastname=|)" + normalizingExpression = visitRepeatedly(normalizingExpression, new DistributiveNormalization()); + + // (fn=John and (fn=John and ln=|) + // normalized to + // (fn=John and ln=|) and (fn=Jane and ln=|) + // or normalized to + // (fn=John and fn=Jane) and ln=| + normalizingExpression = visitRepeatedly(normalizingExpression, new RotateAndExpressions()); + + // normalize a NAND-expression into an OR with DeMorgan, the OR-Expression might + // later be removed + // not ( a and b) => (not a) or (not b) + normalizingExpression = visitRepeatedly(normalizingExpression, new DeMorgan()); + + // remove double negation + // not not a => a + normalizingExpression = visitRepeatedly(normalizingExpression, new DoubleNegationExpressions()); + } while (!normalizingExpression.equals(previousExpression)); + + // Replaces all (a and |) expressions with a special expression that represents + // it. + // This special expression will then be used during evaluation. + return visitRepeatedly(normalizingExpression, new ToAndCaretExpressions()); + } + + private static Expression visitRepeatedly(final Expression expression, + final ExpressionVisitor visitor) { + Expression previousExpression; + Expression result = expression; + + do { + previousExpression = result; + result = previousExpression.visit(visitor); + if (!previousExpression.equals(result)) { + System.out.println(" translate: " + visitor.getClass().getSimpleName()); + System.out.println(" in: " + previousExpression); + System.out.println(" out: " + result); + } + } while (!previousExpression.equals(result)); + + return result; + } + +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java index e09cd34..cdf7ca5 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java @@ -1,10 +1,12 @@ package org.lucares.pdb.datastore.lang; +import java.util.List; import java.util.Stack; import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.tree.ParseTree; import org.antlr.v4.runtime.tree.ParseTreeListener; import org.antlr.v4.runtime.tree.ParseTreeWalker; @@ -22,6 +24,7 @@ import org.lucares.pdb.datastore.lang.PdbLangParser.IdentifierExpressionContext; import org.lucares.pdb.datastore.lang.PdbLangParser.ListOfPropValuesContext; import org.lucares.pdb.datastore.lang.PdbLangParser.NotExpressionContext; import org.lucares.pdb.datastore.lang.PdbLangParser.PropertyTerminalExpressionContext; +import org.lucares.utils.CollectionUtils; public class QueryLanguage { @@ -51,11 +54,7 @@ public class QueryLanguage { throw new SyntaxException(ctx, "token too long"); } - final int line = ctx.getStart().getLine(); - final int startIndex = ctx.getStart().getStartIndex(); - final int stopIndex = ctx.getStart().getStopIndex(); - - stack.push(new Terminal(ctx.getText(), line, startIndex, stopIndex)); + stack.push(new Terminal(ctx.getText())); } @Override @@ -64,11 +63,7 @@ public class QueryLanguage { throw new SyntaxException(ctx, "token too long"); } - final int line = ctx.getStart().getLine(); - final int startIndex = ctx.getStart().getStartIndex(); - final int stopIndex = ctx.getStart().getStopIndex(); - - stack.push(new Terminal(ctx.getText(), line, startIndex, stopIndex)); + stack.push(new Terminal(ctx.getText())); } @Override @@ -145,4 +140,15 @@ public class QueryLanguage { return stack.pop(); } + + public static List getTokens(final String input) { + final CharStream in = CharStreams.fromString(input); + + final PdbLangLexer lexer = new PdbLangLexer(in); + + final CommonTokenStream tokens = new CommonTokenStream(lexer); + tokens.fill(); + final List tokenList = tokens.getTokens(); + return CollectionUtils.map(tokenList, Token::getText); + } } diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java index 5a5e197..beae4b6 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java @@ -1,21 +1,36 @@ package org.lucares.pdb.datastore.internal; +import java.awt.BorderLayout; +import java.awt.event.KeyAdapter; +import java.awt.event.KeyEvent; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import javax.swing.JFrame; +import javax.swing.JTextArea; +import javax.swing.JTextField; import org.lucares.pdb.api.Tags; import org.lucares.pdb.blockstorage.BSFile; import org.lucares.pdb.datastore.Doc; +import org.lucares.pdb.datastore.Proposal; import org.lucares.utils.CollectionUtils; import org.lucares.utils.file.FileUtils; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @Test @@ -107,6 +122,162 @@ public class DataStoreTest { Assert.assertEquals(eagleTimBlockOffset % BSFile.BLOCK_SIZE, 0); } + @DataProvider(name = "providerProposals") + public Iterator providerProposals() { + + final List result = new ArrayList<>(); + + result.add(new Object[] { "type=bird and subtype=eagle and name=|", "name", Arrays.asList("Tim") }); + + // returns Tim, because it is the only dog's name starting with 'Ti' + result.add(new Object[] { "!name=Ti| and type=dog", "name", Arrays.asList("Tim") }); + + // all cats + result.add(new Object[] { "type=cat and !name=|", "name", + Arrays.asList("Jane", "John", "Paul", "Sam", "Timothy") }); + + // finds nothing, because there are not dogs names neither Jenny, nor Ti* + result.add(new Object[] { "!name=Ti| and type=dog and !name=Jenny", "name", Arrays.asList() }); + + result.add(new Object[] { "(type=bird and age=three or type=dog and age=three) and name=|", "name", + Arrays.asList("Jenny", "Tim") }); + + // all but Jennifer + result.add(new Object[] { "!(type=bird) and name=|", "name", + Arrays.asList("Jane", "Jenny", "John", "Paul", "Sam", "Tim", "Timothy") }); + + result.add(new Object[] { "type=bird and !subtype=eagle and name=|", "name", Arrays.asList("Jennifer") }); + + // DeMorgan + // TODO should only match "Jenny", because Jenny is the only non-bird name + // starting with 'Jen' + result.add(new Object[] { "!(type=bird and name=Jen|)", "name", Arrays.asList("Jennifer", "Jenny") }); + result.add(new Object[] { "!(type=dog and name=|) and !type=cat", "name", + Arrays.asList("Jennifer", "Jenny", "Tim") }); + + return result.iterator(); + } + + @Test(dataProvider = "providerProposals") + public void testProposals(final String queryWithCaret, final String field, + final List expectedProposedValues) throws Exception { + + dataStore = new DataStore(dataDirectory); + + final List tags = Arrays.asList( + Tags.create("type", "bird", "subtype", "eagle", "age", "three", "name", "Tim"), + Tags.create("type", "bird", "subtype", "pigeon", "age", "two", "name", "Jennifer"), + Tags.create("type", "bird", "subtype", "flamingo", "age", "one", "name", "Jennifer"), + + Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Jenny"), + Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Tim"), + + Tags.create("type", "cat", "subtype", "tiger", "age", "one", "name", "Timothy"), + Tags.create("type", "cat", "subtype", "tiger", "age", "two", "name", "Paul"), + Tags.create("type", "cat", "subtype", "lion", "age", "three", "name", "Jane"), + Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"), + Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "John")); + + tags.forEach(dataStore::createNewFile); + + assertProposals(queryWithCaret, field, expectedProposedValues); + } + + public static void main(final String[] args) throws IOException, InterruptedException { + final Path dir = Files.createTempDirectory("pdb"); + try (DataStore dataStore = new DataStore(dir)) { + + final List tags = Arrays.asList( + Tags.create("type", "bird", "subtype", "eagle", "age", "three", "name", "Tim"), + Tags.create("type", "bird", "subtype", "pigeon", "age", "two", "name", "Jennifer"), + Tags.create("type", "bird", "subtype", "flamingo", "age", "one", "name", "Jennifer"), + + Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Jenny"), + Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Tim"), + + Tags.create("type", "cat", "subtype", "tiger", "age", "one", "name", "Timothy"), + Tags.create("type", "cat", "subtype", "tiger", "age", "two", "name", "Paul"), + Tags.create("type", "cat", "subtype", "lion", "age", "three", "name", "Jane"), + Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"), + Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "John")); + + tags.forEach(dataStore::createNewFile); + + final JFrame frame = new JFrame(); + final JTextField input = new JTextField(); + final JTextArea output = new JTextArea(); + final JTextArea info = new JTextArea(); + + frame.add(input, BorderLayout.NORTH); + frame.add(output, BorderLayout.CENTER); + frame.add(info, BorderLayout.SOUTH); + + input.setText("type=bird and !subtype=eagle and name="); + + input.addKeyListener(new KeyAdapter() { + + @Override + public void keyReleased(final KeyEvent e) { + + final String query = input.getText(); + final int caretIndex = input.getCaretPosition(); + final List proposals = dataStore.propose(query, caretIndex); + + final StringBuilder out = new StringBuilder(); + + for (final Proposal proposal : proposals) { + out.append(proposal.getProposedTag()); + out.append(" "); + out.append(proposal.getProposedQuery()); + out.append("\n"); + } + + final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, "|").toString(); + + out.append("\n"); + out.append("\n"); + out.append("input: " + queryWithCaretMarker); + + output.setText(out.toString()); + + } + }); + final List docs = dataStore.search(""); + final StringBuilder out = new StringBuilder(); + out.append("info\n"); + for (final Doc doc : docs) { + out.append(doc.getTags()); + out.append("\n"); + } + info.setText(out.toString()); + + frame.setSize(800, 600); + frame.setVisible(true); + TimeUnit.HOURS.sleep(1000); + } + } + + private void assertProposals(final String queryWithCaret, final String field, + final List expectedProposedValues) { + final String query = queryWithCaret.replace("|", ""); + final int caretIndex = queryWithCaret.indexOf("|"); + final List proposals = dataStore.propose(query, caretIndex); + System.out.println( + "proposed values: " + proposals.stream().map(Proposal::getProposedTag).collect(Collectors.toList())); + + proposals.forEach(p -> assertQueryFindsResults(p.getNewQuery())); + + final List proposedValues = CollectionUtils.map(proposals, Proposal::getProposedTag); + Collections.sort(proposedValues); + Collections.sort(expectedProposedValues); + Assert.assertEquals(proposedValues.toString(), expectedProposedValues.toString(), "proposed values:"); + } + + private void assertQueryFindsResults(final String query) { + final List result = dataStore.search(query); + Assert.assertFalse(result.isEmpty(), "The query '" + query + "' must return a result, but didn't."); + } + private void assertSearch(final String query, final Tags... tags) { final List actualDocs = dataStore.search(query); final List actual = CollectionUtils.map(actualDocs, Doc::getRootBlockNumber); diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java index aad9399..b077d80 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java @@ -141,6 +141,19 @@ public class ProposerTest { ); } + public void testProposalWithAndExpression() throws Exception { + assertProposals("name=*im and bird=eagle", 8, // + new Proposal("Tim", "name=Tim and bird=eagle", true, "name=Tim and bird=eagle", 8), // + new Proposal("Timothy", "name=Timothy and bird=eagle", true, "name=Timothy and bird=eagle", 12)// + ); + + assertProposals("name=*im and bird=eagle,pigeon", 8, // + new Proposal("Tim", "name=Tim and bird=eagle,pigeon", true, "name=Tim and bird=eagle,pigeon", 8), // + new Proposal("Timothy", "name=Timothy and bird=eagle,pigeon", true, + "name=Timothy and bird=eagle,pigeon", 12)// + ); + } + private void assertProposals(final String query, final int caretIndex, final Proposal... expected) throws InterruptedException { diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java index 7c74fe7..9204a22 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java @@ -41,7 +41,7 @@ public class QueryCompletionIndexTest { Tags.create("firstname", "John", "lastname", "Miller", "country", "Atlantis")// C ); - try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory.resolve("qci.bs"))) { + try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory)) { for (final Tags t : tags) { index.addTags(t); } @@ -55,6 +55,13 @@ public class QueryCompletionIndexTest { // tags A and C match firstname=John, but both have country=Atlantis final SortedSet countryWithFirstnameJohn = index.find(new Tag("firstname", "John"), "country"); Assert.assertEquals(countryWithFirstnameJohn, Arrays.asList("Atlantis")); + + // findAllValuesForField sorts alphabetically + final SortedSet firstnames = index.findAllValuesForField("firstname"); + Assert.assertEquals(firstnames, Arrays.asList("Jane", "John"), "found: " + firstnames); + + final SortedSet countries = index.findAllValuesForField("country"); + Assert.assertEquals(countries, Arrays.asList("Atlantis", "ElDorado")); } } } diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java b/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java index 5a9ca11..838c614 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java @@ -11,8 +11,8 @@ public class Tag implements Comparable { } public Tag(final String key, final String value) { - this.key = Tags.STRING_COMPRESSOR.put(key); - this.value = Tags.STRING_COMPRESSOR.put(value); + this.key = key != null ? Tags.STRING_COMPRESSOR.put(key) : -1; + this.value = value != null ? Tags.STRING_COMPRESSOR.put(value) : -1; } @Override diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java b/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java index 6e5c57f..44ad3c3 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java @@ -72,6 +72,13 @@ public class Tags implements Comparable { return result; } + public static Tags create(final String key1, final String value1, final String key2, final String value2, + final String key3, final String value3, final String key4, final String value4) { + final Tags result = TagsBuilder.create().add(key1, value1).add(key2, value2).add(key3, value3).add(key4, value4) + .build(); + return result; + } + public static Tags fromBytes(final byte[] bytes) { final List result = new ArrayList<>(); @@ -188,7 +195,7 @@ public class Tags implements Comparable { @Override public String toString() { - return "Tags [tags=" + tags + "]"; + return String.valueOf(tags); } @Override diff --git a/pdb-utils/src/main/java/org/lucares/utils/CollectionUtils.java b/pdb-utils/src/main/java/org/lucares/utils/CollectionUtils.java index e54b70a..e3beff9 100644 --- a/pdb-utils/src/main/java/org/lucares/utils/CollectionUtils.java +++ b/pdb-utils/src/main/java/org/lucares/utils/CollectionUtils.java @@ -7,6 +7,7 @@ import java.util.List; import java.util.Map; import java.util.function.Function; import java.util.function.Predicate; +import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -62,4 +63,31 @@ public class CollectionUtils { return collection.stream().filter(predicate).collect(Collectors.toList()); } + public static int indexOf(final List list, final Predicate predicate) { + for (int i = 0; i < list.size(); i++) { + if (predicate.test(list.get(i))) { + return i; + } + } + return -1; + } + + public static > T removeAll(final T collection, final T remove, + final Supplier generator) { + + final T result = generator.get(); + result.addAll(collection); + result.removeAll(remove); + return result; + } + + public static > T retainAll(final T collection, final T retain, + final Supplier generator) { + + final T result = generator.get(); + result.addAll(collection); + result.retainAll(retain); + return result; + } + } diff --git a/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java b/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java index 265bf27..45b59e5 100644 --- a/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java +++ b/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java @@ -39,6 +39,12 @@ public class Preconditions { } } + public static void checkSmaller(final long a, final long b, final String message, final Object... args) { + if (a >= b) { + throw new IllegalStateException(MessageFormat.format(message, args) + " Expected: " + a + " < " + b); + } + } + public static void checkEqual(final Object actual, final Object expected) { checkEqual(actual, expected, "expected {0} is equal to {1}", actual, expected); } @@ -74,6 +80,18 @@ public class Preconditions { checkEqual(actual, true, message, args); } + /** + * Check that the given value is false. + * + * @param actual must be false + * @param message formatted with {@link MessageFormat} + * @param args arguments for the message + * @throws IllegalStateException if {@code actual} is not false + */ + public static void checkFalse(final boolean actual, final String message, final Object... args) { + checkEqual(actual, false, message, args); + } + public static void checkNull(final Object actual, final String message, final Object... args) { if (actual != null) { throw new IllegalStateException(MessageFormat.format(message, args));