diff --git a/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapOfListsOfLongs.java b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapOfListsOfLongs.java new file mode 100644 index 0000000..03f0ba6 --- /dev/null +++ b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapOfListsOfLongs.java @@ -0,0 +1,107 @@ +package org.lucares.pdb.map; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.NoSuchElementException; +import java.util.stream.Stream; + +import org.lucares.collections.LongList; +import org.lucares.pdb.blockstorage.BSFile; +import org.lucares.pdb.diskstorage.DiskStorage; +import org.lucares.pdb.map.PersistentMap.EncoderDecoder; +import org.lucares.utils.Preconditions; +import org.lucares.utils.cache.HotEntryCache; +import org.lucares.utils.cache.HotEntryCache.Event; +import org.lucares.utils.cache.HotEntryCache.EventListener; +import org.lucares.utils.cache.HotEntryCache.EventType; + +/** + * Combines {@link PersistentMap} and {@link BSFile} to represent a map where + * the values are lists of longs. + */ +public class PersistentMapOfListsOfLongs implements AutoCloseable { + + private static final class RemovalListener implements EventListener { + @Override + public void onEvent(final Event event) { + event.getValue().close(); + } + } + + private final PersistentMap map; + private final Path mapPath; + private final DiskStorage diskStore; + private final Path diskStorePath; + + private final HotEntryCache writerCache; + + /** + * Creates a new map that stores indexed streams/lists of longs. + *

+ * This class creates two files on disk. One for the index and one for the lists + * of longs. + * + * @param path the folder where to store the map + * @param filePrefix prefix of the files + * @param keyEncoder {@link EncoderDecoder} for the key + * @throws IOException + */ + public PersistentMapOfListsOfLongs(final Path path, final String filePrefix, final EncoderDecoder keyEncoder) + throws IOException { + Preconditions.checkTrue(Files.isDirectory(path), "must be a directory {0}", path); + mapPath = path.resolve(filePrefix + "_index.bs"); + diskStorePath = path.resolve(filePrefix + "_data.bs"); + map = new PersistentMap<>(mapPath, keyEncoder, PersistentMap.LONG_CODER); + diskStore = new DiskStorage(diskStorePath); + + writerCache = new HotEntryCache<>(Duration.ofMinutes(10), filePrefix + "Cache"); + writerCache.addListener(new RemovalListener(), EventType.EVICTED, EventType.REMOVED); + } + + public synchronized void appendLong(final K key, final long value) throws IOException { + + BSFile cachedWriter = writerCache.get(key); + if (cachedWriter == null) { + final Long bsFileBlockNumber = map.getValue(key); + + if (bsFileBlockNumber == null) { + cachedWriter = BSFile.newFile(diskStore); + map.putValue(key, cachedWriter.getRootBlockOffset()); + } else { + cachedWriter = BSFile.existingFile(bsFileBlockNumber, diskStore); + } + writerCache.put(key, cachedWriter); + } + cachedWriter.append(value); + } + + public synchronized boolean hasKey(final K key) throws IOException { + return map.getValue(key) != null; + } + + public synchronized Stream getLongs(final K key) throws IOException { + final Long bsFileBlockNumber = map.getValue(key); + if (bsFileBlockNumber == null) { + throw new NoSuchElementException("the map at '" + mapPath + "' does not contain the key '" + key + "'"); + } + + final BSFile bsFile = BSFile.existingFile(bsFileBlockNumber, diskStore); + + return bsFile.streamOfLongLists(); + } + + @Override + public void close() throws IOException { + try { + try { + writerCache.forEach(bsFile -> bsFile.close()); + } finally { + map.close(); + } + } finally { + diskStore.close(); + } + } +} diff --git a/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapOfListsOfLongsTest.java b/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapOfListsOfLongsTest.java new file mode 100644 index 0000000..f0a7876 --- /dev/null +++ b/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapOfListsOfLongsTest.java @@ -0,0 +1,62 @@ +package org.lucares.pdb.map; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.lucares.collections.LongList; +import org.lucares.utils.file.FileUtils; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +@Test +public class PersistentMapOfListsOfLongsTest { + + private Path dataDirectory; + + @BeforeMethod + public void beforeMethod() throws IOException { + dataDirectory = Files.createTempDirectory("pdb"); + } + + @AfterMethod + public void afterMethod() throws IOException { + FileUtils.delete(dataDirectory); + } + + public void test() throws IOException { + + final String mapFilePrefix = "test"; + final String keyA = "a"; + final String keyB = "b"; + + final int size = 10; + final LongList a = LongList.range(0, size); + a.shuffle(); + final LongList b = LongList.range(0, size); + b.shuffle(); + + try (PersistentMapOfListsOfLongs map = new PersistentMapOfListsOfLongs<>(dataDirectory, mapFilePrefix, + PersistentMap.STRING_CODER)) { + + for (int i = 0; i < size; i++) { + map.appendLong(keyA, a.get(i)); + map.appendLong(keyB, b.get(i)); + } + } + + try (PersistentMapOfListsOfLongs map = new PersistentMapOfListsOfLongs<>(dataDirectory, mapFilePrefix, + PersistentMap.STRING_CODER)) { + + final LongList actualA = new LongList(); + map.getLongs(keyA).forEachOrdered(actualA::addAll); + Assert.assertEquals(actualA, a); + + final LongList actualB = new LongList(); + map.getLongs(keyB).forEachOrdered(actualB::addAll); + Assert.assertEquals(actualB, b); + } + } +} diff --git a/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 b/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 index 70ee164..d00d796 100644 --- a/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 +++ b/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 @@ -51,7 +51,7 @@ fragment JavaLetter : [a-zA-Z0-9$_] // these are the "java letters" below 0x7F | [\u002a] // asterisk, used for wildcards - | [\ue001] // used to help parser identify empty identifiers (character is the second in the private use area) + | [\ue001] // used to help parser identify empty identifiers and to find the caret position when searching for proposals (character is the second in the private use area) | // covers all characters above 0x7F which are not a surrogate ~[\u0000-\u007F\uD800-\uDBFF] {Character.isJavaIdentifierStart(_input.LA(-1))}? @@ -64,7 +64,7 @@ fragment JavaLetterOrDigit : [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F | [\u002a] // asterisk, used for wildcards - | [\ue001] // used to help parser identify empty identifiers (character is the second in the private use area) + | [\ue001] // used to help parser identify empty identifiers and to find the caret position when searching for proposals (character is the second in the private use area) | '.' | '/' | '-' diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index a3077b9..6e257af 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -161,7 +161,6 @@ public class DataStore implements AutoCloseable { private final DiskStorage diskStorage; private final Path diskStorageFilePath; private final Path storageBasePath; - private final Path queryCompletionIndexFile; public DataStore(final Path dataDirectory) throws IOException { storageBasePath = storageDirectory(dataDirectory); @@ -183,8 +182,7 @@ public class DataStore implements AutoCloseable { final Path docIdToDocIndexPath = storageBasePath.resolve("docIdToDocIndex.bs"); docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, ENCODER_DOC); - queryCompletionIndexFile = storageBasePath.resolve("queryCompletionIndex.bs"); - queryCompletionIndex = new QueryCompletionIndex(queryCompletionIndexFile); + queryCompletionIndex = new QueryCompletionIndex(storageBasePath); } private Path keyCompressionFile(final Path dataDirectory) throws IOException { @@ -195,39 +193,47 @@ public class DataStore implements AutoCloseable { return dataDirectory.resolve(SUBDIR_STORAGE); } - public long createNewFile(final Tags tags) throws IOException { + // visible for test + QueryCompletionIndex getQueryCompletionIndex() { + return queryCompletionIndex; + } - final long newFilesRootBlockOffset = diskStorage.allocateBlock(BSFile.BLOCK_SIZE); + public long createNewFile(final Tags tags) { + try { + final long newFilesRootBlockOffset = diskStorage.allocateBlock(BSFile.BLOCK_SIZE); - final long docId = createUniqueDocId(); - final Doc doc = new Doc(tags, newFilesRootBlockOffset); - docIdToDoc.putValue(docId, doc); + final long docId = createUniqueDocId(); + final Doc doc = new Doc(tags, newFilesRootBlockOffset); + docIdToDoc.putValue(docId, doc); - final Long oldDocId = tagsToDocId.putValue(tags, docId); - Preconditions.checkNull(oldDocId, "There must be at most one document for tags: {0}", tags); + final Long oldDocId = tagsToDocId.putValue(tags, docId); + Preconditions.checkNull(oldDocId, "There must be at most one document for tags: {0}", tags); - // store mapping from tag to docId, so that we can find all docs for a given tag - final List ts = new ArrayList<>(tags.toTags()); - ts.add(TAG_ALL_DOCS); - for (final Tag tag : ts) { + // store mapping from tag to docId, so that we can find all docs for a given tag + final List ts = new ArrayList<>(tags.toTags()); + ts.add(TAG_ALL_DOCS); + for (final Tag tag : ts) { - Long diskStoreOffsetForDocIdsOfTag = tagToDocsId.getValue(tag); + Long diskStoreOffsetForDocIdsOfTag = tagToDocsId.getValue(tag); - if (diskStoreOffsetForDocIdsOfTag == null) { - diskStoreOffsetForDocIdsOfTag = diskStorage.allocateBlock(BSFile.BLOCK_SIZE); - tagToDocsId.putValue(tag, diskStoreOffsetForDocIdsOfTag); + if (diskStoreOffsetForDocIdsOfTag == null) { + diskStoreOffsetForDocIdsOfTag = diskStorage.allocateBlock(BSFile.BLOCK_SIZE); + tagToDocsId.putValue(tag, diskStoreOffsetForDocIdsOfTag); + } + + try (final BSFile docIdsOfTag = BSFile.existingFile(diskStoreOffsetForDocIdsOfTag, diskStorage)) { + docIdsOfTag.append(docId); + } } - try (final BSFile docIdsOfTag = BSFile.existingFile(diskStoreOffsetForDocIdsOfTag, diskStorage)) { - docIdsOfTag.append(docId); - } + // index the tags, so that we can efficiently find all possible values for a + // field in a query + queryCompletionIndex.addTags(tags); + + return newFilesRootBlockOffset; + } catch (final IOException e) { + throw new RuntimeIOException(e); } - - // index the tags, so that we can efficiently find all possible values for a - // field in a query - queryCompletionIndex.addTags(tags); - - return newFilesRootBlockOffset; } private long createUniqueDocId() { @@ -357,11 +363,10 @@ public class DataStore implements AutoCloseable { public List propose(final String query, final int caretIndex) { - final NewProposerParser newProposerParser = new NewProposerParser(this); + final NewProposerParser newProposerParser = new NewProposerParser(queryCompletionIndex); final List proposals = newProposerParser.propose(query, caretIndex); - System.out.println(proposals); - - return new Proposer(this).propose(query, caretIndex); + LOGGER.debug("Proposals for query {}: {}", query, proposals); + return proposals; } public DiskStorage getDiskStorage() { diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java index dcd66ab..19d51a5 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java @@ -7,11 +7,13 @@ import java.util.SortedSet; import java.util.TreeSet; import org.lucares.collections.LongList; +import org.lucares.pdb.api.RuntimeIOException; import org.lucares.pdb.api.Tag; import org.lucares.pdb.api.Tags; import org.lucares.pdb.map.Empty; import org.lucares.pdb.map.PersistentMap; import org.lucares.pdb.map.PersistentMap.EncoderDecoder; +import org.lucares.utils.Preconditions; import org.lucares.utils.byteencoder.VariableByteEncoder; /** @@ -22,30 +24,42 @@ import org.lucares.utils.byteencoder.VariableByteEncoder; *
* The expensive way is to execute the query for all available lastnames and * keep those that return at least one result.
- * A more effiecient way uses an index that lists all lastnames that occurr with + * A more effiecient way uses an index that lists all lastnames that occur with * firstname=John. If we write this as table, then it looks like this: * *

  *┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┓
- *┃ fieldA  ┃ valueA  ┃ fieldB  ┃  valueB ┃
+ *┃ fieldB  ┃ fieldA  ┃ valueA  ┃  valueB ┃
  *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
- *┃firstname┃ John    ┃lastname ┃ Connor  ┃
+ *┃lastname ┃firstname┃ John    ┃ Connor  ┃
  *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
- *┃firstname┃ John    ┃lastname ┃Carpenter┃
+ *┃lastname ┃firstname┃ John    ┃Carpenter┃
  *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
- *┃firstname┃ John    ┃country  ┃ Germany ┃
+ *┃country  ┃firstname┃ John    ┃ Germany ┃
  *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
- *┃firstname┃ John    ┃lastname ┃ Nash    ┃
+ *┃lastname ┃firstname┃ John    ┃ Nash    ┃
  *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
- *┃firstname┃ Rick    ┃lastname ┃ Meyer   ┃
+ *┃lastname ┃firstname┃ Rick    ┃ Meyer   ┃
  *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
- *┃firstname┃ Rick    ┃lastname ┃ Castle  ┃
+ *┃lastname ┃firstname┃ Rick    ┃ Castle  ┃
  *┗━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┛
  * 
* - * The lastnames where firstname=John are Connor, Carpenter and Nash. Given such - * a table we can just for all rows with fieldA=firstname and valueA=John and - * fieldB = lastname. + * The lastnames where firstname=John are: Connor, Carpenter and Nash. Given + * such a table we can just for all rows with fieldA=firstname and valueA=John + * and fieldB = lastname. + *

+ * Please note, that the columns for fieldA and fieldB come first. This is to + * make this index more suitable for IN-expressions and wildcard expressions of + * fieldA. Because we can now find all values for lastname where firstname=J*n* + * by searching for fieldA=firstname and fieldB=lastname, then do the wildcard + * evaluation while iterating over those hits. We do not have to expand the + * wildcard and the do hundreds or thousands of queries. + *

+ * Please note, that fieldB comes before fieldA. This is, so that we can run + * inverse searches more efficiently. E.g. finding all values for + * fieldB=lastname where fieldA=firstname has a value != Connor. This is used + * for queries like 'NOT (firstname=Connor) and lastname=|' *

* The values in this index represent such a table. *

@@ -63,6 +77,12 @@ public class QueryCompletionIndex implements AutoCloseable { this.tagB = tagB; } + public TwoTags(final String fieldB, final String fieldA, final String valueA, final String valueB) { + + tagA = new Tag(fieldA, valueA); + tagB = new Tag(fieldB, valueB); + } + public Tag getTagA() { return tagA; } @@ -75,7 +95,29 @@ public class QueryCompletionIndex implements AutoCloseable { public String toString() { return tagA + "::" + tagB; } + } + public static final class FieldField { + private final int fieldA; + private final int fieldB; + + public FieldField(final int fieldA, final int fieldB) { + this.fieldA = fieldA; + this.fieldB = fieldB; + } + + public int getFieldA() { + return fieldA; + } + + public int getFieldB() { + return fieldB; + } + + @Override + public String toString() { + return fieldA + "::" + fieldB; + } } private static final class EncoderTwoTags implements EncoderDecoder { @@ -86,17 +128,22 @@ public class QueryCompletionIndex implements AutoCloseable { final Tag tagA = tagAndField.getTagA(); final Tag tagB = tagAndField.getTagB(); - tmp.add(tagA.getKey()); - tmp.add(tagA.getValue()); - tmp.add(tagB.getKey()); + tmp.add(tagA.getKey()); - // A query for tagA.key and tagA.value and tagB.key is done by setting - // tagB.value==0. - // The query is then executed as a prefix search. Thus tagB.value must not be - // part of the byte array that is returned. - if (tagB.getValue() >= 0) { - tmp.add(tagB.getValue()); + if (tagA.getValue() >= 0) { + tmp.add(tagA.getValue()); + + // A query for tagA.key and tagA.value and tagB.key is done by setting + // tagB.value==-1. + // The query is then executed as a prefix search. Thus tagB.value must not be + // part of the byte array that is returned. + if (tagB.getValue() >= 0) { + tmp.add(tagB.getValue()); + } + } else { + Preconditions.checkSmaller(tagB.getValue(), 0, + "if no value for tagA is given, then tagB must also be empty"); } return VariableByteEncoder.encode(tmp); @@ -106,9 +153,9 @@ public class QueryCompletionIndex implements AutoCloseable { public TwoTags decode(final byte[] bytes) { final LongList tmp = VariableByteEncoder.decode(bytes); - final int tagAKey = (int) tmp.get(0); - final int tagAValue = (int) tmp.get(1); - final int tagBKey = (int) tmp.get(2); + final int tagBKey = (int) tmp.get(0); + final int tagAKey = (int) tmp.get(1); + final int tagAValue = (int) tmp.get(2); final int tagBValue = (int) tmp.get(3); final Tag tagA = new Tag(tagAKey, tagAValue); @@ -118,23 +165,81 @@ public class QueryCompletionIndex implements AutoCloseable { } } - private final PersistentMap tagToTagIndex; + private static final class EncoderTag implements EncoderDecoder { - public QueryCompletionIndex(final Path indexFile) throws IOException { - tagToTagIndex = new PersistentMap<>(indexFile, new EncoderTwoTags(), PersistentMap.EMPTY_ENCODER); + @Override + public byte[] encode(final Tag tag) { + + final LongList longList = new LongList(2); + longList.add(tag.getKey()); + + if (tag.getValue() >= 0) { + longList.add(tag.getValue()); + } + return VariableByteEncoder.encode(longList); + } + + @Override + public Tag decode(final byte[] bytes) { + final LongList tmp = VariableByteEncoder.decode(bytes); + final int key = (int) tmp.get(0); + final int value = (int) tmp.get(1); + return new Tag(key, value); + } + + } + + private static final class EncoderField implements EncoderDecoder { + + @Override + public byte[] encode(final String field) { + + if (field.isEmpty()) { + return new byte[0]; + } + + return VariableByteEncoder.encode(Tags.STRING_COMPRESSOR.put(field)); + } + + @Override + public String decode(final byte[] bytes) { + final long compressedString = VariableByteEncoder.decodeFirstValue(bytes); + return Tags.STRING_COMPRESSOR.get((int) compressedString); + } + } + + private final PersistentMap tagToTagIndex; + private final PersistentMap fieldToValueIndex; + private final PersistentMap fieldIndex; + + public QueryCompletionIndex(final Path basePath) throws IOException { + final Path tagToTagIndexFile = basePath.resolve("queryCompletionTagToTagIndex.bs"); + tagToTagIndex = new PersistentMap<>(tagToTagIndexFile, new EncoderTwoTags(), PersistentMap.EMPTY_ENCODER); + + final Path fieldToValueIndexFile = basePath.resolve("queryCompletionFieldToValueIndex.bs"); + fieldToValueIndex = new PersistentMap<>(fieldToValueIndexFile, new EncoderTag(), PersistentMap.EMPTY_ENCODER); + + final Path fieldIndexFile = basePath.resolve("queryCompletionFieldIndex.bs"); + fieldIndex = new PersistentMap<>(fieldIndexFile, new EncoderField(), PersistentMap.EMPTY_ENCODER); } public void addTags(final Tags tags) throws IOException { final List listOfTagsA = tags.toTags(); final List listOfTagsB = tags.toTags(); - // index all combinations of tagA and tagB + // index all combinations of tagA and tagB and fieldA to fieldB for (final Tag tagA : listOfTagsA) { for (final Tag tagB : listOfTagsB) { final TwoTags key = new TwoTags(tagA, tagB); tagToTagIndex.putValue(key, Empty.INSTANCE); } } + + // create indices of all tags and all fields + for (final Tag tag : listOfTagsA) { + fieldToValueIndex.putValue(tag, Empty.INSTANCE); + fieldIndex.putValue(tag.getKeyAsString(), Empty.INSTANCE); + } } @Override @@ -142,17 +247,76 @@ public class QueryCompletionIndex implements AutoCloseable { tagToTagIndex.close(); } - public SortedSet find(final Tag tag, final String field) throws IOException { - final SortedSet result = new TreeSet<>(); - final int tagBKey = Tags.STRING_COMPRESSOR.put(field); - final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See - // EncoderTwoTags - final TwoTags keyPrefix = new TwoTags(tag, tagB); - tagToTagIndex.visitValues(keyPrefix, (k, v) -> { - result.add(k.getTagB().getValueAsString()); - }); - - return result; - + public SortedSet find(final String property, final String value, final String field) { + final Tag tag = new Tag(property, value); + return find(tag, field); } + + public SortedSet find(final Tag tag, final String field) { + try { + final SortedSet result = new TreeSet<>(); + final int tagBKey = Tags.STRING_COMPRESSOR.put(field); + final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See + // EncoderTwoTags + final TwoTags keyPrefix = new TwoTags(tag, tagB); + tagToTagIndex.visitValues(keyPrefix, (k, v) -> { + result.add(k.getTagB().getValueAsString()); + }); + + return result; + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + } + + public SortedSet findAllValuesForField(final String field) { + try { + final SortedSet result = new TreeSet<>(); + final int tagKey = Tags.STRING_COMPRESSOR.put(field); + final Tag keyPrefix = new Tag(tagKey, -1); // the value must be negative for the prefix search to work. See + + fieldToValueIndex.visitValues(keyPrefix, (k, v) -> { + result.add(k.getValueAsString()); + }); + + return result; + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + } + + public SortedSet findAllValuesNotForField(final Tag tag, final String field) { + try { + final SortedSet result = new TreeSet<>(); + + final TwoTags keyPrefix = new TwoTags(field, tag.getKeyAsString(), null, null); + + final int negatedValueA = tag.getValue(); + + tagToTagIndex.visitValues(keyPrefix, (k, v) -> { + + final int valueA = k.getTagA().getValue(); + if (valueA != negatedValueA) { + result.add(k.getTagB().getValueAsString()); + } + }); + + return result; + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + } + + public SortedSet findAllFields() { + try { + final SortedSet result = new TreeSet<>(); + fieldIndex.visitValues("", (k, v) -> { + result.add(k); + }); + return result; + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + } + } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java index 9fdb0c1..433024a 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java @@ -5,6 +5,7 @@ import java.util.Arrays; import java.util.List; import org.lucares.utils.CollectionUtils; +import org.lucares.utils.Preconditions; abstract public class Expression { @@ -12,30 +13,8 @@ abstract public class Expression { throw new UnsupportedOperationException(); } - abstract static class UnaryExpression extends Expression { - - private final int line; - private final int startIndex; - private final int stopIndex; - - public UnaryExpression(final int line, final int startIndex, final int stopIndex) { - super(); - this.line = line; - this.startIndex = startIndex; - this.stopIndex = stopIndex; - } - - int getLine() { - return line; - } - - int getStartIndex() { - return startIndex; - } - - int getStopIndex() { - return stopIndex; - } + boolean containsCaret() { + throw new UnsupportedOperationException(); } abstract static class TemporaryExpression extends Expression { @@ -93,6 +72,11 @@ abstract public class Expression { return "!" + expression; } + @Override + boolean containsCaret() { + return expression.containsCaret(); + } + @Override public int hashCode() { final int prime = 31; @@ -150,7 +134,12 @@ abstract public class Expression { @Override public String toString() { - return " (" + left + " or " + right + ") "; + return "(" + left + " or " + right + ")"; + } + + @Override + boolean containsCaret() { + return left.containsCaret() || right.containsCaret(); } @Override @@ -191,7 +180,7 @@ abstract public class Expression { return true; } - public static Expression create(final List or) { + public static Expression create(final List or) { if (or.size() == 1) { return or.get(0); @@ -231,7 +220,12 @@ abstract public class Expression { @Override public String toString() { - return " (" + left + " and " + right + ") "; + return "(" + left + " and " + right + ")"; + } + + @Override + boolean containsCaret() { + return left.containsCaret() || right.containsCaret(); } @Override @@ -294,11 +288,11 @@ abstract public class Expression { } } - static class Terminal extends UnaryExpression { + static class Terminal extends Expression { private final String value; - Terminal(final String value, final int line, final int startIndex, final int stopIndex) { - super(line, startIndex, stopIndex); + Terminal(final String value) { + this.value = value; } @@ -313,6 +307,11 @@ abstract public class Expression { return value; } + @Override + boolean containsCaret() { + return value.contains(NewProposerParser.CARET_MARKER); + } + @Override public int hashCode() { final int prime = 31; @@ -365,7 +364,24 @@ abstract public class Expression { @Override public String toString() { - return " " + property + " = " + value.getValue() + " "; + return property + " = " + value.getValue(); + } + + @Override + boolean containsCaret() { + return value.containsCaret(); + } + + public String getProperty() { + return property; + } + + public Terminal getValue() { + return value; + } + + public String getValueAsString() { + return value.getValue(); } @Override @@ -398,6 +414,7 @@ abstract public class Expression { return false; return true; } + } static class Parentheses extends Expression { @@ -419,7 +436,12 @@ abstract public class Expression { @Override public String toString() { - return " [ " + expression + " ] "; + return "[ " + expression + " ]"; + } + + @Override + boolean containsCaret() { + return expression.containsCaret(); } @Override @@ -473,6 +495,16 @@ abstract public class Expression { public String toString() { return "(" + String.join(", ", getValues()) + ")"; } + + @Override + boolean containsCaret() { + for (final Terminal terminal : propertyValues) { + if (terminal.containsCaret()) { + return true; + } + } + return false; + } } static class InExpression extends Expression { @@ -506,6 +538,16 @@ abstract public class Expression { return values; } + @Override + boolean containsCaret() { + for (final String value : values) { + if (value.contains(NewProposerParser.CARET_MARKER)) { + return true; + } + } + return false; + } + @Override public int hashCode() { final int prime = 31; @@ -537,4 +579,147 @@ abstract public class Expression { return true; } } + + public static final class AndCaretExpression extends Expression { + Property caretExpression; + Expression expression; + + public AndCaretExpression(final Property caretExpression, final Expression expression) { + Preconditions.checkTrue(caretExpression.containsCaret(), "the expression '{0}' must contain the caret", + caretExpression); + Preconditions.checkFalse(expression.containsCaret(), "the expression '{0}' must not contain the caret", + caretExpression); + this.caretExpression = caretExpression; + this.expression = expression; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return visitor.visit(this); + } + + @Override + boolean containsCaret() { + + return caretExpression.containsCaret(); + } + + public Property getCaretExpression() { + return caretExpression; + } + + public Expression getExpression() { + return expression; + } + + @Override + public String toString() { + return "(" + caretExpression + " and " + expression + ")"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((caretExpression == null) ? 0 : caretExpression.hashCode()); + result = prime * result + ((expression == null) ? 0 : expression.hashCode()); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + final AndCaretExpression other = (AndCaretExpression) obj; + if (caretExpression == null) { + if (other.caretExpression != null) + return false; + } else if (!caretExpression.equals(other.caretExpression)) + return false; + if (expression == null) { + if (other.expression != null) + return false; + } else if (!expression.equals(other.expression)) + return false; + return true; + } + } + + public static final class AndNotCaretExpression extends Expression { + Property negatedCaretExpression; + Expression expression; + + public AndNotCaretExpression(final Property negatedCaretExpression, final Expression expression) { + Preconditions.checkTrue(negatedCaretExpression.containsCaret(), + "the expression '{0}' must contain the caret", negatedCaretExpression); + Preconditions.checkFalse(expression.containsCaret(), "the expression '{0}' must not contain the caret", + expression); + this.negatedCaretExpression = negatedCaretExpression; + this.expression = expression; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return visitor.visit(this); + } + + @Override + boolean containsCaret() { + + return negatedCaretExpression.containsCaret(); + } + + public Property getCaretExpression() { + return negatedCaretExpression; + } + + public Expression getExpression() { + return expression; + } + + @Override + public String toString() { + return "(!" + negatedCaretExpression + " and " + expression + ")"; + } + } + + public static final class CaretAndExpression extends Expression { + + private final Property caretExpression; + private final Property otherExpression; + + public CaretAndExpression(final Property caretExpression, final Property otherExpression) { + this.caretExpression = caretExpression; + this.otherExpression = otherExpression; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return super.visit(visitor); + } + + @Override + boolean containsCaret() { + Preconditions.checkTrue(caretExpression.containsCaret(), + "CaretAndExpression must contain the caret, but was: {0}", this); + return caretExpression.containsCaret(); + } + + @Override + public String toString() { + return "(caretAnd: " + caretExpression + " and " + otherExpression + ")"; + } + + public Property getCaretExpression() { + return caretExpression; + } + + public Property getOtherExpression() { + return otherExpression; + } + } } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java index b4ff5d1..7447609 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java @@ -32,4 +32,16 @@ public abstract class ExpressionVisitor { public T visit(final Expression.Parentheses parentheses) { throw new UnsupportedOperationException(); } + + public T visit(final Expression.AndCaretExpression expression) { + throw new UnsupportedOperationException(); + } + + public T visit(final Expression.AndNotCaretExpression expression) { + throw new UnsupportedOperationException(); + } + + public T visit(final Expression.CaretAndExpression expression) { + throw new UnsupportedOperationException(); + } } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java new file mode 100644 index 0000000..b25a742 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java @@ -0,0 +1,225 @@ +package org.lucares.pdb.datastore.lang; + +import java.util.Collections; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.lucares.pdb.api.Tag; +import org.lucares.pdb.datastore.internal.QueryCompletionIndex; +import org.lucares.pdb.datastore.lang.Expression.And; +import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression; +import org.lucares.pdb.datastore.lang.Expression.AndNotCaretExpression; +import org.lucares.pdb.datastore.lang.Expression.InExpression; +import org.lucares.pdb.datastore.lang.Expression.Not; +import org.lucares.pdb.datastore.lang.Expression.Or; +import org.lucares.pdb.datastore.lang.Expression.Property; +import org.lucares.utils.CollectionUtils; + +public class FindValuesForQueryCompletion extends ExpressionVisitor> { + + private static final class AndCaretExpressionVisitor extends ExpressionVisitor> { + private final QueryCompletionIndex index; + private final String field; + + public AndCaretExpressionVisitor(final QueryCompletionIndex queryCompletionIndex, final String field) { + index = queryCompletionIndex; + this.field = field; + } + + @Override + public SortedSet visit(final Property property) { + + final String fieldA = property.getProperty(); + final String valueA = property.getValue().getValue(); + + return index.find(fieldA, valueA, field); + } + + @Override + public SortedSet visit(final InExpression expression) { + + final SortedSet result = new TreeSet<>(); + final String property = expression.getProperty(); + final List values = expression.getValues(); + for (final String value : values) { + final SortedSet candidates = index.find(property, value, field); + result.addAll(candidates); + } + + return result; + } + + @Override + public SortedSet visit(final And expression) { + + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + if (left instanceof Property && right instanceof Not) { + final Property leftProperty = (Property) left; + + final SortedSet allValuesForField = leftProperty.visit(this); + + final Expression rightInnerExpression = ((Not) right).getExpression(); + final SortedSet rightResult = rightInnerExpression.visit(this); + + return CollectionUtils.removeAll(allValuesForField, rightResult, TreeSet::new); + + } else { + + final SortedSet result = left.visit(this); + final SortedSet rightResult = right.visit(this); + + result.retainAll(rightResult); + + return result; + } + } + + @Override + public SortedSet visit(final Or expression) { + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + final SortedSet result = left.visit(this); + final SortedSet rightResult = right.visit(this); + + result.addAll(rightResult); + + return result; + } + + @Override + public SortedSet visit(final Not expression) { + + if (!(expression.getExpression() instanceof Property)) { + throw new UnsupportedOperationException("NOT expressions like '" + expression + + "' are not supported. Only 'NOT property=value' expressions are supported."); + } + + final Property property = (Property) expression.getExpression(); + final Tag tag = new Tag(property.getProperty(), property.getValueAsString()); + + final SortedSet valuesNotForField = index.findAllValuesNotForField(tag, field); + final SortedSet valuesForField = index.find(tag, field); + final SortedSet valuesOnlyAvailableInField = CollectionUtils.removeAll(valuesForField, + valuesNotForField, TreeSet::new); + + final SortedSet result = CollectionUtils.removeAll(valuesNotForField, valuesOnlyAvailableInField, + TreeSet::new); + + return result; + } + } + + private final QueryCompletionIndex queryCompletionIndex; + + public FindValuesForQueryCompletion(final QueryCompletionIndex queryCompletionIndex) { + this.queryCompletionIndex = queryCompletionIndex; + } + + @Override + public SortedSet visit(final Property property) { + + final String field = property.getProperty(); + final String value = property.getValue().getValue(); + + final SortedSet allValuesForField = queryCompletionIndex.findAllValuesForField(field); + + final String valuePrefix = value.substring(0, value.indexOf(NewProposerParser.CARET_MARKER)); + + return GloblikePattern.filterValues(allValuesForField, valuePrefix, TreeSet::new); + } + + @Override + public SortedSet visit(final AndCaretExpression expression) { + + final Property caretExpression = expression.getCaretExpression(); + final String field = caretExpression.getProperty(); + final String valueWithCaretMarker = caretExpression.getValue().getValue(); + final String valuePrefix = valueWithCaretMarker.substring(0, + valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER)); + + final Expression rightHandExpression = expression.getExpression(); + + final SortedSet candidateValues = rightHandExpression + .visit(new AndCaretExpressionVisitor(queryCompletionIndex, field)); + + return GloblikePattern.filterValues(candidateValues, valuePrefix, TreeSet::new); + } + + @Override + public SortedSet visit(final AndNotCaretExpression expression) { + + final Property caretExpression = expression.getCaretExpression(); + final String field = caretExpression.getProperty(); + final String valueWithCaretMarker = caretExpression.getValue().getValue(); + final String valuePattern = valueWithCaretMarker.substring(0, + valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER)); + + final SortedSet allValuesForField = queryCompletionIndex + .findAllValuesForField(caretExpression.getProperty()); + final SortedSet valuesForFieldMatchingCaretExpression = GloblikePattern.filterValues(allValuesForField, + valuePattern, TreeSet::new); + + final Expression rightHandExpression = expression.getExpression(); + + final SortedSet rightHandValues = rightHandExpression + .visit(new AndCaretExpressionVisitor(queryCompletionIndex, field)); + + if (rightHandValues.size() == 1) { + // there is only one alternative and that one must not be chosen + return Collections.emptySortedSet(); + } + final SortedSet result = CollectionUtils.retainAll(rightHandValues, + valuesForFieldMatchingCaretExpression, TreeSet::new); + return result; + } + + @Override + public SortedSet visit(final Not expression) { + + final String field; + final Expression innerExpression = expression.getExpression(); + if (innerExpression instanceof Property) { + field = ((Property) innerExpression).getProperty(); + final SortedSet allValuesForField = queryCompletionIndex.findAllValuesForField(field); + final String valueWithCaretMarker = ((Property) innerExpression).getValue().getValue(); + final String valuePrefix = valueWithCaretMarker.substring(0, + valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER)); + final TreeSet result = GloblikePattern.filterValues(allValuesForField, valuePrefix + "*", + TreeSet::new); + return result; + } else { + throw new UnsupportedOperationException(); + } + } + + @Override + public SortedSet visit(final Or expression) { + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + final SortedSet result = left.visit(this); + final SortedSet rightResult = right.visit(this); + + result.addAll(rightResult); + + return result; + } + + @Override + public SortedSet visit(final And expression) { + + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + final SortedSet result = left.visit(this); + final SortedSet rightResult = right.visit(this); + + result.retainAll(rightResult); + + return result; + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/GloblikePattern.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/GloblikePattern.java index 3a1688f..c75c377 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/GloblikePattern.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/GloblikePattern.java @@ -1,11 +1,14 @@ package org.lucares.pdb.datastore.lang; +import java.util.Collection; +import java.util.function.Supplier; +import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -class GloblikePattern { +public class GloblikePattern { private static final Logger LOGGER = LoggerFactory.getLogger(GloblikePattern.class); @@ -25,4 +28,26 @@ class GloblikePattern { return Pattern.compile(valueRegex); } + + public static > T filterValues(final Collection availableValues, + final String valuePattern, final Supplier generator) { + final T result = generator.get(); + + return filterValues(result, availableValues, valuePattern); + } + + public static > T filterValues(final T result, + final Collection availableValues, final String valuePattern) { + + final Pattern pattern = GloblikePattern.globlikeToRegex(valuePattern); + + for (final String value : availableValues) { + final Matcher matcher = pattern.matcher(value); + if (matcher.find() && !value.equals(valuePattern)) { + result.add(value); + } + } + + return result; + } } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/IdentityExpressionVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/IdentityExpressionVisitor.java new file mode 100644 index 0000000..da7a647 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/IdentityExpressionVisitor.java @@ -0,0 +1,79 @@ +package org.lucares.pdb.datastore.lang; + +import org.lucares.pdb.datastore.lang.Expression.And; +import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression; +import org.lucares.pdb.datastore.lang.Expression.AndNotCaretExpression; +import org.lucares.pdb.datastore.lang.Expression.CaretAndExpression; +import org.lucares.pdb.datastore.lang.Expression.Not; +import org.lucares.pdb.datastore.lang.Expression.Or; +import org.lucares.pdb.datastore.lang.Expression.Parentheses; +import org.lucares.pdb.datastore.lang.Expression.Property; + +/** + * Visitor that returns the expressions without any modifications. Can be used + * as base class for visitors that modify expressions. + */ +public abstract class IdentityExpressionVisitor extends ExpressionVisitor { + @Override + public Expression visit(final And expression) { + + final Expression left = expression.getLeft().visit(this); + final Expression right = expression.getRight().visit(this); + + return new And(left, right); + } + + @Override + public Expression visit(final Or expression) { + final Expression left = expression.getLeft().visit(this); + final Expression right = expression.getRight().visit(this); + + return new Or(left, right); + } + + @Override + public Expression visit(final Not expression) { + return new Not(expression.getExpression().visit(this)); + } + + @Override + public Expression visit(final Property expression) { + return expression; + } + + @Override + public Expression visit(final Expression.Terminal expression) { + return expression; + } + + @Override + public Expression visit(final Expression.MatchAll expression) { + return expression; + } + + @Override + public Expression visit(final Expression.InExpression expression) { + return expression; + } + + @Override + public Expression visit(final Parentheses parentheses) { + return new Parentheses(parentheses.getExpression().visit(this)); + } + + @Override + public Expression visit(final AndCaretExpression expression) { + return expression; + } + + @Override + public Expression visit(final AndNotCaretExpression expression) { + return expression; + } + + @Override + public Expression visit(final CaretAndExpression expression) { + return expression; + } + +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java index 4ab7624..c1b21fe 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java @@ -1,27 +1,195 @@ package org.lucares.pdb.datastore.lang; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.List; +import java.util.SortedSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.commons.lang3.StringUtils; import org.lucares.pdb.datastore.Proposal; -import org.lucares.pdb.datastore.internal.DataStore; +import org.lucares.pdb.datastore.internal.QueryCompletionIndex; +import org.lucares.utils.CollectionUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class NewProposerParser { - private final static String CARET_MARKER = "\ue001"; // third character in the private use area + private static final Logger LOGGER = LoggerFactory.getLogger(NewProposerParser.class); - private final DataStore dataStore; + public final static String CARET_MARKER = "\ue001"; // character in the private use area - public NewProposerParser(final DataStore dataStore) { - this.dataStore = dataStore; + /* + * Regex matching a java identifier without a caret marker. We define it as a + * blacklist, because this is easer. The regex is only used after the + * query has already been validated with the proper grammar. + */ + private static final String REGEX_IDENTIFIER = "[^\\s,!\\(\\)=" + CARET_MARKER + "]*"; + + private final QueryCompletionIndex queryCompletionIndex; + + public NewProposerParser(final QueryCompletionIndex queryCompletionIndex) { + this.queryCompletionIndex = queryCompletionIndex; } public List propose(final String query, final int caretIndex) { + List proposals; + if (StringUtils.isBlank(query)) { + proposals = proposeForAllKeys(); + } else { - final String queryString = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString(); + final List foundProposals = proposalsForValues(query, caretIndex); + if (foundProposals.isEmpty()) { + proposals = proposalsForNonValues(query, caretIndex); + } else { + proposals = foundProposals; + } + } - final Expression expression = QueryLanguageParser.parse(queryString); + return proposals; + } + + private List proposalsForNonValues(final String query, final int caretIndex) { + final List proposals = new ArrayList<>(); + + /* + * This method is called when the query could not be parsed. It is likely that + * the next word is either a field or an operator. But is is also possible that + * the next word is a field-value, because the syntax error might be at another + * location in the query (not at the caret position). + */ + + final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString(); + + final List tokens = QueryLanguage.getTokens(queryWithCaretMarker); + final int indexTokenWithCaret = CollectionUtils.indexOf(tokens, t -> t.contains(CARET_MARKER)); + + if (indexTokenWithCaret > 0) { + final String previousToken = tokens.get(indexTokenWithCaret - 1); + switch (previousToken) { + case "(": + case "and": + case "or": + case "!": + proposals.addAll(proposeForAllKeys(queryWithCaretMarker)); + break; + + case ")": + default: + // proposals.addAll(proposal); + break; + } + } else if (indexTokenWithCaret == 0) { + proposals.addAll(proposeForAllKeys(queryWithCaretMarker)); + } + + return proposals; + } + + private Collection proposeForAllKeys(final String queryWithCaretMarker) { + final List proposals = new ArrayList<>(); + final String wordPrefix = wordPrefix(queryWithCaretMarker); + + if (wordPrefix != null) { + final SortedSet allFields = queryCompletionIndex.findAllFields(); + for (final String field : allFields) { + + if (!field.startsWith(wordPrefix)) { + continue; + } + + final String proposedQuery = queryWithCaretMarker + .replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, field + "=* "); + final String newQueryWithCaretMarker = queryWithCaretMarker + .replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, field + "=" + CARET_MARKER); + final String newQuery = newQueryWithCaretMarker.replace(CARET_MARKER, ""); + final int newCaretPosition = newQueryWithCaretMarker.indexOf(CARET_MARKER); + final Proposal proposal = new Proposal(field, proposedQuery, true, newQuery, newCaretPosition); + proposals.add(proposal); + } + } + + return proposals; + } + + private String wordPrefix(final String queryWithCaretMarker) { + + final Pattern pattern = Pattern.compile("(" + REGEX_IDENTIFIER + CARET_MARKER + ")"); + final Matcher matcher = pattern.matcher(queryWithCaretMarker); + if (matcher.find()) { + final String group = matcher.group(); + return group.replace(CARET_MARKER, ""); + } return null; } + private List proposeForAllKeys() { + final List proposals = new ArrayList<>(); + + final SortedSet allFields = queryCompletionIndex.findAllFields(); + for (final String field : allFields) { + final String proposedQuery = field + "=*"; + final String newQuery = field + "="; + final int newCaretPosition = newQuery.length(); + final Proposal proposal = new Proposal(field, proposedQuery, true, newQuery, newCaretPosition); + proposals.add(proposal); + } + + return proposals; + } + + List proposalsForValues(final String query, final int caretIndex) { + try { + // Add caret marker, so that we know where the caret is. + // This also makes sure that a query like "name=|" ('|' is the caret) can be + // parsed. + // Without the caret marker the query would be "name=", which is not a valid + // expression. + final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString(); + + // parse the query + final Expression expression = QueryLanguageParser.parse(queryWithCaretMarker); + + // normalize it, so that we can use the queryCompletionIndex to search vor + // candidate values + final QueryCompletionExpressionOptimizer optimizer = new QueryCompletionExpressionOptimizer(); + final Expression normalizedExpression = optimizer.normalizeExpression(expression); + + // find all candidate values + final SortedSet candidateValues = normalizedExpression + .visit(new FindValuesForQueryCompletion(queryCompletionIndex)); + + // translate the candidate values to proposals + final List proposals = generateProposals(queryWithCaretMarker, expression, candidateValues); + + return proposals; + } catch (final SyntaxException e) { + LOGGER.debug("Query ({}) is not valid. This is expected to happen " + + "unless we are looking for proposals of values.", query, e); + return Collections.emptyList(); + } + } + + private List generateProposals(final String queryWithCaretMarker, final Expression expression, + final SortedSet candidateValues) { + final List proposals = new ArrayList<>(); + + for (final String proposedTag : candidateValues) { + + final String proposedQueryWithCaretMarker = queryWithCaretMarker + .replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, proposedTag + CARET_MARKER); + + final String proposedQuery = proposedQueryWithCaretMarker.replace(CARET_MARKER, ""); + final int newCaretPosition = proposedQueryWithCaretMarker.indexOf(CARET_MARKER); + + final Proposal proposal = new Proposal(proposedTag, proposedQuery, true, proposedQuery, newCaretPosition); + proposals.add(proposal); + } + + return proposals; + } + } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionExpressionOptimizer.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionExpressionOptimizer.java new file mode 100644 index 0000000..3701b74 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryCompletionExpressionOptimizer.java @@ -0,0 +1,268 @@ +package org.lucares.pdb.datastore.lang; + +import java.util.ArrayList; +import java.util.List; + +import org.lucares.pdb.datastore.internal.QueryCompletionIndex; +import org.lucares.pdb.datastore.lang.Expression.And; +import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression; +import org.lucares.pdb.datastore.lang.Expression.AndNotCaretExpression; +import org.lucares.pdb.datastore.lang.Expression.InExpression; +import org.lucares.pdb.datastore.lang.Expression.Not; +import org.lucares.pdb.datastore.lang.Expression.Or; +import org.lucares.pdb.datastore.lang.Expression.Property; +import org.lucares.pdb.datastore.lang.Expression.Terminal; + +/** + * Query completion utilizes an index that contains all mappings of + * tags+fieldname to values. This index can be used to answer the question what + * the possible values for fields in simple and queries are. + *

+ * E.g. Given the query "lastname=Doe and firstname=|" ('|' is the marker for + * the caret position). All possible values for firstname are in the index under + * "tagA.field=lastname and tagA.value=Doe and tagB.field=firstname". See also + * {@link QueryCompletionIndex}. + *

+ * We can use this index for all boolean queries. But we have to normalize the + * queries first. + *

+ * E.g. "(lastname=Doe or country=Atlantis) and firstname=|" will be normalized + * and split into two queries: + *

    + *
  1. "lastname=Doe and firstname=|" + *
  2. "country=Atlantis and firstname=|" + *
+ * Everything that is or'ed with the field for which we are doeing the + * completion can be removed. E.g. "lastname=Doe or firstname=|" will be + * normalized to "firstname=|, because the expression lastname=Doe does not + * change which values are possible for firstname. + *

+ * Consequently, IN-expressions are normalized to PROPERTY-expressions. + *

+ * E.g. "firstname=John,|,Frank" will be normalized to "firstname=|". + */ +public class QueryCompletionExpressionOptimizer { + + private static final class ReplaceINExpressionsWithPropertyExpressionsVisitor extends IdentityExpressionVisitor { + + @Override + public Expression visit(final InExpression expression) { + if (expression.containsCaret() || expression.getValues().size() == 1) { + final String property = expression.getProperty(); + final List values = expression.getValues(); + + final List propertyExpressions = new ArrayList<>(); + + for (final String value : values) { + propertyExpressions.add(new Property(property, new Terminal(value))); + } + + return Expression.Or.create(propertyExpressions); + } else { + return super.visit(expression); + } + }; + } + + private static final class RemoveOrEdExpressions extends IdentityExpressionVisitor { + @Override + public Expression visit(final Or expression) { + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + if (left.containsCaret() && !right.containsCaret()) { + return left; + } + if (!left.containsCaret() && right.containsCaret()) { + return right; + } + return super.visit(expression); + }; + } + + private static final class DistributiveNormalization extends IdentityExpressionVisitor { + + @Override + public Expression visit(final And expression) { + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + if (left instanceof Or) { + // (a or b) and c + // becomes + // a and c or b and c + final Expression ac = new And(((Or) left).getLeft(), right); + final Expression bc = new And(((Or) left).getRight(), right); + return new Or(ac, bc); + } + + if (right instanceof Or) { + // a and (b or c) + // becomes + // a and b or a and c + final Expression ab = new And(left, ((Or) right).getLeft()); + final Expression ac = new And(left, ((Or) right).getRight()); + return new Or(ab, ac); + } + return super.visit(expression); + }; + } + + private static final class RotateAndExpressions extends IdentityExpressionVisitor { + @Override + public Expression visit(final And expression) { + + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + // (| and a) and b => | and (a and b) + // + // The expression with the caret is moved up + if (left.containsCaret() && left instanceof And) { + final Expression leftLeft = ((And) left).getLeft(); + final Expression leftRight = ((And) left).getRight(); + + if (leftLeft.containsCaret()) { + return new And(leftLeft, new And(leftRight, right)); + } else { + return new And(new And(leftLeft, right), leftRight); + } + } else if (right.containsCaret() && right instanceof And) { + final Expression rightLeft = ((And) right).getLeft(); + final Expression rightRight = ((And) right).getRight(); + + if (rightLeft.containsCaret()) { + return new And(rightLeft, new And(rightRight, left)); + } else { + return new And(new And(rightLeft, left), rightRight); + } + } + + return super.visit(expression); + } + } + + private static final class DoubleNegationExpressions extends IdentityExpressionVisitor { + @Override + public Expression visit(final Not expression) { + if (expression instanceof Not) { + if (expression.getExpression() instanceof Not) { + return ((Not) expression.getExpression()).getExpression(); + } + } + return super.visit(expression); + } + } + + private static final class DeMorgan extends IdentityExpressionVisitor { + @Override + public Expression visit(final Not expression) { + + if (expression.getExpression() instanceof And) { + final And andExpression = (And) expression.getExpression(); + final Expression left = andExpression.getLeft(); + final Expression right = andExpression.getRight(); + + final Expression notLeft = new Not(left); + final Expression notRight = new Not(right); + + return new Or(notLeft, notRight); + } + + return super.visit(expression); + } + } + + private static final class ToAndCaretExpressions extends IdentityExpressionVisitor { + @Override + public Expression visit(final And expression) { + + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + if (left.containsCaret() && left instanceof Property) { + return new AndCaretExpression((Property) left, right); + } + if (right.containsCaret() && right instanceof Property) { + return new AndCaretExpression((Property) right, left); + } + + if (left.containsCaret()// + && left instanceof Not// + && ((Not) left).getExpression() instanceof Property) { + return new AndNotCaretExpression((Property) ((Not) left).getExpression(), right); + } + if (right.containsCaret()// + && right instanceof Not// + && ((Not) right).getExpression() instanceof Property) { + return new AndNotCaretExpression((Property) ((Not) right).getExpression(), left); + } + + return super.visit(expression); + } + } + + public Expression normalizeExpression(final Expression expression) { + + Expression normalizingExpression = expression; + Expression previousExpression = normalizingExpression; + do { + previousExpression = normalizingExpression; + // replace all IN-expression, because they are just syntactic sugar for + // OR-expressions, but only for those that include the caret + normalizingExpression = normalizingExpression + .visit(new ReplaceINExpressionsWithPropertyExpressionsVisitor()); + + // Remove expressions that are OR'ed with the one that contains the caret. + // Everything that is OR'ed with the 'caret'-expression cannot change the + // possible values. + normalizingExpression = visitRepeatedly(normalizingExpression, new RemoveOrEdExpressions()); + + // In the end we want to have expressions like "firstname=Jane and lastname=|". + // To reach that goal we use the distributive law to modify expressions like + // "(firstname=Jane or firstname=John) and lastname=|" to "(firstname=Jane and + // lastname=|) or (firstname=John and lastname=|)" + normalizingExpression = visitRepeatedly(normalizingExpression, new DistributiveNormalization()); + + // (fn=John and (fn=John and ln=|) + // normalized to + // (fn=John and ln=|) and (fn=Jane and ln=|) + // or normalized to + // (fn=John and fn=Jane) and ln=| + normalizingExpression = visitRepeatedly(normalizingExpression, new RotateAndExpressions()); + + // normalize a NAND-expression into an OR with DeMorgan, the OR-Expression might + // later be removed + // not ( a and b) => (not a) or (not b) + normalizingExpression = visitRepeatedly(normalizingExpression, new DeMorgan()); + + // remove double negation + // not not a => a + normalizingExpression = visitRepeatedly(normalizingExpression, new DoubleNegationExpressions()); + } while (!normalizingExpression.equals(previousExpression)); + + // Replaces all (a and |) expressions with a special expression that represents + // it. + // This special expression will then be used during evaluation. + return visitRepeatedly(normalizingExpression, new ToAndCaretExpressions()); + } + + private static Expression visitRepeatedly(final Expression expression, + final ExpressionVisitor visitor) { + Expression previousExpression; + Expression result = expression; + + do { + previousExpression = result; + result = previousExpression.visit(visitor); + if (!previousExpression.equals(result)) { + System.out.println(" translate: " + visitor.getClass().getSimpleName()); + System.out.println(" in: " + previousExpression); + System.out.println(" out: " + result); + } + } while (!previousExpression.equals(result)); + + return result; + } + +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java index e09cd34..cdf7ca5 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java @@ -1,10 +1,12 @@ package org.lucares.pdb.datastore.lang; +import java.util.List; import java.util.Stack; import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.tree.ParseTree; import org.antlr.v4.runtime.tree.ParseTreeListener; import org.antlr.v4.runtime.tree.ParseTreeWalker; @@ -22,6 +24,7 @@ import org.lucares.pdb.datastore.lang.PdbLangParser.IdentifierExpressionContext; import org.lucares.pdb.datastore.lang.PdbLangParser.ListOfPropValuesContext; import org.lucares.pdb.datastore.lang.PdbLangParser.NotExpressionContext; import org.lucares.pdb.datastore.lang.PdbLangParser.PropertyTerminalExpressionContext; +import org.lucares.utils.CollectionUtils; public class QueryLanguage { @@ -51,11 +54,7 @@ public class QueryLanguage { throw new SyntaxException(ctx, "token too long"); } - final int line = ctx.getStart().getLine(); - final int startIndex = ctx.getStart().getStartIndex(); - final int stopIndex = ctx.getStart().getStopIndex(); - - stack.push(new Terminal(ctx.getText(), line, startIndex, stopIndex)); + stack.push(new Terminal(ctx.getText())); } @Override @@ -64,11 +63,7 @@ public class QueryLanguage { throw new SyntaxException(ctx, "token too long"); } - final int line = ctx.getStart().getLine(); - final int startIndex = ctx.getStart().getStartIndex(); - final int stopIndex = ctx.getStart().getStopIndex(); - - stack.push(new Terminal(ctx.getText(), line, startIndex, stopIndex)); + stack.push(new Terminal(ctx.getText())); } @Override @@ -145,4 +140,15 @@ public class QueryLanguage { return stack.pop(); } + + public static List getTokens(final String input) { + final CharStream in = CharStreams.fromString(input); + + final PdbLangLexer lexer = new PdbLangLexer(in); + + final CommonTokenStream tokens = new CommonTokenStream(lexer); + tokens.fill(); + final List tokenList = tokens.getTokens(); + return CollectionUtils.map(tokenList, Token::getText); + } } diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java index 5a5e197..beae4b6 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java @@ -1,21 +1,36 @@ package org.lucares.pdb.datastore.internal; +import java.awt.BorderLayout; +import java.awt.event.KeyAdapter; +import java.awt.event.KeyEvent; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import javax.swing.JFrame; +import javax.swing.JTextArea; +import javax.swing.JTextField; import org.lucares.pdb.api.Tags; import org.lucares.pdb.blockstorage.BSFile; import org.lucares.pdb.datastore.Doc; +import org.lucares.pdb.datastore.Proposal; import org.lucares.utils.CollectionUtils; import org.lucares.utils.file.FileUtils; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @Test @@ -107,6 +122,162 @@ public class DataStoreTest { Assert.assertEquals(eagleTimBlockOffset % BSFile.BLOCK_SIZE, 0); } + @DataProvider(name = "providerProposals") + public Iterator providerProposals() { + + final List result = new ArrayList<>(); + + result.add(new Object[] { "type=bird and subtype=eagle and name=|", "name", Arrays.asList("Tim") }); + + // returns Tim, because it is the only dog's name starting with 'Ti' + result.add(new Object[] { "!name=Ti| and type=dog", "name", Arrays.asList("Tim") }); + + // all cats + result.add(new Object[] { "type=cat and !name=|", "name", + Arrays.asList("Jane", "John", "Paul", "Sam", "Timothy") }); + + // finds nothing, because there are not dogs names neither Jenny, nor Ti* + result.add(new Object[] { "!name=Ti| and type=dog and !name=Jenny", "name", Arrays.asList() }); + + result.add(new Object[] { "(type=bird and age=three or type=dog and age=three) and name=|", "name", + Arrays.asList("Jenny", "Tim") }); + + // all but Jennifer + result.add(new Object[] { "!(type=bird) and name=|", "name", + Arrays.asList("Jane", "Jenny", "John", "Paul", "Sam", "Tim", "Timothy") }); + + result.add(new Object[] { "type=bird and !subtype=eagle and name=|", "name", Arrays.asList("Jennifer") }); + + // DeMorgan + // TODO should only match "Jenny", because Jenny is the only non-bird name + // starting with 'Jen' + result.add(new Object[] { "!(type=bird and name=Jen|)", "name", Arrays.asList("Jennifer", "Jenny") }); + result.add(new Object[] { "!(type=dog and name=|) and !type=cat", "name", + Arrays.asList("Jennifer", "Jenny", "Tim") }); + + return result.iterator(); + } + + @Test(dataProvider = "providerProposals") + public void testProposals(final String queryWithCaret, final String field, + final List expectedProposedValues) throws Exception { + + dataStore = new DataStore(dataDirectory); + + final List tags = Arrays.asList( + Tags.create("type", "bird", "subtype", "eagle", "age", "three", "name", "Tim"), + Tags.create("type", "bird", "subtype", "pigeon", "age", "two", "name", "Jennifer"), + Tags.create("type", "bird", "subtype", "flamingo", "age", "one", "name", "Jennifer"), + + Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Jenny"), + Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Tim"), + + Tags.create("type", "cat", "subtype", "tiger", "age", "one", "name", "Timothy"), + Tags.create("type", "cat", "subtype", "tiger", "age", "two", "name", "Paul"), + Tags.create("type", "cat", "subtype", "lion", "age", "three", "name", "Jane"), + Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"), + Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "John")); + + tags.forEach(dataStore::createNewFile); + + assertProposals(queryWithCaret, field, expectedProposedValues); + } + + public static void main(final String[] args) throws IOException, InterruptedException { + final Path dir = Files.createTempDirectory("pdb"); + try (DataStore dataStore = new DataStore(dir)) { + + final List tags = Arrays.asList( + Tags.create("type", "bird", "subtype", "eagle", "age", "three", "name", "Tim"), + Tags.create("type", "bird", "subtype", "pigeon", "age", "two", "name", "Jennifer"), + Tags.create("type", "bird", "subtype", "flamingo", "age", "one", "name", "Jennifer"), + + Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Jenny"), + Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Tim"), + + Tags.create("type", "cat", "subtype", "tiger", "age", "one", "name", "Timothy"), + Tags.create("type", "cat", "subtype", "tiger", "age", "two", "name", "Paul"), + Tags.create("type", "cat", "subtype", "lion", "age", "three", "name", "Jane"), + Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"), + Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "John")); + + tags.forEach(dataStore::createNewFile); + + final JFrame frame = new JFrame(); + final JTextField input = new JTextField(); + final JTextArea output = new JTextArea(); + final JTextArea info = new JTextArea(); + + frame.add(input, BorderLayout.NORTH); + frame.add(output, BorderLayout.CENTER); + frame.add(info, BorderLayout.SOUTH); + + input.setText("type=bird and !subtype=eagle and name="); + + input.addKeyListener(new KeyAdapter() { + + @Override + public void keyReleased(final KeyEvent e) { + + final String query = input.getText(); + final int caretIndex = input.getCaretPosition(); + final List proposals = dataStore.propose(query, caretIndex); + + final StringBuilder out = new StringBuilder(); + + for (final Proposal proposal : proposals) { + out.append(proposal.getProposedTag()); + out.append(" "); + out.append(proposal.getProposedQuery()); + out.append("\n"); + } + + final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, "|").toString(); + + out.append("\n"); + out.append("\n"); + out.append("input: " + queryWithCaretMarker); + + output.setText(out.toString()); + + } + }); + final List docs = dataStore.search(""); + final StringBuilder out = new StringBuilder(); + out.append("info\n"); + for (final Doc doc : docs) { + out.append(doc.getTags()); + out.append("\n"); + } + info.setText(out.toString()); + + frame.setSize(800, 600); + frame.setVisible(true); + TimeUnit.HOURS.sleep(1000); + } + } + + private void assertProposals(final String queryWithCaret, final String field, + final List expectedProposedValues) { + final String query = queryWithCaret.replace("|", ""); + final int caretIndex = queryWithCaret.indexOf("|"); + final List proposals = dataStore.propose(query, caretIndex); + System.out.println( + "proposed values: " + proposals.stream().map(Proposal::getProposedTag).collect(Collectors.toList())); + + proposals.forEach(p -> assertQueryFindsResults(p.getNewQuery())); + + final List proposedValues = CollectionUtils.map(proposals, Proposal::getProposedTag); + Collections.sort(proposedValues); + Collections.sort(expectedProposedValues); + Assert.assertEquals(proposedValues.toString(), expectedProposedValues.toString(), "proposed values:"); + } + + private void assertQueryFindsResults(final String query) { + final List result = dataStore.search(query); + Assert.assertFalse(result.isEmpty(), "The query '" + query + "' must return a result, but didn't."); + } + private void assertSearch(final String query, final Tags... tags) { final List actualDocs = dataStore.search(query); final List actual = CollectionUtils.map(actualDocs, Doc::getRootBlockNumber); diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java index aad9399..b077d80 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java @@ -141,6 +141,19 @@ public class ProposerTest { ); } + public void testProposalWithAndExpression() throws Exception { + assertProposals("name=*im and bird=eagle", 8, // + new Proposal("Tim", "name=Tim and bird=eagle", true, "name=Tim and bird=eagle", 8), // + new Proposal("Timothy", "name=Timothy and bird=eagle", true, "name=Timothy and bird=eagle", 12)// + ); + + assertProposals("name=*im and bird=eagle,pigeon", 8, // + new Proposal("Tim", "name=Tim and bird=eagle,pigeon", true, "name=Tim and bird=eagle,pigeon", 8), // + new Proposal("Timothy", "name=Timothy and bird=eagle,pigeon", true, + "name=Timothy and bird=eagle,pigeon", 12)// + ); + } + private void assertProposals(final String query, final int caretIndex, final Proposal... expected) throws InterruptedException { diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java index 7c74fe7..9204a22 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java @@ -41,7 +41,7 @@ public class QueryCompletionIndexTest { Tags.create("firstname", "John", "lastname", "Miller", "country", "Atlantis")// C ); - try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory.resolve("qci.bs"))) { + try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory)) { for (final Tags t : tags) { index.addTags(t); } @@ -55,6 +55,13 @@ public class QueryCompletionIndexTest { // tags A and C match firstname=John, but both have country=Atlantis final SortedSet countryWithFirstnameJohn = index.find(new Tag("firstname", "John"), "country"); Assert.assertEquals(countryWithFirstnameJohn, Arrays.asList("Atlantis")); + + // findAllValuesForField sorts alphabetically + final SortedSet firstnames = index.findAllValuesForField("firstname"); + Assert.assertEquals(firstnames, Arrays.asList("Jane", "John"), "found: " + firstnames); + + final SortedSet countries = index.findAllValuesForField("country"); + Assert.assertEquals(countries, Arrays.asList("Atlantis", "ElDorado")); } } } diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java b/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java index 5a9ca11..838c614 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java @@ -11,8 +11,8 @@ public class Tag implements Comparable { } public Tag(final String key, final String value) { - this.key = Tags.STRING_COMPRESSOR.put(key); - this.value = Tags.STRING_COMPRESSOR.put(value); + this.key = key != null ? Tags.STRING_COMPRESSOR.put(key) : -1; + this.value = value != null ? Tags.STRING_COMPRESSOR.put(value) : -1; } @Override diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java b/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java index 6e5c57f..44ad3c3 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java @@ -72,6 +72,13 @@ public class Tags implements Comparable { return result; } + public static Tags create(final String key1, final String value1, final String key2, final String value2, + final String key3, final String value3, final String key4, final String value4) { + final Tags result = TagsBuilder.create().add(key1, value1).add(key2, value2).add(key3, value3).add(key4, value4) + .build(); + return result; + } + public static Tags fromBytes(final byte[] bytes) { final List result = new ArrayList<>(); @@ -188,7 +195,7 @@ public class Tags implements Comparable { @Override public String toString() { - return "Tags [tags=" + tags + "]"; + return String.valueOf(tags); } @Override diff --git a/pdb-utils/src/main/java/org/lucares/utils/CollectionUtils.java b/pdb-utils/src/main/java/org/lucares/utils/CollectionUtils.java index e54b70a..e3beff9 100644 --- a/pdb-utils/src/main/java/org/lucares/utils/CollectionUtils.java +++ b/pdb-utils/src/main/java/org/lucares/utils/CollectionUtils.java @@ -7,6 +7,7 @@ import java.util.List; import java.util.Map; import java.util.function.Function; import java.util.function.Predicate; +import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -62,4 +63,31 @@ public class CollectionUtils { return collection.stream().filter(predicate).collect(Collectors.toList()); } + public static int indexOf(final List list, final Predicate predicate) { + for (int i = 0; i < list.size(); i++) { + if (predicate.test(list.get(i))) { + return i; + } + } + return -1; + } + + public static > T removeAll(final T collection, final T remove, + final Supplier generator) { + + final T result = generator.get(); + result.addAll(collection); + result.removeAll(remove); + return result; + } + + public static > T retainAll(final T collection, final T retain, + final Supplier generator) { + + final T result = generator.get(); + result.addAll(collection); + result.retainAll(retain); + return result; + } + } diff --git a/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java b/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java index 265bf27..45b59e5 100644 --- a/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java +++ b/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java @@ -39,6 +39,12 @@ public class Preconditions { } } + public static void checkSmaller(final long a, final long b, final String message, final Object... args) { + if (a >= b) { + throw new IllegalStateException(MessageFormat.format(message, args) + " Expected: " + a + " < " + b); + } + } + public static void checkEqual(final Object actual, final Object expected) { checkEqual(actual, expected, "expected {0} is equal to {1}", actual, expected); } @@ -74,6 +80,18 @@ public class Preconditions { checkEqual(actual, true, message, args); } + /** + * Check that the given value is false. + * + * @param actual must be false + * @param message formatted with {@link MessageFormat} + * @param args arguments for the message + * @throws IllegalStateException if {@code actual} is not false + */ + public static void checkFalse(final boolean actual, final String message, final Object... args) { + checkEqual(actual, false, message, args); + } + public static void checkNull(final Object actual, final String message, final Object... args) { if (actual != null) { throw new IllegalStateException(MessageFormat.format(message, args));