diff --git a/block-storage/src/main/java/org/lucares/pdb/map/Empty.java b/block-storage/src/main/java/org/lucares/pdb/map/Empty.java
new file mode 100644
index 0000000..7627887
--- /dev/null
+++ b/block-storage/src/main/java/org/lucares/pdb/map/Empty.java
@@ -0,0 +1,26 @@
+package org.lucares.pdb.map;
+
+import org.lucares.pdb.map.PersistentMap.EncoderDecoder;
+
+/**
+ * Used to denote empty values in {@link PersistentMap}.
+ *
+ * Use {@link PersistentMap#EMPTY_ENCODER} as {@link EncoderDecoder}.
+ *
+ * Implementation note: We cannot use {@link Void}, because {@link Void} cannot
+ * be instantiated. A {@link PersistentMap PersistentMap<<String, Void>}
+ * would have to return {@code null} for {@link PersistentMap#getValue(Object)}
+ * which would make it impossible to know whether the key existed or not.
+ * {@link Empty} solves this by providing a single unmodifiable value.
+ */
+public final class Empty {
+ public static final Empty INSTANCE = new Empty();
+
+ private Empty() {
+ }
+
+ @Override
+ public String toString() {
+ return "";
+ }
+}
diff --git a/block-storage/src/main/java/org/lucares/pdb/map/PersistentMap.java b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMap.java
index 7cf4a69..69dc042 100644
--- a/block-storage/src/main/java/org/lucares/pdb/map/PersistentMap.java
+++ b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMap.java
@@ -8,6 +8,7 @@ import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Objects;
import java.util.Stack;
import java.util.UUID;
@@ -87,9 +88,28 @@ public class PersistentMap implements AutoCloseable {
}
}
+ private static final class EmptyCoder implements EncoderDecoder {
+
+ private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
+
+ @Override
+ public byte[] encode(final Empty __) {
+ return EMPTY_BYTE_ARRAY;
+ }
+
+ @Override
+ public Empty decode(final byte[] bytes) {
+
+ Preconditions.checkEqual(bytes.length, 0, "");
+
+ return Empty.INSTANCE;
+ }
+ }
+
public static final EncoderDecoder LONG_CODER = new LongCoder();
public static final EncoderDecoder UUID_ENCODER = new UUIDCoder();
public static final EncoderDecoder STRING_CODER = new StringCoder();
+ public static final EncoderDecoder EMPTY_ENCODER = new EmptyCoder();
static final int BLOCK_SIZE = 4096;
static final long NODE_OFFSET_TO_ROOT_NODE = 8;
@@ -180,17 +200,27 @@ public class PersistentMap implements AutoCloseable {
final byte[] value) throws IOException {
final PersistentMapDiskNode node = getNode(nodeOffest);
- final var entry = node.getNodeEntryTo(key);
+ final NodeEntry entry = node.getNodeEntryTo(key);
if (entry == null || entry.isDataNode()) {
final byte[] oldValue;
if (entry == null) {
oldValue = null;
} else {
+ // found a NodeEntry that is either equal to key, or it is at the insertion
+ // point
final boolean entryIsForKey = entry.equal(key);
oldValue = entryIsForKey ? entry.getValue() : null;
+ // Early exit, if the oldValue equals the new value.
+ // We do not have to replace the value, because it would not change anything
+ // (just cause unnecessary write operations). But we return the oldValue so that
+ // the caller thinks we replaced the value.
+ if (Objects.equals(oldValue, value)) {
+ return oldValue;
+ }
+
if (entryIsForKey) {
node.removeKey(key);
}
diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java
index 1c1eefb..a3077b9 100644
--- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java
+++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java
@@ -24,6 +24,7 @@ import org.lucares.pdb.datastore.Doc;
import org.lucares.pdb.datastore.Proposal;
import org.lucares.pdb.datastore.lang.Expression;
import org.lucares.pdb.datastore.lang.ExpressionToDocIdVisitor;
+import org.lucares.pdb.datastore.lang.NewProposerParser;
import org.lucares.pdb.datastore.lang.QueryLanguageParser;
import org.lucares.pdb.diskstorage.DiskStorage;
import org.lucares.pdb.map.PersistentMap;
@@ -150,14 +151,17 @@ public class DataStore implements AutoCloseable {
private final PersistentMap tagToDocsId;
+ private final QueryCompletionIndex queryCompletionIndex;
+
// A Doc will never be changed once it is created. Therefore we can cache them
// easily.
- private final HotEntryCache docIdToDocCache = new HotEntryCache<>(Duration.ofMinutes(10),
+ private final HotEntryCache docIdToDocCache = new HotEntryCache<>(Duration.ofSeconds(5),
"docIdToDocCache");
private final DiskStorage diskStorage;
private final Path diskStorageFilePath;
private final Path storageBasePath;
+ private final Path queryCompletionIndexFile;
public DataStore(final Path dataDirectory) throws IOException {
storageBasePath = storageDirectory(dataDirectory);
@@ -178,6 +182,9 @@ public class DataStore implements AutoCloseable {
final Path docIdToDocIndexPath = storageBasePath.resolve("docIdToDocIndex.bs");
docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, ENCODER_DOC);
+
+ queryCompletionIndexFile = storageBasePath.resolve("queryCompletionIndex.bs");
+ queryCompletionIndex = new QueryCompletionIndex(queryCompletionIndexFile);
}
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
@@ -199,6 +206,7 @@ public class DataStore implements AutoCloseable {
final Long oldDocId = tagsToDocId.putValue(tags, docId);
Preconditions.checkNull(oldDocId, "There must be at most one document for tags: {0}", tags);
+ // store mapping from tag to docId, so that we can find all docs for a given tag
final List ts = new ArrayList<>(tags.toTags());
ts.add(TAG_ALL_DOCS);
for (final Tag tag : ts) {
@@ -215,6 +223,10 @@ public class DataStore implements AutoCloseable {
}
}
+ // index the tags, so that we can efficiently find all possible values for a
+ // field in a query
+ queryCompletionIndex.addTags(tags);
+
return newFilesRootBlockOffset;
}
@@ -295,7 +307,7 @@ public class DataStore implements AutoCloseable {
final List result = new ArrayList<>(docIdsList.size());
synchronized (docIdToDoc) {
-
+ final long start = System.nanoTime();
for (int i = 0; i < docIdsList.size(); i++) {
final long docId = docIdsList.get(i);
@@ -304,6 +316,8 @@ public class DataStore implements AutoCloseable {
result.add(doc);
}
+ System.out.println(
+ "mapDocIdsToDocs(" + docIdsList.size() + "): " + (System.nanoTime() - start) / 1_000_000.0 + "ms");
}
return result;
}
@@ -342,6 +356,11 @@ public class DataStore implements AutoCloseable {
}
public List propose(final String query, final int caretIndex) {
+
+ final NewProposerParser newProposerParser = new NewProposerParser(this);
+ final List proposals = newProposerParser.propose(query, caretIndex);
+ System.out.println(proposals);
+
return new Proposer(this).propose(query, caretIndex);
}
diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java
new file mode 100644
index 0000000..dcd66ab
--- /dev/null
+++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java
@@ -0,0 +1,158 @@
+package org.lucares.pdb.datastore.internal;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.lucares.collections.LongList;
+import org.lucares.pdb.api.Tag;
+import org.lucares.pdb.api.Tags;
+import org.lucares.pdb.map.Empty;
+import org.lucares.pdb.map.PersistentMap;
+import org.lucares.pdb.map.PersistentMap.EncoderDecoder;
+import org.lucares.utils.byteencoder.VariableByteEncoder;
+
+/**
+ * This index supports query completion.
+ *
+ * E.g. Given the query "firstname=John and lastname=|" ('|' denotes the
+ * position of the caret). How do we find all lastnames that match this query?
+ *
+ * The expensive way is to execute the query for all available lastnames and
+ * keep those that return at least one result.
+ * A more effiecient way uses an index that lists all lastnames that occurr with
+ * firstname=John. If we write this as table, then it looks like this:
+ *
+ *
+ *┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┓
+ *┃ fieldA ┃ valueA ┃ fieldB ┃ valueB ┃
+ *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
+ *┃firstname┃ John ┃lastname ┃ Connor ┃
+ *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
+ *┃firstname┃ John ┃lastname ┃Carpenter┃
+ *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
+ *┃firstname┃ John ┃country ┃ Germany ┃
+ *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
+ *┃firstname┃ John ┃lastname ┃ Nash ┃
+ *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
+ *┃firstname┃ Rick ┃lastname ┃ Meyer ┃
+ *┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
+ *┃firstname┃ Rick ┃lastname ┃ Castle ┃
+ *┗━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┛
+ *
+ *
+ * The lastnames where firstname=John are Connor, Carpenter and Nash. Given such
+ * a table we can just for all rows with fieldA=firstname and valueA=John and
+ * fieldB = lastname.
+ *
+ * The values in this index represent such a table.
+ *
+ * Note: the index contains all four columns, but when searching we only use the
+ * first three.
+ *
+ */
+public class QueryCompletionIndex implements AutoCloseable {
+ private static final class TwoTags {
+ private final Tag tagA;
+ private final Tag tagB;
+
+ public TwoTags(final Tag tagA, final Tag tagB) {
+ this.tagA = tagA;
+ this.tagB = tagB;
+ }
+
+ public Tag getTagA() {
+ return tagA;
+ }
+
+ public Tag getTagB() {
+ return tagB;
+ }
+
+ @Override
+ public String toString() {
+ return tagA + "::" + tagB;
+ }
+
+ }
+
+ private static final class EncoderTwoTags implements EncoderDecoder {
+
+ @Override
+ public byte[] encode(final TwoTags tagAndField) {
+ final LongList tmp = new LongList(4);
+ final Tag tagA = tagAndField.getTagA();
+ final Tag tagB = tagAndField.getTagB();
+
+ tmp.add(tagA.getKey());
+ tmp.add(tagA.getValue());
+
+ tmp.add(tagB.getKey());
+
+ // A query for tagA.key and tagA.value and tagB.key is done by setting
+ // tagB.value==0.
+ // The query is then executed as a prefix search. Thus tagB.value must not be
+ // part of the byte array that is returned.
+ if (tagB.getValue() >= 0) {
+ tmp.add(tagB.getValue());
+ }
+
+ return VariableByteEncoder.encode(tmp);
+ }
+
+ @Override
+ public TwoTags decode(final byte[] bytes) {
+
+ final LongList tmp = VariableByteEncoder.decode(bytes);
+ final int tagAKey = (int) tmp.get(0);
+ final int tagAValue = (int) tmp.get(1);
+ final int tagBKey = (int) tmp.get(2);
+ final int tagBValue = (int) tmp.get(3);
+
+ final Tag tagA = new Tag(tagAKey, tagAValue);
+ final Tag tagB = new Tag(tagBKey, tagBValue);
+
+ return new TwoTags(tagA, tagB);
+ }
+ }
+
+ private final PersistentMap tagToTagIndex;
+
+ public QueryCompletionIndex(final Path indexFile) throws IOException {
+ tagToTagIndex = new PersistentMap<>(indexFile, new EncoderTwoTags(), PersistentMap.EMPTY_ENCODER);
+ }
+
+ public void addTags(final Tags tags) throws IOException {
+ final List listOfTagsA = tags.toTags();
+ final List listOfTagsB = tags.toTags();
+
+ // index all combinations of tagA and tagB
+ for (final Tag tagA : listOfTagsA) {
+ for (final Tag tagB : listOfTagsB) {
+ final TwoTags key = new TwoTags(tagA, tagB);
+ tagToTagIndex.putValue(key, Empty.INSTANCE);
+ }
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ tagToTagIndex.close();
+ }
+
+ public SortedSet find(final Tag tag, final String field) throws IOException {
+ final SortedSet result = new TreeSet<>();
+ final int tagBKey = Tags.STRING_COMPRESSOR.put(field);
+ final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See
+ // EncoderTwoTags
+ final TwoTags keyPrefix = new TwoTags(tag, tagB);
+ tagToTagIndex.visitValues(keyPrefix, (k, v) -> {
+ result.add(k.getTagB().getValueAsString());
+ });
+
+ return result;
+
+ }
+}
diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java
new file mode 100644
index 0000000..4ab7624
--- /dev/null
+++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/NewProposerParser.java
@@ -0,0 +1,27 @@
+package org.lucares.pdb.datastore.lang;
+
+import java.util.List;
+
+import org.lucares.pdb.datastore.Proposal;
+import org.lucares.pdb.datastore.internal.DataStore;
+
+public class NewProposerParser {
+
+ private final static String CARET_MARKER = "\ue001"; // third character in the private use area
+
+ private final DataStore dataStore;
+
+ public NewProposerParser(final DataStore dataStore) {
+ this.dataStore = dataStore;
+ }
+
+ public List propose(final String query, final int caretIndex) {
+
+ final String queryString = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString();
+
+ final Expression expression = QueryLanguageParser.parse(queryString);
+
+ return null;
+ }
+
+}
diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java
index 0fc1a5a..aad9399 100644
--- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java
+++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java
@@ -76,6 +76,9 @@ public class ProposerTest {
assertProposals("bird", 4, //
new Proposal("bird", "bird=* ", true, "bird=", 5) //
);
+ assertProposals("bird=eagle and n", 16, //
+ new Proposal("name", "bird=eagle and name=* ", true, "bird=eagle and name=", 20) //
+ );
}
public void testPrefixOfValue() throws Exception {
@@ -86,9 +89,9 @@ public class ProposerTest {
new Proposal("Jennifer", "name =Jennifer", true, "name =Jennifer", 14), //
new Proposal("Jenny", "name =Jenny", true, "name =Jenny", 11) //
);
-
- assertProposals("bird=eagle and n", 16, //
- new Proposal("name", "bird=eagle and name=* ", true, "bird=eagle and name=", 20) //
+ assertProposals("name =Tim,Je", 12, //
+ new Proposal("Jennifer", "name =Tim,Jennifer", true, "name =Tim,Jennifer", 18), //
+ new Proposal("Jenny", "name =Tim,Jenny", true, "name =Tim,Jenny", 15) //
);
/*
*/
diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java
new file mode 100644
index 0000000..7c74fe7
--- /dev/null
+++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java
@@ -0,0 +1,60 @@
+package org.lucares.pdb.datastore.internal;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.List;
+import java.util.SortedSet;
+
+import org.lucares.pdb.api.StringCompressor;
+import org.lucares.pdb.api.Tag;
+import org.lucares.pdb.api.Tags;
+import org.lucares.pdb.api.UniqueStringIntegerPairs;
+import org.lucares.utils.file.FileUtils;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+@Test
+public class QueryCompletionIndexTest {
+
+ private Path dataDirectory;
+
+ @BeforeMethod
+ public void beforeMethod() throws IOException {
+ dataDirectory = Files.createTempDirectory("pdb");
+ }
+
+ @AfterMethod
+ public void afterMethod() throws IOException {
+ FileUtils.delete(dataDirectory);
+ }
+
+ public void test() throws Exception {
+ Tags.STRING_COMPRESSOR = new StringCompressor(new UniqueStringIntegerPairs());
+
+ final List tags = Arrays.asList(//
+ Tags.create("firstname", "John", "lastname", "Doe", "country", "Atlantis"), // A
+ Tags.create("firstname", "Jane", "lastname", "Doe", "country", "ElDorado"), // B
+ Tags.create("firstname", "John", "lastname", "Miller", "country", "Atlantis")// C
+ );
+
+ try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory.resolve("qci.bs"))) {
+ for (final Tags t : tags) {
+ index.addTags(t);
+ }
+
+ // all firstnames where lastname=Doe are returned sorted alphabetically.
+ // tags A and B match
+ final SortedSet firstnamesWithLastnameDoe = index.find(new Tag("lastname", "Doe"), "firstname");
+ Assert.assertEquals(firstnamesWithLastnameDoe, Arrays.asList("Jane", "John"));
+
+ // no duplicates are returned:
+ // tags A and C match firstname=John, but both have country=Atlantis
+ final SortedSet countryWithFirstnameJohn = index.find(new Tag("firstname", "John"), "country");
+ Assert.assertEquals(countryWithFirstnameJohn, Arrays.asList("Atlantis"));
+ }
+ }
+}
diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java b/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java
index f3d9d62..6e5c57f 100644
--- a/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java
+++ b/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java
@@ -8,6 +8,7 @@ import java.util.TreeSet;
import java.util.function.BiConsumer;
import java.util.function.Function;
+import org.lucares.collections.IntList;
import org.lucares.collections.LongList;
import org.lucares.utils.byteencoder.VariableByteEncoder;
@@ -156,6 +157,14 @@ public class Tags implements Comparable {
return result;
}
+ public IntList getKeysAsInt() {
+ final IntList result = new IntList();
+ for (final Tag tag : tags) {
+ result.add(tag.getKey());
+ }
+ return result;
+ }
+
public List toTags() {
return Collections.unmodifiableList(tags);
}