prepare more efficient query completion
adding an index that answers the question given a query "a=b and c=", what are possible values for c.
This commit is contained in:
26
block-storage/src/main/java/org/lucares/pdb/map/Empty.java
Normal file
26
block-storage/src/main/java/org/lucares/pdb/map/Empty.java
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
package org.lucares.pdb.map;
|
||||||
|
|
||||||
|
import org.lucares.pdb.map.PersistentMap.EncoderDecoder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used to denote empty values in {@link PersistentMap}.
|
||||||
|
* <p>
|
||||||
|
* Use {@link PersistentMap#EMPTY_ENCODER} as {@link EncoderDecoder}.
|
||||||
|
* <p>
|
||||||
|
* Implementation note: We cannot use {@link Void}, because {@link Void} cannot
|
||||||
|
* be instantiated. A {@link PersistentMap PersistentMap<<String, Void>}
|
||||||
|
* would have to return {@code null} for {@link PersistentMap#getValue(Object)}
|
||||||
|
* which would make it impossible to know whether the key existed or not.<br>
|
||||||
|
* {@link Empty} solves this by providing a single unmodifiable value.
|
||||||
|
*/
|
||||||
|
public final class Empty {
|
||||||
|
public static final Empty INSTANCE = new Empty();
|
||||||
|
|
||||||
|
private Empty() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "<empty>";
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,6 +8,7 @@ import java.util.Arrays;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Stack;
|
import java.util.Stack;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
@@ -87,9 +88,28 @@ public class PersistentMap<K, V> implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final class EmptyCoder implements EncoderDecoder<Empty> {
|
||||||
|
|
||||||
|
private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] encode(final Empty __) {
|
||||||
|
return EMPTY_BYTE_ARRAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Empty decode(final byte[] bytes) {
|
||||||
|
|
||||||
|
Preconditions.checkEqual(bytes.length, 0, "");
|
||||||
|
|
||||||
|
return Empty.INSTANCE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static final EncoderDecoder<Long> LONG_CODER = new LongCoder();
|
public static final EncoderDecoder<Long> LONG_CODER = new LongCoder();
|
||||||
public static final EncoderDecoder<UUID> UUID_ENCODER = new UUIDCoder();
|
public static final EncoderDecoder<UUID> UUID_ENCODER = new UUIDCoder();
|
||||||
public static final EncoderDecoder<String> STRING_CODER = new StringCoder();
|
public static final EncoderDecoder<String> STRING_CODER = new StringCoder();
|
||||||
|
public static final EncoderDecoder<Empty> EMPTY_ENCODER = new EmptyCoder();
|
||||||
|
|
||||||
static final int BLOCK_SIZE = 4096;
|
static final int BLOCK_SIZE = 4096;
|
||||||
static final long NODE_OFFSET_TO_ROOT_NODE = 8;
|
static final long NODE_OFFSET_TO_ROOT_NODE = 8;
|
||||||
@@ -180,17 +200,27 @@ public class PersistentMap<K, V> implements AutoCloseable {
|
|||||||
final byte[] value) throws IOException {
|
final byte[] value) throws IOException {
|
||||||
final PersistentMapDiskNode node = getNode(nodeOffest);
|
final PersistentMapDiskNode node = getNode(nodeOffest);
|
||||||
|
|
||||||
final var entry = node.getNodeEntryTo(key);
|
final NodeEntry entry = node.getNodeEntryTo(key);
|
||||||
if (entry == null || entry.isDataNode()) {
|
if (entry == null || entry.isDataNode()) {
|
||||||
|
|
||||||
final byte[] oldValue;
|
final byte[] oldValue;
|
||||||
if (entry == null) {
|
if (entry == null) {
|
||||||
oldValue = null;
|
oldValue = null;
|
||||||
} else {
|
} else {
|
||||||
|
// found a NodeEntry that is either equal to key, or it is at the insertion
|
||||||
|
// point
|
||||||
final boolean entryIsForKey = entry.equal(key);
|
final boolean entryIsForKey = entry.equal(key);
|
||||||
|
|
||||||
oldValue = entryIsForKey ? entry.getValue() : null;
|
oldValue = entryIsForKey ? entry.getValue() : null;
|
||||||
|
|
||||||
|
// Early exit, if the oldValue equals the new value.
|
||||||
|
// We do not have to replace the value, because it would not change anything
|
||||||
|
// (just cause unnecessary write operations). But we return the oldValue so that
|
||||||
|
// the caller thinks we replaced the value.
|
||||||
|
if (Objects.equals(oldValue, value)) {
|
||||||
|
return oldValue;
|
||||||
|
}
|
||||||
|
|
||||||
if (entryIsForKey) {
|
if (entryIsForKey) {
|
||||||
node.removeKey(key);
|
node.removeKey(key);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ import org.lucares.pdb.datastore.Doc;
|
|||||||
import org.lucares.pdb.datastore.Proposal;
|
import org.lucares.pdb.datastore.Proposal;
|
||||||
import org.lucares.pdb.datastore.lang.Expression;
|
import org.lucares.pdb.datastore.lang.Expression;
|
||||||
import org.lucares.pdb.datastore.lang.ExpressionToDocIdVisitor;
|
import org.lucares.pdb.datastore.lang.ExpressionToDocIdVisitor;
|
||||||
|
import org.lucares.pdb.datastore.lang.NewProposerParser;
|
||||||
import org.lucares.pdb.datastore.lang.QueryLanguageParser;
|
import org.lucares.pdb.datastore.lang.QueryLanguageParser;
|
||||||
import org.lucares.pdb.diskstorage.DiskStorage;
|
import org.lucares.pdb.diskstorage.DiskStorage;
|
||||||
import org.lucares.pdb.map.PersistentMap;
|
import org.lucares.pdb.map.PersistentMap;
|
||||||
@@ -150,14 +151,17 @@ public class DataStore implements AutoCloseable {
|
|||||||
|
|
||||||
private final PersistentMap<Tag, Long> tagToDocsId;
|
private final PersistentMap<Tag, Long> tagToDocsId;
|
||||||
|
|
||||||
|
private final QueryCompletionIndex queryCompletionIndex;
|
||||||
|
|
||||||
// A Doc will never be changed once it is created. Therefore we can cache them
|
// A Doc will never be changed once it is created. Therefore we can cache them
|
||||||
// easily.
|
// easily.
|
||||||
private final HotEntryCache<Long, Doc> docIdToDocCache = new HotEntryCache<>(Duration.ofMinutes(10),
|
private final HotEntryCache<Long, Doc> docIdToDocCache = new HotEntryCache<>(Duration.ofSeconds(5),
|
||||||
"docIdToDocCache");
|
"docIdToDocCache");
|
||||||
|
|
||||||
private final DiskStorage diskStorage;
|
private final DiskStorage diskStorage;
|
||||||
private final Path diskStorageFilePath;
|
private final Path diskStorageFilePath;
|
||||||
private final Path storageBasePath;
|
private final Path storageBasePath;
|
||||||
|
private final Path queryCompletionIndexFile;
|
||||||
|
|
||||||
public DataStore(final Path dataDirectory) throws IOException {
|
public DataStore(final Path dataDirectory) throws IOException {
|
||||||
storageBasePath = storageDirectory(dataDirectory);
|
storageBasePath = storageDirectory(dataDirectory);
|
||||||
@@ -178,6 +182,9 @@ public class DataStore implements AutoCloseable {
|
|||||||
|
|
||||||
final Path docIdToDocIndexPath = storageBasePath.resolve("docIdToDocIndex.bs");
|
final Path docIdToDocIndexPath = storageBasePath.resolve("docIdToDocIndex.bs");
|
||||||
docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, ENCODER_DOC);
|
docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, ENCODER_DOC);
|
||||||
|
|
||||||
|
queryCompletionIndexFile = storageBasePath.resolve("queryCompletionIndex.bs");
|
||||||
|
queryCompletionIndex = new QueryCompletionIndex(queryCompletionIndexFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
|
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
|
||||||
@@ -199,6 +206,7 @@ public class DataStore implements AutoCloseable {
|
|||||||
final Long oldDocId = tagsToDocId.putValue(tags, docId);
|
final Long oldDocId = tagsToDocId.putValue(tags, docId);
|
||||||
Preconditions.checkNull(oldDocId, "There must be at most one document for tags: {0}", tags);
|
Preconditions.checkNull(oldDocId, "There must be at most one document for tags: {0}", tags);
|
||||||
|
|
||||||
|
// store mapping from tag to docId, so that we can find all docs for a given tag
|
||||||
final List<Tag> ts = new ArrayList<>(tags.toTags());
|
final List<Tag> ts = new ArrayList<>(tags.toTags());
|
||||||
ts.add(TAG_ALL_DOCS);
|
ts.add(TAG_ALL_DOCS);
|
||||||
for (final Tag tag : ts) {
|
for (final Tag tag : ts) {
|
||||||
@@ -215,6 +223,10 @@ public class DataStore implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// index the tags, so that we can efficiently find all possible values for a
|
||||||
|
// field in a query
|
||||||
|
queryCompletionIndex.addTags(tags);
|
||||||
|
|
||||||
return newFilesRootBlockOffset;
|
return newFilesRootBlockOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -295,7 +307,7 @@ public class DataStore implements AutoCloseable {
|
|||||||
final List<Doc> result = new ArrayList<>(docIdsList.size());
|
final List<Doc> result = new ArrayList<>(docIdsList.size());
|
||||||
|
|
||||||
synchronized (docIdToDoc) {
|
synchronized (docIdToDoc) {
|
||||||
|
final long start = System.nanoTime();
|
||||||
for (int i = 0; i < docIdsList.size(); i++) {
|
for (int i = 0; i < docIdsList.size(); i++) {
|
||||||
final long docId = docIdsList.get(i);
|
final long docId = docIdsList.get(i);
|
||||||
|
|
||||||
@@ -304,6 +316,8 @@ public class DataStore implements AutoCloseable {
|
|||||||
|
|
||||||
result.add(doc);
|
result.add(doc);
|
||||||
}
|
}
|
||||||
|
System.out.println(
|
||||||
|
"mapDocIdsToDocs(" + docIdsList.size() + "): " + (System.nanoTime() - start) / 1_000_000.0 + "ms");
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@@ -342,6 +356,11 @@ public class DataStore implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public List<Proposal> propose(final String query, final int caretIndex) {
|
public List<Proposal> propose(final String query, final int caretIndex) {
|
||||||
|
|
||||||
|
final NewProposerParser newProposerParser = new NewProposerParser(this);
|
||||||
|
final List<Proposal> proposals = newProposerParser.propose(query, caretIndex);
|
||||||
|
System.out.println(proposals);
|
||||||
|
|
||||||
return new Proposer(this).propose(query, caretIndex);
|
return new Proposer(this).propose(query, caretIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,158 @@
|
|||||||
|
package org.lucares.pdb.datastore.internal;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.SortedSet;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
import org.lucares.collections.LongList;
|
||||||
|
import org.lucares.pdb.api.Tag;
|
||||||
|
import org.lucares.pdb.api.Tags;
|
||||||
|
import org.lucares.pdb.map.Empty;
|
||||||
|
import org.lucares.pdb.map.PersistentMap;
|
||||||
|
import org.lucares.pdb.map.PersistentMap.EncoderDecoder;
|
||||||
|
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This index supports query completion.
|
||||||
|
* <p>
|
||||||
|
* E.g. Given the query "firstname=John and lastname=|" ('|' denotes the
|
||||||
|
* position of the caret). How do we find all lastnames that match this query?
|
||||||
|
* <br>
|
||||||
|
* The expensive way is to execute the query for all available lastnames and
|
||||||
|
* keep those that return at least one result.<br>
|
||||||
|
* A more effiecient way uses an index that lists all lastnames that occurr with
|
||||||
|
* firstname=John. If we write this as table, then it looks like this:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
*┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┓
|
||||||
|
*┃ fieldA ┃ valueA ┃ fieldB ┃ valueB ┃
|
||||||
|
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
||||||
|
*┃firstname┃ John ┃lastname ┃ Connor ┃
|
||||||
|
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
||||||
|
*┃firstname┃ John ┃lastname ┃Carpenter┃
|
||||||
|
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
||||||
|
*┃firstname┃ John ┃country ┃ Germany ┃
|
||||||
|
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
||||||
|
*┃firstname┃ John ┃lastname ┃ Nash ┃
|
||||||
|
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
||||||
|
*┃firstname┃ Rick ┃lastname ┃ Meyer ┃
|
||||||
|
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
||||||
|
*┃firstname┃ Rick ┃lastname ┃ Castle ┃
|
||||||
|
*┗━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┛
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* The lastnames where firstname=John are Connor, Carpenter and Nash. Given such
|
||||||
|
* a table we can just for all rows with fieldA=firstname and valueA=John and
|
||||||
|
* fieldB = lastname.
|
||||||
|
* <p>
|
||||||
|
* The values in this index represent such a table.
|
||||||
|
* <p>
|
||||||
|
* Note: the index contains all four columns, but when searching we only use the
|
||||||
|
* first three.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class QueryCompletionIndex implements AutoCloseable {
|
||||||
|
private static final class TwoTags {
|
||||||
|
private final Tag tagA;
|
||||||
|
private final Tag tagB;
|
||||||
|
|
||||||
|
public TwoTags(final Tag tagA, final Tag tagB) {
|
||||||
|
this.tagA = tagA;
|
||||||
|
this.tagB = tagB;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Tag getTagA() {
|
||||||
|
return tagA;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Tag getTagB() {
|
||||||
|
return tagB;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return tagA + "::" + tagB;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class EncoderTwoTags implements EncoderDecoder<TwoTags> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] encode(final TwoTags tagAndField) {
|
||||||
|
final LongList tmp = new LongList(4);
|
||||||
|
final Tag tagA = tagAndField.getTagA();
|
||||||
|
final Tag tagB = tagAndField.getTagB();
|
||||||
|
|
||||||
|
tmp.add(tagA.getKey());
|
||||||
|
tmp.add(tagA.getValue());
|
||||||
|
|
||||||
|
tmp.add(tagB.getKey());
|
||||||
|
|
||||||
|
// A query for tagA.key and tagA.value and tagB.key is done by setting
|
||||||
|
// tagB.value==0.
|
||||||
|
// The query is then executed as a prefix search. Thus tagB.value must not be
|
||||||
|
// part of the byte array that is returned.
|
||||||
|
if (tagB.getValue() >= 0) {
|
||||||
|
tmp.add(tagB.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
return VariableByteEncoder.encode(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TwoTags decode(final byte[] bytes) {
|
||||||
|
|
||||||
|
final LongList tmp = VariableByteEncoder.decode(bytes);
|
||||||
|
final int tagAKey = (int) tmp.get(0);
|
||||||
|
final int tagAValue = (int) tmp.get(1);
|
||||||
|
final int tagBKey = (int) tmp.get(2);
|
||||||
|
final int tagBValue = (int) tmp.get(3);
|
||||||
|
|
||||||
|
final Tag tagA = new Tag(tagAKey, tagAValue);
|
||||||
|
final Tag tagB = new Tag(tagBKey, tagBValue);
|
||||||
|
|
||||||
|
return new TwoTags(tagA, tagB);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final PersistentMap<TwoTags, Empty> tagToTagIndex;
|
||||||
|
|
||||||
|
public QueryCompletionIndex(final Path indexFile) throws IOException {
|
||||||
|
tagToTagIndex = new PersistentMap<>(indexFile, new EncoderTwoTags(), PersistentMap.EMPTY_ENCODER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addTags(final Tags tags) throws IOException {
|
||||||
|
final List<Tag> listOfTagsA = tags.toTags();
|
||||||
|
final List<Tag> listOfTagsB = tags.toTags();
|
||||||
|
|
||||||
|
// index all combinations of tagA and tagB
|
||||||
|
for (final Tag tagA : listOfTagsA) {
|
||||||
|
for (final Tag tagB : listOfTagsB) {
|
||||||
|
final TwoTags key = new TwoTags(tagA, tagB);
|
||||||
|
tagToTagIndex.putValue(key, Empty.INSTANCE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
tagToTagIndex.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public SortedSet<String> find(final Tag tag, final String field) throws IOException {
|
||||||
|
final SortedSet<String> result = new TreeSet<>();
|
||||||
|
final int tagBKey = Tags.STRING_COMPRESSOR.put(field);
|
||||||
|
final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See
|
||||||
|
// EncoderTwoTags
|
||||||
|
final TwoTags keyPrefix = new TwoTags(tag, tagB);
|
||||||
|
tagToTagIndex.visitValues(keyPrefix, (k, v) -> {
|
||||||
|
result.add(k.getTagB().getValueAsString());
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
package org.lucares.pdb.datastore.lang;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.lucares.pdb.datastore.Proposal;
|
||||||
|
import org.lucares.pdb.datastore.internal.DataStore;
|
||||||
|
|
||||||
|
public class NewProposerParser {
|
||||||
|
|
||||||
|
private final static String CARET_MARKER = "\ue001"; // third character in the private use area
|
||||||
|
|
||||||
|
private final DataStore dataStore;
|
||||||
|
|
||||||
|
public NewProposerParser(final DataStore dataStore) {
|
||||||
|
this.dataStore = dataStore;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Proposal> propose(final String query, final int caretIndex) {
|
||||||
|
|
||||||
|
final String queryString = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString();
|
||||||
|
|
||||||
|
final Expression expression = QueryLanguageParser.parse(queryString);
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -76,6 +76,9 @@ public class ProposerTest {
|
|||||||
assertProposals("bird", 4, //
|
assertProposals("bird", 4, //
|
||||||
new Proposal("bird", "bird=* ", true, "bird=", 5) //
|
new Proposal("bird", "bird=* ", true, "bird=", 5) //
|
||||||
);
|
);
|
||||||
|
assertProposals("bird=eagle and n", 16, //
|
||||||
|
new Proposal("name", "bird=eagle and name=* ", true, "bird=eagle and name=", 20) //
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPrefixOfValue() throws Exception {
|
public void testPrefixOfValue() throws Exception {
|
||||||
@@ -86,9 +89,9 @@ public class ProposerTest {
|
|||||||
new Proposal("Jennifer", "name =Jennifer", true, "name =Jennifer", 14), //
|
new Proposal("Jennifer", "name =Jennifer", true, "name =Jennifer", 14), //
|
||||||
new Proposal("Jenny", "name =Jenny", true, "name =Jenny", 11) //
|
new Proposal("Jenny", "name =Jenny", true, "name =Jenny", 11) //
|
||||||
);
|
);
|
||||||
|
assertProposals("name =Tim,Je", 12, //
|
||||||
assertProposals("bird=eagle and n", 16, //
|
new Proposal("Jennifer", "name =Tim,Jennifer", true, "name =Tim,Jennifer", 18), //
|
||||||
new Proposal("name", "bird=eagle and name=* ", true, "bird=eagle and name=", 20) //
|
new Proposal("Jenny", "name =Tim,Jenny", true, "name =Tim,Jenny", 15) //
|
||||||
);
|
);
|
||||||
/*
|
/*
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -0,0 +1,60 @@
|
|||||||
|
package org.lucares.pdb.datastore.internal;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.SortedSet;
|
||||||
|
|
||||||
|
import org.lucares.pdb.api.StringCompressor;
|
||||||
|
import org.lucares.pdb.api.Tag;
|
||||||
|
import org.lucares.pdb.api.Tags;
|
||||||
|
import org.lucares.pdb.api.UniqueStringIntegerPairs;
|
||||||
|
import org.lucares.utils.file.FileUtils;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.AfterMethod;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public class QueryCompletionIndexTest {
|
||||||
|
|
||||||
|
private Path dataDirectory;
|
||||||
|
|
||||||
|
@BeforeMethod
|
||||||
|
public void beforeMethod() throws IOException {
|
||||||
|
dataDirectory = Files.createTempDirectory("pdb");
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterMethod
|
||||||
|
public void afterMethod() throws IOException {
|
||||||
|
FileUtils.delete(dataDirectory);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test() throws Exception {
|
||||||
|
Tags.STRING_COMPRESSOR = new StringCompressor(new UniqueStringIntegerPairs());
|
||||||
|
|
||||||
|
final List<Tags> tags = Arrays.asList(//
|
||||||
|
Tags.create("firstname", "John", "lastname", "Doe", "country", "Atlantis"), // A
|
||||||
|
Tags.create("firstname", "Jane", "lastname", "Doe", "country", "ElDorado"), // B
|
||||||
|
Tags.create("firstname", "John", "lastname", "Miller", "country", "Atlantis")// C
|
||||||
|
);
|
||||||
|
|
||||||
|
try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory.resolve("qci.bs"))) {
|
||||||
|
for (final Tags t : tags) {
|
||||||
|
index.addTags(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// all firstnames where lastname=Doe are returned sorted alphabetically.
|
||||||
|
// tags A and B match
|
||||||
|
final SortedSet<String> firstnamesWithLastnameDoe = index.find(new Tag("lastname", "Doe"), "firstname");
|
||||||
|
Assert.assertEquals(firstnamesWithLastnameDoe, Arrays.asList("Jane", "John"));
|
||||||
|
|
||||||
|
// no duplicates are returned:
|
||||||
|
// tags A and C match firstname=John, but both have country=Atlantis
|
||||||
|
final SortedSet<String> countryWithFirstnameJohn = index.find(new Tag("firstname", "John"), "country");
|
||||||
|
Assert.assertEquals(countryWithFirstnameJohn, Arrays.asList("Atlantis"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,6 +8,7 @@ import java.util.TreeSet;
|
|||||||
import java.util.function.BiConsumer;
|
import java.util.function.BiConsumer;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
import org.lucares.collections.IntList;
|
||||||
import org.lucares.collections.LongList;
|
import org.lucares.collections.LongList;
|
||||||
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
||||||
|
|
||||||
@@ -156,6 +157,14 @@ public class Tags implements Comparable<Tags> {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public IntList getKeysAsInt() {
|
||||||
|
final IntList result = new IntList();
|
||||||
|
for (final Tag tag : tags) {
|
||||||
|
result.add(tag.getKey());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
public List<Tag> toTags() {
|
public List<Tag> toTags() {
|
||||||
return Collections.unmodifiableList(tags);
|
return Collections.unmodifiableList(tags);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user