remove static string compressor from QueryCompletionIndex
This commit is contained in:
@@ -137,7 +137,7 @@ public class DataStore implements AutoCloseable {
|
|||||||
docIdToDoc = new PartitionPersistentMap<>(storageBasePath, "docIdToDocIndex.bs", PersistentMap.LONG_CODER,
|
docIdToDoc = new PartitionPersistentMap<>(storageBasePath, "docIdToDocIndex.bs", PersistentMap.LONG_CODER,
|
||||||
new DocEncoderDecoder());
|
new DocEncoderDecoder());
|
||||||
|
|
||||||
queryCompletionIndex = new QueryCompletionIndex(storageBasePath);
|
queryCompletionIndex = new QueryCompletionIndex(storageBasePath, stringCompressor);
|
||||||
|
|
||||||
writerCache = new HotEntryCache<>(Duration.ofSeconds(10), 1000);
|
writerCache = new HotEntryCache<>(Duration.ofSeconds(10), 1000);
|
||||||
writerCache.addListener((key, value) -> value.close());
|
writerCache.addListener((key, value) -> value.close());
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||||||
|
|
||||||
import org.lucares.collections.LongList;
|
import org.lucares.collections.LongList;
|
||||||
import org.lucares.pdb.api.DateTimeRange;
|
import org.lucares.pdb.api.DateTimeRange;
|
||||||
|
import org.lucares.pdb.api.StringCompressor;
|
||||||
import org.lucares.pdb.api.Tag;
|
import org.lucares.pdb.api.Tag;
|
||||||
import org.lucares.pdb.api.Tags;
|
import org.lucares.pdb.api.Tags;
|
||||||
import org.lucares.pdb.datastore.lang.QueryCompletionExpressionOptimizer;
|
import org.lucares.pdb.datastore.lang.QueryCompletionExpressionOptimizer;
|
||||||
@@ -152,12 +153,6 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
this.tagB = tagB;
|
this.tagB = tagB;
|
||||||
}
|
}
|
||||||
|
|
||||||
public TwoTags(final String fieldB, final String fieldA, final String valueA, final String valueB) {
|
|
||||||
|
|
||||||
tagA = Tags.STRING_COMPRESSOR.createTag(fieldA, valueA);
|
|
||||||
tagB = Tags.STRING_COMPRESSOR.createTag(fieldB, valueB);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Tag getTagA() {
|
public Tag getTagA() {
|
||||||
return tagA;
|
return tagA;
|
||||||
}
|
}
|
||||||
@@ -275,6 +270,12 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
|
|
||||||
private static final class EncoderField implements EncoderDecoder<String> {
|
private static final class EncoderField implements EncoderDecoder<String> {
|
||||||
|
|
||||||
|
private final StringCompressor stringCompressor;
|
||||||
|
|
||||||
|
public EncoderField(final StringCompressor stringCompressor) {
|
||||||
|
this.stringCompressor = stringCompressor;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public byte[] encode(final String field) {
|
public byte[] encode(final String field) {
|
||||||
|
|
||||||
@@ -282,13 +283,13 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
return new byte[0];
|
return new byte[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
return VariableByteEncoder.encode(Tags.STRING_COMPRESSOR.put(field));
|
return VariableByteEncoder.encode(stringCompressor.put(field));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String decode(final byte[] bytes) {
|
public String decode(final byte[] bytes) {
|
||||||
final long compressedString = VariableByteEncoder.decodeFirstValue(bytes);
|
final long compressedString = VariableByteEncoder.decodeFirstValue(bytes);
|
||||||
return Tags.STRING_COMPRESSOR.get((int) compressedString);
|
return stringCompressor.get((int) compressedString);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -300,16 +301,18 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
private final PartitionPersistentMap<TwoTags, Empty, Empty> tagToTagIndex;
|
private final PartitionPersistentMap<TwoTags, Empty, Empty> tagToTagIndex;
|
||||||
private final PartitionPersistentMap<Tag, Empty, Empty> fieldToValueIndex;
|
private final PartitionPersistentMap<Tag, Empty, Empty> fieldToValueIndex;
|
||||||
private final PartitionPersistentMap<String, Empty, Empty> fieldIndex;
|
private final PartitionPersistentMap<String, Empty, Empty> fieldIndex;
|
||||||
|
private final StringCompressor stringCompressor;
|
||||||
|
|
||||||
public QueryCompletionIndex(final Path basePath) throws IOException {
|
public QueryCompletionIndex(final Path basePath, final StringCompressor stringCompressor) throws IOException {
|
||||||
|
this.stringCompressor = stringCompressor;
|
||||||
tagToTagIndex = new PartitionPersistentMap<>(basePath, "queryCompletionTagToTagIndex.bs", new EncoderTwoTags(),
|
tagToTagIndex = new PartitionPersistentMap<>(basePath, "queryCompletionTagToTagIndex.bs", new EncoderTwoTags(),
|
||||||
PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
|
PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
|
||||||
|
|
||||||
fieldToValueIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldToValueIndex.bs",
|
fieldToValueIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldToValueIndex.bs",
|
||||||
new EncoderTag(), PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
|
new EncoderTag(), PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
|
||||||
|
|
||||||
fieldIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldIndex.bs", new EncoderField(),
|
fieldIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldIndex.bs",
|
||||||
PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
|
new EncoderField(stringCompressor), PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addTags(final ParititionId partitionId, final Tags tags) throws IOException {
|
public void addTags(final ParititionId partitionId, final Tags tags) throws IOException {
|
||||||
@@ -328,7 +331,7 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
// create indices of all tags and all fields
|
// create indices of all tags and all fields
|
||||||
for (final Tag tag : listOfTagsA) {
|
for (final Tag tag : listOfTagsA) {
|
||||||
fieldToValueIndex.putValue(partitionId, tag, Empty.INSTANCE);
|
fieldToValueIndex.putValue(partitionId, tag, Empty.INSTANCE);
|
||||||
fieldIndex.putValue(partitionId, Tags.STRING_COMPRESSOR.getKeyAsString(tag), Empty.INSTANCE);
|
fieldIndex.putValue(partitionId, stringCompressor.getKeyAsString(tag), Empty.INSTANCE);
|
||||||
}
|
}
|
||||||
final double d = (System.nanoTime() - start) / 1_000_000.0;
|
final double d = (System.nanoTime() - start) / 1_000_000.0;
|
||||||
if (d > 1) {
|
if (d > 1) {
|
||||||
@@ -356,15 +359,16 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
|
|
||||||
final SortedSet<String> result = new TreeSet<>();
|
final SortedSet<String> result = new TreeSet<>();
|
||||||
|
|
||||||
final TwoTags keyPrefix = new TwoTags(fieldB, fieldA, null, null);
|
final TwoTags keyPrefix = new TwoTags(stringCompressor.createTag(fieldA, null),
|
||||||
|
stringCompressor.createTag(fieldB, null));
|
||||||
|
|
||||||
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
|
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
|
||||||
tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
|
tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
|
||||||
|
|
||||||
final String vA = Tags.STRING_COMPRESSOR.getValueAsString(k.getTagA());
|
final String vA = stringCompressor.getValueAsString(k.getTagA());
|
||||||
|
|
||||||
if (valueA.matches(vA)) {
|
if (valueA.matches(vA)) {
|
||||||
result.add(Tags.STRING_COMPRESSOR.getValueAsString(k.getTagB()));
|
result.add(stringCompressor.getValueAsString(k.getTagB()));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -383,14 +387,14 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
public SortedSet<String> find(final DateTimeRange dateRange, final Tag tag, final String field) {
|
public SortedSet<String> find(final DateTimeRange dateRange, final Tag tag, final String field) {
|
||||||
|
|
||||||
final SortedSet<String> result = new TreeSet<>();
|
final SortedSet<String> result = new TreeSet<>();
|
||||||
final int tagBKey = Tags.STRING_COMPRESSOR.put(field);
|
final int tagBKey = stringCompressor.put(field);
|
||||||
final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See
|
final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See
|
||||||
// EncoderTwoTags
|
// EncoderTwoTags
|
||||||
final TwoTags keyPrefix = new TwoTags(tag, tagB);
|
final TwoTags keyPrefix = new TwoTags(tag, tagB);
|
||||||
|
|
||||||
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
|
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
|
||||||
tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
|
tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
|
||||||
result.add(Tags.STRING_COMPRESSOR.getValueAsString(k.getTagB()));
|
result.add(stringCompressor.getValueAsString(k.getTagB()));
|
||||||
});
|
});
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@@ -406,12 +410,12 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
public SortedSet<String> findAllValuesForField(final DateTimeRange dateRange, final String field) {
|
public SortedSet<String> findAllValuesForField(final DateTimeRange dateRange, final String field) {
|
||||||
|
|
||||||
final SortedSet<String> result = new TreeSet<>();
|
final SortedSet<String> result = new TreeSet<>();
|
||||||
final int tagKey = Tags.STRING_COMPRESSOR.put(field);
|
final int tagKey = stringCompressor.put(field);
|
||||||
final Tag keyPrefix = new Tag(tagKey, -1); // the value must be negative for the prefix search to work. See
|
final Tag keyPrefix = new Tag(tagKey, -1); // the value must be negative for the prefix search to work. See
|
||||||
|
|
||||||
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
|
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
|
||||||
fieldToValueIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
|
fieldToValueIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
|
||||||
result.add(Tags.STRING_COMPRESSOR.getValueAsString(k));
|
result.add(stringCompressor.getValueAsString(k));
|
||||||
});
|
});
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@@ -431,7 +435,7 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
final String field) {
|
final String field) {
|
||||||
final SortedSet<String> result = new TreeSet<>();
|
final SortedSet<String> result = new TreeSet<>();
|
||||||
|
|
||||||
final TwoTags keyPrefix = new TwoTags(field, Tags.STRING_COMPRESSOR.getKeyAsString(tag), null, null);
|
final TwoTags keyPrefix = new TwoTags(tag.unsetValue(), stringCompressor.createTag(field, null));
|
||||||
|
|
||||||
final int negatedValueA = tag.getValue();
|
final int negatedValueA = tag.getValue();
|
||||||
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
|
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
|
||||||
@@ -439,7 +443,7 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
|
|
||||||
final int valueA = k.getTagA().getValue();
|
final int valueA = k.getTagA().getValue();
|
||||||
if (valueA != negatedValueA) {
|
if (valueA != negatedValueA) {
|
||||||
result.add(Tags.STRING_COMPRESSOR.getValueAsString(k.getTagB()));
|
result.add(stringCompressor.getValueAsString(k.getTagB()));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -35,21 +35,22 @@ public class QueryCompletionIndexTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void test() throws Exception {
|
public void test() throws Exception {
|
||||||
Tags.STRING_COMPRESSOR = new StringCompressor(new UniqueStringIntegerPairs());
|
final StringCompressor stringCompressor = new StringCompressor(new UniqueStringIntegerPairs());
|
||||||
|
Tags.STRING_COMPRESSOR = stringCompressor;
|
||||||
|
|
||||||
final List<Tags> tags = Arrays.asList(//
|
final List<Tags> tags = Arrays.asList(//
|
||||||
Tags.STRING_COMPRESSOR.createAndAddToDictionary("firstname", "John", "lastname", "Doe", "country",
|
stringCompressor.createAndAddToDictionary("firstname", "John", "lastname", "Doe", "country",
|
||||||
"Atlantis"), // A
|
"Atlantis"), // A
|
||||||
Tags.STRING_COMPRESSOR.createAndAddToDictionary("firstname", "Jane", "lastname", "Doe", "country",
|
stringCompressor.createAndAddToDictionary("firstname", "Jane", "lastname", "Doe", "country",
|
||||||
"ElDorado"), // B
|
"ElDorado"), // B
|
||||||
Tags.STRING_COMPRESSOR.createAndAddToDictionary("firstname", "John", "lastname", "Miller", "country",
|
stringCompressor.createAndAddToDictionary("firstname", "John", "lastname", "Miller", "country",
|
||||||
"Atlantis")// C
|
"Atlantis")// C
|
||||||
);
|
);
|
||||||
|
|
||||||
final DateTimeRange dateRange = DateTimeRange.relativeMillis(1);
|
final DateTimeRange dateRange = DateTimeRange.relativeMillis(1);
|
||||||
final ParititionId partitionId = DateIndexExtension.toPartitionIds(dateRange).get(0);
|
final ParititionId partitionId = DateIndexExtension.toPartitionIds(dateRange).get(0);
|
||||||
|
|
||||||
try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory)) {
|
try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory, stringCompressor)) {
|
||||||
for (final Tags t : tags) {
|
for (final Tags t : tags) {
|
||||||
index.addTags(partitionId, t);
|
index.addTags(partitionId, t);
|
||||||
}
|
}
|
||||||
@@ -57,13 +58,13 @@ public class QueryCompletionIndexTest {
|
|||||||
// all firstnames where lastname=Doe are returned sorted alphabetically.
|
// all firstnames where lastname=Doe are returned sorted alphabetically.
|
||||||
// tags A and B match
|
// tags A and B match
|
||||||
final SortedSet<String> firstnamesWithLastnameDoe = index.find(dateRange,
|
final SortedSet<String> firstnamesWithLastnameDoe = index.find(dateRange,
|
||||||
Tags.STRING_COMPRESSOR.createTag("lastname", "Doe"), "firstname");
|
stringCompressor.createTag("lastname", "Doe"), "firstname");
|
||||||
Assertions.assertEquals(new TreeSet<>(Set.of("Jane", "John")), firstnamesWithLastnameDoe);
|
Assertions.assertEquals(new TreeSet<>(Set.of("Jane", "John")), firstnamesWithLastnameDoe);
|
||||||
|
|
||||||
// no duplicates are returned:
|
// no duplicates are returned:
|
||||||
// tags A and C match firstname=John, but both have country=Atlantis
|
// tags A and C match firstname=John, but both have country=Atlantis
|
||||||
final SortedSet<String> countryWithFirstnameJohn = index.find(dateRange,
|
final SortedSet<String> countryWithFirstnameJohn = index.find(dateRange,
|
||||||
Tags.STRING_COMPRESSOR.createTag("firstname", "John"), "country");
|
stringCompressor.createTag("firstname", "John"), "country");
|
||||||
Assertions.assertEquals(new TreeSet<>(Arrays.asList("Atlantis")), countryWithFirstnameJohn);
|
Assertions.assertEquals(new TreeSet<>(Arrays.asList("Atlantis")), countryWithFirstnameJohn);
|
||||||
|
|
||||||
// findAllValuesForField sorts alphabetically
|
// findAllValuesForField sorts alphabetically
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ package org.lucares.pdb.api;
|
|||||||
* 'Sam' is the value.
|
* 'Sam' is the value.
|
||||||
*/
|
*/
|
||||||
public class Tag implements Comparable<Tag> {
|
public class Tag implements Comparable<Tag> {
|
||||||
|
|
||||||
private final int field;
|
private final int field;
|
||||||
|
|
||||||
private final int value;
|
private final int value;
|
||||||
@@ -42,6 +43,10 @@ public class Tag implements Comparable<Tag> {
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Tag unsetValue() {
|
||||||
|
return new Tag(field, -1);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return field + "=" + value;
|
return field + "=" + value;
|
||||||
|
|||||||
Reference in New Issue
Block a user