remove static string compressor from QueryCompletionIndex

This commit is contained in:
2021-09-18 19:24:30 +02:00
parent 311af4b9e9
commit 3e1002a99d
4 changed files with 39 additions and 29 deletions

View File

@@ -137,7 +137,7 @@ public class DataStore implements AutoCloseable {
docIdToDoc = new PartitionPersistentMap<>(storageBasePath, "docIdToDocIndex.bs", PersistentMap.LONG_CODER,
new DocEncoderDecoder());
queryCompletionIndex = new QueryCompletionIndex(storageBasePath);
queryCompletionIndex = new QueryCompletionIndex(storageBasePath, stringCompressor);
writerCache = new HotEntryCache<>(Duration.ofSeconds(10), 1000);
writerCache.addListener((key, value) -> value.close());

View File

@@ -9,6 +9,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import org.lucares.collections.LongList;
import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.StringCompressor;
import org.lucares.pdb.api.Tag;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.datastore.lang.QueryCompletionExpressionOptimizer;
@@ -152,12 +153,6 @@ public class QueryCompletionIndex implements AutoCloseable {
this.tagB = tagB;
}
public TwoTags(final String fieldB, final String fieldA, final String valueA, final String valueB) {
tagA = Tags.STRING_COMPRESSOR.createTag(fieldA, valueA);
tagB = Tags.STRING_COMPRESSOR.createTag(fieldB, valueB);
}
public Tag getTagA() {
return tagA;
}
@@ -275,6 +270,12 @@ public class QueryCompletionIndex implements AutoCloseable {
private static final class EncoderField implements EncoderDecoder<String> {
private final StringCompressor stringCompressor;
public EncoderField(final StringCompressor stringCompressor) {
this.stringCompressor = stringCompressor;
}
@Override
public byte[] encode(final String field) {
@@ -282,13 +283,13 @@ public class QueryCompletionIndex implements AutoCloseable {
return new byte[0];
}
return VariableByteEncoder.encode(Tags.STRING_COMPRESSOR.put(field));
return VariableByteEncoder.encode(stringCompressor.put(field));
}
@Override
public String decode(final byte[] bytes) {
final long compressedString = VariableByteEncoder.decodeFirstValue(bytes);
return Tags.STRING_COMPRESSOR.get((int) compressedString);
return stringCompressor.get((int) compressedString);
}
@Override
@@ -300,16 +301,18 @@ public class QueryCompletionIndex implements AutoCloseable {
private final PartitionPersistentMap<TwoTags, Empty, Empty> tagToTagIndex;
private final PartitionPersistentMap<Tag, Empty, Empty> fieldToValueIndex;
private final PartitionPersistentMap<String, Empty, Empty> fieldIndex;
private final StringCompressor stringCompressor;
public QueryCompletionIndex(final Path basePath) throws IOException {
public QueryCompletionIndex(final Path basePath, final StringCompressor stringCompressor) throws IOException {
this.stringCompressor = stringCompressor;
tagToTagIndex = new PartitionPersistentMap<>(basePath, "queryCompletionTagToTagIndex.bs", new EncoderTwoTags(),
PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
fieldToValueIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldToValueIndex.bs",
new EncoderTag(), PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
fieldIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldIndex.bs", new EncoderField(),
PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
fieldIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldIndex.bs",
new EncoderField(stringCompressor), PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
}
public void addTags(final ParititionId partitionId, final Tags tags) throws IOException {
@@ -328,7 +331,7 @@ public class QueryCompletionIndex implements AutoCloseable {
// create indices of all tags and all fields
for (final Tag tag : listOfTagsA) {
fieldToValueIndex.putValue(partitionId, tag, Empty.INSTANCE);
fieldIndex.putValue(partitionId, Tags.STRING_COMPRESSOR.getKeyAsString(tag), Empty.INSTANCE);
fieldIndex.putValue(partitionId, stringCompressor.getKeyAsString(tag), Empty.INSTANCE);
}
final double d = (System.nanoTime() - start) / 1_000_000.0;
if (d > 1) {
@@ -356,15 +359,16 @@ public class QueryCompletionIndex implements AutoCloseable {
final SortedSet<String> result = new TreeSet<>();
final TwoTags keyPrefix = new TwoTags(fieldB, fieldA, null, null);
final TwoTags keyPrefix = new TwoTags(stringCompressor.createTag(fieldA, null),
stringCompressor.createTag(fieldB, null));
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
final String vA = Tags.STRING_COMPRESSOR.getValueAsString(k.getTagA());
final String vA = stringCompressor.getValueAsString(k.getTagA());
if (valueA.matches(vA)) {
result.add(Tags.STRING_COMPRESSOR.getValueAsString(k.getTagB()));
result.add(stringCompressor.getValueAsString(k.getTagB()));
}
});
@@ -383,14 +387,14 @@ public class QueryCompletionIndex implements AutoCloseable {
public SortedSet<String> find(final DateTimeRange dateRange, final Tag tag, final String field) {
final SortedSet<String> result = new TreeSet<>();
final int tagBKey = Tags.STRING_COMPRESSOR.put(field);
final int tagBKey = stringCompressor.put(field);
final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See
// EncoderTwoTags
final TwoTags keyPrefix = new TwoTags(tag, tagB);
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
result.add(Tags.STRING_COMPRESSOR.getValueAsString(k.getTagB()));
result.add(stringCompressor.getValueAsString(k.getTagB()));
});
return result;
@@ -406,12 +410,12 @@ public class QueryCompletionIndex implements AutoCloseable {
public SortedSet<String> findAllValuesForField(final DateTimeRange dateRange, final String field) {
final SortedSet<String> result = new TreeSet<>();
final int tagKey = Tags.STRING_COMPRESSOR.put(field);
final int tagKey = stringCompressor.put(field);
final Tag keyPrefix = new Tag(tagKey, -1); // the value must be negative for the prefix search to work. See
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
fieldToValueIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
result.add(Tags.STRING_COMPRESSOR.getValueAsString(k));
result.add(stringCompressor.getValueAsString(k));
});
return result;
@@ -431,7 +435,7 @@ public class QueryCompletionIndex implements AutoCloseable {
final String field) {
final SortedSet<String> result = new TreeSet<>();
final TwoTags keyPrefix = new TwoTags(field, Tags.STRING_COMPRESSOR.getKeyAsString(tag), null, null);
final TwoTags keyPrefix = new TwoTags(tag.unsetValue(), stringCompressor.createTag(field, null));
final int negatedValueA = tag.getValue();
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
@@ -439,7 +443,7 @@ public class QueryCompletionIndex implements AutoCloseable {
final int valueA = k.getTagA().getValue();
if (valueA != negatedValueA) {
result.add(Tags.STRING_COMPRESSOR.getValueAsString(k.getTagB()));
result.add(stringCompressor.getValueAsString(k.getTagB()));
}
});

View File

@@ -35,21 +35,22 @@ public class QueryCompletionIndexTest {
@Test
public void test() throws Exception {
Tags.STRING_COMPRESSOR = new StringCompressor(new UniqueStringIntegerPairs());
final StringCompressor stringCompressor = new StringCompressor(new UniqueStringIntegerPairs());
Tags.STRING_COMPRESSOR = stringCompressor;
final List<Tags> tags = Arrays.asList(//
Tags.STRING_COMPRESSOR.createAndAddToDictionary("firstname", "John", "lastname", "Doe", "country",
stringCompressor.createAndAddToDictionary("firstname", "John", "lastname", "Doe", "country",
"Atlantis"), // A
Tags.STRING_COMPRESSOR.createAndAddToDictionary("firstname", "Jane", "lastname", "Doe", "country",
stringCompressor.createAndAddToDictionary("firstname", "Jane", "lastname", "Doe", "country",
"ElDorado"), // B
Tags.STRING_COMPRESSOR.createAndAddToDictionary("firstname", "John", "lastname", "Miller", "country",
stringCompressor.createAndAddToDictionary("firstname", "John", "lastname", "Miller", "country",
"Atlantis")// C
);
final DateTimeRange dateRange = DateTimeRange.relativeMillis(1);
final ParititionId partitionId = DateIndexExtension.toPartitionIds(dateRange).get(0);
try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory)) {
try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory, stringCompressor)) {
for (final Tags t : tags) {
index.addTags(partitionId, t);
}
@@ -57,13 +58,13 @@ public class QueryCompletionIndexTest {
// all firstnames where lastname=Doe are returned sorted alphabetically.
// tags A and B match
final SortedSet<String> firstnamesWithLastnameDoe = index.find(dateRange,
Tags.STRING_COMPRESSOR.createTag("lastname", "Doe"), "firstname");
stringCompressor.createTag("lastname", "Doe"), "firstname");
Assertions.assertEquals(new TreeSet<>(Set.of("Jane", "John")), firstnamesWithLastnameDoe);
// no duplicates are returned:
// tags A and C match firstname=John, but both have country=Atlantis
final SortedSet<String> countryWithFirstnameJohn = index.find(dateRange,
Tags.STRING_COMPRESSOR.createTag("firstname", "John"), "country");
stringCompressor.createTag("firstname", "John"), "country");
Assertions.assertEquals(new TreeSet<>(Arrays.asList("Atlantis")), countryWithFirstnameJohn);
// findAllValuesForField sorts alphabetically

View File

@@ -6,6 +6,7 @@ package org.lucares.pdb.api;
* 'Sam' is the value.
*/
public class Tag implements Comparable<Tag> {
private final int field;
private final int value;
@@ -42,6 +43,10 @@ public class Tag implements Comparable<Tag> {
return value;
}
public Tag unsetValue() {
return new Tag(field, -1);
}
@Override
public String toString() {
return field + "=" + value;