remove static string compressor from QueryCompletionIndex

This commit is contained in:
2021-09-18 19:24:30 +02:00
parent 311af4b9e9
commit 3e1002a99d
4 changed files with 39 additions and 29 deletions

View File

@@ -137,7 +137,7 @@ public class DataStore implements AutoCloseable {
docIdToDoc = new PartitionPersistentMap<>(storageBasePath, "docIdToDocIndex.bs", PersistentMap.LONG_CODER, docIdToDoc = new PartitionPersistentMap<>(storageBasePath, "docIdToDocIndex.bs", PersistentMap.LONG_CODER,
new DocEncoderDecoder()); new DocEncoderDecoder());
queryCompletionIndex = new QueryCompletionIndex(storageBasePath); queryCompletionIndex = new QueryCompletionIndex(storageBasePath, stringCompressor);
writerCache = new HotEntryCache<>(Duration.ofSeconds(10), 1000); writerCache = new HotEntryCache<>(Duration.ofSeconds(10), 1000);
writerCache.addListener((key, value) -> value.close()); writerCache.addListener((key, value) -> value.close());

View File

@@ -9,6 +9,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import org.lucares.collections.LongList; import org.lucares.collections.LongList;
import org.lucares.pdb.api.DateTimeRange; import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.StringCompressor;
import org.lucares.pdb.api.Tag; import org.lucares.pdb.api.Tag;
import org.lucares.pdb.api.Tags; import org.lucares.pdb.api.Tags;
import org.lucares.pdb.datastore.lang.QueryCompletionExpressionOptimizer; import org.lucares.pdb.datastore.lang.QueryCompletionExpressionOptimizer;
@@ -152,12 +153,6 @@ public class QueryCompletionIndex implements AutoCloseable {
this.tagB = tagB; this.tagB = tagB;
} }
public TwoTags(final String fieldB, final String fieldA, final String valueA, final String valueB) {
tagA = Tags.STRING_COMPRESSOR.createTag(fieldA, valueA);
tagB = Tags.STRING_COMPRESSOR.createTag(fieldB, valueB);
}
public Tag getTagA() { public Tag getTagA() {
return tagA; return tagA;
} }
@@ -275,6 +270,12 @@ public class QueryCompletionIndex implements AutoCloseable {
private static final class EncoderField implements EncoderDecoder<String> { private static final class EncoderField implements EncoderDecoder<String> {
private final StringCompressor stringCompressor;
public EncoderField(final StringCompressor stringCompressor) {
this.stringCompressor = stringCompressor;
}
@Override @Override
public byte[] encode(final String field) { public byte[] encode(final String field) {
@@ -282,13 +283,13 @@ public class QueryCompletionIndex implements AutoCloseable {
return new byte[0]; return new byte[0];
} }
return VariableByteEncoder.encode(Tags.STRING_COMPRESSOR.put(field)); return VariableByteEncoder.encode(stringCompressor.put(field));
} }
@Override @Override
public String decode(final byte[] bytes) { public String decode(final byte[] bytes) {
final long compressedString = VariableByteEncoder.decodeFirstValue(bytes); final long compressedString = VariableByteEncoder.decodeFirstValue(bytes);
return Tags.STRING_COMPRESSOR.get((int) compressedString); return stringCompressor.get((int) compressedString);
} }
@Override @Override
@@ -300,16 +301,18 @@ public class QueryCompletionIndex implements AutoCloseable {
private final PartitionPersistentMap<TwoTags, Empty, Empty> tagToTagIndex; private final PartitionPersistentMap<TwoTags, Empty, Empty> tagToTagIndex;
private final PartitionPersistentMap<Tag, Empty, Empty> fieldToValueIndex; private final PartitionPersistentMap<Tag, Empty, Empty> fieldToValueIndex;
private final PartitionPersistentMap<String, Empty, Empty> fieldIndex; private final PartitionPersistentMap<String, Empty, Empty> fieldIndex;
private final StringCompressor stringCompressor;
public QueryCompletionIndex(final Path basePath) throws IOException { public QueryCompletionIndex(final Path basePath, final StringCompressor stringCompressor) throws IOException {
this.stringCompressor = stringCompressor;
tagToTagIndex = new PartitionPersistentMap<>(basePath, "queryCompletionTagToTagIndex.bs", new EncoderTwoTags(), tagToTagIndex = new PartitionPersistentMap<>(basePath, "queryCompletionTagToTagIndex.bs", new EncoderTwoTags(),
PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER)); PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
fieldToValueIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldToValueIndex.bs", fieldToValueIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldToValueIndex.bs",
new EncoderTag(), PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER)); new EncoderTag(), PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
fieldIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldIndex.bs", new EncoderField(), fieldIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldIndex.bs",
PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER)); new EncoderField(stringCompressor), PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));
} }
public void addTags(final ParititionId partitionId, final Tags tags) throws IOException { public void addTags(final ParititionId partitionId, final Tags tags) throws IOException {
@@ -328,7 +331,7 @@ public class QueryCompletionIndex implements AutoCloseable {
// create indices of all tags and all fields // create indices of all tags and all fields
for (final Tag tag : listOfTagsA) { for (final Tag tag : listOfTagsA) {
fieldToValueIndex.putValue(partitionId, tag, Empty.INSTANCE); fieldToValueIndex.putValue(partitionId, tag, Empty.INSTANCE);
fieldIndex.putValue(partitionId, Tags.STRING_COMPRESSOR.getKeyAsString(tag), Empty.INSTANCE); fieldIndex.putValue(partitionId, stringCompressor.getKeyAsString(tag), Empty.INSTANCE);
} }
final double d = (System.nanoTime() - start) / 1_000_000.0; final double d = (System.nanoTime() - start) / 1_000_000.0;
if (d > 1) { if (d > 1) {
@@ -356,15 +359,16 @@ public class QueryCompletionIndex implements AutoCloseable {
final SortedSet<String> result = new TreeSet<>(); final SortedSet<String> result = new TreeSet<>();
final TwoTags keyPrefix = new TwoTags(fieldB, fieldA, null, null); final TwoTags keyPrefix = new TwoTags(stringCompressor.createTag(fieldA, null),
stringCompressor.createTag(fieldB, null));
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange); final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> { tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
final String vA = Tags.STRING_COMPRESSOR.getValueAsString(k.getTagA()); final String vA = stringCompressor.getValueAsString(k.getTagA());
if (valueA.matches(vA)) { if (valueA.matches(vA)) {
result.add(Tags.STRING_COMPRESSOR.getValueAsString(k.getTagB())); result.add(stringCompressor.getValueAsString(k.getTagB()));
} }
}); });
@@ -383,14 +387,14 @@ public class QueryCompletionIndex implements AutoCloseable {
public SortedSet<String> find(final DateTimeRange dateRange, final Tag tag, final String field) { public SortedSet<String> find(final DateTimeRange dateRange, final Tag tag, final String field) {
final SortedSet<String> result = new TreeSet<>(); final SortedSet<String> result = new TreeSet<>();
final int tagBKey = Tags.STRING_COMPRESSOR.put(field); final int tagBKey = stringCompressor.put(field);
final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See
// EncoderTwoTags // EncoderTwoTags
final TwoTags keyPrefix = new TwoTags(tag, tagB); final TwoTags keyPrefix = new TwoTags(tag, tagB);
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange); final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> { tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
result.add(Tags.STRING_COMPRESSOR.getValueAsString(k.getTagB())); result.add(stringCompressor.getValueAsString(k.getTagB()));
}); });
return result; return result;
@@ -406,12 +410,12 @@ public class QueryCompletionIndex implements AutoCloseable {
public SortedSet<String> findAllValuesForField(final DateTimeRange dateRange, final String field) { public SortedSet<String> findAllValuesForField(final DateTimeRange dateRange, final String field) {
final SortedSet<String> result = new TreeSet<>(); final SortedSet<String> result = new TreeSet<>();
final int tagKey = Tags.STRING_COMPRESSOR.put(field); final int tagKey = stringCompressor.put(field);
final Tag keyPrefix = new Tag(tagKey, -1); // the value must be negative for the prefix search to work. See final Tag keyPrefix = new Tag(tagKey, -1); // the value must be negative for the prefix search to work. See
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange); final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
fieldToValueIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> { fieldToValueIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> {
result.add(Tags.STRING_COMPRESSOR.getValueAsString(k)); result.add(stringCompressor.getValueAsString(k));
}); });
return result; return result;
@@ -431,7 +435,7 @@ public class QueryCompletionIndex implements AutoCloseable {
final String field) { final String field) {
final SortedSet<String> result = new TreeSet<>(); final SortedSet<String> result = new TreeSet<>();
final TwoTags keyPrefix = new TwoTags(field, Tags.STRING_COMPRESSOR.getKeyAsString(tag), null, null); final TwoTags keyPrefix = new TwoTags(tag.unsetValue(), stringCompressor.createTag(field, null));
final int negatedValueA = tag.getValue(); final int negatedValueA = tag.getValue();
final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange); final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange);
@@ -439,7 +443,7 @@ public class QueryCompletionIndex implements AutoCloseable {
final int valueA = k.getTagA().getValue(); final int valueA = k.getTagA().getValue();
if (valueA != negatedValueA) { if (valueA != negatedValueA) {
result.add(Tags.STRING_COMPRESSOR.getValueAsString(k.getTagB())); result.add(stringCompressor.getValueAsString(k.getTagB()));
} }
}); });

View File

@@ -35,21 +35,22 @@ public class QueryCompletionIndexTest {
@Test @Test
public void test() throws Exception { public void test() throws Exception {
Tags.STRING_COMPRESSOR = new StringCompressor(new UniqueStringIntegerPairs()); final StringCompressor stringCompressor = new StringCompressor(new UniqueStringIntegerPairs());
Tags.STRING_COMPRESSOR = stringCompressor;
final List<Tags> tags = Arrays.asList(// final List<Tags> tags = Arrays.asList(//
Tags.STRING_COMPRESSOR.createAndAddToDictionary("firstname", "John", "lastname", "Doe", "country", stringCompressor.createAndAddToDictionary("firstname", "John", "lastname", "Doe", "country",
"Atlantis"), // A "Atlantis"), // A
Tags.STRING_COMPRESSOR.createAndAddToDictionary("firstname", "Jane", "lastname", "Doe", "country", stringCompressor.createAndAddToDictionary("firstname", "Jane", "lastname", "Doe", "country",
"ElDorado"), // B "ElDorado"), // B
Tags.STRING_COMPRESSOR.createAndAddToDictionary("firstname", "John", "lastname", "Miller", "country", stringCompressor.createAndAddToDictionary("firstname", "John", "lastname", "Miller", "country",
"Atlantis")// C "Atlantis")// C
); );
final DateTimeRange dateRange = DateTimeRange.relativeMillis(1); final DateTimeRange dateRange = DateTimeRange.relativeMillis(1);
final ParititionId partitionId = DateIndexExtension.toPartitionIds(dateRange).get(0); final ParititionId partitionId = DateIndexExtension.toPartitionIds(dateRange).get(0);
try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory)) { try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory, stringCompressor)) {
for (final Tags t : tags) { for (final Tags t : tags) {
index.addTags(partitionId, t); index.addTags(partitionId, t);
} }
@@ -57,13 +58,13 @@ public class QueryCompletionIndexTest {
// all firstnames where lastname=Doe are returned sorted alphabetically. // all firstnames where lastname=Doe are returned sorted alphabetically.
// tags A and B match // tags A and B match
final SortedSet<String> firstnamesWithLastnameDoe = index.find(dateRange, final SortedSet<String> firstnamesWithLastnameDoe = index.find(dateRange,
Tags.STRING_COMPRESSOR.createTag("lastname", "Doe"), "firstname"); stringCompressor.createTag("lastname", "Doe"), "firstname");
Assertions.assertEquals(new TreeSet<>(Set.of("Jane", "John")), firstnamesWithLastnameDoe); Assertions.assertEquals(new TreeSet<>(Set.of("Jane", "John")), firstnamesWithLastnameDoe);
// no duplicates are returned: // no duplicates are returned:
// tags A and C match firstname=John, but both have country=Atlantis // tags A and C match firstname=John, but both have country=Atlantis
final SortedSet<String> countryWithFirstnameJohn = index.find(dateRange, final SortedSet<String> countryWithFirstnameJohn = index.find(dateRange,
Tags.STRING_COMPRESSOR.createTag("firstname", "John"), "country"); stringCompressor.createTag("firstname", "John"), "country");
Assertions.assertEquals(new TreeSet<>(Arrays.asList("Atlantis")), countryWithFirstnameJohn); Assertions.assertEquals(new TreeSet<>(Arrays.asList("Atlantis")), countryWithFirstnameJohn);
// findAllValuesForField sorts alphabetically // findAllValuesForField sorts alphabetically

View File

@@ -6,6 +6,7 @@ package org.lucares.pdb.api;
* 'Sam' is the value. * 'Sam' is the value.
*/ */
public class Tag implements Comparable<Tag> { public class Tag implements Comparable<Tag> {
private final int field; private final int field;
private final int value; private final int value;
@@ -42,6 +43,10 @@ public class Tag implements Comparable<Tag> {
return value; return value;
} }
public Tag unsetValue() {
return new Tag(field, -1);
}
@Override @Override
public String toString() { public String toString() {
return field + "=" + value; return field + "=" + value;