diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index 030a60b..4745a6f 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -137,7 +137,7 @@ public class DataStore implements AutoCloseable { docIdToDoc = new PartitionPersistentMap<>(storageBasePath, "docIdToDocIndex.bs", PersistentMap.LONG_CODER, new DocEncoderDecoder()); - queryCompletionIndex = new QueryCompletionIndex(storageBasePath); + queryCompletionIndex = new QueryCompletionIndex(storageBasePath, stringCompressor); writerCache = new HotEntryCache<>(Duration.ofSeconds(10), 1000); writerCache.addListener((key, value) -> value.close()); diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java index 3403f79..2cf5815 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java @@ -9,6 +9,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.lucares.collections.LongList; import org.lucares.pdb.api.DateTimeRange; +import org.lucares.pdb.api.StringCompressor; import org.lucares.pdb.api.Tag; import org.lucares.pdb.api.Tags; import org.lucares.pdb.datastore.lang.QueryCompletionExpressionOptimizer; @@ -152,12 +153,6 @@ public class QueryCompletionIndex implements AutoCloseable { this.tagB = tagB; } - public TwoTags(final String fieldB, final String fieldA, final String valueA, final String valueB) { - - tagA = Tags.STRING_COMPRESSOR.createTag(fieldA, valueA); - tagB = Tags.STRING_COMPRESSOR.createTag(fieldB, valueB); - } - public Tag getTagA() { return tagA; } @@ -275,6 +270,12 @@ public class QueryCompletionIndex implements AutoCloseable { private static final class EncoderField implements EncoderDecoder { + private final StringCompressor stringCompressor; + + public EncoderField(final StringCompressor stringCompressor) { + this.stringCompressor = stringCompressor; + } + @Override public byte[] encode(final String field) { @@ -282,13 +283,13 @@ public class QueryCompletionIndex implements AutoCloseable { return new byte[0]; } - return VariableByteEncoder.encode(Tags.STRING_COMPRESSOR.put(field)); + return VariableByteEncoder.encode(stringCompressor.put(field)); } @Override public String decode(final byte[] bytes) { final long compressedString = VariableByteEncoder.decodeFirstValue(bytes); - return Tags.STRING_COMPRESSOR.get((int) compressedString); + return stringCompressor.get((int) compressedString); } @Override @@ -300,16 +301,18 @@ public class QueryCompletionIndex implements AutoCloseable { private final PartitionPersistentMap tagToTagIndex; private final PartitionPersistentMap fieldToValueIndex; private final PartitionPersistentMap fieldIndex; + private final StringCompressor stringCompressor; - public QueryCompletionIndex(final Path basePath) throws IOException { + public QueryCompletionIndex(final Path basePath, final StringCompressor stringCompressor) throws IOException { + this.stringCompressor = stringCompressor; tagToTagIndex = new PartitionPersistentMap<>(basePath, "queryCompletionTagToTagIndex.bs", new EncoderTwoTags(), PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER)); fieldToValueIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldToValueIndex.bs", new EncoderTag(), PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER)); - fieldIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldIndex.bs", new EncoderField(), - PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER)); + fieldIndex = new PartitionPersistentMap<>(basePath, "queryCompletionFieldIndex.bs", + new EncoderField(stringCompressor), PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER)); } public void addTags(final ParititionId partitionId, final Tags tags) throws IOException { @@ -328,7 +331,7 @@ public class QueryCompletionIndex implements AutoCloseable { // create indices of all tags and all fields for (final Tag tag : listOfTagsA) { fieldToValueIndex.putValue(partitionId, tag, Empty.INSTANCE); - fieldIndex.putValue(partitionId, Tags.STRING_COMPRESSOR.getKeyAsString(tag), Empty.INSTANCE); + fieldIndex.putValue(partitionId, stringCompressor.getKeyAsString(tag), Empty.INSTANCE); } final double d = (System.nanoTime() - start) / 1_000_000.0; if (d > 1) { @@ -356,15 +359,16 @@ public class QueryCompletionIndex implements AutoCloseable { final SortedSet result = new TreeSet<>(); - final TwoTags keyPrefix = new TwoTags(fieldB, fieldA, null, null); + final TwoTags keyPrefix = new TwoTags(stringCompressor.createTag(fieldA, null), + stringCompressor.createTag(fieldB, null)); final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange); tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> { - final String vA = Tags.STRING_COMPRESSOR.getValueAsString(k.getTagA()); + final String vA = stringCompressor.getValueAsString(k.getTagA()); if (valueA.matches(vA)) { - result.add(Tags.STRING_COMPRESSOR.getValueAsString(k.getTagB())); + result.add(stringCompressor.getValueAsString(k.getTagB())); } }); @@ -383,14 +387,14 @@ public class QueryCompletionIndex implements AutoCloseable { public SortedSet find(final DateTimeRange dateRange, final Tag tag, final String field) { final SortedSet result = new TreeSet<>(); - final int tagBKey = Tags.STRING_COMPRESSOR.put(field); + final int tagBKey = stringCompressor.put(field); final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See // EncoderTwoTags final TwoTags keyPrefix = new TwoTags(tag, tagB); final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange); tagToTagIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> { - result.add(Tags.STRING_COMPRESSOR.getValueAsString(k.getTagB())); + result.add(stringCompressor.getValueAsString(k.getTagB())); }); return result; @@ -406,12 +410,12 @@ public class QueryCompletionIndex implements AutoCloseable { public SortedSet findAllValuesForField(final DateTimeRange dateRange, final String field) { final SortedSet result = new TreeSet<>(); - final int tagKey = Tags.STRING_COMPRESSOR.put(field); + final int tagKey = stringCompressor.put(field); final Tag keyPrefix = new Tag(tagKey, -1); // the value must be negative for the prefix search to work. See final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange); fieldToValueIndex.visitValues(partitionIdSource, keyPrefix, (k, v) -> { - result.add(Tags.STRING_COMPRESSOR.getValueAsString(k)); + result.add(stringCompressor.getValueAsString(k)); }); return result; @@ -431,7 +435,7 @@ public class QueryCompletionIndex implements AutoCloseable { final String field) { final SortedSet result = new TreeSet<>(); - final TwoTags keyPrefix = new TwoTags(field, Tags.STRING_COMPRESSOR.getKeyAsString(tag), null, null); + final TwoTags keyPrefix = new TwoTags(tag.unsetValue(), stringCompressor.createTag(field, null)); final int negatedValueA = tag.getValue(); final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange); @@ -439,7 +443,7 @@ public class QueryCompletionIndex implements AutoCloseable { final int valueA = k.getTagA().getValue(); if (valueA != negatedValueA) { - result.add(Tags.STRING_COMPRESSOR.getValueAsString(k.getTagB())); + result.add(stringCompressor.getValueAsString(k.getTagB())); } }); diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java index 367a604..0a3d55e 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java @@ -35,21 +35,22 @@ public class QueryCompletionIndexTest { @Test public void test() throws Exception { - Tags.STRING_COMPRESSOR = new StringCompressor(new UniqueStringIntegerPairs()); + final StringCompressor stringCompressor = new StringCompressor(new UniqueStringIntegerPairs()); + Tags.STRING_COMPRESSOR = stringCompressor; final List tags = Arrays.asList(// - Tags.STRING_COMPRESSOR.createAndAddToDictionary("firstname", "John", "lastname", "Doe", "country", + stringCompressor.createAndAddToDictionary("firstname", "John", "lastname", "Doe", "country", "Atlantis"), // A - Tags.STRING_COMPRESSOR.createAndAddToDictionary("firstname", "Jane", "lastname", "Doe", "country", + stringCompressor.createAndAddToDictionary("firstname", "Jane", "lastname", "Doe", "country", "ElDorado"), // B - Tags.STRING_COMPRESSOR.createAndAddToDictionary("firstname", "John", "lastname", "Miller", "country", + stringCompressor.createAndAddToDictionary("firstname", "John", "lastname", "Miller", "country", "Atlantis")// C ); final DateTimeRange dateRange = DateTimeRange.relativeMillis(1); final ParititionId partitionId = DateIndexExtension.toPartitionIds(dateRange).get(0); - try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory)) { + try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory, stringCompressor)) { for (final Tags t : tags) { index.addTags(partitionId, t); } @@ -57,13 +58,13 @@ public class QueryCompletionIndexTest { // all firstnames where lastname=Doe are returned sorted alphabetically. // tags A and B match final SortedSet firstnamesWithLastnameDoe = index.find(dateRange, - Tags.STRING_COMPRESSOR.createTag("lastname", "Doe"), "firstname"); + stringCompressor.createTag("lastname", "Doe"), "firstname"); Assertions.assertEquals(new TreeSet<>(Set.of("Jane", "John")), firstnamesWithLastnameDoe); // no duplicates are returned: // tags A and C match firstname=John, but both have country=Atlantis final SortedSet countryWithFirstnameJohn = index.find(dateRange, - Tags.STRING_COMPRESSOR.createTag("firstname", "John"), "country"); + stringCompressor.createTag("firstname", "John"), "country"); Assertions.assertEquals(new TreeSet<>(Arrays.asList("Atlantis")), countryWithFirstnameJohn); // findAllValuesForField sorts alphabetically diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java b/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java index 4af168d..bc8a0b5 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java @@ -6,6 +6,7 @@ package org.lucares.pdb.api; * 'Sam' is the value. */ public class Tag implements Comparable { + private final int field; private final int value; @@ -42,6 +43,10 @@ public class Tag implements Comparable { return value; } + public Tag unsetValue() { + return new Tag(field, -1); + } + @Override public String toString() { return field + "=" + value;