diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index 5dee4da..9e21d06 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -127,8 +127,9 @@ public class DataStore implements AutoCloseable { Tags.STRING_COMPRESSOR = StringCompressor.create(keyCompressionFile(storageBasePath)); Tags.STRING_COMPRESSOR.put(ALL_DOCS_KEY); Tags.STRING_COMPRESSOR.put(""); - TAG_ALL_DOCS = new Tag(ALL_DOCS_KEY, ""); // Tag(String, String) uses the StringCompressor internally, so it - // must be initialized after the string compressor has been created + TAG_ALL_DOCS = Tags.STRING_COMPRESSOR.createTag(ALL_DOCS_KEY, ""); // Tag(String, String) uses the + // StringCompressor internally, so it + // must be initialized after the string compressor has been created diskStorage = new PartitionDiskStore(storageBasePath, "data.bs"); @@ -263,7 +264,7 @@ public class DataStore implements AutoCloseable { final Set keys = new HashSet<>(); - final Tag keyPrefix = new Tag("", ""); // will find everything + final Tag keyPrefix = Tags.STRING_COMPRESSOR.createTag("", ""); // will find everything final PartitionIdSource partitionIdSource = new DatePartitioner(dateRange); tagToDocsId.visitValues(partitionIdSource, keyPrefix, (tags, __) -> keys.add(tags.getKeyAsString())); diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java index a5a1081..56893a4 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/QueryCompletionIndex.java @@ -154,8 +154,8 @@ public class QueryCompletionIndex implements AutoCloseable { public TwoTags(final String fieldB, final String fieldA, final String valueA, final String valueB) { - tagA = new Tag(fieldA, valueA); - tagB = new Tag(fieldB, valueB); + tagA = Tags.STRING_COMPRESSOR.createTag(fieldA, valueA); + tagB = Tags.STRING_COMPRESSOR.createTag(fieldB, valueB); } public Tag getTagA() { diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/TagEncoderDecoder.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/TagEncoderDecoder.java index c1ba7ec..0b2c48e 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/TagEncoderDecoder.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/TagEncoderDecoder.java @@ -38,17 +38,17 @@ class TagEncoderDecoder implements EncoderDecoder { switch (compressedStrings.size()) { case 0: - result = new Tag("", ""); + result = Tags.STRING_COMPRESSOR.createTag("", ""); break; case 1: final String k = Tags.STRING_COMPRESSOR.get((int) compressedStrings.get(0)); - result = new Tag(k, ""); + result = Tags.STRING_COMPRESSOR.createTag(k, ""); break; case 2: final String key = Tags.STRING_COMPRESSOR.get((int) compressedStrings.get(0)); final String value = Tags.STRING_COMPRESSOR.get((int) compressedStrings.get(1)); - result = new Tag(key, value); + result = Tags.STRING_COMPRESSOR.createTag(key, value); break; default: throw new IllegalStateException("too many values: " + compressedStrings); diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java index 88cc192..540d180 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java @@ -10,6 +10,7 @@ import java.util.stream.Collectors; import org.lucares.collections.LongList; import org.lucares.pdb.api.DateTimeRange; import org.lucares.pdb.api.Tag; +import org.lucares.pdb.api.Tags; import org.lucares.pdb.blockstorage.LongStreamFile; import org.lucares.pdb.datastore.internal.DataStore; import org.lucares.pdb.datastore.internal.DatePartitioner; @@ -148,27 +149,29 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor availablePartitionIds = keyToValueToDocId.getAvailablePartitionIds(datePartitioner); for (final ParititionId partitionId : availablePartitionIds) { final List docIdsForPartition = new ArrayList<>(); - keyToValueToDocId.visitValues(partitionId, new Tag(propertyName, ""), (tags, blockOffsetToDocIds) -> { - if (valuePattern.matcher(tags.getValueAsString()).matches()) { - try (final LongStreamFile bsFile = diskStorage.streamExistingFile(blockOffsetToDocIds, - partitionId)) { + keyToValueToDocId.visitValues(partitionId, Tags.STRING_COMPRESSOR.createTag(propertyName, ""), + (tags, blockOffsetToDocIds) -> { + if (valuePattern.matcher(tags.getValueAsString()).matches()) { + try (final LongStreamFile bsFile = diskStorage.streamExistingFile(blockOffsetToDocIds, + partitionId)) { - // We know that all LongLists coming from a BSFile are sorted, non-overlapping - // and increasing, that means we can just concatenate them and get a sorted - // list. - final List longLists = bsFile.streamOfLongLists().collect(Collectors.toList()); - final LongList concatenatedLists = concatenateLists(longLists); + // We know that all LongLists coming from a BSFile are sorted, non-overlapping + // and increasing, that means we can just concatenate them and get a sorted + // list. + final List longLists = bsFile.streamOfLongLists() + .collect(Collectors.toList()); + final LongList concatenatedLists = concatenateLists(longLists); - Preconditions.checkTrue(concatenatedLists.isSorted(), - "The LongLists containing document ids must be sorted, " - + "non-overlapping and increasing, so that the concatenation " - + "is sorted. This is guaranteed by the fact that document ids " - + "are generated in monotonically increasing order."); + Preconditions.checkTrue(concatenatedLists.isSorted(), + "The LongLists containing document ids must be sorted, " + + "non-overlapping and increasing, so that the concatenation " + + "is sorted. This is guaranteed by the fact that document ids " + + "are generated in monotonically increasing order."); - docIdsForPartition.add(concatenatedLists); - } - } - }); + docIdsForPartition.add(concatenatedLists); + } + } + }); final LongList mergedDocsIdsForPartition = LongList.union(docIdsForPartition); result.put(partitionId, mergedDocsIdsForPartition); diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java index 91d24d4..1a40350 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/FindValuesForQueryCompletion.java @@ -7,6 +7,7 @@ import java.util.TreeSet; import org.lucares.pdb.api.DateTimeRange; import org.lucares.pdb.api.Tag; +import org.lucares.pdb.api.Tags; import org.lucares.pdb.datastore.internal.GlobMatcher; import org.lucares.pdb.datastore.internal.QueryCompletionIndex; import org.lucares.pdb.datastore.lang.Expression.And; @@ -62,7 +63,7 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor(); for (final String v : valuesA) { - final Tag tagA = new Tag(fieldA, v); + final Tag tagA = Tags.STRING_COMPRESSOR.createTag(fieldA, v); final SortedSet tmp = index.find(dateTimeRange, tagA, field); result.addAll(tmp); } @@ -150,7 +151,7 @@ public class FindValuesForQueryCompletion extends ExpressionVisitor valuesNotForField = index.findAllValuesNotForField(dateTimeRange, tag, field); final SortedSet valuesForField = index.find(dateTimeRange, tag, field); diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java index 3077479..3f59edc 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/QueryCompletionIndexTest.java @@ -10,14 +10,13 @@ import java.util.SortedSet; import java.util.TreeSet; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.lucares.pdb.api.DateTimeRange; import org.lucares.pdb.api.StringCompressor; -import org.lucares.pdb.api.Tag; import org.lucares.pdb.api.Tags; import org.lucares.pdb.api.UniqueStringIntegerPairs; -import org.junit.jupiter.api.Assertions; import org.lucares.utils.file.FileUtils; public class QueryCompletionIndexTest { @@ -54,14 +53,14 @@ public class QueryCompletionIndexTest { // all firstnames where lastname=Doe are returned sorted alphabetically. // tags A and B match - final SortedSet firstnamesWithLastnameDoe = index.find(dateRange, new Tag("lastname", "Doe"), - "firstname"); + final SortedSet firstnamesWithLastnameDoe = index.find(dateRange, + Tags.STRING_COMPRESSOR.createTag("lastname", "Doe"), "firstname"); Assertions.assertEquals(new TreeSet<>(Set.of("Jane", "John")), firstnamesWithLastnameDoe); // no duplicates are returned: // tags A and C match firstname=John, but both have country=Atlantis - final SortedSet countryWithFirstnameJohn = index.find(dateRange, new Tag("firstname", "John"), - "country"); + final SortedSet countryWithFirstnameJohn = index.find(dateRange, + Tags.STRING_COMPRESSOR.createTag("firstname", "John"), "country"); Assertions.assertEquals(new TreeSet<>(Arrays.asList("Atlantis")), countryWithFirstnameJohn); // findAllValuesForField sorts alphabetically diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/StringCompressor.java b/pdb-api/src/main/java/org/lucares/pdb/api/StringCompressor.java index fd918b9..4317d36 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/StringCompressor.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/StringCompressor.java @@ -39,4 +39,16 @@ public class StringCompressor { return integer != null ? integer : -1; } + /** + * Create a new {@link Tag} for the given field and value. + * + * @param field the field + * @param value the value + */ + public Tag createTag(final String field, final String value) { + final int f = field != null ? Tags.STRING_COMPRESSOR.getIfPresent(field) : -1; + final int v = value != null ? Tags.STRING_COMPRESSOR.getIfPresent(value) : -1; + return new Tag(f, v); + } + } diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java b/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java index d17adab..c75fc8d 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Tag.java @@ -22,17 +22,6 @@ public class Tag implements Comparable { this.value = value; } - /** - * Create a new {@link Tag} for the given field and value. - * - * @param field the field - * @param value the value - */ - public Tag(final String field, final String value) { - this.field = field != null ? Tags.STRING_COMPRESSOR.getIfPresent(field) : -1; - this.value = value != null ? Tags.STRING_COMPRESSOR.getIfPresent(value) : -1; - } - @Override public int compareTo(final Tag o) { diff --git a/pdb-api/src/test/java/org/lucares/memory/MemoryScale.java b/pdb-api/src/test/java/org/lucares/memory/MemoryScale.java index 91b5d2b..6c4e41f 100644 --- a/pdb-api/src/test/java/org/lucares/memory/MemoryScale.java +++ b/pdb-api/src/test/java/org/lucares/memory/MemoryScale.java @@ -7,7 +7,6 @@ import java.util.LinkedHashMap; import java.util.Map; import org.lucares.pdb.api.StringCompressor; -import org.lucares.pdb.api.Tag; import org.lucares.pdb.api.Tags; import org.lucares.pdb.api.TagsBuilder; import org.lucares.pdb.api.UniqueStringIntegerPairs; @@ -74,7 +73,7 @@ public class MemoryScale { } private static Object createTag() { - return new Tag("", ""); + return Tags.STRING_COMPRESSOR.createTag("", ""); } private static Object createTags0() {