diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index 3b8a5a5..3ef3084 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -4,7 +4,6 @@ import java.io.IOException; import java.nio.file.Path; import java.time.Duration; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -34,9 +33,7 @@ import org.lucares.pdb.datastore.lang.NewProposerParser; import org.lucares.pdb.datastore.lang.QueryLanguageParser; import org.lucares.pdb.diskstorage.DiskStorage; import org.lucares.pdb.map.PersistentMap; -import org.lucares.pdb.map.PersistentMap.EncoderDecoder; import org.lucares.utils.Preconditions; -import org.lucares.utils.byteencoder.VariableByteEncoder; import org.lucares.utils.cache.HotEntryCache; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,97 +59,6 @@ public class DataStore implements AutoCloseable { // ids when getting them from the BSFiles) private static final AtomicLong NEXT_DOC_ID = new AtomicLong(System.currentTimeMillis()); - private static final EncoderDecoder ENCODER_TAGS = new EncoderDecoder<>() { - - @Override - public byte[] encode(final Tags tags) { - return tags.toBytes(); - } - - @Override - public Tags decode(final byte[] bytes) { - return Tags.fromBytes(bytes); - } - }; - - private static final EncoderDecoder ENCODER_DOC = new EncoderDecoder<>() { - - @Override - public byte[] encode(final Doc doc) { - - final byte[] rootBlockNumber = VariableByteEncoder.encode(doc.getRootBlockNumber()); - final byte[] tags = doc.getTags().toBytes(); - - final byte[] result = new byte[rootBlockNumber.length + tags.length]; - - System.arraycopy(rootBlockNumber, 0, result, 0, rootBlockNumber.length); - System.arraycopy(tags, 0, result, rootBlockNumber.length, tags.length); - - return result; - } - - @Override - public Doc decode(final byte[] bytes) { - - final long rootBlockNumber = VariableByteEncoder.decodeFirstValue(bytes); - final int bytesRootBlockNumber = VariableByteEncoder.neededBytes(rootBlockNumber); - final Tags tags = Tags.fromBytes(Arrays.copyOfRange(bytes, bytesRootBlockNumber, bytes.length)); - return new Doc(tags, rootBlockNumber); - } - }; - - private static final EncoderDecoder ENCODER_TAG = new EncoderDecoder<>() { - - @Override - public byte[] encode(final Tag tag) { - - final LongList keyAndValueCompressed = new LongList(2); - - final String key = tag.getKeyAsString(); - final byte[] result; - if (!key.isEmpty()) { - final Integer keyAsLong = Tags.STRING_COMPRESSOR.put(key); - keyAndValueCompressed.add(keyAsLong); - - final String value = tag.getValueAsString(); - if (!value.isEmpty()) { - final Integer valueAsLong = Tags.STRING_COMPRESSOR.put(value); - keyAndValueCompressed.add(valueAsLong); - } - result = VariableByteEncoder.encode(keyAndValueCompressed); - } else { - result = new byte[0]; - } - - return result; - } - - @Override - public Tag decode(final byte[] bytes) { - final LongList compressedStrings = VariableByteEncoder.decode(bytes); - final Tag result; - switch (compressedStrings.size()) { - case 0: - - result = new Tag("", ""); - break; - case 1: - final String k = Tags.STRING_COMPRESSOR.get((int) compressedStrings.get(0)); - result = new Tag(k, ""); - - break; - case 2: - final String key = Tags.STRING_COMPRESSOR.get((int) compressedStrings.get(0)); - final String value = Tags.STRING_COMPRESSOR.get((int) compressedStrings.get(1)); - result = new Tag(key, value); - break; - default: - throw new IllegalStateException("too many values: " + compressedStrings); - } - - return result; - } - }; public static Tag TAG_ALL_DOCS = null; private final PersistentMap docIdToDoc; @@ -186,13 +92,14 @@ public class DataStore implements AutoCloseable { diskStorage.ensureAlignmentForNewBlocks(BSFile.BLOCK_SIZE); final Path keyToValueToDocIdsIndexPath = storageBasePath.resolve("keyToValueToDocIdsIndex.bs"); - tagToDocsId = new PersistentMap<>(keyToValueToDocIdsIndexPath, ENCODER_TAG, PersistentMap.LONG_CODER); + tagToDocsId = new PersistentMap<>(keyToValueToDocIdsIndexPath, new TagEncoderDecoder(), + PersistentMap.LONG_CODER); final Path tagsToDocIdIndexPath = storageBasePath.resolve("tagsToDocIdIndex.bs"); - tagsToDocId = new PersistentMap<>(tagsToDocIdIndexPath, ENCODER_TAGS, PersistentMap.LONG_CODER); + tagsToDocId = new PersistentMap<>(tagsToDocIdIndexPath, new TagsEncoderDecoder(), PersistentMap.LONG_CODER); final Path docIdToDocIndexPath = storageBasePath.resolve("docIdToDocIndex.bs"); - docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, ENCODER_DOC); + docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, new DocEncoderDecoder()); queryCompletionIndex = new QueryCompletionIndex(storageBasePath); } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DocEncoderDecoder.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DocEncoderDecoder.java new file mode 100644 index 0000000..13e2215 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DocEncoderDecoder.java @@ -0,0 +1,35 @@ +package org.lucares.pdb.datastore.internal; + +import java.util.Arrays; + +import org.lucares.pdb.api.Tags; +import org.lucares.pdb.datastore.Doc; +import org.lucares.pdb.map.PersistentMap.EncoderDecoder; +import org.lucares.utils.byteencoder.VariableByteEncoder; + +class DocEncoderDecoder implements EncoderDecoder { + + @Override + public byte[] encode(final Doc doc) { + + final byte[] rootBlockNumber = VariableByteEncoder.encode(doc.getRootBlockNumber()); + final byte[] tags = doc.getTags().toBytes(); + + final byte[] result = new byte[rootBlockNumber.length + tags.length]; + + System.arraycopy(rootBlockNumber, 0, result, 0, rootBlockNumber.length); + System.arraycopy(tags, 0, result, rootBlockNumber.length, tags.length); + + return result; + } + + @Override + public Doc decode(final byte[] bytes) { + + final long rootBlockNumber = VariableByteEncoder.decodeFirstValue(bytes); + final int bytesRootBlockNumber = VariableByteEncoder.neededBytes(rootBlockNumber); + final Tags tags = Tags.fromBytes(Arrays.copyOfRange(bytes, bytesRootBlockNumber, bytes.length)); + return new Doc(tags, rootBlockNumber); + } + +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/TagEncoderDecoder.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/TagEncoderDecoder.java new file mode 100644 index 0000000..4cd3540 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/TagEncoderDecoder.java @@ -0,0 +1,59 @@ +package org.lucares.pdb.datastore.internal; + +import org.lucares.collections.LongList; +import org.lucares.pdb.api.Tag; +import org.lucares.pdb.api.Tags; +import org.lucares.pdb.map.PersistentMap.EncoderDecoder; +import org.lucares.utils.byteencoder.VariableByteEncoder; + +class TagEncoderDecoder implements EncoderDecoder { + @Override + public byte[] encode(final Tag tag) { + + final LongList keyAndValueCompressed = new LongList(2); + + final String key = tag.getKeyAsString(); + final byte[] result; + if (!key.isEmpty()) { + final Integer keyAsLong = Tags.STRING_COMPRESSOR.put(key); + keyAndValueCompressed.add(keyAsLong); + + final String value = tag.getValueAsString(); + if (!value.isEmpty()) { + final Integer valueAsLong = Tags.STRING_COMPRESSOR.put(value); + keyAndValueCompressed.add(valueAsLong); + } + result = VariableByteEncoder.encode(keyAndValueCompressed); + } else { + result = new byte[0]; + } + + return result; + } + + @Override + public Tag decode(final byte[] bytes) { + final LongList compressedStrings = VariableByteEncoder.decode(bytes); + final Tag result; + switch (compressedStrings.size()) { + case 0: + + result = new Tag("", ""); + break; + case 1: + final String k = Tags.STRING_COMPRESSOR.get((int) compressedStrings.get(0)); + result = new Tag(k, ""); + + break; + case 2: + final String key = Tags.STRING_COMPRESSOR.get((int) compressedStrings.get(0)); + final String value = Tags.STRING_COMPRESSOR.get((int) compressedStrings.get(1)); + result = new Tag(key, value); + break; + default: + throw new IllegalStateException("too many values: " + compressedStrings); + } + + return result; + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/TagsEncoderDecoder.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/TagsEncoderDecoder.java new file mode 100644 index 0000000..e1a051c --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/TagsEncoderDecoder.java @@ -0,0 +1,16 @@ +package org.lucares.pdb.datastore.internal; + +import org.lucares.pdb.api.Tags; +import org.lucares.pdb.map.PersistentMap.EncoderDecoder; + +class TagsEncoderDecoder implements EncoderDecoder { + @Override + public byte[] encode(final Tags tags) { + return tags.toBytes(); + } + + @Override + public Tags decode(final byte[] bytes) { + return Tags.fromBytes(bytes); + } +}