split BSFile into a TimeSeries and a LongStream file

BSFile was used to store two types of data. This makes
the API complex. I split the API into two files with
easier and more clear APIs. Interestingly the API of
BSFile is still rather complex and has to consider both
use cases.
This commit is contained in:
2019-02-10 09:59:16 +01:00
parent fd55ea0866
commit cbcb7714bb
16 changed files with 546 additions and 315 deletions

View File

@@ -9,6 +9,7 @@ import org.lucares.collections.LongList;
import org.lucares.pdb.api.RuntimeIOException;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.blockstorage.BSFile;
import org.lucares.pdb.blockstorage.TimeSeriesFile;
import org.lucares.pdb.diskstorage.DiskStorage;
public class PdbFile {
@@ -24,8 +25,8 @@ public class PdbFile {
@Override
public Stream<LongList> apply(final PdbFile pdbFile) {
try {
final BSFile bsFile = BSFile.existingFile(pdbFile.getRootBlockNumber(), diskStorage);
return bsFile.streamOfTimeValueLongLists();
final TimeSeriesFile bsFile = TimeSeriesFile.existingFile(pdbFile.getRootBlockNumber(), diskStorage);
return bsFile.streamOfLongLists();
} catch (final IOException e) {
throw new RuntimeIOException(e);
}

View File

@@ -5,7 +5,7 @@ import java.io.IOException;
import java.util.Optional;
import org.lucares.pdb.api.Entry;
import org.lucares.pdb.blockstorage.BSFile;
import org.lucares.pdb.blockstorage.TimeSeriesFile;
import org.lucares.pdb.diskstorage.DiskStorage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -20,13 +20,13 @@ public class PdbWriter implements AutoCloseable, Flushable {
private final PdbFile pdbFile;
private long lastEpochMilli;
private final BSFile bsFile;
private final TimeSeriesFile bsFile;
public PdbWriter(final PdbFile pdbFile, final DiskStorage diskStorage) throws IOException {
this.pdbFile = pdbFile;
bsFile = BSFile.existingFile(pdbFile.getRootBlockNumber(), diskStorage);
final Optional<Long> optionalLastValue = bsFile.getLastValue();
bsFile = TimeSeriesFile.existingFile(pdbFile.getRootBlockNumber(), diskStorage);
final Optional<Long> optionalLastValue = bsFile.getLastValue(); // TODO is this last value correct?
lastEpochMilli = optionalLastValue.orElse(0L);
}

View File

@@ -21,6 +21,7 @@ import org.lucares.pdb.api.StringCompressor;
import org.lucares.pdb.api.Tag;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.blockstorage.BSFile;
import org.lucares.pdb.blockstorage.LongStreamFile;
import org.lucares.pdb.datastore.Doc;
import org.lucares.pdb.datastore.PdbFile;
import org.lucares.pdb.datastore.PdbWriter;
@@ -232,7 +233,8 @@ public class DataStore implements AutoCloseable {
tagToDocsId.putValue(tag, diskStoreOffsetForDocIdsOfTag);
}
try (final BSFile docIdsOfTag = BSFile.existingFile(diskStoreOffsetForDocIdsOfTag, diskStorage)) {
try (final LongStreamFile docIdsOfTag = LongStreamFile.existingFile(diskStoreOffsetForDocIdsOfTag,
diskStorage)) {
docIdsOfTag.append(docId);
}
}

View File

@@ -10,7 +10,7 @@ import java.util.stream.Collectors;
import org.lucares.collections.LongList;
import org.lucares.pdb.api.RuntimeIOException;
import org.lucares.pdb.api.Tag;
import org.lucares.pdb.blockstorage.BSFile;
import org.lucares.pdb.blockstorage.LongStreamFile;
import org.lucares.pdb.datastore.internal.DataStore;
import org.lucares.pdb.datastore.lang.Expression.And;
import org.lucares.pdb.datastore.lang.Expression.Not;
@@ -122,7 +122,7 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<LongList> {
private LongList getAllDocIds() {
try {
final Long blockOffset = keyToValueToDocId.getValue(DataStore.TAG_ALL_DOCS);
final BSFile bsFile = BSFile.existingFile(blockOffset, diskStorage);
final LongStreamFile bsFile = LongStreamFile.existingFile(blockOffset, diskStorage);
return bsFile.asLongList();
} catch (final IOException e) {
throw new RuntimeIOException(e);
@@ -136,7 +136,8 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<LongList> {
keyToValueToDocId.visitValues(new Tag(propertyName, ""), (tags, blockOffsetToDocIds) -> {
try {
if (valuePattern.matcher(tags.getValueAsString()).matches()) {
try (final BSFile bsFile = BSFile.existingFile(blockOffsetToDocIds, diskStorage)) {
try (final LongStreamFile bsFile = LongStreamFile.existingFile(blockOffsetToDocIds,
diskStorage)) {
// We know that all LongLists coming from a BSFile are sorted, non-overlapping
// and increasing, that means we can just concatenate them and get a sorted