diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusterIdSource.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusterIdSource.java new file mode 100644 index 0000000..7916e71 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusterIdSource.java @@ -0,0 +1,7 @@ +package org.lucares.pdb.datastore.internal; + +import java.util.List; + +public interface ClusterIdSource { + List toClusterIds(); +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusteredPersistentMap.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusteredPersistentMap.java index 519a879..a58018b 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusteredPersistentMap.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusteredPersistentMap.java @@ -1,14 +1,17 @@ package org.lucares.pdb.datastore.internal; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; -import org.lucares.pdb.api.DateTimeRange; +import org.lucares.pdb.api.RuntimeIOException; import org.lucares.pdb.datastore.ReadRuntimeException; import org.lucares.pdb.map.PersistentMap; +import org.lucares.pdb.map.PersistentMap.EncoderDecoder; import org.lucares.pdb.map.Visitor; public class ClusteredPersistentMap implements AutoCloseable { @@ -16,27 +19,46 @@ public class ClusteredPersistentMap implements AutoCloseable { private final ConcurrentHashMap> maps = new ConcurrentHashMap<>(); private final Function> creator; + private final Function> supplier; - // TODO we need two creators, one that actually creates a new map and one that - // only creates a new map if the file on disk already exists - public ClusteredPersistentMap(final Function> creator) { - this.creator = (dateIndexPrefix) -> creator.apply(dateIndexPrefix); + public ClusteredPersistentMap(final Path storageBasePath, final String filename, final EncoderDecoder keyEncoder, + final EncoderDecoder valueEncoder) { + + creator = clusterId -> { + try { + final Path file = storageBasePath.resolve(clusterId.getClusterId()).resolve(filename); + return new PersistentMap<>(file, keyEncoder, valueEncoder); + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + }; + supplier = clusterId -> { + try { + final Path file = storageBasePath.resolve(clusterId.getClusterId()).resolve(filename); + if (Files.exists(file)) { + return new PersistentMap<>(file, keyEncoder, valueEncoder); + } + return null; + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + }; } public V getValue(final ClusterId clusterId, final K key) { try { - final PersistentMap map = maps.computeIfAbsent(clusterId, creator); + final PersistentMap map = maps.computeIfAbsent(clusterId, supplier); return map != null ? map.getValue(key) : null; } catch (final IOException e) { throw new ReadRuntimeException(e); } } - public List getValues(final DateTimeRange dateRange, final K key) { + public List getValues(final ClusterIdSource clusterIdSource, final K key) { try { final List result = new ArrayList<>(); - final List clusterIds = DateIndexExtension.toClusterIds(dateRange); + final List clusterIds = clusterIdSource.toClusterIds(); for (final ClusterId clusterId : clusterIds) { final PersistentMap map = maps.computeIfAbsent(clusterId, creator); @@ -75,9 +97,9 @@ public class ClusteredPersistentMap implements AutoCloseable { } } - public void visitValues(final DateTimeRange dateRange, final K keyPrefix, final Visitor visitor) { + public void visitValues(final ClusterIdSource clusterIdSource, final K keyPrefix, final Visitor visitor) { try { - final List clusterIds = DateIndexExtension.toClusterIds(dateRange); + final List clusterIds = clusterIdSource.toClusterIds(); for (final ClusterId clusterId : clusterIds) { final PersistentMap map = maps.get(clusterId); diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index c7447b8..fd7aa22 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -94,33 +94,14 @@ public class DataStore implements AutoCloseable { diskStorage = new DiskStorage(diskStorageFilePath); diskStorage.ensureAlignmentForNewBlocks(BSFile.BLOCK_SIZE); - tagToDocsId = new ClusteredPersistentMap<>(clusterId -> { - try { - final Path file = storageBasePath.resolve(clusterId.getClusterId()) - .resolve("keyToValueToDocIdsIndex.bs"); - return new PersistentMap<>(file, new TagEncoderDecoder(), PersistentMap.LONG_CODER); - } catch (final IOException e) { - throw new RuntimeIOException(e); - } - }); + tagToDocsId = new ClusteredPersistentMap<>(storageBasePath, "keyToValueToDocIdsIndex.bs", + new TagEncoderDecoder(), PersistentMap.LONG_CODER); - tagsToDocId = new ClusteredPersistentMap<>(clusterId -> { - try { - final Path file = storageBasePath.resolve(clusterId.getClusterId()).resolve("tagsToDocIdIndex.bs"); - return new PersistentMap<>(file, new TagsEncoderDecoder(), PersistentMap.LONG_CODER); - } catch (final IOException e) { - throw new RuntimeIOException(e); - } - }); + tagsToDocId = new ClusteredPersistentMap<>(storageBasePath, "tagsToDocIdIndex.bs", new TagsEncoderDecoder(), + PersistentMap.LONG_CODER); - docIdToDoc = new ClusteredPersistentMap<>(clusterId -> { - try { - final Path file = storageBasePath.resolve(clusterId.getClusterId()).resolve("docIdToDocIndex.bs"); - return new PersistentMap<>(file, PersistentMap.LONG_CODER, new DocEncoderDecoder()); - } catch (final IOException e) { - throw new RuntimeIOException(e); - } - }); + docIdToDoc = new ClusteredPersistentMap<>(storageBasePath, "docIdToDocIndex.bs", PersistentMap.LONG_CODER, + new DocEncoderDecoder()); queryCompletionIndex = new QueryCompletionIndex(storageBasePath); @@ -251,7 +232,8 @@ public class DataStore implements AutoCloseable { final Tag keyPrefix = new Tag("", ""); // will find everything - tagToDocsId.visitValues(dateRange, keyPrefix, (tags, __) -> keys.add(tags.getKeyAsString())); + final ClusterIdSource clusterIdSource = new DateCluster(dateRange); + tagToDocsId.visitValues(clusterIdSource, keyPrefix, (tags, __) -> keys.add(tags.getKeyAsString())); keys.remove(ALL_DOCS_KEY); final List result = new ArrayList<>(keys); @@ -264,8 +246,8 @@ public class DataStore implements AutoCloseable { final SortedSet result = new TreeSet<>(); if (query.getQuery().isEmpty()) { - tagToDocsId.visitValues(query.getDateRange(), new Tag(key, ""), - (tag, __) -> result.add(tag.getValueAsString())); + final ClusterIdSource clusterIdSource = new DateCluster(query.getDateRange()); + tagToDocsId.visitValues(clusterIdSource, new Tag(key, ""), (tag, __) -> result.add(tag.getValueAsString())); } else { final List docs = search(query); for (final Doc doc : docs) { @@ -327,7 +309,8 @@ public class DataStore implements AutoCloseable { public List getByTags(final DateTimeRange dateRange, final Tags tags) { final List result = new ArrayList<>(); - final List docIds = tagsToDocId.getValues(dateRange, tags); + final DateCluster dateCluster = new DateCluster(dateRange); + final List docIds = tagsToDocId.getValues(dateCluster, tags); for (final Long docId : docIds) { if (docId != null) { @@ -348,7 +331,8 @@ public class DataStore implements AutoCloseable { private Doc getDocByDocId(final DateTimeRange dateRange, final Long docId) { return docIdToDocCache.putIfAbsent(docId, () -> { - final List docIds = docIdToDoc.getValues(dateRange, docId); + final DateCluster dateCluster = new DateCluster(dateRange); + final List docIds = docIdToDoc.getValues(dateCluster, docId); if (docIds.size() == 1) { return docIds.get(0); } else if (docIds.size() > 1) { diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DateCluster.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DateCluster.java new file mode 100644 index 0000000..90df6e7 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DateCluster.java @@ -0,0 +1,19 @@ +package org.lucares.pdb.datastore.internal; + +import java.util.List; + +import org.lucares.pdb.api.DateTimeRange; + +public class DateCluster implements ClusterIdSource { + + private final DateTimeRange dateRange; + + public DateCluster(final DateTimeRange dateRange) { + this.dateRange = dateRange; + } + + @Override + public List toClusterIds() { + return DateIndexExtension.toClusterIds(dateRange); + } +}