remove TagsToFile

Remove one layer of abstraction by moving the code into the DataStore.
This commit is contained in:
2019-02-16 16:06:46 +01:00
parent 117ef4ea34
commit 92a47d9b56
9 changed files with 181 additions and 285 deletions

View File

@@ -12,8 +12,8 @@ import java.util.Optional;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Consumer;
import org.lucares.collections.LongList;
import org.lucares.pdb.api.RuntimeIOException;
@@ -74,6 +74,8 @@ public class DataStore implements AutoCloseable {
// easily.
private final HotEntryCache<Long, Doc> docIdToDocCache = new HotEntryCache<>(Duration.ofMillis(30), 100_000);
private final HotEntryCache<Tags, PdbWriter> writerCache;
private final DiskStorage diskStorage;
private final Path diskStorageFilePath;
private final Path storageBasePath;
@@ -102,6 +104,9 @@ public class DataStore implements AutoCloseable {
docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, new DocEncoderDecoder());
queryCompletionIndex = new QueryCompletionIndex(storageBasePath);
writerCache = new HotEntryCache<>(Duration.ofSeconds(10), 1000);
writerCache.addListener((k, v) -> v.close());
}
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
@@ -160,6 +165,30 @@ public class DataStore implements AutoCloseable {
return NEXT_DOC_ID.getAndIncrement();
}
public List<PdbFile> getFilesForQuery(final String query) {
final List<Doc> searchResult = search(query);
if (searchResult.size() > 500_000) {
throw new IllegalStateException("Too many results.");
}
final List<PdbFile> result = toPdbFiles(searchResult);
return result;
}
private List<PdbFile> toPdbFiles(final List<Doc> searchResult) {
final List<PdbFile> result = new ArrayList<>(searchResult.size());
for (final Doc document : searchResult) {
final long rootBlockNumber = document.getRootBlockNumber();
final Tags tags = document.getTags();
final PdbFile pdbFile = new PdbFile(rootBlockNumber, tags);
result.add(pdbFile);
}
return result;
}
public List<Doc> search(final String query) {
try {
final LongList docIdsList = executeQuery(query);
@@ -262,26 +291,13 @@ public class DataStore implements AutoCloseable {
}
private Doc getDocByDocId(final Long docId) {
try {
return docIdToDocCache.putIfAbsent(docId, () -> {
try {
return docIdToDoc.getValue(docId);
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
});
} catch (final ExecutionException e) {
throw new RuntimeException(e);
}
}
@Override
public void close() throws IOException {
try {
diskStorage.close();
} finally {
tagToDocsId.close();
}
return docIdToDocCache.putIfAbsent(docId, () -> {
try {
return docIdToDoc.getValue(docId);
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
});
}
public List<Proposal> propose(final String query, final int caretIndex) {
@@ -296,7 +312,12 @@ public class DataStore implements AutoCloseable {
return diskStorage;
}
public PdbWriter getWriter(final Tags tags) {
public PdbWriter getWriter(final long dateAsEpochMilli, final Tags tags) throws ReadException, WriteException {
return writerCache.putIfAbsent(tags, () -> getWriter(tags));
}
private PdbWriter getWriter(final Tags tags) {
final Optional<Doc> docsForTags = getByTags(tags);
PdbWriter writer;
if (docsForTags.isPresent()) {
@@ -336,4 +357,50 @@ public class DataStore implements AutoCloseable {
return result;
}
@Override
public void close() throws RuntimeIOException {
try {
// we cannot simply clear the cache, because the cache implementation (Guava at
// the time of writing) handles eviction events asynchronously.
forEachWriter(cachedWriter -> {
try {
cachedWriter.close();
} catch (final Exception e) {
throw new WriteException(e);
}
});
} finally {
try {
diskStorage.close();
} catch (final IOException e) {
throw new RuntimeIOException(e);
} finally {
try {
tagToDocsId.close();
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
}
}
private void forEachWriter(final Consumer<PdbWriter> consumer) {
writerCache.forEach(writer -> {
try {
consumer.accept(writer);
} catch (final RuntimeException e) {
LOGGER.warn("Exception while applying consumer to PdbWriter for " + writer.getPdbFile(), e);
}
});
}
public void flush() {
forEachWriter(t -> {
try {
t.flush();
} catch (final Exception e) {
throw new WriteException(e);
}
});
}
}

View File

@@ -6,6 +6,8 @@ import java.awt.event.KeyEvent;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -22,11 +24,14 @@ import javax.swing.JFrame;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import org.lucares.pdb.api.Entry;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.blockstorage.BSFile;
import org.lucares.pdb.datastore.Doc;
import org.lucares.pdb.datastore.PdbWriter;
import org.lucares.pdb.datastore.Proposal;
import org.lucares.utils.CollectionUtils;
import org.lucares.utils.DateUtils;
import org.lucares.utils.file.FileUtils;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
@@ -184,6 +189,63 @@ public class DataStoreTest {
assertProposals(queryWithCaret, field, expectedProposedValues);
}
public void test() throws Exception {
try (final DataStore dataStore = new DataStore(dataDirectory)) {
final OffsetDateTime date = OffsetDateTime.now(ZoneOffset.UTC);
final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue");
final PdbWriter newFileForTags = dataStore.getWriter(date.toInstant().toEpochMilli(), tags);
final PdbWriter existingFileForTags = dataStore.getWriter(date.toInstant().toEpochMilli(), tags);
Assert.assertSame(newFileForTags, existingFileForTags);
}
}
public void testAppendingToSameFile() throws Exception {
try (final DataStore dataStore = new DataStore(dataDirectory)) {
// dayC is before dayA and dayB
final long dayA = DateUtils.getDate(2016, 1, 2, 1, 1, 1).toInstant().toEpochMilli();
final long dayB = DateUtils.getDate(2016, 1, 3, 1, 1, 1).toInstant().toEpochMilli();
final long dayC = DateUtils.getDate(2016, 1, 1, 1, 1, 1).toInstant().toEpochMilli();
final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue");
final PdbWriter writerForDayA = dataStore.getWriter(dayA, tags);
writerForDayA.write(new Entry(dayA, 1, tags));
final PdbWriter writerForDayB = dataStore.getWriter(dayB, tags);
writerForDayB.write(new Entry(dayB, 2, tags));
final PdbWriter writerForDayC = dataStore.getWriter(dayC, tags);
writerForDayC.write(new Entry(dayC, 3, tags));
Assert.assertSame(writerForDayA, writerForDayB);
Assert.assertSame(writerForDayA, writerForDayC);
}
}
public void testIdenticalDatesGoIntoSameFile() throws Exception {
try (final DataStore dataStore = new DataStore(dataDirectory)) {
final long timestamp = DateUtils.getDate(2016, 1, 1, 13, 1, 1).toInstant().toEpochMilli();
final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue");
final PdbWriter fileA = dataStore.getWriter(timestamp, tags);
fileA.write(new Entry(timestamp, 1, tags));
final PdbWriter fileB = dataStore.getWriter(timestamp, tags);
fileA.write(new Entry(timestamp, 2, tags));
Assert.assertEquals(fileA, fileB);
}
}
public static void main(final String[] args) throws IOException, InterruptedException {
final Path dir = Files.createTempDirectory("pdb");
try (DataStore dataStore = new DataStore(dir)) {