remove TagsToFile

Remove one layer of abstraction by moving the code into the DataStore.
This commit is contained in:
2019-02-16 16:06:46 +01:00
parent 117ef4ea34
commit 92a47d9b56
9 changed files with 181 additions and 285 deletions

View File

@@ -12,8 +12,8 @@ import java.util.Optional;
import java.util.Set; import java.util.Set;
import java.util.SortedSet; import java.util.SortedSet;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Consumer;
import org.lucares.collections.LongList; import org.lucares.collections.LongList;
import org.lucares.pdb.api.RuntimeIOException; import org.lucares.pdb.api.RuntimeIOException;
@@ -74,6 +74,8 @@ public class DataStore implements AutoCloseable {
// easily. // easily.
private final HotEntryCache<Long, Doc> docIdToDocCache = new HotEntryCache<>(Duration.ofMillis(30), 100_000); private final HotEntryCache<Long, Doc> docIdToDocCache = new HotEntryCache<>(Duration.ofMillis(30), 100_000);
private final HotEntryCache<Tags, PdbWriter> writerCache;
private final DiskStorage diskStorage; private final DiskStorage diskStorage;
private final Path diskStorageFilePath; private final Path diskStorageFilePath;
private final Path storageBasePath; private final Path storageBasePath;
@@ -102,6 +104,9 @@ public class DataStore implements AutoCloseable {
docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, new DocEncoderDecoder()); docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, new DocEncoderDecoder());
queryCompletionIndex = new QueryCompletionIndex(storageBasePath); queryCompletionIndex = new QueryCompletionIndex(storageBasePath);
writerCache = new HotEntryCache<>(Duration.ofSeconds(10), 1000);
writerCache.addListener((k, v) -> v.close());
} }
private Path keyCompressionFile(final Path dataDirectory) throws IOException { private Path keyCompressionFile(final Path dataDirectory) throws IOException {
@@ -160,6 +165,30 @@ public class DataStore implements AutoCloseable {
return NEXT_DOC_ID.getAndIncrement(); return NEXT_DOC_ID.getAndIncrement();
} }
public List<PdbFile> getFilesForQuery(final String query) {
final List<Doc> searchResult = search(query);
if (searchResult.size() > 500_000) {
throw new IllegalStateException("Too many results.");
}
final List<PdbFile> result = toPdbFiles(searchResult);
return result;
}
private List<PdbFile> toPdbFiles(final List<Doc> searchResult) {
final List<PdbFile> result = new ArrayList<>(searchResult.size());
for (final Doc document : searchResult) {
final long rootBlockNumber = document.getRootBlockNumber();
final Tags tags = document.getTags();
final PdbFile pdbFile = new PdbFile(rootBlockNumber, tags);
result.add(pdbFile);
}
return result;
}
public List<Doc> search(final String query) { public List<Doc> search(final String query) {
try { try {
final LongList docIdsList = executeQuery(query); final LongList docIdsList = executeQuery(query);
@@ -262,26 +291,13 @@ public class DataStore implements AutoCloseable {
} }
private Doc getDocByDocId(final Long docId) { private Doc getDocByDocId(final Long docId) {
try { return docIdToDocCache.putIfAbsent(docId, () -> {
return docIdToDocCache.putIfAbsent(docId, () -> { try {
try { return docIdToDoc.getValue(docId);
return docIdToDoc.getValue(docId); } catch (final IOException e) {
} catch (final IOException e) { throw new RuntimeIOException(e);
throw new RuntimeIOException(e); }
} });
});
} catch (final ExecutionException e) {
throw new RuntimeException(e);
}
}
@Override
public void close() throws IOException {
try {
diskStorage.close();
} finally {
tagToDocsId.close();
}
} }
public List<Proposal> propose(final String query, final int caretIndex) { public List<Proposal> propose(final String query, final int caretIndex) {
@@ -296,7 +312,12 @@ public class DataStore implements AutoCloseable {
return diskStorage; return diskStorage;
} }
public PdbWriter getWriter(final Tags tags) { public PdbWriter getWriter(final long dateAsEpochMilli, final Tags tags) throws ReadException, WriteException {
return writerCache.putIfAbsent(tags, () -> getWriter(tags));
}
private PdbWriter getWriter(final Tags tags) {
final Optional<Doc> docsForTags = getByTags(tags); final Optional<Doc> docsForTags = getByTags(tags);
PdbWriter writer; PdbWriter writer;
if (docsForTags.isPresent()) { if (docsForTags.isPresent()) {
@@ -336,4 +357,50 @@ public class DataStore implements AutoCloseable {
return result; return result;
} }
@Override
public void close() throws RuntimeIOException {
try {
// we cannot simply clear the cache, because the cache implementation (Guava at
// the time of writing) handles eviction events asynchronously.
forEachWriter(cachedWriter -> {
try {
cachedWriter.close();
} catch (final Exception e) {
throw new WriteException(e);
}
});
} finally {
try {
diskStorage.close();
} catch (final IOException e) {
throw new RuntimeIOException(e);
} finally {
try {
tagToDocsId.close();
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
}
}
private void forEachWriter(final Consumer<PdbWriter> consumer) {
writerCache.forEach(writer -> {
try {
consumer.accept(writer);
} catch (final RuntimeException e) {
LOGGER.warn("Exception while applying consumer to PdbWriter for " + writer.getPdbFile(), e);
}
});
}
public void flush() {
forEachWriter(t -> {
try {
t.flush();
} catch (final Exception e) {
throw new WriteException(e);
}
});
}
} }

View File

@@ -6,6 +6,8 @@ import java.awt.event.KeyEvent;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@@ -22,11 +24,14 @@ import javax.swing.JFrame;
import javax.swing.JTextArea; import javax.swing.JTextArea;
import javax.swing.JTextField; import javax.swing.JTextField;
import org.lucares.pdb.api.Entry;
import org.lucares.pdb.api.Tags; import org.lucares.pdb.api.Tags;
import org.lucares.pdb.blockstorage.BSFile; import org.lucares.pdb.blockstorage.BSFile;
import org.lucares.pdb.datastore.Doc; import org.lucares.pdb.datastore.Doc;
import org.lucares.pdb.datastore.PdbWriter;
import org.lucares.pdb.datastore.Proposal; import org.lucares.pdb.datastore.Proposal;
import org.lucares.utils.CollectionUtils; import org.lucares.utils.CollectionUtils;
import org.lucares.utils.DateUtils;
import org.lucares.utils.file.FileUtils; import org.lucares.utils.file.FileUtils;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.AfterMethod; import org.testng.annotations.AfterMethod;
@@ -184,6 +189,63 @@ public class DataStoreTest {
assertProposals(queryWithCaret, field, expectedProposedValues); assertProposals(queryWithCaret, field, expectedProposedValues);
} }
public void test() throws Exception {
try (final DataStore dataStore = new DataStore(dataDirectory)) {
final OffsetDateTime date = OffsetDateTime.now(ZoneOffset.UTC);
final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue");
final PdbWriter newFileForTags = dataStore.getWriter(date.toInstant().toEpochMilli(), tags);
final PdbWriter existingFileForTags = dataStore.getWriter(date.toInstant().toEpochMilli(), tags);
Assert.assertSame(newFileForTags, existingFileForTags);
}
}
public void testAppendingToSameFile() throws Exception {
try (final DataStore dataStore = new DataStore(dataDirectory)) {
// dayC is before dayA and dayB
final long dayA = DateUtils.getDate(2016, 1, 2, 1, 1, 1).toInstant().toEpochMilli();
final long dayB = DateUtils.getDate(2016, 1, 3, 1, 1, 1).toInstant().toEpochMilli();
final long dayC = DateUtils.getDate(2016, 1, 1, 1, 1, 1).toInstant().toEpochMilli();
final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue");
final PdbWriter writerForDayA = dataStore.getWriter(dayA, tags);
writerForDayA.write(new Entry(dayA, 1, tags));
final PdbWriter writerForDayB = dataStore.getWriter(dayB, tags);
writerForDayB.write(new Entry(dayB, 2, tags));
final PdbWriter writerForDayC = dataStore.getWriter(dayC, tags);
writerForDayC.write(new Entry(dayC, 3, tags));
Assert.assertSame(writerForDayA, writerForDayB);
Assert.assertSame(writerForDayA, writerForDayC);
}
}
public void testIdenticalDatesGoIntoSameFile() throws Exception {
try (final DataStore dataStore = new DataStore(dataDirectory)) {
final long timestamp = DateUtils.getDate(2016, 1, 1, 13, 1, 1).toInstant().toEpochMilli();
final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue");
final PdbWriter fileA = dataStore.getWriter(timestamp, tags);
fileA.write(new Entry(timestamp, 1, tags));
final PdbWriter fileB = dataStore.getWriter(timestamp, tags);
fileA.write(new Entry(timestamp, 2, tags));
Assert.assertEquals(fileA, fileB);
}
}
public static void main(final String[] args) throws IOException, InterruptedException { public static void main(final String[] args) throws IOException, InterruptedException {
final Path dir = Files.createTempDirectory("pdb"); final Path dir = Files.createTempDirectory("pdb");
try (DataStore dataStore = new DataStore(dir)) { try (DataStore dataStore = new DataStore(dir)) {

View File

@@ -1,4 +1,4 @@
package org.lucares.performance.db; package org.lucares.utils;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
import java.time.ZoneOffset; import java.time.ZoneOffset;

View File

@@ -77,11 +77,16 @@ public class HotEntryCache<K, V> {
* inserted * inserted
* @return the newly inserted or existing value, or null if * @return the newly inserted or existing value, or null if
* {@code mappingFunction} returned {@code null} * {@code mappingFunction} returned {@code null}
* @throws ExecutionException * @throws RuntimeExcecutionException re-throws any exception thrown during the
* execution of {@code supplier} wrapped in a
* {@link RuntimeExcecutionException}
*/ */
public V putIfAbsent(final K key, final Callable<V> mappingFunction) throws ExecutionException { public V putIfAbsent(final K key, final Callable<V> supplier) {
try {
return cache.get(key, mappingFunction); return cache.get(key, supplier);
} catch (final ExecutionException e) {
throw new RuntimeExcecutionException(e);
}
} }
public void remove(final K key) { public void remove(final K key) {

View File

@@ -0,0 +1,13 @@
package org.lucares.utils.cache;
import java.util.concurrent.ExecutionException;
public class RuntimeExcecutionException extends RuntimeException {
private static final long serialVersionUID = -3626851728980513527L;
public RuntimeExcecutionException(final ExecutionException e) {
super(e);
}
}

View File

@@ -32,15 +32,12 @@ public class PerformanceDb implements AutoCloseable {
private final static Logger LOGGER = LoggerFactory.getLogger(PerformanceDb.class); private final static Logger LOGGER = LoggerFactory.getLogger(PerformanceDb.class);
private final static Logger METRICS_LOGGER = LoggerFactory.getLogger("org.lucares.metrics.ingestion.block"); private final static Logger METRICS_LOGGER = LoggerFactory.getLogger("org.lucares.metrics.ingestion.block");
private final TagsToFile tagsToFile;
private final DataStore dataStore; private final DataStore dataStore;
public PerformanceDb(final Path dataDirectory) throws IOException { public PerformanceDb(final Path dataDirectory) throws IOException {
dataStore = new DataStore(dataDirectory); dataStore = new DataStore(dataDirectory);
tagsToFile = new TagsToFile(dataStore);
} }
void putEntry(final Entry entry) throws WriteException { void putEntry(final Entry entry) throws WriteException {
@@ -81,7 +78,7 @@ public class PerformanceDb implements AutoCloseable {
final Tags tags = entry.getTags(); final Tags tags = entry.getTags();
final long dateAsEpochMilli = entry.getEpochMilli(); final long dateAsEpochMilli = entry.getEpochMilli();
final PdbWriter writer = tagsToFile.getWriter(dateAsEpochMilli, tags); final PdbWriter writer = dataStore.getWriter(dateAsEpochMilli, tags);
writer.write(entry); writer.write(entry);
count++; count++;
@@ -114,7 +111,7 @@ public class PerformanceDb implements AutoCloseable {
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
LOGGER.info("Thread was interrupted. Aborting exectution."); LOGGER.info("Thread was interrupted. Aborting exectution.");
} finally { } finally {
tagsToFile.flush(); dataStore.flush();
} }
} }
@@ -128,7 +125,7 @@ public class PerformanceDb implements AutoCloseable {
} }
public List<PdbFile> getFilesForQuery(final String query) { public List<PdbFile> getFilesForQuery(final String query) {
return tagsToFile.getFilesForQuery(query); return dataStore.getFilesForQuery(query);
} }
/** /**
@@ -140,7 +137,7 @@ public class PerformanceDb implements AutoCloseable {
*/ */
public Result get(final String query, final List<String> groupBy) { public Result get(final String query, final List<String> groupBy) {
final long start = System.nanoTime(); final long start = System.nanoTime();
final List<PdbFile> pdbFiles = tagsToFile.getFilesForQuery(query); final List<PdbFile> pdbFiles = dataStore.getFilesForQuery(query);
final Grouping grouping = Grouping.groupBy(pdbFiles, groupBy); final Grouping grouping = Grouping.groupBy(pdbFiles, groupBy);
@@ -163,11 +160,10 @@ public class PerformanceDb implements AutoCloseable {
@Override @Override
public void close() { public void close() {
tagsToFile.close();
try { try {
dataStore.close(); dataStore.close();
} catch (final IOException e) { } catch (final Exception e) {
LOGGER.error("failed to close PdbDB", e); LOGGER.error("failed to close PerformanceDB", e);
} }
} }

View File

@@ -1,155 +0,0 @@
package org.lucares.performance.db;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.datastore.Doc;
import org.lucares.pdb.datastore.PdbFile;
import org.lucares.pdb.datastore.PdbWriter;
import org.lucares.pdb.datastore.ReadException;
import org.lucares.pdb.datastore.WriteException;
import org.lucares.pdb.datastore.internal.DataStore;
import org.lucares.utils.cache.HotEntryCache;
import org.lucares.utils.cache.HotEntryCache.EventListener;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class TagsToFile implements AutoCloseable {
private static final Logger LOGGER = LoggerFactory.getLogger(TagsToFile.class);
private static final class CacheKey implements Comparable<CacheKey> {
private final Tags tags;
public CacheKey(final Tags tags) {
super();
this.tags = tags;
}
@Override
public int compareTo(final CacheKey o) {
return tags.compareTo(o.tags);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((tags == null) ? 0 : tags.hashCode());
return result;
}
@Override
public boolean equals(final Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
final CacheKey other = (CacheKey) obj;
if (tags == null) {
if (other.tags != null)
return false;
} else if (!tags.equals(other.tags))
return false;
return true;
}
}
private final static class RemovalListener implements EventListener<CacheKey, PdbWriter> {
@Override
public void evicted(final CacheKey key, final PdbWriter value) {
value.close();
}
}
private final HotEntryCache<CacheKey, PdbWriter> writerCache;
private final DataStore dataStore;
public TagsToFile(final DataStore dataStore) {
this.dataStore = dataStore;
writerCache = new HotEntryCache<>(Duration.ofSeconds(10), 1000);
writerCache.addListener(new RemovalListener());
}
public List<PdbFile> getFilesForQuery(final String query) {
final List<Doc> searchResult = dataStore.search(query);
if (searchResult.size() > 500_000) {
throw new IllegalStateException("Too many results.");
}
final List<PdbFile> result = toPdbFiles(searchResult);
return result;
}
private List<PdbFile> toPdbFiles(final List<Doc> searchResult) {
final List<PdbFile> result = new ArrayList<>(searchResult.size());
for (final Doc document : searchResult) {
final long rootBlockNumber = document.getRootBlockNumber();
final Tags tags = document.getTags();
final PdbFile pdbFile = new PdbFile(rootBlockNumber, tags);
result.add(pdbFile);
}
return result;
}
public PdbWriter getWriter(final long dateAsEpochMilli, final Tags tags) throws ReadException, WriteException {
final CacheKey cacheKey = new CacheKey(tags);
PdbWriter writer = writerCache.get(cacheKey);
if (writer == null) {
synchronized (this) {
writer = writerCache.get(cacheKey);
if (writer == null) {
LOGGER.trace("getByTags({})", tags);
writer = dataStore.getWriter(tags);
writerCache.put(cacheKey, writer);
}
}
}
return writer;
}
private void forEachWriter(final Consumer<PdbWriter> consumer) {
writerCache.forEach(writer -> {
try {
consumer.accept(writer);
} catch (final RuntimeException e) {
LOGGER.warn("Exception while applying consumer to PdbWriter for " + writer.getPdbFile(), e);
}
});
}
@Override
public void close() {
forEachWriter(t -> {
try {
t.close();
} catch (final Exception e) {
throw new WriteException(e);
}
});
}
public void flush() {
forEachWriter(t -> {
try {
t.flush();
} catch (final Exception e) {
throw new WriteException(e);
}
});
}
}

View File

@@ -17,6 +17,7 @@ import org.lucares.pdb.api.Entry;
import org.lucares.pdb.api.GroupResult; import org.lucares.pdb.api.GroupResult;
import org.lucares.pdb.api.Result; import org.lucares.pdb.api.Result;
import org.lucares.pdb.api.Tags; import org.lucares.pdb.api.Tags;
import org.lucares.utils.DateUtils;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.AfterMethod; import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod; import org.testng.annotations.BeforeMethod;

View File

@@ -1,93 +0,0 @@
package org.lucares.performance.db;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import org.lucares.pdb.api.Entry;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.datastore.PdbWriter;
import org.lucares.pdb.datastore.internal.DataStore;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@Test
public class TagsToFilesTest {
private Path dataDirectory;
@BeforeMethod
public void beforeMethod() throws IOException {
dataDirectory = Files.createTempDirectory("pdb");
}
@AfterMethod
public void afterMethod() throws IOException {
org.lucares.utils.file.FileUtils.delete(dataDirectory);
}
public void test() throws Exception {
try (final DataStore dataStore = new DataStore(dataDirectory); //
final TagsToFile tagsToFile = new TagsToFile(dataStore)) {
final OffsetDateTime date = OffsetDateTime.now(ZoneOffset.UTC);
final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue");
final PdbWriter newFileForTags = tagsToFile.getWriter(date.toInstant().toEpochMilli(), tags);
final PdbWriter existingFileForTags = tagsToFile.getWriter(date.toInstant().toEpochMilli(), tags);
Assert.assertSame(newFileForTags, existingFileForTags);
}
}
public void testAppendingToSameFile() throws Exception {
try (final DataStore dataStore = new DataStore(dataDirectory); //
final TagsToFile tagsToFile = new TagsToFile(dataStore);) {
// dayC is before dayA and dayB
final long dayA = DateUtils.getDate(2016, 1, 2, 1, 1, 1).toInstant().toEpochMilli();
final long dayB = DateUtils.getDate(2016, 1, 3, 1, 1, 1).toInstant().toEpochMilli();
final long dayC = DateUtils.getDate(2016, 1, 1, 1, 1, 1).toInstant().toEpochMilli();
final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue");
final PdbWriter writerForDayA = tagsToFile.getWriter(dayA, tags);
writerForDayA.write(new Entry(dayA, 1, tags));
final PdbWriter writerForDayB = tagsToFile.getWriter(dayB, tags);
writerForDayB.write(new Entry(dayB, 2, tags));
final PdbWriter writerForDayC = tagsToFile.getWriter(dayC, tags);
writerForDayC.write(new Entry(dayC, 3, tags));
Assert.assertSame(writerForDayA, writerForDayB);
Assert.assertSame(writerForDayA, writerForDayC);
}
}
public void testIdenticalDatesGoIntoSameFile() throws Exception {
try (final DataStore dataStore = new DataStore(dataDirectory); //
final TagsToFile tagsToFile = new TagsToFile(dataStore)) {
final long timestamp = DateUtils.getDate(2016, 1, 1, 13, 1, 1).toInstant().toEpochMilli();
final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue");
final PdbWriter fileA = tagsToFile.getWriter(timestamp, tags);
fileA.write(new Entry(timestamp, 1, tags));
final PdbWriter fileB = tagsToFile.getWriter(timestamp, tags);
fileA.write(new Entry(timestamp, 2, tags));
Assert.assertEquals(fileA, fileB);
}
}
}