add index for tags-to-documents

Now we can find writer much faster, because we don't have to execute
a query for documents that match the tags. We can just look up the 
documents in the map.
Speedup: 2-4ms -> 0.002-0.01ms
This commit is contained in:
ahr
2018-01-14 09:51:37 +01:00
parent 64613ce43c
commit d98c45e8bd
5 changed files with 60 additions and 33 deletions

View File

@@ -12,9 +12,7 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.datastore.Doc;
@@ -26,7 +24,8 @@ import org.slf4j.LoggerFactory;
public class TagsToFile implements AutoCloseable {
private static final Logger LOGGER = LoggerFactory.getLogger(TagsToFile.class);
private final static Logger METRICS_LOGGER = LoggerFactory.getLogger("org.lucares.metrics.ingestion.tagsToFile");
private final static Logger METRICS_LOGGER_FIND_WRITER = LoggerFactory.getLogger("org.lucares.metrics.ingestion.tagsToFile.findWriter");
private final static Logger METRICS_LOGGER_NEW_WRITER = LoggerFactory.getLogger("org.lucares.metrics.ingestion.tagsToFile.newPdbWriter");
private static class WriterCache {
final List<PdbWriter> writers = new ArrayList<>();
@@ -53,25 +52,23 @@ public class TagsToFile implements AutoCloseable {
}
private List<PdbFile> getFilesMatchingTagsExactly(final Tags tags) {
final List<PdbFile> files = getFilesMatchingTags(tags);
return CollectionUtils.filter(files, f -> f.getTags().equals(tags));
}
private List<PdbFile> getFilesMatchingTags(final Tags tags) {
final String query = Query.createQuery(tags);
return getFilesForQuery(query);
final List<Doc> docs = db.getByTags(tags);
return toPdbFiles(docs);
}
public List<PdbFile> getFilesForQuery(final String query) {
final List<PdbFile> result = new ArrayList<>();
final List<Doc> searchResult = db.search(query);
if (searchResult.size() > 500_000){
throw new IllegalStateException("Too many results.");
}
final List<PdbFile> result = toPdbFiles(searchResult);
return result;
}
private List<PdbFile> toPdbFiles(final List<Doc> searchResult) {
final List<PdbFile> result = new ArrayList<>();
for (final Doc document : searchResult) {
final Path path = document.getPath();
@@ -80,7 +77,6 @@ public class TagsToFile implements AutoCloseable {
result.add(pdbFile);
}
return result;
}
@@ -97,15 +93,13 @@ public class TagsToFile implements AutoCloseable {
final long start = System.nanoTime();
final List<PdbFile> pdbFiles = getFilesMatchingTagsExactly(tags);
assertAllFilesHaveSameFolder(pdbFiles);
pdbFiles.removeIf(f -> !f.exists());
final List<Optional<PdbWriter>> optionalWriters = CollectionUtils.map(pdbFiles, writersForTags::writer);
final List<Optional<PdbWriter>> existingWriters = CollectionUtils.filter(optionalWriters,
Optional::isPresent);
final List<PdbWriter> writers = CollectionUtils.map(existingWriters, Optional::get);
METRICS_LOGGER.debug("find writers took {}ms for tags {}", (System.nanoTime() - start)
METRICS_LOGGER_FIND_WRITER.debug("find writers took {}ms for tags {}", (System.nanoTime() - start)
/ 1_000_000.0, tags);
final Optional<PdbWriter> optionalFirst = chooseBestMatchingWriter(writers, date);
@@ -163,27 +157,20 @@ public class TagsToFile implements AutoCloseable {
}
private PdbWriter newPdbWriter(final Tags tags) {
final long start = System.nanoTime();
try {
final PdbFile pdbFile = createNewPdbFile(tags);
final PdbWriter result = new PdbWriter(pdbFile);
getOrInit(tags).addWriter(result);
METRICS_LOGGER_NEW_WRITER.debug("newPdbWriter took {}ms tags: ", (System.nanoTime() - start) / 1_000_000.0, tags);
return result;
} catch (final IOException e) {
throw new WriteException(e);
}
}
private void assertAllFilesHaveSameFolder(final List<PdbFile> pdbFiles) {
final Set<Path> reducedFolder = pdbFiles.stream()//
.map(PdbFile::getPath)//
.map(p -> p.getParent().getParent().getParent())//
.collect(Collectors.toSet());
if (reducedFolder.size() > 1) {
throw new IllegalStateException(
"All storage folders for the same tag must be the same, but are not: " + reducedFolder);
}
}
private PdbFile createNewPdbFile(final Tags tags) throws IOException {