open PdbReaders only when reading
We used to open all PdbReaders in a search result and then interate over them. This used a lot of heap space (> 8GB) for 400k files. Now the PdbReaders are only opened while they are used. Heap usage was less than 550 while reading more than 400k files.
This commit is contained in:
@@ -7,23 +7,23 @@ import org.lucares.pdb.api.Tags;
|
|||||||
class Group {
|
class Group {
|
||||||
private final Tags tags;
|
private final Tags tags;
|
||||||
|
|
||||||
private final List<PdbReader> readers;
|
private final List<PdbFile> files;
|
||||||
|
|
||||||
public Group(final Tags tags, final List<PdbReader> files) {
|
public Group(final Tags tags, final List<PdbFile> files) {
|
||||||
super();
|
super();
|
||||||
this.tags = tags;
|
this.tags = tags;
|
||||||
this.readers = files;
|
this.files = files;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Tags getTags() {
|
public Tags getTags() {
|
||||||
return tags;
|
return tags;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<PdbReader> getReaders() {
|
public List<PdbFile> getFiles() {
|
||||||
return readers;
|
return files;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addReader(final PdbReader pdbReader) {
|
public void addFile(final PdbFile file) {
|
||||||
readers.add(pdbReader);
|
files.add(file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,22 +23,22 @@ public class Grouping {
|
|||||||
this.groups.addAll(groups);
|
this.groups.addAll(groups);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Grouping groupBy(final List<PdbReader> pdbReaders, final List<String> groupByField) {
|
public static Grouping groupBy(final List<PdbFile> pdbFiles, final List<String> groupByField) {
|
||||||
|
|
||||||
final Grouping result;
|
final Grouping result;
|
||||||
if (noGrouping(groupByField)) {
|
if (noGrouping(groupByField)) {
|
||||||
final Group group = new Group(Tags.EMPTY, pdbReaders);
|
final Group group = new Group(Tags.EMPTY, pdbFiles);
|
||||||
|
|
||||||
result = new Grouping(group);
|
result = new Grouping(group);
|
||||||
} else {
|
} else {
|
||||||
final Map<Tags, Group> grouping = new HashMap<>();
|
final Map<Tags, Group> grouping = new HashMap<>();
|
||||||
|
|
||||||
for (final PdbReader pdbReader : pdbReaders) {
|
for (final PdbFile pdbFile : pdbFiles) {
|
||||||
final Tags tags = pdbReader.getPdbFile().getTags();
|
final Tags tags = pdbFile.getTags();
|
||||||
final Tags groupTags = tags.subset(groupByField);
|
final Tags groupTags = tags.subset(groupByField);
|
||||||
|
|
||||||
addIfNotExists(grouping, groupTags);
|
addIfNotExists(grouping, groupTags);
|
||||||
grouping.get(groupTags).addReader(pdbReader);
|
grouping.get(groupTags).addFile(pdbFile);
|
||||||
}
|
}
|
||||||
result = new Grouping(grouping.values());
|
result = new Grouping(grouping.values());
|
||||||
}
|
}
|
||||||
@@ -51,9 +51,9 @@ public class Grouping {
|
|||||||
|
|
||||||
private static void addIfNotExists(final Map<Tags, Group> grouping, final Tags groupTags) {
|
private static void addIfNotExists(final Map<Tags, Group> grouping, final Tags groupTags) {
|
||||||
if (!grouping.containsKey(groupTags)) {
|
if (!grouping.containsKey(groupTags)) {
|
||||||
final List<PdbReader> readers = new ArrayList<>();
|
final List<PdbFile> files = new ArrayList<>();
|
||||||
|
|
||||||
grouping.put(groupTags, new Group(groupTags, readers));
|
grouping.put(groupTags, new Group(groupTags, files));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
package org.lucares.performance.db;
|
package org.lucares.performance.db;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
import java.util.ArrayDeque;
|
import java.util.ArrayDeque;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
@@ -13,14 +16,17 @@ import org.slf4j.LoggerFactory;
|
|||||||
|
|
||||||
public class PdbFileIterator implements Iterator<Entry>, AutoCloseable {
|
public class PdbFileIterator implements Iterator<Entry>, AutoCloseable {
|
||||||
|
|
||||||
private final static Logger LOGGER = LoggerFactory.getLogger(PdbFileIterator.class);
|
private final static Logger LOGGER = LoggerFactory
|
||||||
|
.getLogger(PdbFileIterator.class);
|
||||||
|
|
||||||
private static final class EntrySupplier implements Supplier<Entry>, AutoCloseable {
|
private static final class EntrySupplier implements Supplier<Entry>,
|
||||||
|
AutoCloseable {
|
||||||
|
|
||||||
private final Queue<PdbReader> pdbFiles;
|
private final Queue<PdbFile> pdbFiles;
|
||||||
private PdbReader reader;
|
private PdbReader reader;
|
||||||
|
private PdbFile currentPdbFile;
|
||||||
|
|
||||||
public EntrySupplier(final Collection<PdbReader> pdbFiles) {
|
public EntrySupplier(final Collection<PdbFile> pdbFiles) {
|
||||||
super();
|
super();
|
||||||
this.pdbFiles = new ArrayDeque<>(pdbFiles);
|
this.pdbFiles = new ArrayDeque<>(pdbFiles);
|
||||||
}
|
}
|
||||||
@@ -45,7 +51,8 @@ public class PdbFileIterator implements Iterator<Entry>, AutoCloseable {
|
|||||||
// A reader might return null, for a newly opened reader,
|
// A reader might return null, for a newly opened reader,
|
||||||
// if the file was created, but nothing has been written to
|
// if the file was created, but nothing has been written to
|
||||||
// disk yet.
|
// disk yet.
|
||||||
// This might happen, because of buffering, or when an ingestion
|
// This might happen, because of buffering, or when an
|
||||||
|
// ingestion
|
||||||
// was cancelled.
|
// was cancelled.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -61,7 +68,23 @@ public class PdbFileIterator implements Iterator<Entry>, AutoCloseable {
|
|||||||
reader = null;
|
reader = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
reader = pdbFiles.poll();
|
while (!pdbFiles.isEmpty()) {
|
||||||
|
currentPdbFile = pdbFiles.poll();
|
||||||
|
try {
|
||||||
|
|
||||||
|
if (Files.size(currentPdbFile.getPath()) > 0) {
|
||||||
|
reader = new PdbReader(currentPdbFile);
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
LOGGER.info("ignoring empty file " + currentPdbFile);
|
||||||
|
}
|
||||||
|
} catch (final FileNotFoundException e) {
|
||||||
|
LOGGER.warn("the pdbFile " + currentPdbFile.getPath()
|
||||||
|
+ " is missing", e);
|
||||||
|
} catch (final IOException e) {
|
||||||
|
throw new ReadException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -69,14 +92,6 @@ public class PdbFileIterator implements Iterator<Entry>, AutoCloseable {
|
|||||||
if (reader != null) {
|
if (reader != null) {
|
||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!pdbFiles.isEmpty()) {
|
|
||||||
try {
|
|
||||||
pdbFiles.poll().close();
|
|
||||||
} catch (final Exception e) {
|
|
||||||
LOGGER.warn("Closing pdb file failed.", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -85,7 +100,7 @@ public class PdbFileIterator implements Iterator<Entry>, AutoCloseable {
|
|||||||
|
|
||||||
private Optional<Entry> next = Optional.empty();
|
private Optional<Entry> next = Optional.empty();
|
||||||
|
|
||||||
public PdbFileIterator(final Collection<PdbReader> pdbFiles) {
|
public PdbFileIterator(final Collection<PdbFile> pdbFiles) {
|
||||||
supplier = new EntrySupplier(pdbFiles);
|
supplier = new EntrySupplier(pdbFiles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -151,9 +151,9 @@ public class PerformanceDb implements AutoCloseable {
|
|||||||
*/
|
*/
|
||||||
public Result get(final String query, final List<String> groupBy) {
|
public Result get(final String query, final List<String> groupBy) {
|
||||||
|
|
||||||
final List<PdbReader> pdbReaders = tagsToFile.getReaders(query);
|
final List<PdbFile> pdbFiles = tagsToFile.getFilesForQuery(query);
|
||||||
|
|
||||||
final Grouping grouping = Grouping.groupBy(pdbReaders, groupBy);
|
final Grouping grouping = Grouping.groupBy(pdbFiles, groupBy);
|
||||||
|
|
||||||
final Result result = toResult(grouping);
|
final Result result = toResult(grouping);
|
||||||
|
|
||||||
@@ -163,7 +163,7 @@ public class PerformanceDb implements AutoCloseable {
|
|||||||
private Result toResult(final Grouping grouping) {
|
private Result toResult(final Grouping grouping) {
|
||||||
final List<GroupResult> groupResults = new ArrayList<>();
|
final List<GroupResult> groupResults = new ArrayList<>();
|
||||||
for (final Group group : grouping.getGroups()) {
|
for (final Group group : grouping.getGroups()) {
|
||||||
final Stream<Entry> stream = toStream(group.getReaders());
|
final Stream<Entry> stream = toStream(group.getFiles());
|
||||||
final GroupResult groupResult = new GroupResult(stream, group.getTags());
|
final GroupResult groupResult = new GroupResult(stream, group.getTags());
|
||||||
groupResults.add(groupResult);
|
groupResults.add(groupResult);
|
||||||
}
|
}
|
||||||
@@ -171,7 +171,7 @@ public class PerformanceDb implements AutoCloseable {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Stream<Entry> toStream(final List<PdbReader> pdbFiles) {
|
private Stream<Entry> toStream(final List<PdbFile> pdbFiles) {
|
||||||
final PdbFileIterator iterator = new PdbFileIterator(pdbFiles);
|
final PdbFileIterator iterator = new PdbFileIterator(pdbFiles);
|
||||||
|
|
||||||
final Spliterator<Entry> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
|
final Spliterator<Entry> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
|
||||||
|
|||||||
Reference in New Issue
Block a user