reduce memory usage for computation of cumulative distribution

Before: To compute the cumulative distribution we added every duration
into a LongList. This requires O(n) memory, where n is the number of
values.

Now: We store the durations + the number of occurrences in a
LongLongHashMap. This has the potential to reduce the memory
requirements if durations occur multiple times. There are a lot of
durations with 0, 1, 2 milliseconds. In the worst case every duration
is different. In that case the memory usage doubled with this solution.

Future: We are currently storing durations with milli seconds precision.
We don't have to do that. We cannot draw 100 million different values
on the y-axis in an images with only 1000px.
This commit is contained in:
2019-09-07 18:31:18 +02:00
parent 0e9e2cd53a
commit 162ef1626c
3 changed files with 72 additions and 18 deletions

View File

@@ -8,14 +8,68 @@ import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.LinkedHashMap;
import org.lucares.collections.LongList;
import org.lucares.collections.LongLongConsumer;
import org.lucares.collections.LongLongHashMap;
public class CumulativeDistributionCustomAggregator implements CustomAggregator {
private final static int POINTS = 500;
private final LongList values = new LongList();
private static final class ToPercentiles implements LongLongConsumer {
private long cumulativeCount = 0;
private long maxValue = 0;
private final LinkedHashMap<Double, Long> percentiles = new LinkedHashMap<>(POINTS);
private final double stepSize;
private double lastPercentile;
private double nextPercentile;
private final long totalValues;
public ToPercentiles(final long totalValues) {
this.totalValues = totalValues;
stepSize = 100.0 / POINTS;
nextPercentile = stepSize;
}
@Override
public void accept(final long duration, final long count) {
maxValue = duration;
cumulativeCount += count;
final double newPercentile = cumulativeCount * 100.0 / totalValues;
if (newPercentile >= nextPercentile) {
double currentPercentile = lastPercentile + stepSize;
while (currentPercentile <= newPercentile) {
percentiles.put(currentPercentile, duration);
currentPercentile += stepSize;
}
nextPercentile = currentPercentile;
lastPercentile = currentPercentile - stepSize;
}
}
public long getMaxValue() {
return maxValue;
}
public LinkedHashMap<Double, Long> getPercentiles() {
return percentiles;
}
}
// the rather large initial capacity should prevent too many grow&re-hash phases
private final LongLongHashMap map = new LongLongHashMap(5_000, 0.75);
private long totalValues = 0;
private final Path tmpDir;
@@ -25,7 +79,8 @@ public class CumulativeDistributionCustomAggregator implements CustomAggregator
@Override
public void addValue(final long epochMilli, final long value) {
values.add((int) value);
map.compute(value, 0, l -> l + 1);
totalValues++;
}
@Override
@@ -33,26 +88,25 @@ public class CumulativeDistributionCustomAggregator implements CustomAggregator
final char separator = ',';
final char newline = '\n';
values.parallelSort();
final ToPercentiles toPercentiles = new ToPercentiles(totalValues);
map.forEachOrdered(toPercentiles);
final LongList percentiles = new LongList(POINTS);
final File dataFile = File.createTempFile("data", ".dat", tmpDir.toFile());
try (final Writer output = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(dataFile), StandardCharsets.US_ASCII));) {
final StringBuilder data = new StringBuilder();
if (values.size() > 0) {
if (map.size() > 0) {
// compute the percentiles
for (int i = 0; i < POINTS; i++) {
data.append(i * (100 / (double) POINTS));
data.append(separator);
final long percentile = values.get((int) Math.floor(values.size() / ((double) POINTS) * i));
data.append(percentile);
data.append(newline);
toPercentiles.getPercentiles().forEach((percentile, value) -> {
percentiles.add(percentile);
}
final long maxValue = values.get(values.size() - 1);
data.append(percentile);
data.append(separator);
data.append(value);
data.append(newline);
});
final long maxValue = toPercentiles.getMaxValue();
data.append(100);
data.append(separator);
data.append(maxValue);
@@ -62,7 +116,7 @@ public class CumulativeDistributionCustomAggregator implements CustomAggregator
}
final String title = String.format("percentiles");
final String title = String.format("cumulative distribution");
return new AggregatedData(title, dataFile);
}

View File

@@ -119,7 +119,7 @@ public class PlotSettings {
public DateTimeRange dateRange() {
final String[] startEnd = dateRangeAsString.split(Pattern.quote(" - "));
Preconditions.checkEqual(startEnd, 2, "invalid date range: ''{0}''", dateRangeAsString);
Preconditions.checkEqual(startEnd.length, 2, "invalid date range: ''{0}''", dateRangeAsString);
final OffsetDateTime startDate = LocalDateTime.parse(startEnd[0], DATE_FORMAT).atOffset(ZoneOffset.UTC);
final OffsetDateTime endDate = LocalDateTime.parse(startEnd[1], DATE_FORMAT).atOffset(ZoneOffset.UTC);