reduce memory usage for computation of cumulative distribution

Before: To compute the cumulative distribution we added every duration
into a LongList. This requires O(n) memory, where n is the number of
values.

Now: We store the durations + the number of occurrences in a
LongLongHashMap. This has the potential to reduce the memory
requirements if durations occur multiple times. There are a lot of
durations with 0, 1, 2 milliseconds. In the worst case every duration
is different. In that case the memory usage doubled with this solution.

Future: We are currently storing durations with milli seconds precision.
We don't have to do that. We cannot draw 100 million different values
on the y-axis in an images with only 1000px.
This commit is contained in:
2019-09-07 18:31:18 +02:00
parent 0e9e2cd53a
commit 162ef1626c
3 changed files with 72 additions and 18 deletions

View File

@@ -32,7 +32,7 @@ ext {
lib_log4j2_core = "org.apache.logging.log4j:log4j-core:${version_log4j2}" lib_log4j2_core = "org.apache.logging.log4j:log4j-core:${version_log4j2}"
lib_log4j2_slf4j_impl = "org.apache.logging.log4j:log4j-slf4j-impl:${version_log4j2}" lib_log4j2_slf4j_impl = "org.apache.logging.log4j:log4j-slf4j-impl:${version_log4j2}"
lib_primitive_collections='org.lucares:primitiveCollections:0.1.20190819195450' lib_primitive_collections='org.lucares:primitiveCollections:0.1.20190907180014'
lib_spring_boot_log4j2="org.springframework.boot:spring-boot-starter-log4j2:${version_spring}" lib_spring_boot_log4j2="org.springframework.boot:spring-boot-starter-log4j2:${version_spring}"
lib_spring_boot_mustache="org.springframework.boot:spring-boot-starter-mustache:${version_spring}" lib_spring_boot_mustache="org.springframework.boot:spring-boot-starter-mustache:${version_spring}"

View File

@@ -8,14 +8,68 @@ import java.io.OutputStreamWriter;
import java.io.Writer; import java.io.Writer;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.LinkedHashMap;
import org.lucares.collections.LongList; import org.lucares.collections.LongLongConsumer;
import org.lucares.collections.LongLongHashMap;
public class CumulativeDistributionCustomAggregator implements CustomAggregator { public class CumulativeDistributionCustomAggregator implements CustomAggregator {
private final static int POINTS = 500; private final static int POINTS = 500;
private final LongList values = new LongList(); private static final class ToPercentiles implements LongLongConsumer {
private long cumulativeCount = 0;
private long maxValue = 0;
private final LinkedHashMap<Double, Long> percentiles = new LinkedHashMap<>(POINTS);
private final double stepSize;
private double lastPercentile;
private double nextPercentile;
private final long totalValues;
public ToPercentiles(final long totalValues) {
this.totalValues = totalValues;
stepSize = 100.0 / POINTS;
nextPercentile = stepSize;
}
@Override
public void accept(final long duration, final long count) {
maxValue = duration;
cumulativeCount += count;
final double newPercentile = cumulativeCount * 100.0 / totalValues;
if (newPercentile >= nextPercentile) {
double currentPercentile = lastPercentile + stepSize;
while (currentPercentile <= newPercentile) {
percentiles.put(currentPercentile, duration);
currentPercentile += stepSize;
}
nextPercentile = currentPercentile;
lastPercentile = currentPercentile - stepSize;
}
}
public long getMaxValue() {
return maxValue;
}
public LinkedHashMap<Double, Long> getPercentiles() {
return percentiles;
}
}
// the rather large initial capacity should prevent too many grow&re-hash phases
private final LongLongHashMap map = new LongLongHashMap(5_000, 0.75);
private long totalValues = 0;
private final Path tmpDir; private final Path tmpDir;
@@ -25,7 +79,8 @@ public class CumulativeDistributionCustomAggregator implements CustomAggregator
@Override @Override
public void addValue(final long epochMilli, final long value) { public void addValue(final long epochMilli, final long value) {
values.add((int) value); map.compute(value, 0, l -> l + 1);
totalValues++;
} }
@Override @Override
@@ -33,26 +88,25 @@ public class CumulativeDistributionCustomAggregator implements CustomAggregator
final char separator = ','; final char separator = ',';
final char newline = '\n'; final char newline = '\n';
values.parallelSort(); final ToPercentiles toPercentiles = new ToPercentiles(totalValues);
map.forEachOrdered(toPercentiles);
final LongList percentiles = new LongList(POINTS);
final File dataFile = File.createTempFile("data", ".dat", tmpDir.toFile()); final File dataFile = File.createTempFile("data", ".dat", tmpDir.toFile());
try (final Writer output = new BufferedWriter( try (final Writer output = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(dataFile), StandardCharsets.US_ASCII));) { new OutputStreamWriter(new FileOutputStream(dataFile), StandardCharsets.US_ASCII));) {
final StringBuilder data = new StringBuilder(); final StringBuilder data = new StringBuilder();
if (values.size() > 0) { if (map.size() > 0) {
// compute the percentiles // compute the percentiles
for (int i = 0; i < POINTS; i++) { toPercentiles.getPercentiles().forEach((percentile, value) -> {
data.append(i * (100 / (double) POINTS));
data.append(separator);
final long percentile = values.get((int) Math.floor(values.size() / ((double) POINTS) * i));
data.append(percentile);
data.append(newline);
percentiles.add(percentile); data.append(percentile);
} data.append(separator);
final long maxValue = values.get(values.size() - 1); data.append(value);
data.append(newline);
});
final long maxValue = toPercentiles.getMaxValue();
data.append(100); data.append(100);
data.append(separator); data.append(separator);
data.append(maxValue); data.append(maxValue);
@@ -62,7 +116,7 @@ public class CumulativeDistributionCustomAggregator implements CustomAggregator
} }
final String title = String.format("percentiles"); final String title = String.format("cumulative distribution");
return new AggregatedData(title, dataFile); return new AggregatedData(title, dataFile);
} }

View File

@@ -119,7 +119,7 @@ public class PlotSettings {
public DateTimeRange dateRange() { public DateTimeRange dateRange() {
final String[] startEnd = dateRangeAsString.split(Pattern.quote(" - ")); final String[] startEnd = dateRangeAsString.split(Pattern.quote(" - "));
Preconditions.checkEqual(startEnd, 2, "invalid date range: ''{0}''", dateRangeAsString); Preconditions.checkEqual(startEnd.length, 2, "invalid date range: ''{0}''", dateRangeAsString);
final OffsetDateTime startDate = LocalDateTime.parse(startEnd[0], DATE_FORMAT).atOffset(ZoneOffset.UTC); final OffsetDateTime startDate = LocalDateTime.parse(startEnd[0], DATE_FORMAT).atOffset(ZoneOffset.UTC);
final OffsetDateTime endDate = LocalDateTime.parse(startEnd[1], DATE_FORMAT).atOffset(ZoneOffset.UTC); final OffsetDateTime endDate = LocalDateTime.parse(startEnd[1], DATE_FORMAT).atOffset(ZoneOffset.UTC);