reduce memory usage for computation of cumulative distribution
Before: To compute the cumulative distribution we added every duration into a LongList. This requires O(n) memory, where n is the number of values. Now: We store the durations + the number of occurrences in a LongLongHashMap. This has the potential to reduce the memory requirements if durations occur multiple times. There are a lot of durations with 0, 1, 2 milliseconds. In the worst case every duration is different. In that case the memory usage doubled with this solution. Future: We are currently storing durations with milli seconds precision. We don't have to do that. We cannot draw 100 million different values on the y-axis in an images with only 1000px.
This commit is contained in:
@@ -32,7 +32,7 @@ ext {
|
|||||||
lib_log4j2_core = "org.apache.logging.log4j:log4j-core:${version_log4j2}"
|
lib_log4j2_core = "org.apache.logging.log4j:log4j-core:${version_log4j2}"
|
||||||
lib_log4j2_slf4j_impl = "org.apache.logging.log4j:log4j-slf4j-impl:${version_log4j2}"
|
lib_log4j2_slf4j_impl = "org.apache.logging.log4j:log4j-slf4j-impl:${version_log4j2}"
|
||||||
|
|
||||||
lib_primitive_collections='org.lucares:primitiveCollections:0.1.20190819195450'
|
lib_primitive_collections='org.lucares:primitiveCollections:0.1.20190907180014'
|
||||||
|
|
||||||
lib_spring_boot_log4j2="org.springframework.boot:spring-boot-starter-log4j2:${version_spring}"
|
lib_spring_boot_log4j2="org.springframework.boot:spring-boot-starter-log4j2:${version_spring}"
|
||||||
lib_spring_boot_mustache="org.springframework.boot:spring-boot-starter-mustache:${version_spring}"
|
lib_spring_boot_mustache="org.springframework.boot:spring-boot-starter-mustache:${version_spring}"
|
||||||
|
|||||||
@@ -8,14 +8,68 @@ import java.io.OutputStreamWriter;
|
|||||||
import java.io.Writer;
|
import java.io.Writer;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
|
||||||
import org.lucares.collections.LongList;
|
import org.lucares.collections.LongLongConsumer;
|
||||||
|
import org.lucares.collections.LongLongHashMap;
|
||||||
|
|
||||||
public class CumulativeDistributionCustomAggregator implements CustomAggregator {
|
public class CumulativeDistributionCustomAggregator implements CustomAggregator {
|
||||||
|
|
||||||
private final static int POINTS = 500;
|
private final static int POINTS = 500;
|
||||||
|
|
||||||
private final LongList values = new LongList();
|
private static final class ToPercentiles implements LongLongConsumer {
|
||||||
|
|
||||||
|
private long cumulativeCount = 0;
|
||||||
|
|
||||||
|
private long maxValue = 0;
|
||||||
|
|
||||||
|
private final LinkedHashMap<Double, Long> percentiles = new LinkedHashMap<>(POINTS);
|
||||||
|
|
||||||
|
private final double stepSize;
|
||||||
|
|
||||||
|
private double lastPercentile;
|
||||||
|
private double nextPercentile;
|
||||||
|
|
||||||
|
private final long totalValues;
|
||||||
|
|
||||||
|
public ToPercentiles(final long totalValues) {
|
||||||
|
this.totalValues = totalValues;
|
||||||
|
stepSize = 100.0 / POINTS;
|
||||||
|
nextPercentile = stepSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void accept(final long duration, final long count) {
|
||||||
|
maxValue = duration;
|
||||||
|
|
||||||
|
cumulativeCount += count;
|
||||||
|
final double newPercentile = cumulativeCount * 100.0 / totalValues;
|
||||||
|
|
||||||
|
if (newPercentile >= nextPercentile) {
|
||||||
|
double currentPercentile = lastPercentile + stepSize;
|
||||||
|
while (currentPercentile <= newPercentile) {
|
||||||
|
percentiles.put(currentPercentile, duration);
|
||||||
|
currentPercentile += stepSize;
|
||||||
|
}
|
||||||
|
nextPercentile = currentPercentile;
|
||||||
|
lastPercentile = currentPercentile - stepSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getMaxValue() {
|
||||||
|
return maxValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public LinkedHashMap<Double, Long> getPercentiles() {
|
||||||
|
return percentiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// the rather large initial capacity should prevent too many grow&re-hash phases
|
||||||
|
private final LongLongHashMap map = new LongLongHashMap(5_000, 0.75);
|
||||||
|
|
||||||
|
private long totalValues = 0;
|
||||||
|
|
||||||
private final Path tmpDir;
|
private final Path tmpDir;
|
||||||
|
|
||||||
@@ -25,7 +79,8 @@ public class CumulativeDistributionCustomAggregator implements CustomAggregator
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void addValue(final long epochMilli, final long value) {
|
public void addValue(final long epochMilli, final long value) {
|
||||||
values.add((int) value);
|
map.compute(value, 0, l -> l + 1);
|
||||||
|
totalValues++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -33,26 +88,25 @@ public class CumulativeDistributionCustomAggregator implements CustomAggregator
|
|||||||
final char separator = ',';
|
final char separator = ',';
|
||||||
final char newline = '\n';
|
final char newline = '\n';
|
||||||
|
|
||||||
values.parallelSort();
|
final ToPercentiles toPercentiles = new ToPercentiles(totalValues);
|
||||||
|
map.forEachOrdered(toPercentiles);
|
||||||
|
|
||||||
final LongList percentiles = new LongList(POINTS);
|
|
||||||
final File dataFile = File.createTempFile("data", ".dat", tmpDir.toFile());
|
final File dataFile = File.createTempFile("data", ".dat", tmpDir.toFile());
|
||||||
try (final Writer output = new BufferedWriter(
|
try (final Writer output = new BufferedWriter(
|
||||||
new OutputStreamWriter(new FileOutputStream(dataFile), StandardCharsets.US_ASCII));) {
|
new OutputStreamWriter(new FileOutputStream(dataFile), StandardCharsets.US_ASCII));) {
|
||||||
|
|
||||||
final StringBuilder data = new StringBuilder();
|
final StringBuilder data = new StringBuilder();
|
||||||
if (values.size() > 0) {
|
if (map.size() > 0) {
|
||||||
// compute the percentiles
|
// compute the percentiles
|
||||||
for (int i = 0; i < POINTS; i++) {
|
toPercentiles.getPercentiles().forEach((percentile, value) -> {
|
||||||
data.append(i * (100 / (double) POINTS));
|
|
||||||
data.append(separator);
|
|
||||||
final long percentile = values.get((int) Math.floor(values.size() / ((double) POINTS) * i));
|
|
||||||
data.append(percentile);
|
|
||||||
data.append(newline);
|
|
||||||
|
|
||||||
percentiles.add(percentile);
|
data.append(percentile);
|
||||||
}
|
data.append(separator);
|
||||||
final long maxValue = values.get(values.size() - 1);
|
data.append(value);
|
||||||
|
data.append(newline);
|
||||||
|
});
|
||||||
|
|
||||||
|
final long maxValue = toPercentiles.getMaxValue();
|
||||||
data.append(100);
|
data.append(100);
|
||||||
data.append(separator);
|
data.append(separator);
|
||||||
data.append(maxValue);
|
data.append(maxValue);
|
||||||
@@ -62,7 +116,7 @@ public class CumulativeDistributionCustomAggregator implements CustomAggregator
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final String title = String.format("percentiles");
|
final String title = String.format("cumulative distribution");
|
||||||
return new AggregatedData(title, dataFile);
|
return new AggregatedData(title, dataFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -119,7 +119,7 @@ public class PlotSettings {
|
|||||||
public DateTimeRange dateRange() {
|
public DateTimeRange dateRange() {
|
||||||
|
|
||||||
final String[] startEnd = dateRangeAsString.split(Pattern.quote(" - "));
|
final String[] startEnd = dateRangeAsString.split(Pattern.quote(" - "));
|
||||||
Preconditions.checkEqual(startEnd, 2, "invalid date range: ''{0}''", dateRangeAsString);
|
Preconditions.checkEqual(startEnd.length, 2, "invalid date range: ''{0}''", dateRangeAsString);
|
||||||
|
|
||||||
final OffsetDateTime startDate = LocalDateTime.parse(startEnd[0], DATE_FORMAT).atOffset(ZoneOffset.UTC);
|
final OffsetDateTime startDate = LocalDateTime.parse(startEnd[0], DATE_FORMAT).atOffset(ZoneOffset.UTC);
|
||||||
final OffsetDateTime endDate = LocalDateTime.parse(startEnd[1], DATE_FORMAT).atOffset(ZoneOffset.UTC);
|
final OffsetDateTime endDate = LocalDateTime.parse(startEnd[1], DATE_FORMAT).atOffset(ZoneOffset.UTC);
|
||||||
|
|||||||
Reference in New Issue
Block a user