move scatter plot creation into an AggregateHandler

This commit is contained in:
2019-10-20 08:11:09 +02:00
parent 7c61686808
commit b7c4fe4c1f
19 changed files with 186 additions and 109 deletions

View File

@@ -23,6 +23,6 @@ public interface AggregateHandler {
void addPlotsAfterScatter(StringBuilder result, Collection<DataSeries> dataSeries);
CustomAggregator createCustomAggregator(Path tmpDir, long fromEpochMilli, long toEpochMilli);
CustomAggregator createCustomAggregator(Path tmpDir, PlotSettings plotSettings, long fromEpochMilli, long toEpochMilli);
}

View File

@@ -78,7 +78,7 @@ public class CumulativeDistributionCustomAggregator implements CustomAggregator
}
@Override
public void addValue(final long epochMilli, final long value) {
public void addValue(boolean valueIsInYRange, final long epochMilli, final long value) {
map.compute(value, 0, l -> l + 1);
totalValues++;
}

View File

@@ -9,7 +9,7 @@ import org.lucares.recommind.logs.LineStyle;
public class CumulativeDistributionHandler implements AggregateHandler {
@Override
public CustomAggregator createCustomAggregator(final Path tmpDir, final long fromEpochMilli,
public CustomAggregator createCustomAggregator(final Path tmpDir, PlotSettings plotSettings, final long fromEpochMilli,
final long toEpochMilli) {
return new CumulativeDistributionCustomAggregator(tmpDir);
}

View File

@@ -4,8 +4,7 @@ import java.io.IOException;
public interface CustomAggregator {
void addValue(long epochMilli, long value);
AggregatedData getAggregatedData() throws IOException;
void addValue(boolean valueIsInYRange, long epochMilli, long value);
AggregatedData getAggregatedData() throws IOException;
}

View File

@@ -24,7 +24,7 @@ public class NullAggregate implements AggregateHandler {
}
@Override
public CustomAggregator createCustomAggregator(final Path tmpDir, final long fromEpochMilli,
public CustomAggregator createCustomAggregator(final Path tmpDir, PlotSettings plotSettings, final long fromEpochMilli,
final long toEpochMilli) {
return new NullCustomAggregator();
}

View File

@@ -3,7 +3,7 @@ package org.lucares.pdb.plot.api;
public class NullCustomAggregator implements CustomAggregator {
@Override
public void addValue(final long epochMilli, final long value) {
public void addValue(boolean valueIsInYRange,final long epochMilli, final long value) {
// nothing to do; this is a null-object
}

View File

@@ -35,7 +35,7 @@ public class ParallelRequestsAggregate implements AggregateHandler {
}
@Override
public CustomAggregator createCustomAggregator(final Path tmpDir, final long fromEpochMilli,
public CustomAggregator createCustomAggregator(final Path tmpDir, PlotSettings plotSettings, final long fromEpochMilli,
final long toEpochMilli) {
if ((toEpochMilli - fromEpochMilli) <= TimeUnit.HOURS.toMillis(5)) {
return new ParallelRequestsAggregator(tmpDir, fromEpochMilli, toEpochMilli);

View File

@@ -39,7 +39,7 @@ public class ParallelRequestsAggregator implements CustomAggregator {
}
@Override
public void addValue(final long epochMilli, final long value) {
public void addValue(boolean valueIsInYRange,final long epochMilli, final long value) {
final int endPos = (int) (epochMilli - fromEpochMilli);
increments[endPos]--;

View File

@@ -0,0 +1,47 @@
package org.lucares.pdb.plot.api;
import java.nio.file.Path;
import java.util.Collection;
import org.lucares.recommind.logs.DataSeries;
import org.lucares.recommind.logs.GnuplotLineType;
public class ScatterAggregateHandler implements AggregateHandler {
@Override
public void addGnuplotDefinitions(StringBuilder result, String separator, Collection<DataSeries> dataSeries) {
// TODO Auto-generated method stub
}
@Override
public void addPlotsBeforeScatter(StringBuilder result, Collection<DataSeries> dataSeries) {
for (final DataSeries dataSerie : dataSeries) {
final AggregatedData aggregatedData = dataSerie.getAggregatedData();
if (aggregatedData != null) {
appendfln(result, "'%s' using 1:2 title '%s' with %s %s, \\", //
aggregatedData.getDataFile(), //
dataSerie.getTitle(), //
GnuplotLineType.Points,
dataSerie.getStyle()//
);
}
}
}
@Override
public void addPlotsAfterScatter(StringBuilder result, Collection<DataSeries> dataSeries) {
// TODO Auto-generated method stub
}
@Override
public CustomAggregator createCustomAggregator(Path tmpDir, PlotSettings plotSettings, long fromEpochMilli,
long toEpochMilli) {
return new ScatterAggregator(tmpDir, plotSettings, fromEpochMilli, toEpochMilli);
}
}

View File

@@ -0,0 +1,94 @@
package org.lucares.pdb.plot.api;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.Formatter;
import java.util.concurrent.TimeUnit;
import org.lucares.collections.Sparse2DLongArray;
import org.lucares.recommind.logs.GnuplotSettings;
import org.lucares.recommind.logs.LambdaFriendlyWriter;
import org.lucares.recommind.logs.LongUtils;
public class ScatterAggregator implements CustomAggregator {
private final Sparse2DLongArray matrix2d = new Sparse2DLongArray();
private final boolean useMillis;
private final long plotAreaWidthInPx;
private final long plotAreaHeightInPx;
private final long epochMillisPerPixel;
private final long minValue;
private final long maxValue;
private final long durationMillisPerPixel;
private Path tmpDir;
public ScatterAggregator(Path tmpDir, PlotSettings plotSettings, long fromEpochMilli, long toEpochMilli) {
this.tmpDir = tmpDir;
useMillis = (toEpochMilli - fromEpochMilli) < TimeUnit.MINUTES.toMillis(5);
plotAreaWidthInPx = plotSettings.getWidth() - GnuplotSettings.GNUPLOT_LEFT_RIGHT_MARGIN;
plotAreaHeightInPx = plotSettings.getHeight() - GnuplotSettings.GNUPLOT_TOP_BOTTOM_MARGIN;
epochMillisPerPixel = Math.max(1, (toEpochMilli - fromEpochMilli) / plotAreaWidthInPx);
minValue = plotSettings.getYRangeUnit() == TimeRangeUnitInternal.AUTOMATIC ? 0
: plotSettings.getYRangeUnit().toMilliSeconds(plotSettings.getYRangeMin());
maxValue = plotSettings.getYRangeUnit() == TimeRangeUnitInternal.AUTOMATIC ? Long.MAX_VALUE
: plotSettings.getYRangeUnit().toMilliSeconds(plotSettings.getYRangeMax());
durationMillisPerPixel = plotSettings.getYAxisScale() == AxisScale.LINEAR
? Math.max(1, (maxValue - minValue) / plotAreaHeightInPx)
: 1;
}
@Override
public void addValue(boolean valueIsInYRange, long epochMilli, long value) {
final long roundedEpochMilli = epochMilli - epochMilli % epochMillisPerPixel;
final long roundedValue = value - value % durationMillisPerPixel;
matrix2d.put(roundedEpochMilli, roundedValue, 1);
}
@Override
public AggregatedData getAggregatedData() throws IOException {
final File dataFile = File.createTempFile("data", ".dat", tmpDir.toFile());
final int separator = ',';
final int newline = '\n';
long[] actualValuesWritten = new long[1];
final StringBuilder formattedDateBuilder = new StringBuilder();
try (
final LambdaFriendlyWriter output = new LambdaFriendlyWriter(
new BufferedWriter(new OutputStreamWriter(new FileOutputStream(dataFile), StandardCharsets.ISO_8859_1)));
final Formatter formatter = new Formatter(formattedDateBuilder);) {
matrix2d.forEach((epochMilli, value, __) -> {
final String stringValue = LongUtils.longToString(value);
final String formattedDate;
if (useMillis) {
formattedDateBuilder.delete(0, formattedDateBuilder.length());
formatter.format("%.3f", epochMilli / 1000.0);
formattedDate = formattedDateBuilder.toString();
} else {
formattedDate = String.valueOf(epochMilli / 1000);
}
output.write(formattedDate);
output.write(separator);
output.write(stringValue);
output.write(newline);
actualValuesWritten[0]++;
});
}
return new AggregatedData("scatter", dataFile);
}
}

View File

@@ -1,21 +1,17 @@
package org.lucares.recommind.logs;
import java.io.File;
import org.lucares.pdb.plot.api.AggregatedData;
class CsvSummary {
private final int values;
private final long maxValue;
private final File dataFile;
private final AggregatedData aggregatedData;
private final double statsAverage;
private final int plottedValues;
public CsvSummary(final File dataFile, final int values, final int plottedValues, final long maxValue,
public CsvSummary(final int values, final int plottedValues, final long maxValue,
final double statsAverage, final AggregatedData aggregatedData) {
super();
this.dataFile = dataFile;
this.values = values;
this.plottedValues = plottedValues;
this.maxValue = maxValue;
@@ -23,9 +19,6 @@ class CsvSummary {
this.aggregatedData = aggregatedData;
}
public File getDataFile() {
return dataFile;
}
/**
* Total number of values in the selected date range.

View File

@@ -43,8 +43,6 @@ public interface DataSeries {
public AggregatedData getAggregatedData();
public String getGnuplotPlotDefinition();
public static Map<String, Integer> toMap(final List<DataSeries> dataSeries) {
final Map<String, Integer> result = new LinkedHashMap<>();

View File

@@ -1,7 +1,5 @@
package org.lucares.recommind.logs;
import java.io.File;
import org.lucares.pdb.plot.api.AggregatedData;
public class FileBackedDataSeries implements DataSeries {
@@ -12,16 +10,13 @@ public class FileBackedDataSeries implements DataSeries {
private final int id;
private final GnuplotLineType linetype;
private LineStyle style;
public FileBackedDataSeries(final int id, final String title, final CsvSummary csvSummary,
final GnuplotLineType linetype) {
public FileBackedDataSeries(final int id, final String title, final CsvSummary csvSummary
) {
this.id = id;
this.title = title;
this.csvSummary = csvSummary;
this.linetype = linetype;
}
@Override
@@ -44,9 +39,6 @@ public class FileBackedDataSeries implements DataSeries {
return style;
}
public File getDataFile() {
return csvSummary.getDataFile();
}
@Override
public String getTitle() {
@@ -77,15 +69,4 @@ public class FileBackedDataSeries implements DataSeries {
public AggregatedData getAggregatedData() {
return csvSummary.getAggregatedData();
}
@Override
public String getGnuplotPlotDefinition() {
return String.format("'%s' using 1:2 title '%s' with %s %s, \\", //
getDataFile(), //
getTitle(), //
linetype, // line or points
style//
);
}
}

View File

@@ -60,6 +60,7 @@ public class GnuplotFileGenerator {
appendfln(result, "set grid");
appendfln(result, "set output \"%s\"", settings.getOutput().toAbsolutePath().toString().replace("\\", "/"));
// TODO remove marker lines?
// marker lines that show which area will be zoomed
appendfln(result, "set arrow from graph 0.25,0 rto graph 0,1 lc rgb \"#EEEEEE\" nohead");
appendfln(result, "set arrow from graph 0.75,0 rto graph 0,1 lc rgb \"#EEEEEE\" nohead");
@@ -103,9 +104,7 @@ public class GnuplotFileGenerator {
appendf(result, "plot ");
settings.getAggregate().addPlotsBeforeScatter(result, dataSeries);
for (final DataSeries dataSerie : dataSeries) {
appendfln(result, dataSerie.getGnuplotPlotDefinition());
}
settings.getAggregate().addPlotsAfterScatter(result, dataSeries);
return result.toString();

View File

@@ -1,11 +1,6 @@
package org.lucares.recommind.logs;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
@@ -14,7 +9,6 @@ import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Formatter;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;
@@ -23,13 +17,11 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;
import org.lucares.collections.LongList;
import org.lucares.collections.Sparse2DLongArray;
import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.GroupResult;
import org.lucares.pdb.api.Query;
import org.lucares.pdb.api.Result;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.plot.api.AxisScale;
import org.lucares.pdb.plot.api.CustomAggregator;
import org.lucares.pdb.plot.api.Limit;
import org.lucares.pdb.plot.api.PlotSettings;
@@ -95,7 +87,7 @@ public class Plotter {
final int id = idCounter.incrementAndGet();
final String title = title(groupResult.getGroupedBy(), csvSummary);
final DataSeries dataSerie = new FileBackedDataSeries(id, title, csvSummary, GnuplotLineType.Points);
final DataSeries dataSerie = new FileBackedDataSeries(id, title, csvSummary);
if (dataSerie.getValues() > 0) {
dataSeries.add(dataSerie);
}
@@ -202,35 +194,25 @@ public class Plotter {
private static CsvSummary toCsvDeduplicated(final GroupResult groupResult, final Path tmpDir,
final OffsetDateTime dateFrom, final OffsetDateTime dateTo, final PlotSettings plotSettings) throws IOException {
final File dataFile = File.createTempFile("data", ".dat", tmpDir.toFile());
final long start = System.nanoTime();
final Stream<LongList> timeValueStream = groupResult.asStream();
final long fromEpochMilli = dateFrom.toInstant().toEpochMilli();
final long toEpochMilli = dateTo.toInstant().toEpochMilli();
final boolean useMillis = (toEpochMilli - fromEpochMilli) < TimeUnit.MINUTES.toMillis(5);
final long plotAreaWidthInPx = plotSettings.getWidth() - GnuplotSettings.GNUPLOT_LEFT_RIGHT_MARGIN;
final long plotAreaHeightInPx = plotSettings.getHeight() - GnuplotSettings.GNUPLOT_TOP_BOTTOM_MARGIN;
final long epochMillisPerPixel = Math.max(1, (toEpochMilli - fromEpochMilli) / plotAreaWidthInPx);
final long minValue = plotSettings.getYRangeUnit() == TimeRangeUnitInternal.AUTOMATIC ? 0
: plotSettings.getYRangeUnit().toMilliSeconds(plotSettings.getYRangeMin());
final long maxValue = plotSettings.getYRangeUnit() == TimeRangeUnitInternal.AUTOMATIC ? Long.MAX_VALUE
: plotSettings.getYRangeUnit().toMilliSeconds(plotSettings.getYRangeMax());
final long durationMillisPerPixel = plotSettings.getYAxisScale() == AxisScale.LINEAR
? Math.max(1, (maxValue - minValue) / plotAreaHeightInPx)
: 1;
final CustomAggregator aggregator = plotSettings.getAggregate().createCustomAggregator(tmpDir, fromEpochMilli,
final CustomAggregator aggregator = plotSettings.getAggregate().createCustomAggregator(tmpDir, plotSettings, fromEpochMilli,
toEpochMilli);
final Sparse2DLongArray matrix2d = new Sparse2DLongArray();
int count = 0; // number of values in the x-axis range (used to compute stats)
int plottedValues = 0;
long statsMaxValue = 0;
double statsCurrentAverage = 0.0;
long ignoredValues = 0;
final int separator = ',';
final int newline = '\n';
final Iterator<LongList> it = timeValueStream.iterator();
while (it.hasNext()) {
@@ -246,7 +228,7 @@ public class Plotter {
final long value = entry.get(i + 1);
aggregator.addValue(epochMilli, value);
// compute stats
count++;
@@ -256,53 +238,25 @@ public class Plotter {
statsCurrentAverage = statsCurrentAverage + (value - statsCurrentAverage) / count;
// check if value is in the selected y-range
if (value < minValue || value > maxValue) {
boolean valueIsInYRange = value < minValue || value > maxValue;
if (valueIsInYRange) {
ignoredValues++;
continue;
}else {
plottedValues++;
}
final long roundedEpochMilli = epochMilli - epochMilli % epochMillisPerPixel;
final long roundedValue = value - value % durationMillisPerPixel;
matrix2d.put(roundedEpochMilli, roundedValue, 1);
plottedValues++;
aggregator.addValue(valueIsInYRange, epochMilli, value);
}
}
long[] actualValuesWritten = new long[1];
final StringBuilder formattedDateBuilder = new StringBuilder();
try (
final LambdaFriendlyWriter output = new LambdaFriendlyWriter(
new BufferedWriter(new OutputStreamWriter(new FileOutputStream(dataFile), StandardCharsets.ISO_8859_1)));
final Formatter formatter = new Formatter(formattedDateBuilder);) {
matrix2d.forEach((epochMilli, value, __) -> {
final String stringValue = LongUtils.longToString(value);
final String formattedDate;
if (useMillis) {
formattedDateBuilder.delete(0, formattedDateBuilder.length());
formatter.format("%.3f", epochMilli / 1000.0);
formattedDate = formattedDateBuilder.toString();
} else {
formattedDate = String.valueOf(epochMilli / 1000);
}
output.write(formattedDate);
output.write(separator);
output.write(stringValue);
output.write(newline);
actualValuesWritten[0]++;
});
}
METRICS_LOGGER.debug(
"wrote {} (actual: {} factor: {}%) values to csv in: {}ms (ignored {} values) use millis: {}, grouping={}, file={}",
actualValuesWritten[0], count, (double) count / (actualValuesWritten[0]),
"wrote {} values to csv in: {}ms (ignored {} values) use millis: {}, grouping={}",
plottedValues,
(System.nanoTime() - start) / 1_000_000.0, ignoredValues, Boolean.toString(useMillis),
groupResult.getGroupedBy().asString(), dataFile);
return new CsvSummary(dataFile, count, plottedValues, statsMaxValue, statsCurrentAverage,
groupResult.getGroupedBy().asString());
return new CsvSummary( count, plottedValues, statsMaxValue, statsCurrentAverage,
aggregator.getAggregatedData());
}

View File

@@ -5,6 +5,7 @@ import org.lucares.pdb.plot.api.NullAggregate;
import org.lucares.pdb.plot.api.ParallelRequestsAggregate;
import org.lucares.pdb.plot.api.CumulativeDistributionHandler;
import org.lucares.pdb.plot.api.PlotSettings;
import org.lucares.pdb.plot.api.ScatterAggregateHandler;
import org.lucares.pdb.plot.api.TimeRangeUnitInternal;
import org.lucares.pdbui.domain.Aggregate;
import org.lucares.pdbui.domain.PlotRequest;
@@ -61,6 +62,8 @@ class PlotSettingsTransformer {
return new CumulativeDistributionHandler();
case PARALLEL:
return new ParallelRequestsAggregate();
case SCATTER:
return new ScatterAggregateHandler();
}
throw new IllegalStateException("unhandled enum: " + aggregate);
}

View File

@@ -1,14 +1,19 @@
package org.lucares.pdbui.domain;
/**
* Note: The order in this enum defines the order in which the aggregates are drawn.
*/
public enum Aggregate {
NONE,
PARALLEL,
SCATTER,
/**
* Empirical cumulative distribution functions
*
* @see https://serialmentor.com/dataviz/ecdf-qq.html
*/
CUM_DISTRIBUTION,
PARALLEL
}

View File

@@ -784,6 +784,7 @@ Vue.component('search-bar', {
<option value="NONE">-</option>
<option value="CUM_DISTRIBUTION" title="cumulative distribution, see https://serialmentor.com/dataviz/ecdf-qq.html">cum. distribution</option>
<option value="PARALLEL">parallel requests</option>
<option value="SCATTER">scatter plot</option>
</select>
</div>

View File

@@ -16,7 +16,8 @@ public class CsvToEntryTransformerPerformanceTest {
private static final byte NEWLINE = '\n';
public static void main(final String[] args) throws Exception {
@SuppressWarnings("unused")
public static void main(final String[] args) throws Exception {
// final Path csvFile =
// Paths.get("/home/andi/ws/performanceDb/data/production/1k.csv");
final Path csvFile = Paths.get("/home/andi/ws/performanceDb/data/production/logs_2018-09-05_2018-09-05.csv");
@@ -40,7 +41,8 @@ public class CsvToEntryTransformerPerformanceTest {
// System.out.println("summary: " + summaryStatisticsPut);
}
private static void runtest(final Path csvFile) throws IOException, FileNotFoundException {
@SuppressWarnings("unused")
private static void runtest(final Path csvFile) throws IOException, FileNotFoundException {
final byte newline = NEWLINE;
byte[] line = new byte[4096]; // max line length
@@ -92,7 +94,8 @@ public class CsvToEntryTransformerPerformanceTest {
private static void handleLine(final byte[] line, final int bytesInLine) {
final String x = new String(line, 0, bytesInLine, StandardCharsets.UTF_8);
@SuppressWarnings("unused")
final String x = new String(line, 0, bytesInLine, StandardCharsets.UTF_8);
// System.out.println(">" + x + "<");
}
}