From 5617547d630fcd722d8e84693b22eafc178797a4 Mon Sep 17 00:00:00 2001 From: ahr Date: Sat, 30 Dec 2017 09:15:26 +0100 Subject: [PATCH] add percentile plots --- .../pdb/plot/api/PercentileAggregate.java | 4 +- .../recommind/logs/ConcretePlotter.java | 45 ++++ .../recommind/logs/FileBackedDataSeries.java | 10 +- .../recommind/logs/InlineDataSeries.java | 56 +++++ .../recommind/logs/PercentilePlot.java | 209 ++++++++++++++++++ .../org/lucares/recommind/logs/Plotter.java | 11 +- .../lucares/recommind/logs/ScatterPlot.java | 39 +--- 7 files changed, 333 insertions(+), 41 deletions(-) create mode 100644 pdb-plotting/src/main/java/org/lucares/recommind/logs/ConcretePlotter.java create mode 100644 pdb-plotting/src/main/java/org/lucares/recommind/logs/InlineDataSeries.java create mode 100644 pdb-plotting/src/main/java/org/lucares/recommind/logs/PercentilePlot.java diff --git a/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PercentileAggregate.java b/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PercentileAggregate.java index 55198b7..e9c3f81 100644 --- a/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PercentileAggregate.java +++ b/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PercentileAggregate.java @@ -38,7 +38,9 @@ public class PercentileAggregate implements AggregateHandler{ @Override public void addPlots(StringBuilder result, Collection dataSeries) { for (DataSeries dataSerie : dataSeries) { - appendfln(result, "'$%s' using 1:2 title '%s' with linespoints lw 2, \\", dataSerie.getId(), dataSerie.getTitle()+" " +dataSerie.getAggregatedData().getLabel()); + if (dataSerie.getAggregatedData() != null){ + appendfln(result, "'$%s' using 1:2 title '%s' with linespoints lw 2, \\", dataSerie.getId(), dataSerie.getTitle()+" " +dataSerie.getAggregatedData().getLabel()); + } } } diff --git a/pdb-plotting/src/main/java/org/lucares/recommind/logs/ConcretePlotter.java b/pdb-plotting/src/main/java/org/lucares/recommind/logs/ConcretePlotter.java new file mode 100644 index 0000000..e8a682f --- /dev/null +++ b/pdb-plotting/src/main/java/org/lucares/recommind/logs/ConcretePlotter.java @@ -0,0 +1,45 @@ +package org.lucares.recommind.logs; + +import java.time.OffsetDateTime; +import java.time.format.DateTimeFormatter; +import java.util.UUID; + +import org.lucares.pdb.api.Tags; +import org.lucares.pdb.plot.api.PlotSettings; + +public interface ConcretePlotter { + + static final String DEFAULT_GROUP = ""; + + PlotResult plot(PlotSettings plotSettings) throws InternalPlottingException; + + + + static String uniqueDirectoryName() { + return OffsetDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd_HH_mm_ss")) + "_" + + UUID.randomUUID().toString(); + } + + static String title(final Tags tags, final int values) { + + final StringBuilder result = new StringBuilder(); + + if (tags.isEmpty()) { + result.append(DEFAULT_GROUP); + } else { + tags.forEach((k, v) -> { + if (result.length() > 0) { + result.append(" / "); + } + result.append(v); + }); + } + + result.append(" ("); + result.append(String.format("%,d", values)); + result.append(")"); + + return result.toString(); + + } +} diff --git a/pdb-plotting/src/main/java/org/lucares/recommind/logs/FileBackedDataSeries.java b/pdb-plotting/src/main/java/org/lucares/recommind/logs/FileBackedDataSeries.java index 5e772ea..c0817e6 100644 --- a/pdb-plotting/src/main/java/org/lucares/recommind/logs/FileBackedDataSeries.java +++ b/pdb-plotting/src/main/java/org/lucares/recommind/logs/FileBackedDataSeries.java @@ -12,10 +12,14 @@ public class FileBackedDataSeries implements DataSeries { private String id; - public FileBackedDataSeries(String id, String title, CsvSummary csvSummary) { + private String linetype; + + + public FileBackedDataSeries(String id, String title, CsvSummary csvSummary, String linetype) { this.id = id; this.title = title; this.csvSummary = csvSummary; + this.linetype = linetype; } public String getId() { return id; @@ -42,7 +46,7 @@ public class FileBackedDataSeries implements DataSeries { } @Override public String getGnuplotPlotDefinition() { - return String.format("'%s' using 1:2 title '%s' with points, \\", getDataFile(), - getTitle()); + return String.format("'%s' using 1:2 title '%s' with %s, \\", getDataFile(), + getTitle(), linetype); } } diff --git a/pdb-plotting/src/main/java/org/lucares/recommind/logs/InlineDataSeries.java b/pdb-plotting/src/main/java/org/lucares/recommind/logs/InlineDataSeries.java new file mode 100644 index 0000000..d7affe0 --- /dev/null +++ b/pdb-plotting/src/main/java/org/lucares/recommind/logs/InlineDataSeries.java @@ -0,0 +1,56 @@ +package org.lucares.recommind.logs; + +import org.lucares.pdb.plot.api.AggregatedData; + +public class InlineDataSeries implements DataSeries{ + + private long maxValue; + private int numValues; + private String title; + private String id; + private String inlineData; + + public InlineDataSeries(long maxValue, int numValues, String title, + String id, String inlineData) { + super(); + this.maxValue = maxValue; + this.numValues = numValues; + this.title = title; + this.id = id; + this.inlineData = inlineData; + } + + @Override + public String getId() { + + return id; + } + + @Override + public String getTitle() { + return title; + } + + @Override + public int getValues() { + + return numValues; + } + + @Override + public long getMaxValue() { + return maxValue; + } + + @Override + public AggregatedData getAggregatedData() { + return null; + } + + @Override + public String getGnuplotPlotDefinition() { + + return String.format("'-' u 1:2 title '%s' with line\n%s,", title, inlineData); + } + +} diff --git a/pdb-plotting/src/main/java/org/lucares/recommind/logs/PercentilePlot.java b/pdb-plotting/src/main/java/org/lucares/recommind/logs/PercentilePlot.java new file mode 100644 index 0000000..19660f6 --- /dev/null +++ b/pdb-plotting/src/main/java/org/lucares/recommind/logs/PercentilePlot.java @@ -0,0 +1,209 @@ +package org.lucares.recommind.logs; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.OffsetDateTime; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; + +import org.lucares.collections.IntList; +import org.lucares.pdb.api.Entry; +import org.lucares.pdb.api.GroupResult; +import org.lucares.pdb.api.Result; +import org.lucares.pdb.plot.api.Limit; +import org.lucares.pdb.plot.api.PlotSettings; +import org.lucares.performance.db.PerformanceDb; +import org.lucares.utils.file.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PercentilePlot implements ConcretePlotter { + private static final Logger LOGGER = LoggerFactory + .getLogger(ScatterPlot.class); + private static final Logger METRICS_LOGGER = LoggerFactory + .getLogger("org.lucares.metrics.plotter.percentile"); + private PerformanceDb db; + private Path tmpBaseDir; + private Path outputDir; + + public PercentilePlot(PerformanceDb db, final Path tmpBaseDir, + Path outputDir) { + this.db = db; + this.tmpBaseDir = tmpBaseDir; + this.outputDir = outputDir; + } + + @Override + public PlotResult plot(PlotSettings plotSettings) + throws InternalPlottingException { + + LOGGER.trace("start plot: {}", plotSettings); + + final String tmpSubDir = ConcretePlotter.uniqueDirectoryName(); + final Path tmpDir = tmpBaseDir.resolve(tmpSubDir); + try { + Files.createDirectories(tmpDir); + final List dataSeries = Collections + .synchronizedList(new ArrayList<>()); + + final String query = plotSettings.getQuery(); + final List groupBy = plotSettings.getGroupBy(); + final int height = plotSettings.getHeight(); + final int width = plotSettings.getWidth(); + final OffsetDateTime dateFrom = plotSettings.dateFrom(); + final OffsetDateTime dateTo = plotSettings.dateTo(); + + final Result result = db.get(query, groupBy); + + final long start = System.nanoTime(); + final AtomicInteger idCounter = new AtomicInteger(0); + result.getGroups() + .stream() + .parallel() + .forEach( + groupResult -> { + try { + final String id = "id" + + idCounter.getAndIncrement(); + + final FileBackedDataSeries dataSerie = toCsv( + id, groupResult, tmpDir, dateFrom, + dateTo, plotSettings); + + if (dataSerie.getValues() > 0) { + dataSeries.add(dataSerie); + } + } catch (Exception e) { + throw new IllegalStateException(e); // TODO + // handle + } + }); + METRICS_LOGGER.debug("csv generation took: " + + (System.nanoTime() - start) / 1_000_000.0 + "ms"); + + if (dataSeries.isEmpty()) { + throw new NoDataPointsException(); + } + + final Limit limitBy = plotSettings.getLimitBy(); + int limit = plotSettings.getLimit(); + DataSeries.sortAndLimit(dataSeries, limitBy, limit); + + final Path outputFile = Files.createTempFile(outputDir, "out", + ".png"); + final Gnuplot gnuplot = new Gnuplot(tmpBaseDir); + final GnuplotSettings gnuplotSettings = new GnuplotSettings( + outputFile); + gnuplotSettings.setHeight(height); + gnuplotSettings.setWidth(width); + defineXAxis(gnuplotSettings); + + gnuplotSettings.setYAxisScale(plotSettings.getYAxisScale()); + gnuplotSettings.setAggregate(plotSettings.getAggregate()); + gnuplotSettings.setKeyOutside(plotSettings.isKeyOutside()); + gnuplot.plot(gnuplotSettings, dataSeries); + + return new PlotResult(outputFile.getFileName(), dataSeries); + } catch (final InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IllegalStateException("Plotting was interrupted."); + } catch (final IOException e) { + throw new InternalPlottingException("Plotting failed: " + + e.getMessage(), e); + } finally { + FileUtils.delete(tmpDir); + LOGGER.trace("done plot"); + } + } + + private FileBackedDataSeries toCsv(String id, GroupResult groupResult, + Path tmpDir, OffsetDateTime dateFrom, OffsetDateTime dateTo, + PlotSettings plotSettings) throws IOException { + + final long start = System.nanoTime(); + final Stream entries = groupResult.asStream(); + int count = 0; + final long fromEpochMilli = dateFrom.toInstant().toEpochMilli(); + final long toEpochMilli = dateTo.toInstant().toEpochMilli(); + + long ignoredValues = 0; + final char separator = ','; + final char newline = '\n'; + final IntList values = new IntList(); // TODO should be a LongList + long maxValue = 0; + + + + final Iterator it = entries.iterator(); + while (it.hasNext()) { + final Entry entry = it.next(); + + long epochMilli = entry.getEpochMilli(); + if (fromEpochMilli <= epochMilli && epochMilli <= toEpochMilli) { + + final long value = entry.getValue(); + values.add((int) value); + count++; + } else { + ignoredValues++; + } + } + + + values.parallelSort(); + + final File dataFile = File.createTempFile("data", ".dat", tmpDir.toFile()); + try(final Writer output = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(dataFile), StandardCharsets.US_ASCII));){ + + final StringBuilder data = new StringBuilder(); + if (values.size() > 0) { + // compute the percentiles + for (int i = 0; i < 100; i++) { + data.append(i); + data.append(separator); + data.append(values.get((int) Math.floor(values.size() + / 100.0 * i))); + data.append(newline); + } + maxValue = values.get(values.size() - 1); + data.append(100); + data.append(separator); + data.append(maxValue); + data.append(newline); + } + output.write(data.toString()); + } + METRICS_LOGGER + .debug("wrote {} values to csv in: {}ms (ignored {} values) grouping={}", + count, (System.nanoTime() - start) / 1_000_000.0, + ignoredValues, groupResult.getGroupedBy()); + + final String title = ConcretePlotter.title(groupResult.getGroupedBy(), + values.size()); + + CsvSummary csvSummary = new CsvSummary(dataFile, values.size(), maxValue, null); + return new FileBackedDataSeries(id, title, csvSummary, "line"); + } + + private void defineXAxis(GnuplotSettings gnuplotSettings) { + final XAxisSettings xAxis = gnuplotSettings.getxAxisSettings(); + xAxis.setxDataTime(false); + xAxis.setFrom("0"); + xAxis.setTo("100"); + xAxis.setRotateXAxisLabel(0); + xAxis.setFormatX("%.0f"); + xAxis.setXlabel("Percentile"); + } + +} diff --git a/pdb-plotting/src/main/java/org/lucares/recommind/logs/Plotter.java b/pdb-plotting/src/main/java/org/lucares/recommind/logs/Plotter.java index ea21780..ae91760 100644 --- a/pdb-plotting/src/main/java/org/lucares/recommind/logs/Plotter.java +++ b/pdb-plotting/src/main/java/org/lucares/recommind/logs/Plotter.java @@ -34,16 +34,19 @@ public class Plotter { public PlotResult plot(final PlotSettings plotSettings) throws InternalPlottingException { PlotType plotType = plotSettings.getPlotType(); - + final ConcretePlotter plotter; switch (plotType) { case SCATTER: - final ScatterPlot scatterPlot = new ScatterPlot(db, tmpBaseDir, outputDir); - return scatterPlot.plot(plotSettings); - + plotter = new ScatterPlot(db, tmpBaseDir, outputDir); + break; + case PERCENTILES: + plotter = new PercentilePlot(db, tmpBaseDir, outputDir); + break; default: throw new UnsupportedOperationException("plot of type " + plotType + " not supported."); } + return plotter.plot(plotSettings); } diff --git a/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java b/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java index ed628c2..55e0cee 100644 --- a/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java +++ b/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java @@ -11,14 +11,12 @@ import java.nio.file.Files; import java.nio.file.LinkOption; import java.nio.file.Path; import java.time.OffsetDateTime; -import java.time.format.DateTimeFormatter; import java.time.temporal.ChronoUnit; import java.util.ArrayList; import java.util.Collections; import java.util.Formatter; import java.util.Iterator; import java.util.List; -import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Stream; @@ -26,7 +24,6 @@ import java.util.stream.Stream; import org.lucares.pdb.api.Entry; import org.lucares.pdb.api.GroupResult; import org.lucares.pdb.api.Result; -import org.lucares.pdb.api.Tags; import org.lucares.pdb.plot.api.CustomAggregator; import org.lucares.pdb.plot.api.Limit; import org.lucares.pdb.plot.api.PlotSettings; @@ -35,12 +32,11 @@ import org.lucares.utils.file.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class ScatterPlot { +public class ScatterPlot implements ConcretePlotter { private static final Logger LOGGER = LoggerFactory.getLogger(ScatterPlot.class); private static final Logger METRICS_LOGGER = LoggerFactory.getLogger("org.lucares.metrics.plotter.scatter"); - private static final String DEFAULT_GROUP = ""; private static final int INT_TO_STRING_CACHE_SIZE= 1000; private static final String[] INT_TO_STRING; @@ -74,12 +70,13 @@ public class ScatterPlot { return outputDir; } + @Override public PlotResult plot(final PlotSettings plotSettings) throws InternalPlottingException { LOGGER.trace("start plot: {}", plotSettings); - final String tmpSubDir = uniqueDirectoryName(); + final String tmpSubDir = ConcretePlotter.uniqueDirectoryName(); final Path tmpDir = tmpBaseDir.resolve(tmpSubDir); try { Files.createDirectories(tmpDir); @@ -101,8 +98,8 @@ public class ScatterPlot { final CsvSummary csvSummary = toCsv(groupResult, tmpDir, dateFrom, dateTo, plotSettings); final int id = idCounter.getAndIncrement(); - final String title = title(groupResult.getGroupedBy(), csvSummary.getValues()); - final DataSeries dataSerie = new FileBackedDataSeries("id"+id, title, csvSummary); + final String title = ConcretePlotter.title(groupResult.getGroupedBy(), csvSummary.getValues()); + final DataSeries dataSerie = new FileBackedDataSeries("id"+id, title, csvSummary, "points"); if (dataSerie.getValues() > 0) { dataSeries.add(dataSerie); } @@ -171,10 +168,7 @@ public class ScatterPlot { gnuplotSettings.getxAxisSettings().setTo(formattedMaxDate); } - private String uniqueDirectoryName() { - return OffsetDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd_HH_mm_ss")) + "_" - + UUID.randomUUID().toString(); - } + @@ -245,26 +239,5 @@ public class ScatterPlot { return String.valueOf(value); } - private String title(final Tags tags, final int values) { - final StringBuilder result = new StringBuilder(); - - if (tags.isEmpty()) { - result.append(DEFAULT_GROUP); - } else { - tags.forEach((k, v) -> { - if (result.length() > 0) { - result.append(" / "); - } - result.append(v); - }); - } - - result.append(" ("); - result.append(String.format("%,d", values)); - result.append(")"); - - return result.toString(); - - } }