replace the FolderStorage with DiskStorage

- The DiskStorage uses only one file instead of millions. Also the block size is only 512 byte instead of 4kb, which helps to reduce the memory usage for short sequences. - Update primitiveCollections to get the new LongList.range and LongList.rangeClosed methods. - BSFile now stores Time&Value sequences and knows how to encode the time values with delta encoding. - Doc had to do some magic tricks to save memory. The path was initialized lazy and stored as byte array. This is no longer necessary. The patch was replaced by the rootBlockNumber of the BSFile. - Had to temporarily disable the 'in' queries. - The stored values are now processed as stream of LongLists instead of Entry. The overhead for creating Entries is gone, so is the memory overhead, because Entry was an object and had a reference to the tags, which is unnecessary.
2018-09-12 09:35:07 +02:00
parent 26dc052b95
commit 1182d76205
36 changed files with 799 additions and 1483 deletions
--- a/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PercentileCustomAggregator.java
+++ b/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PercentileCustomAggregator.java
@@ -9,13 +9,13 @@ import java.io.Writer;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Path;

-import org.lucares.collections.IntList;
+import org.lucares.collections.LongList;

 public class PercentileCustomAggregator implements CustomAggregator {

 	private final static int POINTS = 500;

-	private final IntList values = new IntList(); // TODO should be a LongList
+	private final LongList values = new LongList();

 	private final Path tmpDir;

@@ -35,7 +35,7 @@ public class PercentileCustomAggregator implements CustomAggregator {

 		values.parallelSort();

-		final IntList percentiles = new IntList(POINTS);
+		final LongList percentiles = new LongList(POINTS);
 		final File dataFile = File.createTempFile("data", ".dat", tmpDir.toFile());
 		try (final Writer output = new BufferedWriter(
 				new OutputStreamWriter(new FileOutputStream(dataFile), StandardCharsets.US_ASCII));) {
@@ -46,13 +46,13 @@ public class PercentileCustomAggregator implements CustomAggregator {
 				for (int i = 0; i < POINTS; i++) {
 					data.append(i * (100 / (double) POINTS));
 					data.append(separator);
-					final int percentile = values.get((int) Math.floor(values.size() / ((double) POINTS) * i));
+					final long percentile = values.get((int) Math.floor(values.size() / ((double) POINTS) * i));
 					data.append(percentile);
 					data.append(newline);

 					percentiles.add(percentile);
 				}
-				final int maxValue = values.get(values.size() - 1);
+				final long maxValue = values.get(values.size() - 1);
 				data.append(100);
 				data.append(separator);
 				data.append(maxValue);
--- a/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java
+++ b/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java
@@ -23,7 +23,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Stream;

-import org.lucares.pdb.api.Entry;
+import org.lucares.collections.LongList;
 import org.lucares.pdb.api.GroupResult;
 import org.lucares.pdb.api.Result;
 import org.lucares.pdb.api.Tags;
@@ -203,7 +203,7 @@ public class ScatterPlot {

 		final File dataFile = File.createTempFile("data", ".dat", tmpDir.toFile());
 		final long start = System.nanoTime();
-		final Stream<Entry> entries = groupResult.asStream();
+		final Stream<LongList> timeValueStream = groupResult.asStream();
 		final long fromEpochMilli = dateFrom.toInstant().toEpochMilli();
 		final long toEpochMilli = dateTo.toInstant().toEpochMilli();
 		final boolean useMillis = (toEpochMilli - fromEpochMilli) < TimeUnit.MINUTES.toMillis(5);
@@ -228,49 +228,53 @@ public class ScatterPlot {
 				new OutputStreamWriter(new FileOutputStream(dataFile), StandardCharsets.US_ASCII));
 				final Formatter formatter = new Formatter(formattedDateBuilder);) {

-			final Iterator<Entry> it = entries.iterator();
+			final Iterator<LongList> it = timeValueStream.iterator();
 			while (it.hasNext()) {
-				final Entry entry = it.next();
+				final LongList entry = it.next();

-				final long epochMilli = entry.getEpochMilli();
-				if (fromEpochMilli > epochMilli || epochMilli > toEpochMilli) {
-					ignoredValues++;
-					continue;
+				for (int i = 0; i < entry.size(); i += 2) {
+
+					final long epochMilli = entry.get(i);
+					if (fromEpochMilli > epochMilli || epochMilli > toEpochMilli) {
+						ignoredValues++;
+						continue;
+					}
+
+					final long value = entry.get(i + 1);
+
+					aggregator.addValue(epochMilli, value);
+
+					// compute stats
+					count++;
+					statsMaxValue = Math.max(statsMaxValue, value);
+
+					// compute average (important to do this after 'count' has been incremented)
+					statsCurrentAverage = statsCurrentAverage + (value - statsCurrentAverage) / count;
+
+					// check if value is in the selected y-range
+					if (value < minValue || value > maxValue) {
+						ignoredValues++;
+						continue;
+					}
+
+					final String stringValue = LongUtils.longToString(value);
+					final String formattedDate;
+
+					if (useMillis) {
+						formattedDateBuilder.delete(0, formattedDateBuilder.length());
+						formatter.format("%.3f", epochMilli / 1000.0);
+						formattedDate = formattedDateBuilder.toString();
+					} else {
+						formattedDate = String.valueOf(epochMilli / 1000);
+					}
+
+					output.write(formattedDate);
+					output.write(separator);
+					output.write(stringValue);
+					output.write(newline);
+
+					plottedValues++;
 				}
-
-				final long value = entry.getValue();
-				aggregator.addValue(epochMilli, value);
-
-				// compute stats
-				count++;
-				statsMaxValue = Math.max(statsMaxValue, value);
-
-				// compute average (important to do this after 'count' has been incremented)
-				statsCurrentAverage = statsCurrentAverage + (value - statsCurrentAverage) / count;
-
-				// check if value is in the selected y-range
-				if (value < minValue || value > maxValue) {
-					ignoredValues++;
-					continue;
-				}
-
-				final String stringValue = LongUtils.longToString(value);
-				final String formattedDate;
-
-				if (useMillis) {
-					formattedDateBuilder.delete(0, formattedDateBuilder.length());
-					formatter.format("%.3f", epochMilli / 1000.0);
-					formattedDate = formattedDateBuilder.toString();
-				} else {
-					formattedDate = String.valueOf(epochMilli / 1000);
-				}
-
-				output.write(formattedDate);
-				output.write(separator);
-				output.write(stringValue);
-				output.write(newline);
-
-				plottedValues++;
 			}
 		}