replace the FolderStorage with DiskStorage

- The DiskStorage uses only one file instead of millions.
  Also the block size is only 512 byte instead of 4kb, which
  helps to reduce the memory usage for short sequences.
- Update primitiveCollections to get the new LongList.range
  and LongList.rangeClosed methods.
- BSFile now stores Time&Value sequences and knows how to
  encode the time values with delta encoding.
- Doc had to do some magic tricks to save memory. The path
  was initialized lazy and stored as byte array. This is no
  longer necessary. The patch was replaced by the
  rootBlockNumber of the BSFile.
- Had to temporarily disable the 'in' queries.
- The stored values are now processed as stream of LongLists
  instead of Entry. The overhead for creating Entries is
  gone, so is the memory overhead, because Entry was an
  object and had a reference to the tags, which is
  unnecessary.
This commit is contained in:
2018-09-12 09:35:07 +02:00
parent 26dc052b95
commit 1182d76205
36 changed files with 799 additions and 1483 deletions

View File

@@ -9,13 +9,13 @@ import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import org.lucares.collections.IntList;
import org.lucares.collections.LongList;
public class PercentileCustomAggregator implements CustomAggregator {
private final static int POINTS = 500;
private final IntList values = new IntList(); // TODO should be a LongList
private final LongList values = new LongList();
private final Path tmpDir;
@@ -35,7 +35,7 @@ public class PercentileCustomAggregator implements CustomAggregator {
values.parallelSort();
final IntList percentiles = new IntList(POINTS);
final LongList percentiles = new LongList(POINTS);
final File dataFile = File.createTempFile("data", ".dat", tmpDir.toFile());
try (final Writer output = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(dataFile), StandardCharsets.US_ASCII));) {
@@ -46,13 +46,13 @@ public class PercentileCustomAggregator implements CustomAggregator {
for (int i = 0; i < POINTS; i++) {
data.append(i * (100 / (double) POINTS));
data.append(separator);
final int percentile = values.get((int) Math.floor(values.size() / ((double) POINTS) * i));
final long percentile = values.get((int) Math.floor(values.size() / ((double) POINTS) * i));
data.append(percentile);
data.append(newline);
percentiles.add(percentile);
}
final int maxValue = values.get(values.size() - 1);
final long maxValue = values.get(values.size() - 1);
data.append(100);
data.append(separator);
data.append(maxValue);

View File

@@ -23,7 +23,7 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;
import org.lucares.pdb.api.Entry;
import org.lucares.collections.LongList;
import org.lucares.pdb.api.GroupResult;
import org.lucares.pdb.api.Result;
import org.lucares.pdb.api.Tags;
@@ -203,7 +203,7 @@ public class ScatterPlot {
final File dataFile = File.createTempFile("data", ".dat", tmpDir.toFile());
final long start = System.nanoTime();
final Stream<Entry> entries = groupResult.asStream();
final Stream<LongList> timeValueStream = groupResult.asStream();
final long fromEpochMilli = dateFrom.toInstant().toEpochMilli();
final long toEpochMilli = dateTo.toInstant().toEpochMilli();
final boolean useMillis = (toEpochMilli - fromEpochMilli) < TimeUnit.MINUTES.toMillis(5);
@@ -228,49 +228,53 @@ public class ScatterPlot {
new OutputStreamWriter(new FileOutputStream(dataFile), StandardCharsets.US_ASCII));
final Formatter formatter = new Formatter(formattedDateBuilder);) {
final Iterator<Entry> it = entries.iterator();
final Iterator<LongList> it = timeValueStream.iterator();
while (it.hasNext()) {
final Entry entry = it.next();
final LongList entry = it.next();
final long epochMilli = entry.getEpochMilli();
if (fromEpochMilli > epochMilli || epochMilli > toEpochMilli) {
ignoredValues++;
continue;
for (int i = 0; i < entry.size(); i += 2) {
final long epochMilli = entry.get(i);
if (fromEpochMilli > epochMilli || epochMilli > toEpochMilli) {
ignoredValues++;
continue;
}
final long value = entry.get(i + 1);
aggregator.addValue(epochMilli, value);
// compute stats
count++;
statsMaxValue = Math.max(statsMaxValue, value);
// compute average (important to do this after 'count' has been incremented)
statsCurrentAverage = statsCurrentAverage + (value - statsCurrentAverage) / count;
// check if value is in the selected y-range
if (value < minValue || value > maxValue) {
ignoredValues++;
continue;
}
final String stringValue = LongUtils.longToString(value);
final String formattedDate;
if (useMillis) {
formattedDateBuilder.delete(0, formattedDateBuilder.length());
formatter.format("%.3f", epochMilli / 1000.0);
formattedDate = formattedDateBuilder.toString();
} else {
formattedDate = String.valueOf(epochMilli / 1000);
}
output.write(formattedDate);
output.write(separator);
output.write(stringValue);
output.write(newline);
plottedValues++;
}
final long value = entry.getValue();
aggregator.addValue(epochMilli, value);
// compute stats
count++;
statsMaxValue = Math.max(statsMaxValue, value);
// compute average (important to do this after 'count' has been incremented)
statsCurrentAverage = statsCurrentAverage + (value - statsCurrentAverage) / count;
// check if value is in the selected y-range
if (value < minValue || value > maxValue) {
ignoredValues++;
continue;
}
final String stringValue = LongUtils.longToString(value);
final String formattedDate;
if (useMillis) {
formattedDateBuilder.delete(0, formattedDateBuilder.length());
formatter.format("%.3f", epochMilli / 1000.0);
formattedDate = formattedDateBuilder.toString();
} else {
formattedDate = String.valueOf(epochMilli / 1000);
}
output.write(formattedDate);
output.write(separator);
output.write(stringValue);
output.write(newline);
plottedValues++;
}
}