replace the FolderStorage with DiskStorage

- The DiskStorage uses only one file instead of millions.
  Also the block size is only 512 byte instead of 4kb, which
  helps to reduce the memory usage for short sequences.
- Update primitiveCollections to get the new LongList.range
  and LongList.rangeClosed methods.
- BSFile now stores Time&Value sequences and knows how to
  encode the time values with delta encoding.
- Doc had to do some magic tricks to save memory. The path
  was initialized lazy and stored as byte array. This is no
  longer necessary. The patch was replaced by the
  rootBlockNumber of the BSFile.
- Had to temporarily disable the 'in' queries.
- The stored values are now processed as stream of LongLists
  instead of Entry. The overhead for creating Entries is
  gone, so is the memory overhead, because Entry was an
  object and had a reference to the tags, which is
  unnecessary.
This commit is contained in:
2018-09-12 09:35:07 +02:00
parent 26dc052b95
commit 1182d76205
36 changed files with 799 additions and 1483 deletions

View File

@@ -2,4 +2,5 @@
dependencies {
compile project(':pdb-utils')
compile project(':file-utils')
compile 'org.lucares:primitiveCollections:0.1.20180908084945'
}

View File

@@ -1,32 +1,36 @@
package org.lucares.pdb.api;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.lucares.collections.LongList;
public class GroupResult {
private final Tags groupedBy;
private final Stream<Entry> entries;
private final Stream<LongList> timeValueStream;
public GroupResult(final Stream<Entry> entries, final Tags groupedBy) {
this.entries = entries;
public GroupResult(final Stream<LongList> entries, final Tags groupedBy) {
this.timeValueStream = entries;
this.groupedBy = groupedBy;
}
/**
* @return {@link Stream} unbound, unordered and non-parallel
*/
public Stream<Entry> asStream() {
return entries;
}
public List<Entry> asList() {
return entries.collect(Collectors.toList());
}
public Tags getGroupedBy() {
return groupedBy;
}
/**
* @return {@link Stream}
*/
public Stream<LongList> asStream() {
return timeValueStream;
}
public LongList flatMap() {
final LongList result = new LongList();
timeValueStream.forEachOrdered(result::addAll);
return result;
}
}

View File

@@ -37,18 +37,17 @@ public class Tags {
filenameBytes = EMPTY_BYTES;
}
public Tags(final String filename) {
// normalize filename
// filenames look like this: 0-1_2-1M_H-28_4-5$1.pdb
public Tags(final String serializedTags) {
// serialized tags look like this: 0-1_2-1M_H-28_4-5$1.pdb
// there can be several files for the same set of tags, in which case the number
// after the $ is incremented
// We only take the part until the $.
final int end = filename.indexOf(KEY_VALUE_END_SEPARATOR);
final int end = serializedTags.indexOf(KEY_VALUE_END_SEPARATOR);
final String normalizedFilename;
if (end >= 0) {
normalizedFilename = filename.substring(0, end);
normalizedFilename = serializedTags.substring(0, end);
} else {
normalizedFilename = filename;
normalizedFilename = serializedTags;
}
this.filenameBytes = normalizedFilename.getBytes(StandardCharsets.UTF_8);
}
@@ -80,7 +79,7 @@ public class Tags {
return result;
}
public String getFilename() {
public String serialize() {
return new String(this.filenameBytes, StandardCharsets.UTF_8);
}
@@ -165,7 +164,7 @@ public class Tags {
@Override
public String toString() {
return "Tags [filename=" + getFilename() + ", tags=" + toTags() + "]";
return "Tags [filename=" + serialize() + ", tags=" + toTags() + "]";
}
@Override
@@ -213,6 +212,9 @@ public class Tags {
return new Tags(filename);
}
/**
* @return User facing readable representation
*/
public String asString() {
final StringBuilder result = new StringBuilder();