tags are now stored as variable length byte sequences of longs
Replaced Tags.filenameBytes with a SortedSet<Tag>. Tags are now stored as longs (variable length encoded) in the PersistenMap. Tags.filenameBytes was introduced to reduce memory consumption, when all tags were hold in memory. Tags are now stored in a PersistentMap and only read when needed. Moved the VariableByteEncoder into its own project, because it was needed by pdb-api.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
apply plugin: 'antlr'
|
||||
|
||||
dependencies {
|
||||
compile project(':byte-utils')
|
||||
compile project(':file-utils')
|
||||
compile project(':pdb-utils')
|
||||
|
||||
|
||||
@@ -11,9 +11,9 @@ import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import org.lucares.collections.LongList;
|
||||
import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder;
|
||||
import org.lucares.pdb.diskstorage.DiskBlock;
|
||||
import org.lucares.pdb.diskstorage.DiskStorage;
|
||||
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@ package org.lucares.pdb.blockstorage;
|
||||
import java.nio.MappedByteBuffer;
|
||||
|
||||
import org.lucares.collections.LongList;
|
||||
import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder;
|
||||
import org.lucares.pdb.diskstorage.DiskBlock;
|
||||
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
||||
|
||||
public class BSFileDiskBlock {
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder;
|
||||
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
||||
|
||||
class NodeEntry {
|
||||
enum ValueType {
|
||||
@@ -87,7 +87,13 @@ class NodeEntry {
|
||||
public String toString(final Function<byte[], String> keyDecoder, final Function<byte[], String> valueDecoder) {
|
||||
final String valueAsString = isInnerNode() ? String.valueOf(VariableByteEncoder.decodeFirstValue(value))
|
||||
: valueDecoder.apply(value);
|
||||
final String keyAsString = keyDecoder.apply(key);
|
||||
|
||||
final String keyAsString;
|
||||
if (Arrays.equals(key, PersistentMap.MAX_KEY)) {
|
||||
keyAsString = "<<<MAX_KEY>>>";
|
||||
} else {
|
||||
keyAsString = keyDecoder.apply(key);
|
||||
}
|
||||
|
||||
return "NodeEntry [type=" + type + ", key=" + keyAsString + ", value=" + valueAsString + "]";
|
||||
}
|
||||
|
||||
7
byte-utils/.gitignore
vendored
Normal file
7
byte-utils/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
/.settings/
|
||||
/.classpath
|
||||
/.project
|
||||
/bin/
|
||||
/build/
|
||||
/target/
|
||||
/test-output/
|
||||
7
byte-utils/build.gradle
Normal file
7
byte-utils/build.gradle
Normal file
@@ -0,0 +1,7 @@
|
||||
dependencies {
|
||||
|
||||
|
||||
compile 'org.apache.logging.log4j:log4j-core:2.10.0'
|
||||
compile 'org.apache.logging.log4j:log4j-slf4j-impl:2.10.0'
|
||||
compile 'org.lucares:primitiveCollections:0.1.20180908084945'
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package org.lucares.pdb.blockstorage.intsequence;
|
||||
package org.lucares.utils.byteencoder;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package org.lucares.pdb.blockstorage.intsequence;
|
||||
package org.lucares.utils.byteencoder;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
|
||||
@@ -6,6 +6,7 @@ import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.lucares.collections.LongList;
|
||||
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
@@ -19,7 +19,6 @@ import org.lucares.pdb.api.StringCompressor;
|
||||
import org.lucares.pdb.api.Tag;
|
||||
import org.lucares.pdb.api.Tags;
|
||||
import org.lucares.pdb.blockstorage.BSFile;
|
||||
import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder;
|
||||
import org.lucares.pdb.datastore.Doc;
|
||||
import org.lucares.pdb.datastore.Proposal;
|
||||
import org.lucares.pdb.datastore.lang.Expression;
|
||||
@@ -29,6 +28,7 @@ import org.lucares.pdb.diskstorage.DiskStorage;
|
||||
import org.lucares.pdb.map.PersistentMap;
|
||||
import org.lucares.pdb.map.PersistentMap.EncoderDecoder;
|
||||
import org.lucares.utils.Preconditions;
|
||||
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -52,13 +52,13 @@ public class DataStore implements AutoCloseable {
|
||||
private static final EncoderDecoder<Tags> ENCODER_TAGS = new EncoderDecoder<>() {
|
||||
|
||||
@Override
|
||||
public byte[] encode(final Tags object) {
|
||||
return object.getFilenameBytes();
|
||||
public byte[] encode(final Tags tags) {
|
||||
return tags.toBytes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tags decode(final byte[] bytes) {
|
||||
return new Tags(bytes);
|
||||
return Tags.fromBytes(bytes);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -68,7 +68,7 @@ public class DataStore implements AutoCloseable {
|
||||
public byte[] encode(final Doc doc) {
|
||||
|
||||
final byte[] rootBlockNumber = VariableByteEncoder.encode(doc.getRootBlockNumber());
|
||||
final byte[] tags = doc.getTags().getFilenameBytes();
|
||||
final byte[] tags = doc.getTags().toBytes();
|
||||
|
||||
final byte[] result = new byte[rootBlockNumber.length + tags.length];
|
||||
|
||||
@@ -83,7 +83,7 @@ public class DataStore implements AutoCloseable {
|
||||
|
||||
final long rootBlockNumber = VariableByteEncoder.decodeFirstValue(bytes);
|
||||
final int bytesRootBlockNumber = VariableByteEncoder.neededBytes(rootBlockNumber);
|
||||
final Tags tags = new Tags(Arrays.copyOfRange(bytes, bytesRootBlockNumber, bytes.length));
|
||||
final Tags tags = Tags.fromBytes(Arrays.copyOfRange(bytes, bytesRootBlockNumber, bytes.length));
|
||||
return new Doc(tags, rootBlockNumber);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
|
||||
dependencies {
|
||||
compile project(':byte-utils')
|
||||
compile project(':pdb-utils')
|
||||
compile project(':file-utils')
|
||||
compile 'org.lucares:primitiveCollections:0.1.20180908084945'
|
||||
|
||||
@@ -18,7 +18,7 @@ public class StringCompressor {
|
||||
return new StringCompressor(mapsi);
|
||||
}
|
||||
|
||||
public Integer put(final String string) {
|
||||
public int put(final String string) {
|
||||
|
||||
return usip.computeIfAbsent(string, s -> usip.getHighestInteger() + 1);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
package org.lucares.pdb.api;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
public class TagByKeyAndValueComparator {
|
||||
|
||||
public static final Comparator<Tag> INSTANCE = Comparator.comparing(Tag::getKey).thenComparing(Tag::getValue);
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
package org.lucares.pdb.api;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Comparator;
|
||||
|
||||
public class TagByKeyComparator implements Comparator<Tag>, Serializable {
|
||||
|
||||
private static final long serialVersionUID = -6683582291996307323L;
|
||||
public static final TagByKeyComparator INSTANCE = new TagByKeyComparator();
|
||||
|
||||
@Override
|
||||
public int compare(final Tag a, final Tag b) {
|
||||
return a.getKey().compareToIgnoreCase(b.getKey());
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,8 @@
|
||||
package org.lucares.pdb.api;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
@@ -11,57 +10,30 @@ import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.lucares.collections.LongList;
|
||||
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
||||
|
||||
public class Tags {
|
||||
|
||||
public static StringCompressor STRING_COMPRESSOR = null;
|
||||
public static final byte[] EMPTY_BYTES = new byte[0];
|
||||
private static final byte[] EMPTY_BYTES = new byte[0];
|
||||
public static final Tags EMPTY = new Tags();
|
||||
|
||||
public static final String KEY_VALUE_SEPARATOR = "-";
|
||||
public static final String KEY_VALUE_PAIR_SEPARATOR = "_";
|
||||
public static final String KEY_VALUE_END_SEPARATOR = "$";
|
||||
|
||||
private static final String REGEX_KEY_VALUE = "[a-zA-Z0-9]+" + Pattern.quote(KEY_VALUE_SEPARATOR) + "[a-zA-Z0-9]+";
|
||||
|
||||
private static final String REGEX_KEY_VALUE_PAIRS = REGEX_KEY_VALUE + "(" + Pattern.quote(KEY_VALUE_PAIR_SEPARATOR)
|
||||
+ REGEX_KEY_VALUE + ")*";;
|
||||
|
||||
private static final String REGEX_STORAGE_FILE = String.format("(%1$s)", REGEX_KEY_VALUE_PAIRS);
|
||||
|
||||
private static final Pattern EXTRACT_TAGS_PATTERN = Pattern.compile(REGEX_STORAGE_FILE);
|
||||
|
||||
private final byte[] filenameBytes;
|
||||
private final SortedSet<Tag> tags;
|
||||
|
||||
public Tags() {
|
||||
filenameBytes = EMPTY_BYTES;
|
||||
tags = new TreeSet<>(TagByKeyAndValueComparator.INSTANCE);
|
||||
}
|
||||
|
||||
public Tags(final byte[] filenameBytes) {
|
||||
this(new String(filenameBytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
public Tags(final String serializedTags) {
|
||||
// serialized tags look like this: 0-1_2-1M_H-28_4-5$1.pdb
|
||||
// there can be several files for the same set of tags, in which case the number
|
||||
// after the $ is incremented
|
||||
// We only take the part until the $.
|
||||
final int end = serializedTags.indexOf(KEY_VALUE_END_SEPARATOR);
|
||||
final String normalizedFilename;
|
||||
if (end >= 0) {
|
||||
normalizedFilename = serializedTags.substring(0, end);
|
||||
} else {
|
||||
normalizedFilename = serializedTags;
|
||||
}
|
||||
this.filenameBytes = normalizedFilename.getBytes(StandardCharsets.UTF_8);
|
||||
public Tags(final Collection<Tag> tags) {
|
||||
this.tags = new TreeSet<>(TagByKeyAndValueComparator.INSTANCE);
|
||||
this.tags.addAll(tags);
|
||||
}
|
||||
|
||||
public static Tags create(final Collection<Tag> tags) {
|
||||
final String newFilename = toFilename(tags);
|
||||
|
||||
return new Tags(newFilename);
|
||||
return new Tags(tags);
|
||||
}
|
||||
|
||||
public static Tags create() {
|
||||
@@ -85,12 +57,42 @@ public class Tags {
|
||||
return result;
|
||||
}
|
||||
|
||||
public String serialize() {
|
||||
return new String(this.filenameBytes, StandardCharsets.UTF_8);
|
||||
public static Tags fromBytes(final byte[] bytes) {
|
||||
final SortedSet<Tag> result = new TreeSet<>(TagByKeyAndValueComparator.INSTANCE);
|
||||
|
||||
final LongList keyValuesAsLongs = VariableByteEncoder.decode(bytes);
|
||||
|
||||
for (int i = 0; i < keyValuesAsLongs.size(); i += 2) {
|
||||
|
||||
final long keyAsLong = keyValuesAsLongs.get(i);
|
||||
final long valueAsLong = keyValuesAsLongs.get(i + 1);
|
||||
|
||||
final String key = STRING_COMPRESSOR.get((int) keyAsLong);
|
||||
final String value = STRING_COMPRESSOR.get((int) valueAsLong);
|
||||
result.add(new Tag(key, value));
|
||||
}
|
||||
|
||||
return new Tags(result);
|
||||
}
|
||||
|
||||
public byte[] getFilenameBytes() {
|
||||
return filenameBytes;
|
||||
public byte[] toBytes() {
|
||||
final byte[] result;
|
||||
|
||||
if (tags.size() > 0) {
|
||||
final LongList keyValuesAsLongs = new LongList(tags.size() * 2);
|
||||
for (final Tag tag : tags) {
|
||||
final long keyAsLong = STRING_COMPRESSOR.put(tag.getKey());
|
||||
final long valueAsLong = STRING_COMPRESSOR.put(tag.getValue());
|
||||
|
||||
keyValuesAsLongs.add(keyAsLong);
|
||||
keyValuesAsLongs.add(valueAsLong);
|
||||
}
|
||||
|
||||
result = VariableByteEncoder.encode(keyValuesAsLongs);
|
||||
} else {
|
||||
result = EMPTY_BYTES;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public String getValue(final String key) {
|
||||
@@ -105,54 +107,7 @@ public class Tags {
|
||||
}
|
||||
|
||||
public SortedSet<Tag> toTags() {
|
||||
final SortedSet<Tag> result = new TreeSet<>(TagByKeyComparator.INSTANCE);
|
||||
final String filename = new String(this.filenameBytes, StandardCharsets.UTF_8);
|
||||
final Matcher matcher = EXTRACT_TAGS_PATTERN.matcher(filename);
|
||||
|
||||
if (matcher.find()) {
|
||||
final String serializedTags = matcher.group(1);
|
||||
|
||||
final String[] serializedKeyValuePairs = serializedTags.split(Pattern.quote(KEY_VALUE_PAIR_SEPARATOR));
|
||||
|
||||
for (int i = 0; i < serializedKeyValuePairs.length; i++) {
|
||||
final String[] keyValuePair = serializedKeyValuePairs[i].split(Pattern.quote(KEY_VALUE_SEPARATOR));
|
||||
|
||||
if (keyValuePair.length == 2) {
|
||||
|
||||
final String key = STRING_COMPRESSOR.get(RadixConverter.fromString(keyValuePair[0]));
|
||||
final String value = STRING_COMPRESSOR.get(RadixConverter.fromString(keyValuePair[1]));
|
||||
|
||||
result.add(new Tag(key, value));
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static String toFilename(final Collection<Tag> tags) {
|
||||
final StringBuilder path = new StringBuilder();
|
||||
|
||||
final Tag[] tagsAsArray = tags.toArray(new Tag[tags.size()]);
|
||||
Arrays.sort(tagsAsArray, TagByKeyComparator.INSTANCE);
|
||||
|
||||
for (final Tag tag : tagsAsArray) {
|
||||
final String key = tag.getKey();
|
||||
final String value = tag.getValue();
|
||||
|
||||
final int compressedKey = STRING_COMPRESSOR.put(key);
|
||||
final int compressedValue = STRING_COMPRESSOR.put(value);
|
||||
|
||||
if (path.length() > 0) {
|
||||
path.append(Tags.KEY_VALUE_PAIR_SEPARATOR);
|
||||
}
|
||||
|
||||
path.append(RadixConverter.toString(compressedKey));
|
||||
path.append(Tags.KEY_VALUE_SEPARATOR);
|
||||
path.append(RadixConverter.toString(compressedValue));
|
||||
}
|
||||
path.append(Tags.KEY_VALUE_END_SEPARATOR);
|
||||
|
||||
return path.toString();
|
||||
return Collections.unmodifiableSortedSet(tags);
|
||||
}
|
||||
|
||||
public Set<String> getKeys() {
|
||||
@@ -183,14 +138,14 @@ public class Tags {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Tags [filename=" + serialize() + ", tags=" + toTags() + "]";
|
||||
return "Tags [tags=" + toTags() + "]";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + Arrays.hashCode(filenameBytes);
|
||||
result = prime * result + ((tags == null) ? 0 : tags.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -203,7 +158,10 @@ public class Tags {
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
final Tags other = (Tags) obj;
|
||||
if (!Arrays.equals(filenameBytes, other.filenameBytes))
|
||||
if (tags == null) {
|
||||
if (other.tags != null)
|
||||
return false;
|
||||
} else if (!tags.equals(other.tags))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@@ -224,11 +182,7 @@ public class Tags {
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return filenameBytes == null || filenameBytes.length == 0;
|
||||
}
|
||||
|
||||
public static Tags create(final String filename) {
|
||||
return new Tags(filename);
|
||||
return tags.isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -237,7 +191,6 @@ public class Tags {
|
||||
public String asString() {
|
||||
|
||||
final StringBuilder result = new StringBuilder();
|
||||
final SortedSet<Tag> tags = toTags();
|
||||
|
||||
for (final Tag tag : tags) {
|
||||
if (result.length() > 0) {
|
||||
|
||||
Reference in New Issue
Block a user