reduce memory usage

Reduce memory usage by storing the filename as string instead of
individual tags.
This commit is contained in:
2018-03-19 19:21:57 +01:00
parent 181fce805d
commit 5343c0d427
20 changed files with 315 additions and 454 deletions

View File

@@ -0,0 +1,42 @@
package org.lucares.pdb.api;
public class RadixConverter {
private static final String ALPHABET = "0123456789ABCDEFGHIJKLMNOPRSTUVWXYZacbdefghijklmnopqrstuvwxyz";
public static String toString(final int value) {
if (value < 0) {
throw new IllegalArgumentException("value must not be negative");
}
final StringBuilder result = new StringBuilder();
int v = value;
if (v == 0) {
result.append(ALPHABET.charAt(0));
} else {
while (v > 0) {
final int remainder = v % ALPHABET.length();
v = v / ALPHABET.length();
result.insert(0, ALPHABET.charAt(remainder));
}
}
return result.toString();
}
public static int fromString(final String string) {
int result = 0;
for (int i = 0; i < string.length(); i++) {
final int value = ALPHABET.indexOf(string.charAt(i));
result = result * ALPHABET.length() + value;
}
return result;
}
}

View File

@@ -0,0 +1,10 @@
package org.lucares.pdb.api;
public class RuntimeIOException extends RuntimeException {
private static final long serialVersionUID = 1L;
public RuntimeIOException(final Throwable cause) {
super(cause);
}
}

View File

@@ -0,0 +1,30 @@
package org.lucares.pdb.api;
import java.nio.file.Path;
/**
* Persistently maps Strings to integers.
*/
public class StringCompressor {
private final UniqueStringIntegerPairs usip;
public StringCompressor(final UniqueStringIntegerPairs usip) throws RuntimeIOException {
this.usip = usip;
}
public static StringCompressor create(final Path path) {
final UniqueStringIntegerPairs mapsi = new UniqueStringIntegerPairs(path);
return new StringCompressor(mapsi);
}
public Integer put(final String string) {
return usip.computeIfAbsent(string, s -> usip.getHighestInteger() + 1);
}
public String get(final int integer) {
return usip.getKey(integer);
}
}

View File

@@ -6,23 +6,45 @@ import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.function.BiConsumer;
import org.lucares.utils.MiniMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Tags {
public static StringCompressor STRING_COMPRESSOR = null;
public static final Tags EMPTY = new Tags();
private final MiniMap<String, Tag> tags;
public static final String KEY_VALUE_SEPARATOR = "-";
public static final String KEY_VALUE_PAIR_SEPARATOR = "_";
public static final String KEY_VALUE_END_SEPARATOR = "$";
private int cachedHash = 0;
private static final String REGEX_KEY_VALUE = "[a-zA-Z0-9]+" + Pattern.quote(KEY_VALUE_SEPARATOR) + "[a-zA-Z0-9]+";
private Tags() {
super();
tags = MiniMap.emptyMap();
private static final String REGEX_KEY_VALUE_PAIRS = REGEX_KEY_VALUE + "(" + Pattern.quote(KEY_VALUE_PAIR_SEPARATOR)
+ REGEX_KEY_VALUE + ")*";;
private static final String REGEX_STORAGE_FILE = String.format("(%1$s)", REGEX_KEY_VALUE_PAIRS);
private static final Pattern EXTRACT_TAGS_PATTERN = Pattern.compile(REGEX_STORAGE_FILE);
private final String filename;
public Tags() {
filename = "";
}
private Tags(final MiniMap<String, Tag> tags) {
this.tags = tags;
public Tags(final String filename) {
// normalize filename
// filenames look like this: 0-1_2-1M_H-28_4-5$1.pdb
// there can be several files for the same set of tags, in which case the number
// after the $ is incremented
// We only take the part until the $.
final int end = filename.indexOf(KEY_VALUE_END_SEPARATOR);
if (end >= 0) {
this.filename = filename.substring(0, end);
} else {
this.filename = filename;
}
}
public static Tags create() {
@@ -30,36 +52,34 @@ public class Tags {
}
public static Tags create(final String key, final String value) {
final MiniMap<String, Tag> tags = new MiniMap<>();
tags.put(key, new Tag(key, value));
return new Tags(tags);
return EMPTY.copyAdd(key, value);
}
public static Tags create(final String key1, final String value1, final String key2, final String value2) {
final MiniMap<String, Tag> tags = new MiniMap<>();
tags.put(key1, new Tag(key1, value1));
tags.put(key2, new Tag(key2, value2));
return new Tags(tags);
final Tags result = EMPTY.copyAdd(key1, value1).copyAdd(key2, value2);
return result;
}
public static Tags create(final String key1, final String value1, final String key2, final String value2,
final String key3, final String value3) {
final MiniMap<String, Tag> tags = new MiniMap<>();
tags.put(key1, new Tag(key1, value1));
tags.put(key2, new Tag(key2, value2));
tags.put(key3, new Tag(key3, value3));
return new Tags(tags);
final Tags result = EMPTY.copyAdd(key1, value1).copyAdd(key2, value2).copyAdd(key3, value3);
return result;
}
public Tags copyAdd(final String key, final String value) {
Objects.requireNonNull(key, "key must not be null");
Objects.requireNonNull(value, "value must not be null");
final MiniMap<String, Tag> newTags = new MiniMap<>(tags);
final Tag tag = new Tag(key, value);
newTags.put(key, new Tag(key, value));
final SortedSet<Tag> tags = toTags();
tags.add(tag);
return new Tags(newTags);
final String newFilename = toFilename(tags);
return new Tags(newFilename);
}
public Tags copyAddIfNotNull(final String key, final String value) {
@@ -73,44 +93,96 @@ public class Tags {
return result;
}
public String getFilename() {
return filename;
}
public String getValue(final String key) {
final Tag tag = tags.get(key);
final String value = tag != null ? tag.getValue() : null;
return value;
final Set<Tag> tags = toTags();
for (final Tag tag : tags) {
if (Objects.equals(tag.getKey(), key)) {
return tag.getValue();
}
}
return null;
}
private SortedSet<Tag> toTags() {
final SortedSet<Tag> result = new TreeSet<>((a, b) -> a.getKey().compareToIgnoreCase(b.getKey()));
final Matcher matcher = EXTRACT_TAGS_PATTERN.matcher(filename);
if (matcher.find()) {
final String serializedTags = matcher.group(1);
final String[] serializedKeyValuePairs = serializedTags.split(Pattern.quote(KEY_VALUE_PAIR_SEPARATOR));
for (int i = 0; i < serializedKeyValuePairs.length; i++) {
final String[] keyValuePair = serializedKeyValuePairs[i].split(Pattern.quote(KEY_VALUE_SEPARATOR));
if (keyValuePair.length == 2) {
final String key = STRING_COMPRESSOR.get(RadixConverter.fromString(keyValuePair[0]));
final String value = STRING_COMPRESSOR.get(RadixConverter.fromString(keyValuePair[1]));
result.add(new Tag(key, value));
}
}
}
return result;
}
public String toFilename(final SortedSet<Tag> tags) {
final StringBuilder path = new StringBuilder();
for (final Tag tag : tags) {
final String key = tag.getKey();
final String value = tag.getValue();
final int compressedKey = STRING_COMPRESSOR.put(key);
final int compressedValue = STRING_COMPRESSOR.put(value);
if (path.length() > 0) {
path.append(Tags.KEY_VALUE_PAIR_SEPARATOR);
}
path.append(RadixConverter.toString(compressedKey));
path.append(Tags.KEY_VALUE_SEPARATOR);
path.append(RadixConverter.toString(compressedValue));
}
path.append(Tags.KEY_VALUE_END_SEPARATOR);
return path.toString();
}
public Set<String> getKeys() {
return new TreeSet<>(tags.keySet());
final TreeSet<String> result = new TreeSet<>();
final Set<Tag> tags = toTags();
for (final Tag tag : tags) {
result.add(tag.getKey());
}
return result;
}
public void forEach(final BiConsumer<String, String> keyValueConsumer) {
Set<String> keys = tags.keySet();
for (String key : keys) {
final Tag value = tags.get(key);
keyValueConsumer.accept(key, value.getValue());
final Set<Tag> tags = toTags();
for (final Tag tag : tags) {
keyValueConsumer.accept(tag.getKey(), tag.getValue());
}
}
@Override
public String toString() {
return String.valueOf(tags.values());
return "Tags [filename=" + filename + "]";
}
@Override
public int hashCode() {
if (cachedHash != 0) {
return cachedHash;
} else {
final int prime = 31;
int result = 1;
result = prime * result + ((tags == null) ? 0 : tags.hashCode());
cachedHash = result;
return result;
}
final int prime = 31;
int result = 1;
result = prime * result + ((filename == null) ? 0 : filename.hashCode());
return result;
}
@Override
@@ -122,39 +194,14 @@ public class Tags {
if (getClass() != obj.getClass())
return false;
final Tags other = (Tags) obj;
if (tags == null) {
if (other.tags != null)
if (filename == null) {
if (other.filename != null)
return false;
} else if (!tags.equals(other.tags))
} else if (!filename.equals(other.filename))
return false;
return true;
}
public String abbreviatedRepresentation() {
final StringBuilder result = new StringBuilder();
final int maxLength = 100;
final SortedSet<String> keys = new TreeSet<>(tags.keySet());
final int cutAt = maxLength / (keys.size() * 2 + 2);
for (final String key : keys) {
final String value = tags.get(key).getValue();
result.append(substr(key, cutAt));
result.append("-");
result.append(substr(value, cutAt));
result.append("_");
}
return substr(result.toString(), maxLength);
}
private static String substr(final String s, final int maxLength) {
return s.substring(0, Math.min(maxLength, s.length()));
}
public Tags subset(final List<String> groupByFields) {
Tags result = new Tags();
@@ -171,7 +218,11 @@ public class Tags {
}
public boolean isEmpty() {
return tags.isEmpty();
return filename == null || filename.length() == 0;
}
public static Tags create(final String filename) {
return new Tags(filename);
}
}

View File

@@ -0,0 +1,130 @@
package org.lucares.pdb.api;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.regex.Pattern;
/**
* A very simple {@link Set}-like or {@link Map}-like datastructure that stores
* unique&sup1; pairs of Strings and integers persistently.
* <p>
* (1) Unique means, that neither the string, nor the integer may occur twice.
* For Example, imagine the pair ("a", 1) already exists, then neither ("a", 2)
* nor ("b", 1) may be added.
* <p>
* You can only add pairs. No deletion. It keeps an in memory view for fast
* retrievals.
*/
public class UniqueStringIntegerPairs {
private static final String SEPARATOR = "\t";
private static final boolean APPEND = true;
/**
* Maps a string to an integer. E.g. "myLongValue" -> 123
*/
private final Map<String, Integer> stringToInt = new HashMap<>();
/**
* Maps an integer to a string. E.g. 123 -> "myLongValue"
*/
private final SortedMap<Integer, String> intToString = new TreeMap<>();
private final Path file;
public UniqueStringIntegerPairs() {
this(null);
}
public UniqueStringIntegerPairs(final Path file) {
this.file = file;
if (file != null) {
init(file);
}
}
private void init(final Path file) throws RuntimeIOException {
try {
Files.createDirectories(file.getParent());
if (!Files.exists(file)) {
Files.createFile(file);
}
try (final BufferedReader reader = new BufferedReader(
new InputStreamReader(new FileInputStream(file.toFile()), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
final String[] tokens = line.split(Pattern.quote(SEPARATOR));
if (tokens.length == 2) {
final String string = tokens[0];
final int value = Integer.parseInt(tokens[1]);
intToString.put(value, string);
stringToInt.put(string, value);
}
}
}
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
public void put(final String first, final int second) {
if (stringToInt.containsKey(first) || intToString.containsKey(second)) {
throw new IllegalArgumentException("Unique key constraint violation for (" + first + ", " + second + ")");
}
if (file != null) {
try (final Writer writer = new OutputStreamWriter(new FileOutputStream(file.toFile(), APPEND),
StandardCharsets.UTF_8)) {
writer.write(first + SEPARATOR + second + "\n");
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
intToString.put(second, first);
stringToInt.put(first, second);
}
public Integer get(final String first) {
return stringToInt.get(first);
}
public String getKey(final Integer second) {
return intToString.get(second);
}
public Integer getHighestInteger() {
return intToString.size() == 0 ? -1 : intToString.lastKey();
}
public Integer computeIfAbsent(final String first, final Function<String, Integer> mappingFunction) {
if (!stringToInt.containsKey(first)) {
final Integer second = mappingFunction.apply(first);
put(first, second);
}
return stringToInt.get(first);
}
}