reduce memory usage

Reduce memory usage by storing the filename as string instead of
individual tags.
This commit is contained in:
2018-03-19 19:21:57 +01:00
parent 181fce805d
commit 5343c0d427
20 changed files with 315 additions and 454 deletions

View File

@@ -0,0 +1,42 @@
package org.lucares.pdb.api;
public class RadixConverter {
private static final String ALPHABET = "0123456789ABCDEFGHIJKLMNOPRSTUVWXYZacbdefghijklmnopqrstuvwxyz";
public static String toString(final int value) {
if (value < 0) {
throw new IllegalArgumentException("value must not be negative");
}
final StringBuilder result = new StringBuilder();
int v = value;
if (v == 0) {
result.append(ALPHABET.charAt(0));
} else {
while (v > 0) {
final int remainder = v % ALPHABET.length();
v = v / ALPHABET.length();
result.insert(0, ALPHABET.charAt(remainder));
}
}
return result.toString();
}
public static int fromString(final String string) {
int result = 0;
for (int i = 0; i < string.length(); i++) {
final int value = ALPHABET.indexOf(string.charAt(i));
result = result * ALPHABET.length() + value;
}
return result;
}
}

View File

@@ -0,0 +1,10 @@
package org.lucares.pdb.api;
public class RuntimeIOException extends RuntimeException {
private static final long serialVersionUID = 1L;
public RuntimeIOException(final Throwable cause) {
super(cause);
}
}

View File

@@ -0,0 +1,30 @@
package org.lucares.pdb.api;
import java.nio.file.Path;
/**
* Persistently maps Strings to integers.
*/
public class StringCompressor {
private final UniqueStringIntegerPairs usip;
public StringCompressor(final UniqueStringIntegerPairs usip) throws RuntimeIOException {
this.usip = usip;
}
public static StringCompressor create(final Path path) {
final UniqueStringIntegerPairs mapsi = new UniqueStringIntegerPairs(path);
return new StringCompressor(mapsi);
}
public Integer put(final String string) {
return usip.computeIfAbsent(string, s -> usip.getHighestInteger() + 1);
}
public String get(final int integer) {
return usip.getKey(integer);
}
}

View File

@@ -6,23 +6,45 @@ import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.function.BiConsumer;
import org.lucares.utils.MiniMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Tags {
public static StringCompressor STRING_COMPRESSOR = null;
public static final Tags EMPTY = new Tags();
private final MiniMap<String, Tag> tags;
public static final String KEY_VALUE_SEPARATOR = "-";
public static final String KEY_VALUE_PAIR_SEPARATOR = "_";
public static final String KEY_VALUE_END_SEPARATOR = "$";
private int cachedHash = 0;
private static final String REGEX_KEY_VALUE = "[a-zA-Z0-9]+" + Pattern.quote(KEY_VALUE_SEPARATOR) + "[a-zA-Z0-9]+";
private Tags() {
super();
tags = MiniMap.emptyMap();
private static final String REGEX_KEY_VALUE_PAIRS = REGEX_KEY_VALUE + "(" + Pattern.quote(KEY_VALUE_PAIR_SEPARATOR)
+ REGEX_KEY_VALUE + ")*";;
private static final String REGEX_STORAGE_FILE = String.format("(%1$s)", REGEX_KEY_VALUE_PAIRS);
private static final Pattern EXTRACT_TAGS_PATTERN = Pattern.compile(REGEX_STORAGE_FILE);
private final String filename;
public Tags() {
filename = "";
}
private Tags(final MiniMap<String, Tag> tags) {
this.tags = tags;
public Tags(final String filename) {
// normalize filename
// filenames look like this: 0-1_2-1M_H-28_4-5$1.pdb
// there can be several files for the same set of tags, in which case the number
// after the $ is incremented
// We only take the part until the $.
final int end = filename.indexOf(KEY_VALUE_END_SEPARATOR);
if (end >= 0) {
this.filename = filename.substring(0, end);
} else {
this.filename = filename;
}
}
public static Tags create() {
@@ -30,36 +52,34 @@ public class Tags {
}
public static Tags create(final String key, final String value) {
final MiniMap<String, Tag> tags = new MiniMap<>();
tags.put(key, new Tag(key, value));
return new Tags(tags);
return EMPTY.copyAdd(key, value);
}
public static Tags create(final String key1, final String value1, final String key2, final String value2) {
final MiniMap<String, Tag> tags = new MiniMap<>();
tags.put(key1, new Tag(key1, value1));
tags.put(key2, new Tag(key2, value2));
return new Tags(tags);
final Tags result = EMPTY.copyAdd(key1, value1).copyAdd(key2, value2);
return result;
}
public static Tags create(final String key1, final String value1, final String key2, final String value2,
final String key3, final String value3) {
final MiniMap<String, Tag> tags = new MiniMap<>();
tags.put(key1, new Tag(key1, value1));
tags.put(key2, new Tag(key2, value2));
tags.put(key3, new Tag(key3, value3));
return new Tags(tags);
final Tags result = EMPTY.copyAdd(key1, value1).copyAdd(key2, value2).copyAdd(key3, value3);
return result;
}
public Tags copyAdd(final String key, final String value) {
Objects.requireNonNull(key, "key must not be null");
Objects.requireNonNull(value, "value must not be null");
final MiniMap<String, Tag> newTags = new MiniMap<>(tags);
final Tag tag = new Tag(key, value);
newTags.put(key, new Tag(key, value));
final SortedSet<Tag> tags = toTags();
tags.add(tag);
return new Tags(newTags);
final String newFilename = toFilename(tags);
return new Tags(newFilename);
}
public Tags copyAddIfNotNull(final String key, final String value) {
@@ -73,44 +93,96 @@ public class Tags {
return result;
}
public String getFilename() {
return filename;
}
public String getValue(final String key) {
final Tag tag = tags.get(key);
final String value = tag != null ? tag.getValue() : null;
return value;
final Set<Tag> tags = toTags();
for (final Tag tag : tags) {
if (Objects.equals(tag.getKey(), key)) {
return tag.getValue();
}
}
return null;
}
private SortedSet<Tag> toTags() {
final SortedSet<Tag> result = new TreeSet<>((a, b) -> a.getKey().compareToIgnoreCase(b.getKey()));
final Matcher matcher = EXTRACT_TAGS_PATTERN.matcher(filename);
if (matcher.find()) {
final String serializedTags = matcher.group(1);
final String[] serializedKeyValuePairs = serializedTags.split(Pattern.quote(KEY_VALUE_PAIR_SEPARATOR));
for (int i = 0; i < serializedKeyValuePairs.length; i++) {
final String[] keyValuePair = serializedKeyValuePairs[i].split(Pattern.quote(KEY_VALUE_SEPARATOR));
if (keyValuePair.length == 2) {
final String key = STRING_COMPRESSOR.get(RadixConverter.fromString(keyValuePair[0]));
final String value = STRING_COMPRESSOR.get(RadixConverter.fromString(keyValuePair[1]));
result.add(new Tag(key, value));
}
}
}
return result;
}
public String toFilename(final SortedSet<Tag> tags) {
final StringBuilder path = new StringBuilder();
for (final Tag tag : tags) {
final String key = tag.getKey();
final String value = tag.getValue();
final int compressedKey = STRING_COMPRESSOR.put(key);
final int compressedValue = STRING_COMPRESSOR.put(value);
if (path.length() > 0) {
path.append(Tags.KEY_VALUE_PAIR_SEPARATOR);
}
path.append(RadixConverter.toString(compressedKey));
path.append(Tags.KEY_VALUE_SEPARATOR);
path.append(RadixConverter.toString(compressedValue));
}
path.append(Tags.KEY_VALUE_END_SEPARATOR);
return path.toString();
}
public Set<String> getKeys() {
return new TreeSet<>(tags.keySet());
final TreeSet<String> result = new TreeSet<>();
final Set<Tag> tags = toTags();
for (final Tag tag : tags) {
result.add(tag.getKey());
}
return result;
}
public void forEach(final BiConsumer<String, String> keyValueConsumer) {
Set<String> keys = tags.keySet();
for (String key : keys) {
final Tag value = tags.get(key);
keyValueConsumer.accept(key, value.getValue());
final Set<Tag> tags = toTags();
for (final Tag tag : tags) {
keyValueConsumer.accept(tag.getKey(), tag.getValue());
}
}
@Override
public String toString() {
return String.valueOf(tags.values());
return "Tags [filename=" + filename + "]";
}
@Override
public int hashCode() {
if (cachedHash != 0) {
return cachedHash;
} else {
final int prime = 31;
int result = 1;
result = prime * result + ((tags == null) ? 0 : tags.hashCode());
cachedHash = result;
return result;
}
final int prime = 31;
int result = 1;
result = prime * result + ((filename == null) ? 0 : filename.hashCode());
return result;
}
@Override
@@ -122,39 +194,14 @@ public class Tags {
if (getClass() != obj.getClass())
return false;
final Tags other = (Tags) obj;
if (tags == null) {
if (other.tags != null)
if (filename == null) {
if (other.filename != null)
return false;
} else if (!tags.equals(other.tags))
} else if (!filename.equals(other.filename))
return false;
return true;
}
public String abbreviatedRepresentation() {
final StringBuilder result = new StringBuilder();
final int maxLength = 100;
final SortedSet<String> keys = new TreeSet<>(tags.keySet());
final int cutAt = maxLength / (keys.size() * 2 + 2);
for (final String key : keys) {
final String value = tags.get(key).getValue();
result.append(substr(key, cutAt));
result.append("-");
result.append(substr(value, cutAt));
result.append("_");
}
return substr(result.toString(), maxLength);
}
private static String substr(final String s, final int maxLength) {
return s.substring(0, Math.min(maxLength, s.length()));
}
public Tags subset(final List<String> groupByFields) {
Tags result = new Tags();
@@ -171,7 +218,11 @@ public class Tags {
}
public boolean isEmpty() {
return tags.isEmpty();
return filename == null || filename.length() == 0;
}
public static Tags create(final String filename) {
return new Tags(filename);
}
}

View File

@@ -0,0 +1,130 @@
package org.lucares.pdb.api;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.regex.Pattern;
/**
* A very simple {@link Set}-like or {@link Map}-like datastructure that stores
* unique&sup1; pairs of Strings and integers persistently.
* <p>
* (1) Unique means, that neither the string, nor the integer may occur twice.
* For Example, imagine the pair ("a", 1) already exists, then neither ("a", 2)
* nor ("b", 1) may be added.
* <p>
* You can only add pairs. No deletion. It keeps an in memory view for fast
* retrievals.
*/
public class UniqueStringIntegerPairs {
private static final String SEPARATOR = "\t";
private static final boolean APPEND = true;
/**
* Maps a string to an integer. E.g. "myLongValue" -> 123
*/
private final Map<String, Integer> stringToInt = new HashMap<>();
/**
* Maps an integer to a string. E.g. 123 -> "myLongValue"
*/
private final SortedMap<Integer, String> intToString = new TreeMap<>();
private final Path file;
public UniqueStringIntegerPairs() {
this(null);
}
public UniqueStringIntegerPairs(final Path file) {
this.file = file;
if (file != null) {
init(file);
}
}
private void init(final Path file) throws RuntimeIOException {
try {
Files.createDirectories(file.getParent());
if (!Files.exists(file)) {
Files.createFile(file);
}
try (final BufferedReader reader = new BufferedReader(
new InputStreamReader(new FileInputStream(file.toFile()), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
final String[] tokens = line.split(Pattern.quote(SEPARATOR));
if (tokens.length == 2) {
final String string = tokens[0];
final int value = Integer.parseInt(tokens[1]);
intToString.put(value, string);
stringToInt.put(string, value);
}
}
}
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
public void put(final String first, final int second) {
if (stringToInt.containsKey(first) || intToString.containsKey(second)) {
throw new IllegalArgumentException("Unique key constraint violation for (" + first + ", " + second + ")");
}
if (file != null) {
try (final Writer writer = new OutputStreamWriter(new FileOutputStream(file.toFile(), APPEND),
StandardCharsets.UTF_8)) {
writer.write(first + SEPARATOR + second + "\n");
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
intToString.put(second, first);
stringToInt.put(first, second);
}
public Integer get(final String first) {
return stringToInt.get(first);
}
public String getKey(final Integer second) {
return intToString.get(second);
}
public Integer getHighestInteger() {
return intToString.size() == 0 ? -1 : intToString.lastKey();
}
public Integer computeIfAbsent(final String first, final Function<String, Integer> mappingFunction) {
if (!stringToInt.containsKey(first)) {
final Integer second = mappingFunction.apply(first);
put(first, second);
}
return stringToInt.get(first);
}
}

View File

@@ -6,37 +6,57 @@ import java.nio.file.Paths;
import java.util.LinkedHashMap;
import java.util.Map;
import org.lucares.utils.MiniMap;
import org.lucares.pdb.api.StringCompressor;
import org.lucares.pdb.api.Tag;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.api.UniqueStringIntegerPairs;
public class MemoryScale {
public static final String A = "A";
public static void main(String[] args) {
System.out.println("start");
public static void main(final String[] args) {
Tags.STRING_COMPRESSOR = new StringCompressor(new UniqueStringIntegerPairs());
scale("singleTag");
scale("tags0");
scale("tags1");
scale("tags2");
scale("tags6");
}
private static void scale(final String what) {
System.out.println("start: " + what);
// warmup of classes
getUsedMemory();
Object handle =createObject();
handle = null;
runGc();
long memoryBefore = getUsedMemory();
Object handle = createObject(what);
handle = null;
handle = createObject();
runGc();
long memoryAfter = getUsedMemory();
System.out.println("used memory: " + (memoryAfter - memoryBefore));
final long memoryBefore = getUsedMemory();
handle = createObject(what);
runGc();
final long memoryAfter = getUsedMemory();
System.out.println(what + ": used memory: " + (memoryAfter - memoryBefore));
handle.hashCode(); // use the variable, so causes no warnings and is not removed by JIT compiler
}
private static Object createObject(){
String key = "pathAsUtf8";
switch (key) {
case "minimap":
return createMinimap();
private static Object createObject(final String what) {
switch (what) {
case "singleTag":
return createTag();
case "tags0":
return createTags0();
case "tags1":
return createTags1();
case "tags2":
return createTags2();
case "tags6":
return createTags6();
case "string":
return createString();
case "linkedHashMap":
@@ -50,45 +70,60 @@ public class MemoryScale {
default:
return null;
}
}
private static Object createPathAsUtf8(String string) {
private static Object createTag() {
return new Tag("", "");
}
private static Object createTags0() {
return new Tags();
}
private static Object createTags1() {
return Tags.create("k1", "v1");
}
private static Object createTags2() {
return Tags.create("k1", "v1", "k2", "v2");
}
private static Object createTags6() {
Tags result = Tags.create("k1", "v1");
result = result.copyAdd("k2", "v2");
result = result.copyAdd("k3", "v3");
result = result.copyAdd("k4", "v4");
result = result.copyAdd("k5", "v5");
result = result.copyAdd("k6", "v6");
return result;
}
private static Object createPathAsUtf8(final String string) {
// TODO Auto-generated method stub
return string.getBytes(StandardCharsets.UTF_8);
}
private static String createPathAsString(String string) {
private static String createPathAsString(final String string) {
return string.replace("C", "c");
}
private static Path createPath(String string) {
private static Path createPath(final String string) {
return Paths.get(string);
}
private static Object createMinimap() {
final MiniMap<String, String> map = new MiniMap<>();
map.put("A", "A");
for (int i = 0; i < 0; i++){
map.put(""+i, ""+i);
}
return map;
}
private static String createString() {
int i= 0;
return ""+i;
final int i = 0;
return "" + i;
}
private static Object createLinkedHashMap() {
final Map<String, String> map = new LinkedHashMap<>();
map.put("A", "A");
for (int i = 0; i < 0; i++){
map.put(""+i, ""+i);
for (int i = 0; i < 0; i++) {
map.put("" + i, "" + i);
}
return map;
@@ -99,14 +134,13 @@ public class MemoryScale {
System.gc();
try {
Thread.sleep(100);
} catch (InterruptedException e) {
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
}
}
}
private static long getUsedMemory() {
return Runtime.getRuntime().totalMemory()
- Runtime.getRuntime().freeMemory();
return Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
}
}

View File

@@ -0,0 +1,24 @@
package org.lucares.pdb.api;
import org.testng.Assert;
import org.testng.annotations.Test;
@Test
public class RadixConverterTest {
public void testConvertRoundtrip() {
for (int i = 0; i < 1000; i++) {
final String string = RadixConverter.toString(i);
final int actual = RadixConverter.fromString(string);
Assert.assertEquals(actual, i, "string representation: " + string);
}
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testNoNegativeValues() {
RadixConverter.toString(-1);
}
}

View File

@@ -0,0 +1,52 @@
package org.lucares.pdb.api;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.lucares.utils.file.FileUtils;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@Test
public class StringCompressorTest {
private Path dataDirectory;
@BeforeMethod
public void beforeMethod() throws IOException {
dataDirectory = Files.createTempDirectory("pdb");
}
@AfterMethod
public void afterMethod() throws IOException {
FileUtils.delete(dataDirectory);
}
public void testKeyCompressorRoundtrip() throws Exception {
final StringCompressor keyValueCompressor = StringCompressor.create(dataDirectory.resolve("key.csv"));
final String value = "foo";
final Integer intFoo = keyValueCompressor.put(value);
final String actual = keyValueCompressor.get(intFoo);
Assert.assertEquals(actual, value);
}
public void testKeyCompressorInitialization() throws Exception {
final Path database = dataDirectory.resolve("key.csv");
final String value = "foo";
{
final StringCompressor keyValueCompressor = StringCompressor.create(database);
keyValueCompressor.put(value);
}
{
final StringCompressor keyValueCompressor = StringCompressor.create(database);
keyValueCompressor.get(0);
}
}
}

View File

@@ -0,0 +1,74 @@
package org.lucares.pdb.api;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.lucares.utils.file.FileUtils;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@Test
public class UniqueStringIntegerPairsTest {
private Path dataDirectory;
@BeforeMethod
public void beforeMethod() throws IOException {
dataDirectory = Files.createTempDirectory("pdb");
}
@AfterMethod
public void afterMethod() throws IOException {
FileUtils.delete(dataDirectory);
}
public void testPutGet() throws Exception {
final Path database = dataDirectory.resolve("key.csv");
final String first = "key1";
final Integer second = 1;
{
final UniqueStringIntegerPairs usip = new UniqueStringIntegerPairs(database);
usip.put(first, second);
Assert.assertEquals(usip.get(first), second);
Assert.assertEquals(usip.getKey(second), first);
}
{
final UniqueStringIntegerPairs usip = new UniqueStringIntegerPairs(database);
Assert.assertEquals(usip.get(first), second);
Assert.assertEquals(usip.getKey(second), first);
}
}
public void testUniqueKeyContstraint() throws Exception {
final Path database = dataDirectory.resolve("key.csv");
final String first = "key1";
final Integer second = 1;
final UniqueStringIntegerPairs usip = new UniqueStringIntegerPairs(database);
usip.put(first, second);
try {
// cannot add another pair with the first key
final int another = second + 1;
usip.put(first, another);
Assert.fail("expected an IllegalArgumentException");
} catch (final IllegalArgumentException e) {
// expected
}
try {
// cannot add another pair with the same second value
final String another = first + 1;
usip.put(another, second);
Assert.fail("expected an IllegalArgumentException");
} catch (final IllegalArgumentException e) {
// expected
}
}
}