reduce memory usage

Reduce memory usage by storing the filename as string instead of
individual tags.
This commit is contained in:
2018-03-19 19:21:57 +01:00
parent 181fce805d
commit 5343c0d427
20 changed files with 315 additions and 454 deletions

View File

@@ -2,7 +2,6 @@ package org.lucares.pdb.datastore.internal;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -12,11 +11,10 @@ import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.lucares.collections.IntList;
import org.lucares.pdb.api.StringCompressor;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.datastore.Doc;
import org.lucares.pdb.datastore.lang.Expression;
@@ -27,37 +25,24 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DataStore {
private static final Logger EXECUTE_QUERY_LOGGER = LoggerFactory.getLogger("org.lucares.metrics.dataStore.executeQuery");
private static final Logger EXECUTE_QUERY_LOGGER = LoggerFactory
.getLogger("org.lucares.metrics.dataStore.executeQuery");
private static final Logger LOGGER = LoggerFactory.getLogger(DataStore.class);
private static final String SUBDIR_STORAGE = "storage";
private static final String PDB_EXTENSION = ".pdb";
private static final String KEY_VALUE_SEPARATOR = "-";
private static final String KEY_VALUE_PAIR_SEPARATOR = "_";
private static final String KEY_VALUE_END_SEPARATOR = "$";
private static final String REGEX_KEY_VALUE = "[a-zA-Z0-9]+" + Pattern.quote(KEY_VALUE_SEPARATOR) + "[a-zA-Z0-9]+";
private static final String REGEX_KEY_VALUE_PAIRS = REGEX_KEY_VALUE + "(" + Pattern.quote(KEY_VALUE_PAIR_SEPARATOR)
+ REGEX_KEY_VALUE + ")*";;
private static final String REGEX_STORAGE_FILE = String.format("(%1$s)%2$s[0-9]*%3$s", REGEX_KEY_VALUE_PAIRS,
Pattern.quote(KEY_VALUE_END_SEPARATOR), PDB_EXTENSION);
private static final Pattern EXTRACT_TAGS_PATTERN = Pattern.compile(REGEX_STORAGE_FILE);
// to be guarded by itself
private final List<Doc> docIdToDoc = new ArrayList<>();
private final ConcurrentHashMap<Tags, List<Doc>> tagsToDocs = new ConcurrentHashMap<>();
private final ConcurrentHashMap<String, Map<String, IntList>> keyToValueToDocId = new ConcurrentHashMap<>();
private final StringCompressor stringCompressor;
private final FolderStorage folderStorage;
public DataStore(final Path dataDirectory) throws IOException {
stringCompressor = StringCompressor.create(keyCompressionFile(dataDirectory));
Tags.STRING_COMPRESSOR = StringCompressor.create(keyCompressionFile(dataDirectory));
folderStorage = new FolderStorage(storageDirectory(dataDirectory), 1000);
init(folderStorage);
@@ -75,8 +60,8 @@ public class DataStore {
});
trimIntLists();
sortIntLists();
synchronized (docIdToDoc) {
((ArrayList<Doc>)docIdToDoc).trimToSize();
synchronized (docIdToDoc) {
((ArrayList<Doc>) docIdToDoc).trimToSize();
}
}
@@ -84,24 +69,25 @@ public class DataStore {
final int docId;
final Doc newDoc = new Doc(tags, path);
synchronized (docIdToDoc) {
synchronized (docIdToDoc) {
docId = docIdToDoc.size();
docIdToDoc.add(newDoc);
}
tagsToDocs.compute(tags, (t, listOfDocs) -> {
final List<Doc> result = listOfDocs != null ? listOfDocs : new ArrayList<>(2);
result.add(newDoc);
return result;
});
for (final String key : tags.getKeys()) {
final Map<String, IntList> valueToDocIds = keyToValueToDocId.computeIfAbsent(key, k -> new ConcurrentHashMap<>());
final Map<String, IntList> valueToDocIds = keyToValueToDocId.computeIfAbsent(key,
k -> new ConcurrentHashMap<>());
final String value = tags.getValue(key);
final IntList docIds = valueToDocIds.computeIfAbsent(value, v -> new IntList());
synchronized (docIds) {
synchronized (docIds) {
docIds.add(docId);
}
}
@@ -112,37 +98,30 @@ public class DataStore {
int totalBeforeTrim = 0;
int totalAfterTrim = 0;
int totalValues = 0;
for (Map<String, IntList> valueToDocIds : keyToValueToDocId.values()) {
for (IntList intList : valueToDocIds.values()) {
for (final Map<String, IntList> valueToDocIds : keyToValueToDocId.values()) {
for (final IntList intList : valueToDocIds.values()) {
totalBeforeTrim += intList.getCapacity();
intList.trim();
totalAfterTrim += intList.getCapacity();
totalValues += intList.size();
}
}
LOGGER.info(
"trimming IntLists of index: values {}, {} kB before, {} kB after, difference {} kB, took: {} ms",
totalValues,
(totalBeforeTrim * 4) / 1024,
(totalAfterTrim * 4) / 1024,
((totalBeforeTrim - totalAfterTrim) * 4) / 1024,
(totalValues * 4) / 1024,
LOGGER.info("trimming IntLists of index: values {}, {} kB before, {} kB after, difference {} kB, took: {} ms",
totalValues, (totalBeforeTrim * 4) / 1024, (totalAfterTrim * 4) / 1024,
((totalBeforeTrim - totalAfterTrim) * 4) / 1024, (totalValues * 4) / 1024,
(System.nanoTime() - start) / 1_000_000.0);
}
private void sortIntLists() {
final long start = System.nanoTime();
final Collection<Map<String, IntList>> valueToDocIds = keyToValueToDocId.values();
valueToDocIds.stream().flatMap(map -> map.values().stream()).forEach(intList -> intList.sort());
LOGGER.info(
"sorting IntLists, took: {} ms",
(System.nanoTime() - start) / 1_000_000.0);
LOGGER.info("sorting IntLists, took: {} ms", (System.nanoTime() - start) / 1_000_000.0);
}
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
@@ -155,60 +134,16 @@ public class DataStore {
public Path createNewFile(final Tags tags) throws IOException {
final Path filename = toFilename(tags);
final Path result = folderStorage.insert(filename.toString(), PDB_EXTENSION);
final String filename = tags.getFilename();
final Path result = folderStorage.insert(filename, PDB_EXTENSION);
cacheTagToFileMapping(tags, result);
return result;
}
private Path toFilename(final Tags tags) {
final StringBuilder path = new StringBuilder();
final SortedSet<String> sortedKeys = new TreeSet<>(tags.getKeys());
for (final String key : sortedKeys) {
final String value = tags.getValue(key);
final int compressedKey = stringCompressor.put(key);
final int compressedValue = stringCompressor.put(value);
if (path.length() > 0) {
path.append(KEY_VALUE_PAIR_SEPARATOR);
}
path.append(RadixConverter.toString(compressedKey));
path.append(KEY_VALUE_SEPARATOR);
path.append(RadixConverter.toString(compressedValue));
}
path.append(KEY_VALUE_END_SEPARATOR);
return Paths.get(path.toString());
}
private Tags toTags(final String filename) {
Tags tags = Tags.create();
final Matcher matcher = EXTRACT_TAGS_PATTERN.matcher(filename);
if (matcher.find()) {
final String serializedTags = matcher.group(1);
final String[] serializedKeyValuePairs = serializedTags.split(Pattern.quote(KEY_VALUE_PAIR_SEPARATOR));
for (int i = 0; i < serializedKeyValuePairs.length; i++) {
final String[] keyValuePair = serializedKeyValuePairs[i].split(Pattern.quote(KEY_VALUE_SEPARATOR));
if (keyValuePair.length == 2) {
final String key = stringCompressor.get(RadixConverter.fromString(keyValuePair[0]));
final String value = stringCompressor.get(RadixConverter.fromString(keyValuePair[1]));
tags = tags.copyAdd(key, value);
}
}
}
final Tags tags = Tags.create(filename);
return tags;
}
@@ -263,13 +198,11 @@ public class DataStore {
final long start = System.nanoTime();
synchronized (docIdToDoc) {
final Expression expression = QueryLanguageParser.parse(query);
final ExpressionToDocIdVisitor visitor = new ExpressionToDocIdVisitor(
keyToValueToDocId, new AllDocIds(docIdToDoc));
final ExpressionToDocIdVisitor visitor = new ExpressionToDocIdVisitor(keyToValueToDocId,
new AllDocIds(docIdToDoc));
final IntList docIdsList = expression.visit(visitor);
EXECUTE_QUERY_LOGGER.debug(
"executeQuery({}) took {}ms returned {} results ", query,
(System.nanoTime() - start) / 1_000_000.0,
docIdsList.size());
EXECUTE_QUERY_LOGGER.debug("executeQuery({}) took {}ms returned {} results ", query,
(System.nanoTime() - start) / 1_000_000.0, docIdsList.size());
return docIdsList;
}
}
@@ -277,20 +210,20 @@ public class DataStore {
private List<Doc> mapDocIdsToDocs(final IntList docIdsList) {
final List<Doc> result = new ArrayList<>(docIdsList.size());
synchronized (docIdToDoc) {
final int[] intDocIds = docIdsList.toArray();
synchronized (docIdToDoc) {
final int[] intDocIds = docIdsList.toArray();
for (int i = 0; i < intDocIds.length; i++) {
final int docId = intDocIds[i];
final Doc doc = docIdToDoc.get(docId);
result.add(doc);
}
}
return result;
}
public List<Doc> getByTags(Tags tags) {
public List<Doc> getByTags(final Tags tags) {
final List<Doc> result = tagsToDocs.getOrDefault(tags, new ArrayList<>(0));
return result;
}

View File

@@ -0,0 +1,5 @@
package org.lucares.pdb.datastore.internal;
public class TagsUtils {
}

View File

@@ -35,13 +35,14 @@ public class DataStoreTest {
FileUtils.delete(dataDirectory);
dataStore = null;
tagsToPath = null;
Tags.STRING_COMPRESSOR = null;
}
public void testInsertSingleTag() throws Exception {
final Tags tags = Tags.create("key1", "value1", "key2", "value2");
final Path path;
{
final DataStore dataStore = new DataStore(dataDirectory);
final Tags tags = Tags.create("key1", "value1", "key2", "value2");
path = dataStore.createNewFile(tags);
assertSearch(dataStore, "key1=value1", path);
@@ -54,6 +55,8 @@ public class DataStoreTest {
public void testQuery() throws Exception {
dataStore = new DataStore(dataDirectory);
tagsToPath = new LinkedHashMap<>();
final Tags eagleTim = Tags.create("bird", "eagle", "name", "Tim");
final Tags pigeonJennifer = Tags.create("bird", "pigeon", "name", "Jennifer");
@@ -67,8 +70,6 @@ public class DataStoreTest {
tagsToPath.put(labradorJenny, null);
tagsToPath.put(labradorTim, null);
dataStore = new DataStore(dataDirectory);
for (final Tags tags : tagsToPath.keySet()) {
final Path newFile = dataStore.createNewFile(tags);
tagsToPath.put(tags, newFile);
@@ -97,25 +98,25 @@ public class DataStoreTest {
assertSearch("dog=*lab*dor*", labradorJenny, labradorTim);
}
public void testGetByTags() throws IOException
{
public void testGetByTags() throws IOException {
dataStore = new DataStore(dataDirectory);
final Tags eagleTim1 = Tags.create("bird", "eagle", "name", "Tim");
final Tags eagleTim2 = Tags.create("bird", "eagle", "name", "Tim");
final Tags pigeonJennifer = Tags.create("bird", "pigeon", "name", "Jennifer");
final Tags flamingoJennifer = Tags.create("bird", "flamingo", "name", "Jennifer");
dataStore = new DataStore(dataDirectory);
dataStore.createNewFile(eagleTim1);
dataStore.createNewFile(eagleTim2);
dataStore.createNewFile(pigeonJennifer);
dataStore.createNewFile(flamingoJennifer);
// eagleTim1 and eagleTim2 have the same tags, so we find both docs
final List<Doc> docsEagleTim = dataStore.getByTags(eagleTim1);
Assert.assertEquals(docsEagleTim.size(), 2, "two docs for eagleTim1 and eagleTim2");
final List<Doc> docsFlamingoJennifer = dataStore.getByTags(flamingoJennifer);
Assert.assertEquals(docsFlamingoJennifer.size(), 1, "doc for docsFlamingoJennifer");
}

View File

@@ -36,9 +36,12 @@ public class ProposerTest {
FileUtils.delete(dataDirectory);
db = null;
tagsToPath = null;
Tags.STRING_COMPRESSOR = null;
}
private void initDatabase() throws Exception {
db = new PdbDB(dataDirectory);
tagsToPath = new LinkedHashMap<>();
final Tags eagleTim = Tags.create("bird", "eagle", "name", "Tim");
final Tags eagleTimothy = Tags.create("bird", "eagle", "name", "Timothy");
@@ -54,8 +57,6 @@ public class ProposerTest {
tagsToPath.put(labradorJenny, null);
tagsToPath.put(labradorTim, null);
db = new PdbDB(dataDirectory);
for (final Tags tags : tagsToPath.keySet()) {
final Path newFile = db.createNewFile(tags);
tagsToPath.put(tags, newFile);
@@ -88,14 +89,12 @@ public class ProposerTest {
public void testPrefixOfValue() throws Exception {
assertProposals("name =Tim", 9, //
new Proposal("Timothy", "name =Timothy ", true)
);
new Proposal("Timothy", "name =Timothy ", true));
assertProposals("name =Je", 8, //
new Proposal("Jennifer", "name =Jennifer ", true), //
new Proposal("Jenny", "name =Jenny ", true) //
);
assertProposals("bird=eagle and n", 16, //
new Proposal("name", "bird=eagle and name=* ", true) //

View File

@@ -1,4 +1,5 @@
dependencies {
compile project(':pdb-utils')
compile project(':file-utils')
}

View File

@@ -1,4 +1,4 @@
package org.lucares.pdb.datastore.internal;
package org.lucares.pdb.api;
public class RadixConverter {

View File

@@ -1,4 +1,4 @@
package org.lucares.pdb.datastore.internal;
package org.lucares.pdb.api;
public class RuntimeIOException extends RuntimeException {

View File

@@ -1,9 +1,7 @@
package org.lucares.pdb.datastore.internal;
package org.lucares.pdb.api;
import java.nio.file.Path;
import org.lucares.pdb.datastore.internal.map.UniqueStringIntegerPairs;
/**
* Persistently maps Strings to integers.
*/

View File

@@ -6,23 +6,45 @@ import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.function.BiConsumer;
import org.lucares.utils.MiniMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Tags {
public static StringCompressor STRING_COMPRESSOR = null;
public static final Tags EMPTY = new Tags();
private final MiniMap<String, Tag> tags;
public static final String KEY_VALUE_SEPARATOR = "-";
public static final String KEY_VALUE_PAIR_SEPARATOR = "_";
public static final String KEY_VALUE_END_SEPARATOR = "$";
private int cachedHash = 0;
private static final String REGEX_KEY_VALUE = "[a-zA-Z0-9]+" + Pattern.quote(KEY_VALUE_SEPARATOR) + "[a-zA-Z0-9]+";
private Tags() {
super();
tags = MiniMap.emptyMap();
private static final String REGEX_KEY_VALUE_PAIRS = REGEX_KEY_VALUE + "(" + Pattern.quote(KEY_VALUE_PAIR_SEPARATOR)
+ REGEX_KEY_VALUE + ")*";;
private static final String REGEX_STORAGE_FILE = String.format("(%1$s)", REGEX_KEY_VALUE_PAIRS);
private static final Pattern EXTRACT_TAGS_PATTERN = Pattern.compile(REGEX_STORAGE_FILE);
private final String filename;
public Tags() {
filename = "";
}
private Tags(final MiniMap<String, Tag> tags) {
this.tags = tags;
public Tags(final String filename) {
// normalize filename
// filenames look like this: 0-1_2-1M_H-28_4-5$1.pdb
// there can be several files for the same set of tags, in which case the number
// after the $ is incremented
// We only take the part until the $.
final int end = filename.indexOf(KEY_VALUE_END_SEPARATOR);
if (end >= 0) {
this.filename = filename.substring(0, end);
} else {
this.filename = filename;
}
}
public static Tags create() {
@@ -30,36 +52,34 @@ public class Tags {
}
public static Tags create(final String key, final String value) {
final MiniMap<String, Tag> tags = new MiniMap<>();
tags.put(key, new Tag(key, value));
return new Tags(tags);
return EMPTY.copyAdd(key, value);
}
public static Tags create(final String key1, final String value1, final String key2, final String value2) {
final MiniMap<String, Tag> tags = new MiniMap<>();
tags.put(key1, new Tag(key1, value1));
tags.put(key2, new Tag(key2, value2));
return new Tags(tags);
final Tags result = EMPTY.copyAdd(key1, value1).copyAdd(key2, value2);
return result;
}
public static Tags create(final String key1, final String value1, final String key2, final String value2,
final String key3, final String value3) {
final MiniMap<String, Tag> tags = new MiniMap<>();
tags.put(key1, new Tag(key1, value1));
tags.put(key2, new Tag(key2, value2));
tags.put(key3, new Tag(key3, value3));
return new Tags(tags);
final Tags result = EMPTY.copyAdd(key1, value1).copyAdd(key2, value2).copyAdd(key3, value3);
return result;
}
public Tags copyAdd(final String key, final String value) {
Objects.requireNonNull(key, "key must not be null");
Objects.requireNonNull(value, "value must not be null");
final MiniMap<String, Tag> newTags = new MiniMap<>(tags);
final Tag tag = new Tag(key, value);
newTags.put(key, new Tag(key, value));
final SortedSet<Tag> tags = toTags();
tags.add(tag);
return new Tags(newTags);
final String newFilename = toFilename(tags);
return new Tags(newFilename);
}
public Tags copyAddIfNotNull(final String key, final String value) {
@@ -73,44 +93,96 @@ public class Tags {
return result;
}
public String getFilename() {
return filename;
}
public String getValue(final String key) {
final Tag tag = tags.get(key);
final String value = tag != null ? tag.getValue() : null;
return value;
final Set<Tag> tags = toTags();
for (final Tag tag : tags) {
if (Objects.equals(tag.getKey(), key)) {
return tag.getValue();
}
}
return null;
}
private SortedSet<Tag> toTags() {
final SortedSet<Tag> result = new TreeSet<>((a, b) -> a.getKey().compareToIgnoreCase(b.getKey()));
final Matcher matcher = EXTRACT_TAGS_PATTERN.matcher(filename);
if (matcher.find()) {
final String serializedTags = matcher.group(1);
final String[] serializedKeyValuePairs = serializedTags.split(Pattern.quote(KEY_VALUE_PAIR_SEPARATOR));
for (int i = 0; i < serializedKeyValuePairs.length; i++) {
final String[] keyValuePair = serializedKeyValuePairs[i].split(Pattern.quote(KEY_VALUE_SEPARATOR));
if (keyValuePair.length == 2) {
final String key = STRING_COMPRESSOR.get(RadixConverter.fromString(keyValuePair[0]));
final String value = STRING_COMPRESSOR.get(RadixConverter.fromString(keyValuePair[1]));
result.add(new Tag(key, value));
}
}
}
return result;
}
public String toFilename(final SortedSet<Tag> tags) {
final StringBuilder path = new StringBuilder();
for (final Tag tag : tags) {
final String key = tag.getKey();
final String value = tag.getValue();
final int compressedKey = STRING_COMPRESSOR.put(key);
final int compressedValue = STRING_COMPRESSOR.put(value);
if (path.length() > 0) {
path.append(Tags.KEY_VALUE_PAIR_SEPARATOR);
}
path.append(RadixConverter.toString(compressedKey));
path.append(Tags.KEY_VALUE_SEPARATOR);
path.append(RadixConverter.toString(compressedValue));
}
path.append(Tags.KEY_VALUE_END_SEPARATOR);
return path.toString();
}
public Set<String> getKeys() {
return new TreeSet<>(tags.keySet());
final TreeSet<String> result = new TreeSet<>();
final Set<Tag> tags = toTags();
for (final Tag tag : tags) {
result.add(tag.getKey());
}
return result;
}
public void forEach(final BiConsumer<String, String> keyValueConsumer) {
Set<String> keys = tags.keySet();
for (String key : keys) {
final Tag value = tags.get(key);
keyValueConsumer.accept(key, value.getValue());
final Set<Tag> tags = toTags();
for (final Tag tag : tags) {
keyValueConsumer.accept(tag.getKey(), tag.getValue());
}
}
@Override
public String toString() {
return String.valueOf(tags.values());
return "Tags [filename=" + filename + "]";
}
@Override
public int hashCode() {
if (cachedHash != 0) {
return cachedHash;
} else {
final int prime = 31;
int result = 1;
result = prime * result + ((tags == null) ? 0 : tags.hashCode());
cachedHash = result;
return result;
}
final int prime = 31;
int result = 1;
result = prime * result + ((filename == null) ? 0 : filename.hashCode());
return result;
}
@Override
@@ -122,39 +194,14 @@ public class Tags {
if (getClass() != obj.getClass())
return false;
final Tags other = (Tags) obj;
if (tags == null) {
if (other.tags != null)
if (filename == null) {
if (other.filename != null)
return false;
} else if (!tags.equals(other.tags))
} else if (!filename.equals(other.filename))
return false;
return true;
}
public String abbreviatedRepresentation() {
final StringBuilder result = new StringBuilder();
final int maxLength = 100;
final SortedSet<String> keys = new TreeSet<>(tags.keySet());
final int cutAt = maxLength / (keys.size() * 2 + 2);
for (final String key : keys) {
final String value = tags.get(key).getValue();
result.append(substr(key, cutAt));
result.append("-");
result.append(substr(value, cutAt));
result.append("_");
}
return substr(result.toString(), maxLength);
}
private static String substr(final String s, final int maxLength) {
return s.substring(0, Math.min(maxLength, s.length()));
}
public Tags subset(final List<String> groupByFields) {
Tags result = new Tags();
@@ -171,7 +218,11 @@ public class Tags {
}
public boolean isEmpty() {
return tags.isEmpty();
return filename == null || filename.length() == 0;
}
public static Tags create(final String filename) {
return new Tags(filename);
}
}

View File

@@ -1,4 +1,4 @@
package org.lucares.pdb.datastore.internal.map;
package org.lucares.pdb.api;
import java.io.BufferedReader;
import java.io.FileInputStream;
@@ -18,8 +18,6 @@ import java.util.TreeMap;
import java.util.function.Function;
import java.util.regex.Pattern;
import org.lucares.pdb.datastore.internal.RuntimeIOException;
/**
* A very simple {@link Set}-like or {@link Map}-like datastructure that stores
* unique&sup1; pairs of Strings and integers persistently.
@@ -48,10 +46,15 @@ public class UniqueStringIntegerPairs {
private final Path file;
public UniqueStringIntegerPairs() {
this(null);
}
public UniqueStringIntegerPairs(final Path file) {
super();
this.file = file;
init(file);
if (file != null) {
init(file);
}
}
private void init(final Path file) throws RuntimeIOException {
@@ -87,14 +90,15 @@ public class UniqueStringIntegerPairs {
if (stringToInt.containsKey(first) || intToString.containsKey(second)) {
throw new IllegalArgumentException("Unique key constraint violation for (" + first + ", " + second + ")");
}
if (file != null) {
try (final Writer writer = new OutputStreamWriter(new FileOutputStream(file.toFile(), APPEND),
StandardCharsets.UTF_8)) {
try (final Writer writer = new OutputStreamWriter(new FileOutputStream(file.toFile(), APPEND),
StandardCharsets.UTF_8)) {
writer.write(first + SEPARATOR + second + "\n");
writer.write(first + SEPARATOR + second + "\n");
} catch (final IOException e) {
throw new RuntimeIOException(e);
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
intToString.put(second, first);

View File

@@ -6,37 +6,57 @@ import java.nio.file.Paths;
import java.util.LinkedHashMap;
import java.util.Map;
import org.lucares.utils.MiniMap;
import org.lucares.pdb.api.StringCompressor;
import org.lucares.pdb.api.Tag;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.api.UniqueStringIntegerPairs;
public class MemoryScale {
public static final String A = "A";
public static void main(String[] args) {
System.out.println("start");
public static void main(final String[] args) {
Tags.STRING_COMPRESSOR = new StringCompressor(new UniqueStringIntegerPairs());
scale("singleTag");
scale("tags0");
scale("tags1");
scale("tags2");
scale("tags6");
}
private static void scale(final String what) {
System.out.println("start: " + what);
// warmup of classes
getUsedMemory();
Object handle =createObject();
handle = null;
runGc();
long memoryBefore = getUsedMemory();
Object handle = createObject(what);
handle = null;
handle = createObject();
runGc();
long memoryAfter = getUsedMemory();
System.out.println("used memory: " + (memoryAfter - memoryBefore));
final long memoryBefore = getUsedMemory();
handle = createObject(what);
runGc();
final long memoryAfter = getUsedMemory();
System.out.println(what + ": used memory: " + (memoryAfter - memoryBefore));
handle.hashCode(); // use the variable, so causes no warnings and is not removed by JIT compiler
}
private static Object createObject(){
String key = "pathAsUtf8";
switch (key) {
case "minimap":
return createMinimap();
private static Object createObject(final String what) {
switch (what) {
case "singleTag":
return createTag();
case "tags0":
return createTags0();
case "tags1":
return createTags1();
case "tags2":
return createTags2();
case "tags6":
return createTags6();
case "string":
return createString();
case "linkedHashMap":
@@ -50,45 +70,60 @@ public class MemoryScale {
default:
return null;
}
}
private static Object createPathAsUtf8(String string) {
private static Object createTag() {
return new Tag("", "");
}
private static Object createTags0() {
return new Tags();
}
private static Object createTags1() {
return Tags.create("k1", "v1");
}
private static Object createTags2() {
return Tags.create("k1", "v1", "k2", "v2");
}
private static Object createTags6() {
Tags result = Tags.create("k1", "v1");
result = result.copyAdd("k2", "v2");
result = result.copyAdd("k3", "v3");
result = result.copyAdd("k4", "v4");
result = result.copyAdd("k5", "v5");
result = result.copyAdd("k6", "v6");
return result;
}
private static Object createPathAsUtf8(final String string) {
// TODO Auto-generated method stub
return string.getBytes(StandardCharsets.UTF_8);
}
private static String createPathAsString(String string) {
private static String createPathAsString(final String string) {
return string.replace("C", "c");
}
private static Path createPath(String string) {
private static Path createPath(final String string) {
return Paths.get(string);
}
private static Object createMinimap() {
final MiniMap<String, String> map = new MiniMap<>();
map.put("A", "A");
for (int i = 0; i < 0; i++){
map.put(""+i, ""+i);
}
return map;
}
private static String createString() {
int i= 0;
return ""+i;
final int i = 0;
return "" + i;
}
private static Object createLinkedHashMap() {
final Map<String, String> map = new LinkedHashMap<>();
map.put("A", "A");
for (int i = 0; i < 0; i++){
map.put(""+i, ""+i);
for (int i = 0; i < 0; i++) {
map.put("" + i, "" + i);
}
return map;
@@ -99,14 +134,13 @@ public class MemoryScale {
System.gc();
try {
Thread.sleep(100);
} catch (InterruptedException e) {
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
}
}
}
private static long getUsedMemory() {
return Runtime.getRuntime().totalMemory()
- Runtime.getRuntime().freeMemory();
return Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
}
}

View File

@@ -1,4 +1,4 @@
package org.lucares.pdb.datastore.internal;
package org.lucares.pdb.api;
import org.testng.Assert;
import org.testng.annotations.Test;

View File

@@ -1,4 +1,4 @@
package org.lucares.pdb.datastore.internal;
package org.lucares.pdb.api;
import java.io.IOException;
import java.nio.file.Files;

View File

@@ -1,4 +1,4 @@
package org.lucares.pdb.datastore.internal.map;
package org.lucares.pdb.api;
import java.io.IOException;
import java.nio.file.Files;

View File

@@ -42,7 +42,6 @@ import com.fasterxml.jackson.databind.ObjectReader;
@Component
public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean {
private static final Logger LOGGER = LoggerFactory.getLogger(TcpIngestor.class);
private static final Logger METRICS_LOGGER = LoggerFactory.getLogger("org.lucares.metrics.tcpIngestor");
public static final int PORT = 17347;
@@ -73,8 +72,7 @@ public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean {
Thread.currentThread().setName("worker-" + clientAddress);
LOGGER.debug("opening streams to client");
try (PrintWriter out = new PrintWriter(clientSocket.getOutputStream(), true);
BufferedReader in = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
) {
BufferedReader in = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));) {
final ObjectMapper objectMapper = new ObjectMapper();
final ObjectReader objectReader = objectMapper.readerFor(typeReferenceForMap);
@@ -91,12 +89,12 @@ public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean {
LOGGER.debug("adding entry to queue: {}", entry);
queue.put(entry.get());
}
} catch (JsonParseException e) {
} catch (final JsonParseException e) {
LOGGER.info("json parse error in line '" + line + "'", e);
}
}
LOGGER.debug("connection closed: " + clientAddress);
} catch (Throwable e) {
} catch (final Throwable e) {
LOGGER.warn("Stream handling failed", e);
throw e;
}

View File

@@ -7,6 +7,7 @@ import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@@ -75,10 +76,12 @@ public class TcpIngestorTest {
Assert.assertEquals(result.size(), 2);
Assert.assertEquals(result.get(0).getValue(), 1);
Assert.assertEquals(result.get(0).getDate().toInstant(), dateA.toInstant());
Assert.assertEquals(result.get(0).getDate().toInstant().truncatedTo(ChronoUnit.MILLIS),
dateA.toInstant().truncatedTo(ChronoUnit.MILLIS));
Assert.assertEquals(result.get(1).getValue(), 2);
Assert.assertEquals(result.get(1).getDate().toInstant(), dateB.toInstant());
Assert.assertEquals(result.get(1).getDate().toInstant().truncatedTo(ChronoUnit.MILLIS),
dateB.toInstant().truncatedTo(ChronoUnit.MILLIS));
}
}
@@ -99,8 +102,8 @@ public class TcpIngestorTest {
entryA.put("tags", Collections.emptyList());
// skipped, because it is not valid json
String corrupEntry = "{\"corrupt...";
final String corrupEntry = "{\"corrupt...";
// valid entry
final Map<String, Object> entryB = new HashMap<>();
entryB.put("duration", 2);
@@ -109,9 +112,9 @@ public class TcpIngestorTest {
entryB.put("tags", Collections.emptyList());
final ObjectMapper objectMapper = new ObjectMapper();
final String data = objectMapper.writeValueAsString(entryA)+"\n"+corrupEntry+"\n"+objectMapper.writeValueAsString(entryB)+"\n";
final String data = objectMapper.writeValueAsString(entryA) + "\n" + corrupEntry + "\n"
+ objectMapper.writeValueAsString(entryB) + "\n";
PdbTestUtil.send(data);
}
@@ -120,7 +123,8 @@ public class TcpIngestorTest {
Assert.assertEquals(result.size(), 1);
Assert.assertEquals(result.get(0).getValue(), 2);
Assert.assertEquals(result.get(0).getDate().toInstant(), dateB.toInstant());
Assert.assertEquals(result.get(0).getDate().toInstant().truncatedTo(ChronoUnit.MILLIS),
dateB.toInstant().truncatedTo(ChronoUnit.MILLIS));
}
}
}

View File

@@ -1,142 +0,0 @@
package org.lucares.utils;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
/**
* A memory efficient map implementation. It doesn't implement {@link Map},
* because this class does not support the full API of {@link Map}.
*/
public class MiniMap<K, V> {
private static final Object[] EMPTY_ARRAY = new Object[0];
private static final MiniMap<?,?> EMPTY_MAP = new MiniMap<>();
// keys are on even indices (0,2,4,...) and values on uneven (1,3,5,...)
private Object[] keysValues;
public MiniMap() {
keysValues = EMPTY_ARRAY;
}
public MiniMap(final MiniMap<K, V> miniMap){
keysValues = miniMap.keysValues.clone();
}
@SuppressWarnings("unchecked")
public static final <K,V> MiniMap<K,V> emptyMap() {
return (MiniMap<K,V>) EMPTY_MAP;
}
public int size() {
return keysValues.length / 2;
}
public boolean isEmpty() {
return size() == 0;
}
public boolean containsKey(Object key) {
return get(key) != null;
}
@SuppressWarnings("unchecked")
public V get(Object key) {
final int size = size();
for (int i = 0; i < size; i++) {
Object object = keysValues[2*i];
if (Objects.equals(key, object)) {
return (V) keysValues[2*i+1];
}
}
return null;
}
public V put(K key, V value) {
V oldValue = get(key);
if (oldValue != null) {
final int size = size();
for (int i = 0; i < size; i++) {
Object object = keysValues[2*i];
if (Objects.equals(key, object)) {
keysValues[2*i+1] = value;
break;
}
}
} else {
final Object[] newKeysValues = new Object[keysValues.length + 2];
System.arraycopy(keysValues, 0, newKeysValues, 0, keysValues.length);
newKeysValues[newKeysValues.length - 2] = key;
newKeysValues[newKeysValues.length - 1] = value;
keysValues = newKeysValues;
}
return oldValue;
}
public void putAll(Map<? extends K, ? extends V> map) {
for (java.util.Map.Entry<? extends K, ? extends V> e : map.entrySet()) {
put(e.getKey(), e.getValue());
}
}
public void clear() {
keysValues = EMPTY_ARRAY;
}
@SuppressWarnings("unchecked")
public Set<K> keySet() {
final Set<K> result = new HashSet<>(size());
final int size = size();
for (int i = 0; i < size; i++) {
K k = (K) keysValues[2*i];
result.add(k);
}
return result;
}
@SuppressWarnings("unchecked")
public Set<V> values() {
final Set<V> result = new HashSet<>(size());
final int size = size();
for (int i = 0; i < size; i++) {
V v = (V) keysValues[2*i+1];
result.add(v);
}
return result;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + Arrays.hashCode(keysValues);
return result;
}
@SuppressWarnings("rawtypes")
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
MiniMap other = (MiniMap) obj;
if (!Arrays.equals(keysValues, other.keysValues))
return false;
return true;
}
}

View File

@@ -1,24 +0,0 @@
package org.lucares.utils;
import org.testng.Assert;
import org.testng.annotations.Test;
@Test
public class MiniMapTest {
public void testInsertGet()
{
final MiniMap<String, String> map = new MiniMap<>();
String key1 = "key1";
String key2 = "key2";
String value1 = "value1";
String value2 = "value1";
map.put(key1, value1);
map.put(key2, value2);
Assert.assertEquals(map.get(key1), value1);
Assert.assertEquals(map.get(key2), value2);
}
}

View File

@@ -3,7 +3,6 @@ package org.lucares.performance.db;
import java.io.IOException;
import java.nio.file.Path;
import java.time.Duration;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
@@ -94,9 +93,8 @@ public class PerformanceDb implements AutoCloseable {
final long duration = end - lastSync;
final long entriesPerSecond = (long) (insertionsSinceLastSync / (duration / 1000.0));
METRICS_LOGGER
.debug(String.format("inserting %d/s ; total: %,d; last: %s",
entriesPerSecond, count, entry));
METRICS_LOGGER.debug(
String.format("inserting %d/s ; total: %,d; last: %s", entriesPerSecond, count, entry));
tagsToFile.flush();
lastSync = System.currentTimeMillis();
@@ -156,8 +154,8 @@ public class PerformanceDb implements AutoCloseable {
final Grouping grouping = Grouping.groupBy(pdbFiles, groupBy);
final Result result = toResult(grouping);
METRICS_LOGGER.debug("query execution took: " + (System.nanoTime() - start) / 1_000_000.0
+ "ms: " + query + " ("+groupBy+"): files found: " + pdbFiles.size());
METRICS_LOGGER.debug("query execution took: " + (System.nanoTime() - start) / 1_000_000.0 + "ms: " + query
+ " (" + groupBy + "): files found: " + pdbFiles.size());
return result;
}

View File

@@ -2,7 +2,6 @@ package org.lucares.performance.db;
import java.io.IOException;
import java.nio.file.Path;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@@ -24,8 +23,8 @@ import org.slf4j.LoggerFactory;
public class TagsToFile implements AutoCloseable {
private static final Logger LOGGER = LoggerFactory.getLogger(TagsToFile.class);
private final static Logger METRICS_LOGGER_FIND_WRITER = LoggerFactory.getLogger("org.lucares.metrics.ingestion.tagsToFile.findWriter");
private final static Logger METRICS_LOGGER_NEW_WRITER = LoggerFactory.getLogger("org.lucares.metrics.ingestion.tagsToFile.newPdbWriter");
private final static Logger METRICS_LOGGER_NEW_WRITER = LoggerFactory
.getLogger("org.lucares.metrics.ingestion.tagsToFile.newPdbWriter");
private static class WriterCache {
final List<PdbWriter> writers = new ArrayList<>();
@@ -57,9 +56,9 @@ public class TagsToFile implements AutoCloseable {
}
public List<PdbFile> getFilesForQuery(final String query) {
final List<Doc> searchResult = db.search(query);
if (searchResult.size() > 500_000){
if (searchResult.size() > 500_000) {
throw new IllegalStateException("Too many results.");
}
@@ -84,7 +83,8 @@ public class TagsToFile implements AutoCloseable {
final PdbWriter result;
final WriterCache writersForTags = getOrInit(tags);
final Optional<PdbWriter> optionalWriter = chooseBestMatchingWriter(writersForTags.getWriters(), dateAsEpochMilli);
final Optional<PdbWriter> optionalWriter = chooseBestMatchingWriter(writersForTags.getWriters(),
dateAsEpochMilli);
if (optionalWriter.isPresent()) {
result = optionalWriter.get();
@@ -97,7 +97,7 @@ public class TagsToFile implements AutoCloseable {
final List<Optional<PdbWriter>> existingWriters = CollectionUtils.filter(optionalWriters,
Optional::isPresent);
final List<PdbWriter> writers = CollectionUtils.map(existingWriters, Optional::get);
final Optional<PdbWriter> optionalFirst = chooseBestMatchingWriter(writers, dateAsEpochMilli);
result = optionalFirst.orElseGet(() -> newPdbWriter(tags));
@@ -161,14 +161,15 @@ public class TagsToFile implements AutoCloseable {
final PdbWriter result = new PdbWriter(pdbFile);
getOrInit(tags).addWriter(result);
METRICS_LOGGER_NEW_WRITER.debug("newPdbWriter took {}ms tags: {}", (System.nanoTime() - start) / 1_000_000.0, tags);
METRICS_LOGGER_NEW_WRITER.debug("newPdbWriter took {}ms tags: {}",
(System.nanoTime() - start) / 1_000_000.0, tags);
return result;
} catch (final IOException e) {
throw new WriteException(e);
}
}
private PdbFile createNewPdbFile(final Tags tags) throws IOException {