reduce memory usage
Reduce memory usage by storing the filename as string instead of individual tags.
This commit is contained in:
@@ -2,7 +2,6 @@ package org.lucares.pdb.datastore.internal;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
@@ -12,11 +11,10 @@ import java.util.Set;
|
|||||||
import java.util.SortedSet;
|
import java.util.SortedSet;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.lucares.collections.IntList;
|
import org.lucares.collections.IntList;
|
||||||
|
import org.lucares.pdb.api.StringCompressor;
|
||||||
import org.lucares.pdb.api.Tags;
|
import org.lucares.pdb.api.Tags;
|
||||||
import org.lucares.pdb.datastore.Doc;
|
import org.lucares.pdb.datastore.Doc;
|
||||||
import org.lucares.pdb.datastore.lang.Expression;
|
import org.lucares.pdb.datastore.lang.Expression;
|
||||||
@@ -27,24 +25,12 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
public class DataStore {
|
public class DataStore {
|
||||||
private static final Logger EXECUTE_QUERY_LOGGER = LoggerFactory.getLogger("org.lucares.metrics.dataStore.executeQuery");
|
private static final Logger EXECUTE_QUERY_LOGGER = LoggerFactory
|
||||||
|
.getLogger("org.lucares.metrics.dataStore.executeQuery");
|
||||||
private static final Logger LOGGER = LoggerFactory.getLogger(DataStore.class);
|
private static final Logger LOGGER = LoggerFactory.getLogger(DataStore.class);
|
||||||
|
|
||||||
private static final String SUBDIR_STORAGE = "storage";
|
private static final String SUBDIR_STORAGE = "storage";
|
||||||
private static final String PDB_EXTENSION = ".pdb";
|
private static final String PDB_EXTENSION = ".pdb";
|
||||||
private static final String KEY_VALUE_SEPARATOR = "-";
|
|
||||||
private static final String KEY_VALUE_PAIR_SEPARATOR = "_";
|
|
||||||
private static final String KEY_VALUE_END_SEPARATOR = "$";
|
|
||||||
|
|
||||||
private static final String REGEX_KEY_VALUE = "[a-zA-Z0-9]+" + Pattern.quote(KEY_VALUE_SEPARATOR) + "[a-zA-Z0-9]+";
|
|
||||||
|
|
||||||
private static final String REGEX_KEY_VALUE_PAIRS = REGEX_KEY_VALUE + "(" + Pattern.quote(KEY_VALUE_PAIR_SEPARATOR)
|
|
||||||
+ REGEX_KEY_VALUE + ")*";;
|
|
||||||
|
|
||||||
private static final String REGEX_STORAGE_FILE = String.format("(%1$s)%2$s[0-9]*%3$s", REGEX_KEY_VALUE_PAIRS,
|
|
||||||
Pattern.quote(KEY_VALUE_END_SEPARATOR), PDB_EXTENSION);
|
|
||||||
|
|
||||||
private static final Pattern EXTRACT_TAGS_PATTERN = Pattern.compile(REGEX_STORAGE_FILE);
|
|
||||||
|
|
||||||
// to be guarded by itself
|
// to be guarded by itself
|
||||||
private final List<Doc> docIdToDoc = new ArrayList<>();
|
private final List<Doc> docIdToDoc = new ArrayList<>();
|
||||||
@@ -53,11 +39,10 @@ public class DataStore {
|
|||||||
|
|
||||||
private final ConcurrentHashMap<String, Map<String, IntList>> keyToValueToDocId = new ConcurrentHashMap<>();
|
private final ConcurrentHashMap<String, Map<String, IntList>> keyToValueToDocId = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
private final StringCompressor stringCompressor;
|
|
||||||
private final FolderStorage folderStorage;
|
private final FolderStorage folderStorage;
|
||||||
|
|
||||||
public DataStore(final Path dataDirectory) throws IOException {
|
public DataStore(final Path dataDirectory) throws IOException {
|
||||||
stringCompressor = StringCompressor.create(keyCompressionFile(dataDirectory));
|
Tags.STRING_COMPRESSOR = StringCompressor.create(keyCompressionFile(dataDirectory));
|
||||||
|
|
||||||
folderStorage = new FolderStorage(storageDirectory(dataDirectory), 1000);
|
folderStorage = new FolderStorage(storageDirectory(dataDirectory), 1000);
|
||||||
init(folderStorage);
|
init(folderStorage);
|
||||||
@@ -76,7 +61,7 @@ public class DataStore {
|
|||||||
trimIntLists();
|
trimIntLists();
|
||||||
sortIntLists();
|
sortIntLists();
|
||||||
synchronized (docIdToDoc) {
|
synchronized (docIdToDoc) {
|
||||||
((ArrayList<Doc>)docIdToDoc).trimToSize();
|
((ArrayList<Doc>) docIdToDoc).trimToSize();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -96,7 +81,8 @@ public class DataStore {
|
|||||||
});
|
});
|
||||||
|
|
||||||
for (final String key : tags.getKeys()) {
|
for (final String key : tags.getKeys()) {
|
||||||
final Map<String, IntList> valueToDocIds = keyToValueToDocId.computeIfAbsent(key, k -> new ConcurrentHashMap<>());
|
final Map<String, IntList> valueToDocIds = keyToValueToDocId.computeIfAbsent(key,
|
||||||
|
k -> new ConcurrentHashMap<>());
|
||||||
|
|
||||||
final String value = tags.getValue(key);
|
final String value = tags.getValue(key);
|
||||||
|
|
||||||
@@ -112,9 +98,9 @@ public class DataStore {
|
|||||||
int totalBeforeTrim = 0;
|
int totalBeforeTrim = 0;
|
||||||
int totalAfterTrim = 0;
|
int totalAfterTrim = 0;
|
||||||
int totalValues = 0;
|
int totalValues = 0;
|
||||||
for (Map<String, IntList> valueToDocIds : keyToValueToDocId.values()) {
|
for (final Map<String, IntList> valueToDocIds : keyToValueToDocId.values()) {
|
||||||
|
|
||||||
for (IntList intList : valueToDocIds.values()) {
|
for (final IntList intList : valueToDocIds.values()) {
|
||||||
totalBeforeTrim += intList.getCapacity();
|
totalBeforeTrim += intList.getCapacity();
|
||||||
intList.trim();
|
intList.trim();
|
||||||
totalAfterTrim += intList.getCapacity();
|
totalAfterTrim += intList.getCapacity();
|
||||||
@@ -122,13 +108,9 @@ public class DataStore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LOGGER.info(
|
LOGGER.info("trimming IntLists of index: values {}, {} kB before, {} kB after, difference {} kB, took: {} ms",
|
||||||
"trimming IntLists of index: values {}, {} kB before, {} kB after, difference {} kB, took: {} ms",
|
totalValues, (totalBeforeTrim * 4) / 1024, (totalAfterTrim * 4) / 1024,
|
||||||
totalValues,
|
((totalBeforeTrim - totalAfterTrim) * 4) / 1024, (totalValues * 4) / 1024,
|
||||||
(totalBeforeTrim * 4) / 1024,
|
|
||||||
(totalAfterTrim * 4) / 1024,
|
|
||||||
((totalBeforeTrim - totalAfterTrim) * 4) / 1024,
|
|
||||||
(totalValues * 4) / 1024,
|
|
||||||
(System.nanoTime() - start) / 1_000_000.0);
|
(System.nanoTime() - start) / 1_000_000.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -139,10 +121,7 @@ public class DataStore {
|
|||||||
|
|
||||||
valueToDocIds.stream().flatMap(map -> map.values().stream()).forEach(intList -> intList.sort());
|
valueToDocIds.stream().flatMap(map -> map.values().stream()).forEach(intList -> intList.sort());
|
||||||
|
|
||||||
|
LOGGER.info("sorting IntLists, took: {} ms", (System.nanoTime() - start) / 1_000_000.0);
|
||||||
LOGGER.info(
|
|
||||||
"sorting IntLists, took: {} ms",
|
|
||||||
(System.nanoTime() - start) / 1_000_000.0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
|
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
|
||||||
@@ -155,60 +134,16 @@ public class DataStore {
|
|||||||
|
|
||||||
public Path createNewFile(final Tags tags) throws IOException {
|
public Path createNewFile(final Tags tags) throws IOException {
|
||||||
|
|
||||||
final Path filename = toFilename(tags);
|
final String filename = tags.getFilename();
|
||||||
final Path result = folderStorage.insert(filename.toString(), PDB_EXTENSION);
|
final Path result = folderStorage.insert(filename, PDB_EXTENSION);
|
||||||
|
|
||||||
cacheTagToFileMapping(tags, result);
|
cacheTagToFileMapping(tags, result);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Path toFilename(final Tags tags) {
|
|
||||||
final StringBuilder path = new StringBuilder();
|
|
||||||
|
|
||||||
final SortedSet<String> sortedKeys = new TreeSet<>(tags.getKeys());
|
|
||||||
|
|
||||||
for (final String key : sortedKeys) {
|
|
||||||
final String value = tags.getValue(key);
|
|
||||||
|
|
||||||
final int compressedKey = stringCompressor.put(key);
|
|
||||||
final int compressedValue = stringCompressor.put(value);
|
|
||||||
|
|
||||||
if (path.length() > 0) {
|
|
||||||
path.append(KEY_VALUE_PAIR_SEPARATOR);
|
|
||||||
}
|
|
||||||
|
|
||||||
path.append(RadixConverter.toString(compressedKey));
|
|
||||||
path.append(KEY_VALUE_SEPARATOR);
|
|
||||||
path.append(RadixConverter.toString(compressedValue));
|
|
||||||
}
|
|
||||||
path.append(KEY_VALUE_END_SEPARATOR);
|
|
||||||
|
|
||||||
return Paths.get(path.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
private Tags toTags(final String filename) {
|
private Tags toTags(final String filename) {
|
||||||
Tags tags = Tags.create();
|
final Tags tags = Tags.create(filename);
|
||||||
|
|
||||||
final Matcher matcher = EXTRACT_TAGS_PATTERN.matcher(filename);
|
|
||||||
|
|
||||||
if (matcher.find()) {
|
|
||||||
final String serializedTags = matcher.group(1);
|
|
||||||
|
|
||||||
final String[] serializedKeyValuePairs = serializedTags.split(Pattern.quote(KEY_VALUE_PAIR_SEPARATOR));
|
|
||||||
|
|
||||||
for (int i = 0; i < serializedKeyValuePairs.length; i++) {
|
|
||||||
final String[] keyValuePair = serializedKeyValuePairs[i].split(Pattern.quote(KEY_VALUE_SEPARATOR));
|
|
||||||
|
|
||||||
if (keyValuePair.length == 2) {
|
|
||||||
|
|
||||||
final String key = stringCompressor.get(RadixConverter.fromString(keyValuePair[0]));
|
|
||||||
final String value = stringCompressor.get(RadixConverter.fromString(keyValuePair[1]));
|
|
||||||
|
|
||||||
tags = tags.copyAdd(key, value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return tags;
|
return tags;
|
||||||
}
|
}
|
||||||
@@ -263,13 +198,11 @@ public class DataStore {
|
|||||||
final long start = System.nanoTime();
|
final long start = System.nanoTime();
|
||||||
synchronized (docIdToDoc) {
|
synchronized (docIdToDoc) {
|
||||||
final Expression expression = QueryLanguageParser.parse(query);
|
final Expression expression = QueryLanguageParser.parse(query);
|
||||||
final ExpressionToDocIdVisitor visitor = new ExpressionToDocIdVisitor(
|
final ExpressionToDocIdVisitor visitor = new ExpressionToDocIdVisitor(keyToValueToDocId,
|
||||||
keyToValueToDocId, new AllDocIds(docIdToDoc));
|
new AllDocIds(docIdToDoc));
|
||||||
final IntList docIdsList = expression.visit(visitor);
|
final IntList docIdsList = expression.visit(visitor);
|
||||||
EXECUTE_QUERY_LOGGER.debug(
|
EXECUTE_QUERY_LOGGER.debug("executeQuery({}) took {}ms returned {} results ", query,
|
||||||
"executeQuery({}) took {}ms returned {} results ", query,
|
(System.nanoTime() - start) / 1_000_000.0, docIdsList.size());
|
||||||
(System.nanoTime() - start) / 1_000_000.0,
|
|
||||||
docIdsList.size());
|
|
||||||
return docIdsList;
|
return docIdsList;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -290,7 +223,7 @@ public class DataStore {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Doc> getByTags(Tags tags) {
|
public List<Doc> getByTags(final Tags tags) {
|
||||||
final List<Doc> result = tagsToDocs.getOrDefault(tags, new ArrayList<>(0));
|
final List<Doc> result = tagsToDocs.getOrDefault(tags, new ArrayList<>(0));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,5 @@
|
|||||||
|
package org.lucares.pdb.datastore.internal;
|
||||||
|
|
||||||
|
public class TagsUtils {
|
||||||
|
|
||||||
|
}
|
||||||
@@ -35,13 +35,14 @@ public class DataStoreTest {
|
|||||||
FileUtils.delete(dataDirectory);
|
FileUtils.delete(dataDirectory);
|
||||||
dataStore = null;
|
dataStore = null;
|
||||||
tagsToPath = null;
|
tagsToPath = null;
|
||||||
|
Tags.STRING_COMPRESSOR = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testInsertSingleTag() throws Exception {
|
public void testInsertSingleTag() throws Exception {
|
||||||
final Tags tags = Tags.create("key1", "value1", "key2", "value2");
|
|
||||||
final Path path;
|
final Path path;
|
||||||
{
|
{
|
||||||
final DataStore dataStore = new DataStore(dataDirectory);
|
final DataStore dataStore = new DataStore(dataDirectory);
|
||||||
|
final Tags tags = Tags.create("key1", "value1", "key2", "value2");
|
||||||
|
|
||||||
path = dataStore.createNewFile(tags);
|
path = dataStore.createNewFile(tags);
|
||||||
assertSearch(dataStore, "key1=value1", path);
|
assertSearch(dataStore, "key1=value1", path);
|
||||||
@@ -54,6 +55,8 @@ public class DataStoreTest {
|
|||||||
|
|
||||||
public void testQuery() throws Exception {
|
public void testQuery() throws Exception {
|
||||||
|
|
||||||
|
dataStore = new DataStore(dataDirectory);
|
||||||
|
|
||||||
tagsToPath = new LinkedHashMap<>();
|
tagsToPath = new LinkedHashMap<>();
|
||||||
final Tags eagleTim = Tags.create("bird", "eagle", "name", "Tim");
|
final Tags eagleTim = Tags.create("bird", "eagle", "name", "Tim");
|
||||||
final Tags pigeonJennifer = Tags.create("bird", "pigeon", "name", "Jennifer");
|
final Tags pigeonJennifer = Tags.create("bird", "pigeon", "name", "Jennifer");
|
||||||
@@ -67,8 +70,6 @@ public class DataStoreTest {
|
|||||||
tagsToPath.put(labradorJenny, null);
|
tagsToPath.put(labradorJenny, null);
|
||||||
tagsToPath.put(labradorTim, null);
|
tagsToPath.put(labradorTim, null);
|
||||||
|
|
||||||
dataStore = new DataStore(dataDirectory);
|
|
||||||
|
|
||||||
for (final Tags tags : tagsToPath.keySet()) {
|
for (final Tags tags : tagsToPath.keySet()) {
|
||||||
final Path newFile = dataStore.createNewFile(tags);
|
final Path newFile = dataStore.createNewFile(tags);
|
||||||
tagsToPath.put(tags, newFile);
|
tagsToPath.put(tags, newFile);
|
||||||
@@ -98,15 +99,15 @@ public class DataStoreTest {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGetByTags() throws IOException
|
public void testGetByTags() throws IOException {
|
||||||
{
|
|
||||||
|
dataStore = new DataStore(dataDirectory);
|
||||||
|
|
||||||
final Tags eagleTim1 = Tags.create("bird", "eagle", "name", "Tim");
|
final Tags eagleTim1 = Tags.create("bird", "eagle", "name", "Tim");
|
||||||
final Tags eagleTim2 = Tags.create("bird", "eagle", "name", "Tim");
|
final Tags eagleTim2 = Tags.create("bird", "eagle", "name", "Tim");
|
||||||
final Tags pigeonJennifer = Tags.create("bird", "pigeon", "name", "Jennifer");
|
final Tags pigeonJennifer = Tags.create("bird", "pigeon", "name", "Jennifer");
|
||||||
final Tags flamingoJennifer = Tags.create("bird", "flamingo", "name", "Jennifer");
|
final Tags flamingoJennifer = Tags.create("bird", "flamingo", "name", "Jennifer");
|
||||||
|
|
||||||
dataStore = new DataStore(dataDirectory);
|
|
||||||
|
|
||||||
dataStore.createNewFile(eagleTim1);
|
dataStore.createNewFile(eagleTim1);
|
||||||
dataStore.createNewFile(eagleTim2);
|
dataStore.createNewFile(eagleTim2);
|
||||||
dataStore.createNewFile(pigeonJennifer);
|
dataStore.createNewFile(pigeonJennifer);
|
||||||
|
|||||||
@@ -36,9 +36,12 @@ public class ProposerTest {
|
|||||||
FileUtils.delete(dataDirectory);
|
FileUtils.delete(dataDirectory);
|
||||||
db = null;
|
db = null;
|
||||||
tagsToPath = null;
|
tagsToPath = null;
|
||||||
|
Tags.STRING_COMPRESSOR = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initDatabase() throws Exception {
|
private void initDatabase() throws Exception {
|
||||||
|
db = new PdbDB(dataDirectory);
|
||||||
|
|
||||||
tagsToPath = new LinkedHashMap<>();
|
tagsToPath = new LinkedHashMap<>();
|
||||||
final Tags eagleTim = Tags.create("bird", "eagle", "name", "Tim");
|
final Tags eagleTim = Tags.create("bird", "eagle", "name", "Tim");
|
||||||
final Tags eagleTimothy = Tags.create("bird", "eagle", "name", "Timothy");
|
final Tags eagleTimothy = Tags.create("bird", "eagle", "name", "Timothy");
|
||||||
@@ -54,8 +57,6 @@ public class ProposerTest {
|
|||||||
tagsToPath.put(labradorJenny, null);
|
tagsToPath.put(labradorJenny, null);
|
||||||
tagsToPath.put(labradorTim, null);
|
tagsToPath.put(labradorTim, null);
|
||||||
|
|
||||||
db = new PdbDB(dataDirectory);
|
|
||||||
|
|
||||||
for (final Tags tags : tagsToPath.keySet()) {
|
for (final Tags tags : tagsToPath.keySet()) {
|
||||||
final Path newFile = db.createNewFile(tags);
|
final Path newFile = db.createNewFile(tags);
|
||||||
tagsToPath.put(tags, newFile);
|
tagsToPath.put(tags, newFile);
|
||||||
@@ -88,15 +89,13 @@ public class ProposerTest {
|
|||||||
|
|
||||||
public void testPrefixOfValue() throws Exception {
|
public void testPrefixOfValue() throws Exception {
|
||||||
assertProposals("name =Tim", 9, //
|
assertProposals("name =Tim", 9, //
|
||||||
new Proposal("Timothy", "name =Timothy ", true)
|
new Proposal("Timothy", "name =Timothy ", true));
|
||||||
);
|
|
||||||
|
|
||||||
assertProposals("name =Je", 8, //
|
assertProposals("name =Je", 8, //
|
||||||
new Proposal("Jennifer", "name =Jennifer ", true), //
|
new Proposal("Jennifer", "name =Jennifer ", true), //
|
||||||
new Proposal("Jenny", "name =Jenny ", true) //
|
new Proposal("Jenny", "name =Jenny ", true) //
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
assertProposals("bird=eagle and n", 16, //
|
assertProposals("bird=eagle and n", 16, //
|
||||||
new Proposal("name", "bird=eagle and name=* ", true) //
|
new Proposal("name", "bird=eagle and name=* ", true) //
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
compile project(':pdb-utils')
|
compile project(':pdb-utils')
|
||||||
|
compile project(':file-utils')
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package org.lucares.pdb.datastore.internal;
|
package org.lucares.pdb.api;
|
||||||
|
|
||||||
public class RadixConverter {
|
public class RadixConverter {
|
||||||
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package org.lucares.pdb.datastore.internal;
|
package org.lucares.pdb.api;
|
||||||
|
|
||||||
public class RuntimeIOException extends RuntimeException {
|
public class RuntimeIOException extends RuntimeException {
|
||||||
|
|
||||||
@@ -1,9 +1,7 @@
|
|||||||
package org.lucares.pdb.datastore.internal;
|
package org.lucares.pdb.api;
|
||||||
|
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import org.lucares.pdb.datastore.internal.map.UniqueStringIntegerPairs;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Persistently maps Strings to integers.
|
* Persistently maps Strings to integers.
|
||||||
*/
|
*/
|
||||||
@@ -6,23 +6,45 @@ import java.util.Set;
|
|||||||
import java.util.SortedSet;
|
import java.util.SortedSet;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.function.BiConsumer;
|
import java.util.function.BiConsumer;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
import org.lucares.utils.MiniMap;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class Tags {
|
public class Tags {
|
||||||
|
|
||||||
|
public static StringCompressor STRING_COMPRESSOR = null;
|
||||||
public static final Tags EMPTY = new Tags();
|
public static final Tags EMPTY = new Tags();
|
||||||
|
|
||||||
private final MiniMap<String, Tag> tags;
|
public static final String KEY_VALUE_SEPARATOR = "-";
|
||||||
|
public static final String KEY_VALUE_PAIR_SEPARATOR = "_";
|
||||||
|
public static final String KEY_VALUE_END_SEPARATOR = "$";
|
||||||
|
|
||||||
private int cachedHash = 0;
|
private static final String REGEX_KEY_VALUE = "[a-zA-Z0-9]+" + Pattern.quote(KEY_VALUE_SEPARATOR) + "[a-zA-Z0-9]+";
|
||||||
|
|
||||||
private Tags() {
|
private static final String REGEX_KEY_VALUE_PAIRS = REGEX_KEY_VALUE + "(" + Pattern.quote(KEY_VALUE_PAIR_SEPARATOR)
|
||||||
super();
|
+ REGEX_KEY_VALUE + ")*";;
|
||||||
tags = MiniMap.emptyMap();
|
|
||||||
|
private static final String REGEX_STORAGE_FILE = String.format("(%1$s)", REGEX_KEY_VALUE_PAIRS);
|
||||||
|
|
||||||
|
private static final Pattern EXTRACT_TAGS_PATTERN = Pattern.compile(REGEX_STORAGE_FILE);
|
||||||
|
|
||||||
|
private final String filename;
|
||||||
|
|
||||||
|
public Tags() {
|
||||||
|
filename = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
private Tags(final MiniMap<String, Tag> tags) {
|
public Tags(final String filename) {
|
||||||
this.tags = tags;
|
// normalize filename
|
||||||
|
// filenames look like this: 0-1_2-1M_H-28_4-5$1.pdb
|
||||||
|
// there can be several files for the same set of tags, in which case the number
|
||||||
|
// after the $ is incremented
|
||||||
|
// We only take the part until the $.
|
||||||
|
final int end = filename.indexOf(KEY_VALUE_END_SEPARATOR);
|
||||||
|
if (end >= 0) {
|
||||||
|
this.filename = filename.substring(0, end);
|
||||||
|
} else {
|
||||||
|
this.filename = filename;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Tags create() {
|
public static Tags create() {
|
||||||
@@ -30,36 +52,34 @@ public class Tags {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static Tags create(final String key, final String value) {
|
public static Tags create(final String key, final String value) {
|
||||||
final MiniMap<String, Tag> tags = new MiniMap<>();
|
|
||||||
tags.put(key, new Tag(key, value));
|
return EMPTY.copyAdd(key, value);
|
||||||
return new Tags(tags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Tags create(final String key1, final String value1, final String key2, final String value2) {
|
public static Tags create(final String key1, final String value1, final String key2, final String value2) {
|
||||||
final MiniMap<String, Tag> tags = new MiniMap<>();
|
|
||||||
tags.put(key1, new Tag(key1, value1));
|
final Tags result = EMPTY.copyAdd(key1, value1).copyAdd(key2, value2);
|
||||||
tags.put(key2, new Tag(key2, value2));
|
return result;
|
||||||
return new Tags(tags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Tags create(final String key1, final String value1, final String key2, final String value2,
|
public static Tags create(final String key1, final String value1, final String key2, final String value2,
|
||||||
final String key3, final String value3) {
|
final String key3, final String value3) {
|
||||||
final MiniMap<String, Tag> tags = new MiniMap<>();
|
final Tags result = EMPTY.copyAdd(key1, value1).copyAdd(key2, value2).copyAdd(key3, value3);
|
||||||
tags.put(key1, new Tag(key1, value1));
|
return result;
|
||||||
tags.put(key2, new Tag(key2, value2));
|
|
||||||
tags.put(key3, new Tag(key3, value3));
|
|
||||||
return new Tags(tags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Tags copyAdd(final String key, final String value) {
|
public Tags copyAdd(final String key, final String value) {
|
||||||
Objects.requireNonNull(key, "key must not be null");
|
Objects.requireNonNull(key, "key must not be null");
|
||||||
Objects.requireNonNull(value, "value must not be null");
|
Objects.requireNonNull(value, "value must not be null");
|
||||||
|
|
||||||
final MiniMap<String, Tag> newTags = new MiniMap<>(tags);
|
final Tag tag = new Tag(key, value);
|
||||||
|
|
||||||
newTags.put(key, new Tag(key, value));
|
final SortedSet<Tag> tags = toTags();
|
||||||
|
tags.add(tag);
|
||||||
|
|
||||||
return new Tags(newTags);
|
final String newFilename = toFilename(tags);
|
||||||
|
|
||||||
|
return new Tags(newFilename);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Tags copyAddIfNotNull(final String key, final String value) {
|
public Tags copyAddIfNotNull(final String key, final String value) {
|
||||||
@@ -73,45 +93,97 @@ public class Tags {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getFilename() {
|
||||||
|
return filename;
|
||||||
|
}
|
||||||
|
|
||||||
public String getValue(final String key) {
|
public String getValue(final String key) {
|
||||||
final Tag tag = tags.get(key);
|
|
||||||
final String value = tag != null ? tag.getValue() : null;
|
final Set<Tag> tags = toTags();
|
||||||
return value;
|
for (final Tag tag : tags) {
|
||||||
|
if (Objects.equals(tag.getKey(), key)) {
|
||||||
|
return tag.getValue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SortedSet<Tag> toTags() {
|
||||||
|
final SortedSet<Tag> result = new TreeSet<>((a, b) -> a.getKey().compareToIgnoreCase(b.getKey()));
|
||||||
|
final Matcher matcher = EXTRACT_TAGS_PATTERN.matcher(filename);
|
||||||
|
|
||||||
|
if (matcher.find()) {
|
||||||
|
final String serializedTags = matcher.group(1);
|
||||||
|
|
||||||
|
final String[] serializedKeyValuePairs = serializedTags.split(Pattern.quote(KEY_VALUE_PAIR_SEPARATOR));
|
||||||
|
|
||||||
|
for (int i = 0; i < serializedKeyValuePairs.length; i++) {
|
||||||
|
final String[] keyValuePair = serializedKeyValuePairs[i].split(Pattern.quote(KEY_VALUE_SEPARATOR));
|
||||||
|
|
||||||
|
if (keyValuePair.length == 2) {
|
||||||
|
|
||||||
|
final String key = STRING_COMPRESSOR.get(RadixConverter.fromString(keyValuePair[0]));
|
||||||
|
final String value = STRING_COMPRESSOR.get(RadixConverter.fromString(keyValuePair[1]));
|
||||||
|
|
||||||
|
result.add(new Tag(key, value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toFilename(final SortedSet<Tag> tags) {
|
||||||
|
final StringBuilder path = new StringBuilder();
|
||||||
|
|
||||||
|
for (final Tag tag : tags) {
|
||||||
|
final String key = tag.getKey();
|
||||||
|
final String value = tag.getValue();
|
||||||
|
|
||||||
|
final int compressedKey = STRING_COMPRESSOR.put(key);
|
||||||
|
final int compressedValue = STRING_COMPRESSOR.put(value);
|
||||||
|
|
||||||
|
if (path.length() > 0) {
|
||||||
|
path.append(Tags.KEY_VALUE_PAIR_SEPARATOR);
|
||||||
|
}
|
||||||
|
|
||||||
|
path.append(RadixConverter.toString(compressedKey));
|
||||||
|
path.append(Tags.KEY_VALUE_SEPARATOR);
|
||||||
|
path.append(RadixConverter.toString(compressedValue));
|
||||||
|
}
|
||||||
|
path.append(Tags.KEY_VALUE_END_SEPARATOR);
|
||||||
|
|
||||||
|
return path.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Set<String> getKeys() {
|
public Set<String> getKeys() {
|
||||||
return new TreeSet<>(tags.keySet());
|
final TreeSet<String> result = new TreeSet<>();
|
||||||
|
final Set<Tag> tags = toTags();
|
||||||
|
for (final Tag tag : tags) {
|
||||||
|
result.add(tag.getKey());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void forEach(final BiConsumer<String, String> keyValueConsumer) {
|
public void forEach(final BiConsumer<String, String> keyValueConsumer) {
|
||||||
|
|
||||||
Set<String> keys = tags.keySet();
|
final Set<Tag> tags = toTags();
|
||||||
|
for (final Tag tag : tags) {
|
||||||
for (String key : keys) {
|
keyValueConsumer.accept(tag.getKey(), tag.getValue());
|
||||||
final Tag value = tags.get(key);
|
|
||||||
keyValueConsumer.accept(key, value.getValue());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.valueOf(tags.values());
|
return "Tags [filename=" + filename + "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
|
|
||||||
if (cachedHash != 0) {
|
|
||||||
return cachedHash;
|
|
||||||
} else {
|
|
||||||
|
|
||||||
final int prime = 31;
|
final int prime = 31;
|
||||||
int result = 1;
|
int result = 1;
|
||||||
result = prime * result + ((tags == null) ? 0 : tags.hashCode());
|
result = prime * result + ((filename == null) ? 0 : filename.hashCode());
|
||||||
cachedHash = result;
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(final Object obj) {
|
public boolean equals(final Object obj) {
|
||||||
@@ -122,39 +194,14 @@ public class Tags {
|
|||||||
if (getClass() != obj.getClass())
|
if (getClass() != obj.getClass())
|
||||||
return false;
|
return false;
|
||||||
final Tags other = (Tags) obj;
|
final Tags other = (Tags) obj;
|
||||||
if (tags == null) {
|
if (filename == null) {
|
||||||
if (other.tags != null)
|
if (other.filename != null)
|
||||||
return false;
|
return false;
|
||||||
} else if (!tags.equals(other.tags))
|
} else if (!filename.equals(other.filename))
|
||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String abbreviatedRepresentation() {
|
|
||||||
final StringBuilder result = new StringBuilder();
|
|
||||||
final int maxLength = 100;
|
|
||||||
|
|
||||||
final SortedSet<String> keys = new TreeSet<>(tags.keySet());
|
|
||||||
|
|
||||||
final int cutAt = maxLength / (keys.size() * 2 + 2);
|
|
||||||
|
|
||||||
for (final String key : keys) {
|
|
||||||
|
|
||||||
final String value = tags.get(key).getValue();
|
|
||||||
|
|
||||||
result.append(substr(key, cutAt));
|
|
||||||
result.append("-");
|
|
||||||
result.append(substr(value, cutAt));
|
|
||||||
result.append("_");
|
|
||||||
}
|
|
||||||
|
|
||||||
return substr(result.toString(), maxLength);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String substr(final String s, final int maxLength) {
|
|
||||||
return s.substring(0, Math.min(maxLength, s.length()));
|
|
||||||
}
|
|
||||||
|
|
||||||
public Tags subset(final List<String> groupByFields) {
|
public Tags subset(final List<String> groupByFields) {
|
||||||
|
|
||||||
Tags result = new Tags();
|
Tags result = new Tags();
|
||||||
@@ -171,7 +218,11 @@ public class Tags {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public boolean isEmpty() {
|
public boolean isEmpty() {
|
||||||
return tags.isEmpty();
|
return filename == null || filename.length() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Tags create(final String filename) {
|
||||||
|
return new Tags(filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
package org.lucares.pdb.datastore.internal.map;
|
package org.lucares.pdb.api;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
@@ -18,8 +18,6 @@ import java.util.TreeMap;
|
|||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.lucares.pdb.datastore.internal.RuntimeIOException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A very simple {@link Set}-like or {@link Map}-like datastructure that stores
|
* A very simple {@link Set}-like or {@link Map}-like datastructure that stores
|
||||||
* unique¹ pairs of Strings and integers persistently.
|
* unique¹ pairs of Strings and integers persistently.
|
||||||
@@ -48,11 +46,16 @@ public class UniqueStringIntegerPairs {
|
|||||||
|
|
||||||
private final Path file;
|
private final Path file;
|
||||||
|
|
||||||
|
public UniqueStringIntegerPairs() {
|
||||||
|
this(null);
|
||||||
|
}
|
||||||
|
|
||||||
public UniqueStringIntegerPairs(final Path file) {
|
public UniqueStringIntegerPairs(final Path file) {
|
||||||
super();
|
|
||||||
this.file = file;
|
this.file = file;
|
||||||
|
if (file != null) {
|
||||||
init(file);
|
init(file);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void init(final Path file) throws RuntimeIOException {
|
private void init(final Path file) throws RuntimeIOException {
|
||||||
|
|
||||||
@@ -87,7 +90,7 @@ public class UniqueStringIntegerPairs {
|
|||||||
if (stringToInt.containsKey(first) || intToString.containsKey(second)) {
|
if (stringToInt.containsKey(first) || intToString.containsKey(second)) {
|
||||||
throw new IllegalArgumentException("Unique key constraint violation for (" + first + ", " + second + ")");
|
throw new IllegalArgumentException("Unique key constraint violation for (" + first + ", " + second + ")");
|
||||||
}
|
}
|
||||||
|
if (file != null) {
|
||||||
try (final Writer writer = new OutputStreamWriter(new FileOutputStream(file.toFile(), APPEND),
|
try (final Writer writer = new OutputStreamWriter(new FileOutputStream(file.toFile(), APPEND),
|
||||||
StandardCharsets.UTF_8)) {
|
StandardCharsets.UTF_8)) {
|
||||||
|
|
||||||
@@ -96,6 +99,7 @@ public class UniqueStringIntegerPairs {
|
|||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
throw new RuntimeIOException(e);
|
throw new RuntimeIOException(e);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
intToString.put(second, first);
|
intToString.put(second, first);
|
||||||
stringToInt.put(first, second);
|
stringToInt.put(first, second);
|
||||||
@@ -6,37 +6,57 @@ import java.nio.file.Paths;
|
|||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.lucares.utils.MiniMap;
|
import org.lucares.pdb.api.StringCompressor;
|
||||||
|
import org.lucares.pdb.api.Tag;
|
||||||
|
import org.lucares.pdb.api.Tags;
|
||||||
|
import org.lucares.pdb.api.UniqueStringIntegerPairs;
|
||||||
|
|
||||||
public class MemoryScale {
|
public class MemoryScale {
|
||||||
|
|
||||||
public static final String A = "A";
|
public static final String A = "A";
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(final String[] args) {
|
||||||
System.out.println("start");
|
Tags.STRING_COMPRESSOR = new StringCompressor(new UniqueStringIntegerPairs());
|
||||||
|
|
||||||
|
scale("singleTag");
|
||||||
|
scale("tags0");
|
||||||
|
scale("tags1");
|
||||||
|
scale("tags2");
|
||||||
|
scale("tags6");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void scale(final String what) {
|
||||||
|
System.out.println("start: " + what);
|
||||||
// warmup of classes
|
// warmup of classes
|
||||||
getUsedMemory();
|
getUsedMemory();
|
||||||
Object handle =createObject();
|
Object handle = createObject(what);
|
||||||
|
|
||||||
handle = null;
|
handle = null;
|
||||||
|
|
||||||
runGc();
|
runGc();
|
||||||
long memoryBefore = getUsedMemory();
|
final long memoryBefore = getUsedMemory();
|
||||||
|
|
||||||
handle = createObject();
|
handle = createObject(what);
|
||||||
|
|
||||||
runGc();
|
runGc();
|
||||||
long memoryAfter = getUsedMemory();
|
final long memoryAfter = getUsedMemory();
|
||||||
System.out.println("used memory: " + (memoryAfter - memoryBefore));
|
System.out.println(what + ": used memory: " + (memoryAfter - memoryBefore));
|
||||||
handle.hashCode(); // use the variable, so causes no warnings and is not removed by JIT compiler
|
handle.hashCode(); // use the variable, so causes no warnings and is not removed by JIT compiler
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Object createObject(){
|
private static Object createObject(final String what) {
|
||||||
|
|
||||||
String key = "pathAsUtf8";
|
switch (what) {
|
||||||
switch (key) {
|
case "singleTag":
|
||||||
case "minimap":
|
return createTag();
|
||||||
return createMinimap();
|
case "tags0":
|
||||||
|
return createTags0();
|
||||||
|
case "tags1":
|
||||||
|
return createTags1();
|
||||||
|
case "tags2":
|
||||||
|
return createTags2();
|
||||||
|
case "tags6":
|
||||||
|
return createTags6();
|
||||||
case "string":
|
case "string":
|
||||||
return createString();
|
return createString();
|
||||||
case "linkedHashMap":
|
case "linkedHashMap":
|
||||||
@@ -53,42 +73,57 @@ public class MemoryScale {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Object createPathAsUtf8(String string) {
|
private static Object createTag() {
|
||||||
|
return new Tag("", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Object createTags0() {
|
||||||
|
return new Tags();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Object createTags1() {
|
||||||
|
return Tags.create("k1", "v1");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Object createTags2() {
|
||||||
|
return Tags.create("k1", "v1", "k2", "v2");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Object createTags6() {
|
||||||
|
Tags result = Tags.create("k1", "v1");
|
||||||
|
result = result.copyAdd("k2", "v2");
|
||||||
|
result = result.copyAdd("k3", "v3");
|
||||||
|
result = result.copyAdd("k4", "v4");
|
||||||
|
result = result.copyAdd("k5", "v5");
|
||||||
|
result = result.copyAdd("k6", "v6");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Object createPathAsUtf8(final String string) {
|
||||||
// TODO Auto-generated method stub
|
// TODO Auto-generated method stub
|
||||||
return string.getBytes(StandardCharsets.UTF_8);
|
return string.getBytes(StandardCharsets.UTF_8);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String createPathAsString(String string) {
|
private static String createPathAsString(final String string) {
|
||||||
return string.replace("C", "c");
|
return string.replace("C", "c");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Path createPath(String string) {
|
private static Path createPath(final String string) {
|
||||||
return Paths.get(string);
|
return Paths.get(string);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Object createMinimap() {
|
|
||||||
final MiniMap<String, String> map = new MiniMap<>();
|
|
||||||
|
|
||||||
map.put("A", "A");
|
|
||||||
for (int i = 0; i < 0; i++){
|
|
||||||
map.put(""+i, ""+i);
|
|
||||||
}
|
|
||||||
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String createString() {
|
private static String createString() {
|
||||||
|
|
||||||
int i= 0;
|
final int i = 0;
|
||||||
return ""+i;
|
return "" + i;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Object createLinkedHashMap() {
|
private static Object createLinkedHashMap() {
|
||||||
final Map<String, String> map = new LinkedHashMap<>();
|
final Map<String, String> map = new LinkedHashMap<>();
|
||||||
|
|
||||||
map.put("A", "A");
|
map.put("A", "A");
|
||||||
for (int i = 0; i < 0; i++){
|
for (int i = 0; i < 0; i++) {
|
||||||
map.put(""+i, ""+i);
|
map.put("" + i, "" + i);
|
||||||
}
|
}
|
||||||
|
|
||||||
return map;
|
return map;
|
||||||
@@ -99,14 +134,13 @@ public class MemoryScale {
|
|||||||
System.gc();
|
System.gc();
|
||||||
try {
|
try {
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
} catch (InterruptedException e) {
|
} catch (final InterruptedException e) {
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static long getUsedMemory() {
|
private static long getUsedMemory() {
|
||||||
return Runtime.getRuntime().totalMemory()
|
return Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
|
||||||
- Runtime.getRuntime().freeMemory();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
package org.lucares.pdb.datastore.internal;
|
package org.lucares.pdb.api;
|
||||||
|
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package org.lucares.pdb.datastore.internal;
|
package org.lucares.pdb.api;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package org.lucares.pdb.datastore.internal.map;
|
package org.lucares.pdb.api;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
@@ -42,7 +42,6 @@ import com.fasterxml.jackson.databind.ObjectReader;
|
|||||||
@Component
|
@Component
|
||||||
public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean {
|
public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean {
|
||||||
private static final Logger LOGGER = LoggerFactory.getLogger(TcpIngestor.class);
|
private static final Logger LOGGER = LoggerFactory.getLogger(TcpIngestor.class);
|
||||||
private static final Logger METRICS_LOGGER = LoggerFactory.getLogger("org.lucares.metrics.tcpIngestor");
|
|
||||||
|
|
||||||
public static final int PORT = 17347;
|
public static final int PORT = 17347;
|
||||||
|
|
||||||
@@ -73,8 +72,7 @@ public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean {
|
|||||||
Thread.currentThread().setName("worker-" + clientAddress);
|
Thread.currentThread().setName("worker-" + clientAddress);
|
||||||
LOGGER.debug("opening streams to client");
|
LOGGER.debug("opening streams to client");
|
||||||
try (PrintWriter out = new PrintWriter(clientSocket.getOutputStream(), true);
|
try (PrintWriter out = new PrintWriter(clientSocket.getOutputStream(), true);
|
||||||
BufferedReader in = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
|
BufferedReader in = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));) {
|
||||||
) {
|
|
||||||
final ObjectMapper objectMapper = new ObjectMapper();
|
final ObjectMapper objectMapper = new ObjectMapper();
|
||||||
final ObjectReader objectReader = objectMapper.readerFor(typeReferenceForMap);
|
final ObjectReader objectReader = objectMapper.readerFor(typeReferenceForMap);
|
||||||
|
|
||||||
@@ -91,12 +89,12 @@ public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean {
|
|||||||
LOGGER.debug("adding entry to queue: {}", entry);
|
LOGGER.debug("adding entry to queue: {}", entry);
|
||||||
queue.put(entry.get());
|
queue.put(entry.get());
|
||||||
}
|
}
|
||||||
} catch (JsonParseException e) {
|
} catch (final JsonParseException e) {
|
||||||
LOGGER.info("json parse error in line '" + line + "'", e);
|
LOGGER.info("json parse error in line '" + line + "'", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOGGER.debug("connection closed: " + clientAddress);
|
LOGGER.debug("connection closed: " + clientAddress);
|
||||||
} catch (Throwable e) {
|
} catch (final Throwable e) {
|
||||||
LOGGER.warn("Stream handling failed", e);
|
LOGGER.warn("Stream handling failed", e);
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import java.time.Instant;
|
|||||||
import java.time.OffsetDateTime;
|
import java.time.OffsetDateTime;
|
||||||
import java.time.ZoneOffset;
|
import java.time.ZoneOffset;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
|
import java.time.temporal.ChronoUnit;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@@ -75,10 +76,12 @@ public class TcpIngestorTest {
|
|||||||
Assert.assertEquals(result.size(), 2);
|
Assert.assertEquals(result.size(), 2);
|
||||||
|
|
||||||
Assert.assertEquals(result.get(0).getValue(), 1);
|
Assert.assertEquals(result.get(0).getValue(), 1);
|
||||||
Assert.assertEquals(result.get(0).getDate().toInstant(), dateA.toInstant());
|
Assert.assertEquals(result.get(0).getDate().toInstant().truncatedTo(ChronoUnit.MILLIS),
|
||||||
|
dateA.toInstant().truncatedTo(ChronoUnit.MILLIS));
|
||||||
|
|
||||||
Assert.assertEquals(result.get(1).getValue(), 2);
|
Assert.assertEquals(result.get(1).getValue(), 2);
|
||||||
Assert.assertEquals(result.get(1).getDate().toInstant(), dateB.toInstant());
|
Assert.assertEquals(result.get(1).getDate().toInstant().truncatedTo(ChronoUnit.MILLIS),
|
||||||
|
dateB.toInstant().truncatedTo(ChronoUnit.MILLIS));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -99,7 +102,7 @@ public class TcpIngestorTest {
|
|||||||
entryA.put("tags", Collections.emptyList());
|
entryA.put("tags", Collections.emptyList());
|
||||||
|
|
||||||
// skipped, because it is not valid json
|
// skipped, because it is not valid json
|
||||||
String corrupEntry = "{\"corrupt...";
|
final String corrupEntry = "{\"corrupt...";
|
||||||
|
|
||||||
// valid entry
|
// valid entry
|
||||||
final Map<String, Object> entryB = new HashMap<>();
|
final Map<String, Object> entryB = new HashMap<>();
|
||||||
@@ -109,8 +112,8 @@ public class TcpIngestorTest {
|
|||||||
entryB.put("tags", Collections.emptyList());
|
entryB.put("tags", Collections.emptyList());
|
||||||
|
|
||||||
final ObjectMapper objectMapper = new ObjectMapper();
|
final ObjectMapper objectMapper = new ObjectMapper();
|
||||||
final String data = objectMapper.writeValueAsString(entryA)+"\n"+corrupEntry+"\n"+objectMapper.writeValueAsString(entryB)+"\n";
|
final String data = objectMapper.writeValueAsString(entryA) + "\n" + corrupEntry + "\n"
|
||||||
|
+ objectMapper.writeValueAsString(entryB) + "\n";
|
||||||
|
|
||||||
PdbTestUtil.send(data);
|
PdbTestUtil.send(data);
|
||||||
}
|
}
|
||||||
@@ -120,7 +123,8 @@ public class TcpIngestorTest {
|
|||||||
Assert.assertEquals(result.size(), 1);
|
Assert.assertEquals(result.size(), 1);
|
||||||
|
|
||||||
Assert.assertEquals(result.get(0).getValue(), 2);
|
Assert.assertEquals(result.get(0).getValue(), 2);
|
||||||
Assert.assertEquals(result.get(0).getDate().toInstant(), dateB.toInstant());
|
Assert.assertEquals(result.get(0).getDate().toInstant().truncatedTo(ChronoUnit.MILLIS),
|
||||||
|
dateB.toInstant().truncatedTo(ChronoUnit.MILLIS));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,142 +0,0 @@
|
|||||||
package org.lucares.utils;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A memory efficient map implementation. It doesn't implement {@link Map},
|
|
||||||
* because this class does not support the full API of {@link Map}.
|
|
||||||
*/
|
|
||||||
public class MiniMap<K, V> {
|
|
||||||
|
|
||||||
private static final Object[] EMPTY_ARRAY = new Object[0];
|
|
||||||
private static final MiniMap<?,?> EMPTY_MAP = new MiniMap<>();
|
|
||||||
|
|
||||||
// keys are on even indices (0,2,4,...) and values on uneven (1,3,5,...)
|
|
||||||
private Object[] keysValues;
|
|
||||||
|
|
||||||
|
|
||||||
public MiniMap() {
|
|
||||||
keysValues = EMPTY_ARRAY;
|
|
||||||
}
|
|
||||||
|
|
||||||
public MiniMap(final MiniMap<K, V> miniMap){
|
|
||||||
keysValues = miniMap.keysValues.clone();
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public static final <K,V> MiniMap<K,V> emptyMap() {
|
|
||||||
return (MiniMap<K,V>) EMPTY_MAP;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int size() {
|
|
||||||
return keysValues.length / 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return size() == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean containsKey(Object key) {
|
|
||||||
return get(key) != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public V get(Object key) {
|
|
||||||
|
|
||||||
final int size = size();
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
Object object = keysValues[2*i];
|
|
||||||
if (Objects.equals(key, object)) {
|
|
||||||
return (V) keysValues[2*i+1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public V put(K key, V value) {
|
|
||||||
|
|
||||||
V oldValue = get(key);
|
|
||||||
|
|
||||||
if (oldValue != null) {
|
|
||||||
final int size = size();
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
Object object = keysValues[2*i];
|
|
||||||
if (Objects.equals(key, object)) {
|
|
||||||
keysValues[2*i+1] = value;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
final Object[] newKeysValues = new Object[keysValues.length + 2];
|
|
||||||
System.arraycopy(keysValues, 0, newKeysValues, 0, keysValues.length);
|
|
||||||
|
|
||||||
|
|
||||||
newKeysValues[newKeysValues.length - 2] = key;
|
|
||||||
newKeysValues[newKeysValues.length - 1] = value;
|
|
||||||
|
|
||||||
keysValues = newKeysValues;
|
|
||||||
}
|
|
||||||
|
|
||||||
return oldValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void putAll(Map<? extends K, ? extends V> map) {
|
|
||||||
for (java.util.Map.Entry<? extends K, ? extends V> e : map.entrySet()) {
|
|
||||||
put(e.getKey(), e.getValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void clear() {
|
|
||||||
keysValues = EMPTY_ARRAY;
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public Set<K> keySet() {
|
|
||||||
final Set<K> result = new HashSet<>(size());
|
|
||||||
|
|
||||||
final int size = size();
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
K k = (K) keysValues[2*i];
|
|
||||||
result.add(k);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public Set<V> values() {
|
|
||||||
final Set<V> result = new HashSet<>(size());
|
|
||||||
final int size = size();
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
V v = (V) keysValues[2*i+1];
|
|
||||||
result.add(v);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
final int prime = 31;
|
|
||||||
int result = 1;
|
|
||||||
result = prime * result + Arrays.hashCode(keysValues);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("rawtypes")
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
if (this == obj)
|
|
||||||
return true;
|
|
||||||
if (obj == null)
|
|
||||||
return false;
|
|
||||||
if (getClass() != obj.getClass())
|
|
||||||
return false;
|
|
||||||
MiniMap other = (MiniMap) obj;
|
|
||||||
if (!Arrays.equals(keysValues, other.keysValues))
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
package org.lucares.utils;
|
|
||||||
|
|
||||||
import org.testng.Assert;
|
|
||||||
import org.testng.annotations.Test;
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public class MiniMapTest {
|
|
||||||
public void testInsertGet()
|
|
||||||
{
|
|
||||||
final MiniMap<String, String> map = new MiniMap<>();
|
|
||||||
|
|
||||||
String key1 = "key1";
|
|
||||||
String key2 = "key2";
|
|
||||||
String value1 = "value1";
|
|
||||||
String value2 = "value1";
|
|
||||||
|
|
||||||
|
|
||||||
map.put(key1, value1);
|
|
||||||
map.put(key2, value2);
|
|
||||||
|
|
||||||
Assert.assertEquals(map.get(key1), value1);
|
|
||||||
Assert.assertEquals(map.get(key2), value2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -3,7 +3,6 @@ package org.lucares.performance.db;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.time.OffsetDateTime;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
@@ -94,9 +93,8 @@ public class PerformanceDb implements AutoCloseable {
|
|||||||
final long duration = end - lastSync;
|
final long duration = end - lastSync;
|
||||||
final long entriesPerSecond = (long) (insertionsSinceLastSync / (duration / 1000.0));
|
final long entriesPerSecond = (long) (insertionsSinceLastSync / (duration / 1000.0));
|
||||||
|
|
||||||
METRICS_LOGGER
|
METRICS_LOGGER.debug(
|
||||||
.debug(String.format("inserting %d/s ; total: %,d; last: %s",
|
String.format("inserting %d/s ; total: %,d; last: %s", entriesPerSecond, count, entry));
|
||||||
entriesPerSecond, count, entry));
|
|
||||||
tagsToFile.flush();
|
tagsToFile.flush();
|
||||||
|
|
||||||
lastSync = System.currentTimeMillis();
|
lastSync = System.currentTimeMillis();
|
||||||
@@ -156,8 +154,8 @@ public class PerformanceDb implements AutoCloseable {
|
|||||||
final Grouping grouping = Grouping.groupBy(pdbFiles, groupBy);
|
final Grouping grouping = Grouping.groupBy(pdbFiles, groupBy);
|
||||||
|
|
||||||
final Result result = toResult(grouping);
|
final Result result = toResult(grouping);
|
||||||
METRICS_LOGGER.debug("query execution took: " + (System.nanoTime() - start) / 1_000_000.0
|
METRICS_LOGGER.debug("query execution took: " + (System.nanoTime() - start) / 1_000_000.0 + "ms: " + query
|
||||||
+ "ms: " + query + " ("+groupBy+"): files found: " + pdbFiles.size());
|
+ " (" + groupBy + "): files found: " + pdbFiles.size());
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package org.lucares.performance.db;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.time.OffsetDateTime;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@@ -24,8 +23,8 @@ import org.slf4j.LoggerFactory;
|
|||||||
public class TagsToFile implements AutoCloseable {
|
public class TagsToFile implements AutoCloseable {
|
||||||
|
|
||||||
private static final Logger LOGGER = LoggerFactory.getLogger(TagsToFile.class);
|
private static final Logger LOGGER = LoggerFactory.getLogger(TagsToFile.class);
|
||||||
private final static Logger METRICS_LOGGER_FIND_WRITER = LoggerFactory.getLogger("org.lucares.metrics.ingestion.tagsToFile.findWriter");
|
private final static Logger METRICS_LOGGER_NEW_WRITER = LoggerFactory
|
||||||
private final static Logger METRICS_LOGGER_NEW_WRITER = LoggerFactory.getLogger("org.lucares.metrics.ingestion.tagsToFile.newPdbWriter");
|
.getLogger("org.lucares.metrics.ingestion.tagsToFile.newPdbWriter");
|
||||||
|
|
||||||
private static class WriterCache {
|
private static class WriterCache {
|
||||||
final List<PdbWriter> writers = new ArrayList<>();
|
final List<PdbWriter> writers = new ArrayList<>();
|
||||||
@@ -59,7 +58,7 @@ public class TagsToFile implements AutoCloseable {
|
|||||||
public List<PdbFile> getFilesForQuery(final String query) {
|
public List<PdbFile> getFilesForQuery(final String query) {
|
||||||
|
|
||||||
final List<Doc> searchResult = db.search(query);
|
final List<Doc> searchResult = db.search(query);
|
||||||
if (searchResult.size() > 500_000){
|
if (searchResult.size() > 500_000) {
|
||||||
throw new IllegalStateException("Too many results.");
|
throw new IllegalStateException("Too many results.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -84,7 +83,8 @@ public class TagsToFile implements AutoCloseable {
|
|||||||
final PdbWriter result;
|
final PdbWriter result;
|
||||||
final WriterCache writersForTags = getOrInit(tags);
|
final WriterCache writersForTags = getOrInit(tags);
|
||||||
|
|
||||||
final Optional<PdbWriter> optionalWriter = chooseBestMatchingWriter(writersForTags.getWriters(), dateAsEpochMilli);
|
final Optional<PdbWriter> optionalWriter = chooseBestMatchingWriter(writersForTags.getWriters(),
|
||||||
|
dateAsEpochMilli);
|
||||||
|
|
||||||
if (optionalWriter.isPresent()) {
|
if (optionalWriter.isPresent()) {
|
||||||
result = optionalWriter.get();
|
result = optionalWriter.get();
|
||||||
@@ -162,7 +162,8 @@ public class TagsToFile implements AutoCloseable {
|
|||||||
|
|
||||||
getOrInit(tags).addWriter(result);
|
getOrInit(tags).addWriter(result);
|
||||||
|
|
||||||
METRICS_LOGGER_NEW_WRITER.debug("newPdbWriter took {}ms tags: {}", (System.nanoTime() - start) / 1_000_000.0, tags);
|
METRICS_LOGGER_NEW_WRITER.debug("newPdbWriter took {}ms tags: {}",
|
||||||
|
(System.nanoTime() - start) / 1_000_000.0, tags);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
|
|||||||
Reference in New Issue
Block a user