diff --git a/.gitignore b/.gitignore index f1d628f..ea24bc0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ -/.settings/ /.classpath -/.project /.gradle/ +/.project +/.settings/ +/bin/ /build/ +/target/ +/test-output/ diff --git a/data-store/.gitignore b/data-store/.gitignore new file mode 100644 index 0000000..bf7d3fd --- /dev/null +++ b/data-store/.gitignore @@ -0,0 +1,7 @@ +/.settings/ +/.classpath +/.project +/bin/ +/build/ +/target/ +/test-output/ diff --git a/data-store/build.gradle b/data-store/build.gradle new file mode 100644 index 0000000..f915ef6 --- /dev/null +++ b/data-store/build.gradle @@ -0,0 +1,24 @@ +apply plugin: 'antlr' + +dependencies { + compile project(':pdb-api') + compile project(':file-utils') + compile project(':pdb-utils') + antlr "org.antlr:antlr4:4.7" + + compile 'org.lucares:primitiveCollections:0.1.20170205141947' + compile 'org.apache.commons:commons-lang3:3.5' + + compile 'org.apache.logging.log4j:log4j-core:2.8.2' + compile 'org.apache.logging.log4j:log4j-slf4j-impl:2.8.2' +} + +sourceSets { + generated{ + java.srcDir "build/generated-src/antlr/main" + } +} + +compileJava{ + source += sourceSets.generated.java +} diff --git a/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 b/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 new file mode 100644 index 0000000..b8c1176 --- /dev/null +++ b/data-store/src/main/antlr/org/lucares/pdb/datastore/lang/PdbLang.g4 @@ -0,0 +1,65 @@ +grammar PdbLang; + +@header { +package org.lucares.pdb.datastore.lang; +} + +start : expression EOF ; + +expression + : LPAREN expression RPAREN #parenExpression + | NOT expression #notExpression + | prop=identifier eq=equal value=propValue #propertyExpression + | left=expression AND right=expression #binaryAndExpression + | left=expression OR right=expression #binaryOrExpression + ; + +identifier + : IDENTIFIER #identifierExpression + ; +propValue + : identifier + ; + +equal : EQUAL ; + +AND : 'and' ; +OR : 'or' ; +NOT : '!'; +EQUAL : '=' ; +LPAREN : '(' ; +RPAREN : ')' ; +WS : [ \r\t\u000C\n]+ -> skip; + + +IDENTIFIER + : JavaLetter JavaLetterOrDigit* + ; + + +fragment +JavaLetter + : [a-zA-Z0-9$_] // these are the "java letters" below 0x7F + | [\u002a] // asterisk, used for wildcards + | // covers all characters above 0x7F which are not a surrogate + ~[\u0000-\u007F\uD800-\uDBFF] + {Character.isJavaIdentifierStart(_input.LA(-1))}? + | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF + [\uD800-\uDBFF] [\uDC00-\uDFFF] + {Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? + ; + +fragment +JavaLetterOrDigit + : [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F + | [\u002a] // asterisk, used for wildcards + | '.' + | '/' + | '-' + | // covers all characters above 0x7F which are not a surrogate + ~[\u0000-\u007F\uD800-\uDBFF] + {Character.isJavaIdentifierPart(_input.LA(-1))}? + | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF + [\uD800-\uDBFF] [\uDC00-\uDFFF] + {Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? + ; \ No newline at end of file diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/DataStore.java new file mode 100644 index 0000000..3d0c949 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/DataStore.java @@ -0,0 +1,199 @@ +package org.lucares.pdb.datastore; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Stream; + +import org.lucares.collections.IntList; +import org.lucares.pdb.api.Tags; +import org.lucares.pdb.datastore.internal.FolderStorage; +import org.lucares.pdb.datastore.internal.RadixConverter; +import org.lucares.pdb.datastore.internal.StringCompressor; +import org.lucares.pdb.datastore.lang.Expression; +import org.lucares.pdb.datastore.lang.ExpressionToDocIdVisitor; +import org.lucares.pdb.datastore.lang.ExpressionToDocIdVisitor.AllDocIds; +import org.lucares.pdb.datastore.lang.QueryLanguageParser; + +public class DataStore { + + private static final String SUBDIR_STORAGE = "storage"; + private static final String PDB_EXTENSION = ".pdb"; + private static final String KEY_VALUE_SEPARATOR = "-"; + private static final String KEY_VALUE_PAIR_SEPARATOR = "_"; + private static final String KEY_VALUE_END_SEPARATOR = "$"; + + private static final String REGEX_KEY_VALUE = "[a-zA-Z0-9]+" + Pattern.quote(KEY_VALUE_SEPARATOR) + "[a-zA-Z0-9]+"; + + private static final String REGEX_KEY_VALUE_PAIRS = REGEX_KEY_VALUE + "(" + Pattern.quote(KEY_VALUE_PAIR_SEPARATOR) + + REGEX_KEY_VALUE + ")*";; + + private static final String REGEX_STORAGE_FILE = String.format("(%1$s)%2$s[0-9]*%3$s", REGEX_KEY_VALUE_PAIRS, + Pattern.quote(KEY_VALUE_END_SEPARATOR), PDB_EXTENSION); + + private static final Pattern EXTRACT_TAGS_PATTERN = Pattern.compile(REGEX_STORAGE_FILE); + + private final List docIdToDoc = new ArrayList<>(); + + private final Map> keyToValueToDocId = new HashMap<>(); + + private final StringCompressor stringCompressor; + private final FolderStorage folderStorage; + + public DataStore(final Path dataDirectory) throws IOException { + stringCompressor = StringCompressor.create(keyCompressionFile(dataDirectory)); + + folderStorage = new FolderStorage(storageDirectory(dataDirectory), 1000); + init(folderStorage); + } + + private void init(final FolderStorage folderStorage) throws IOException { + + final Stream files = folderStorage.list(); + files.forEach(path -> { + + final String filename = path.getFileName().toString(); + final Tags tags = toTags(filename); + cacheTagToFileMapping(tags, path); + + }); + } + + private void cacheTagToFileMapping(final Tags tags, final Path path) { + + final int docId = docIdToDoc.size(); + docIdToDoc.add(new Doc(tags, path)); + + for (final String key : tags.getKeys()) { + final Map valueToDocIds = keyToValueToDocId.computeIfAbsent(key, k -> new HashMap<>()); + + final String value = tags.getValue(key); + + final IntList docIds = valueToDocIds.computeIfAbsent(value, v -> new IntList()); + docIds.add(docId); + } + } + + private Path keyCompressionFile(final Path dataDirectory) throws IOException { + return dataDirectory.resolve("keys.csv"); + } + + public static Path storageDirectory(final Path dataDirectory) throws IOException { + return dataDirectory.resolve(SUBDIR_STORAGE); + } + + public Path createNewFile(final Tags tags) throws IOException { + + final Path filename = toFilename(tags); + final Path result = folderStorage.insert(filename.toString(), PDB_EXTENSION); + + cacheTagToFileMapping(tags, result); + + return result; + } + + private Path toFilename(final Tags tags) { + final StringBuilder path = new StringBuilder(); + + final SortedSet sortedKeys = new TreeSet<>(tags.getKeys()); + + for (final String key : sortedKeys) { + final String value = tags.getValue(key); + + final int compressedKey = stringCompressor.put(key); + final int compressedValue = stringCompressor.put(value); + + if (path.length() > 0) { + path.append(KEY_VALUE_PAIR_SEPARATOR); + } + + path.append(RadixConverter.toString(compressedKey)); + path.append(KEY_VALUE_SEPARATOR); + path.append(RadixConverter.toString(compressedValue)); + } + path.append(KEY_VALUE_END_SEPARATOR); + + return Paths.get(path.toString()); + } + + private Tags toTags(final String filename) { + Tags tags = Tags.create(); + + final Matcher matcher = EXTRACT_TAGS_PATTERN.matcher(filename); + + if (matcher.find()) { + final String serializedTags = matcher.group(1); + + final String[] serializedKeyValuePairs = serializedTags.split(Pattern.quote(KEY_VALUE_PAIR_SEPARATOR)); + + for (int i = 0; i < serializedKeyValuePairs.length; i++) { + final String[] keyValuePair = serializedKeyValuePairs[i].split(Pattern.quote(KEY_VALUE_SEPARATOR)); + + if (keyValuePair.length == 2) { + + final String key = stringCompressor.get(RadixConverter.fromString(keyValuePair[0])); + final String value = stringCompressor.get(RadixConverter.fromString(keyValuePair[1])); + + tags = tags.copyAdd(key, value); + } + } + } + + return tags; + } + + public List search(final String query) { + + final Expression expression = QueryLanguageParser.parse(query); + final ExpressionToDocIdVisitor visitor = new ExpressionToDocIdVisitor(keyToValueToDocId, + new AllDocIds(docIdToDoc)); + final IntList docIdsList = expression.visit(visitor); + + final List result = new ArrayList<>(docIdsList.size()); + + final int[] intDocIds = docIdsList.toArray(); + for (int i = 0; i < intDocIds.length; i++) { + final int docId = intDocIds[i]; + + final Doc doc = docIdToDoc.get(docId); + result.add(doc); + } + + return result; + } + + public List getAvailableFields() { + + final List result = new ArrayList<>(); + result.addAll(keyToValueToDocId.keySet()); + + Collections.sort(result); + + return result; + } + + public SortedSet getAvailableValuesForKey(final String query, final String key) { + + final SortedSet result = new TreeSet<>(); + final List docs = search(query); + for (final Doc doc : docs) { + final String valueForKey = doc.getTags().getValue(key); + + if (valueForKey != null) { + result.add(valueForKey); + } + } + + return result; + } + +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/Doc.java b/data-store/src/main/java/org/lucares/pdb/datastore/Doc.java new file mode 100644 index 0000000..1171a7d --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/Doc.java @@ -0,0 +1,30 @@ +package org.lucares.pdb.datastore; + +import java.nio.file.Path; + +import org.lucares.pdb.api.Tags; + +public class Doc { + private final Tags tags; + private final Path path; + + public Doc(final Tags tags, final Path path) { + super(); + this.tags = tags; + this.path = path; + } + + public Tags getTags() { + return tags; + } + + public Path getPath() { + return path; + } + + @Override + public String toString() { + return "Doc [tags=" + tags + ", path=" + path + "]"; + } + +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/CreateNewKey.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/CreateNewKey.java new file mode 100644 index 0000000..3abfffe --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/CreateNewKey.java @@ -0,0 +1,20 @@ +package org.lucares.pdb.datastore.internal; + +import java.util.function.Function; + +public class CreateNewKey implements Function { + + private final int index; + + public CreateNewKey(final int index) { + this.index = index; + } + + @Override + public String apply(final String key) { + + final String result = String.valueOf(index); + + return result; + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/FolderStorage.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/FolderStorage.java new file mode 100644 index 0000000..e937b79 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/FolderStorage.java @@ -0,0 +1,86 @@ +package org.lucares.pdb.datastore.internal; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.function.BiPredicate; +import java.util.stream.Stream; + +public class FolderStorage { + + private final Path storageBaseDirectory; + + private int firstLevel = 0; + private int secondLevel = 0; + private int filesInSecondLevel = 0; + + private Path currentDirectory; + + private final int maxFilesPerFolder; + + public FolderStorage(final Path storageBaseDirectory, final int maxFilesPerFolder) throws IOException { + this.storageBaseDirectory = storageBaseDirectory; + this.maxFilesPerFolder = maxFilesPerFolder; + init(); + } + + private void init() throws IOException { + + Files.createDirectories(storageBaseDirectory); + + firstLevel = Math.max((int) Files.list(storageBaseDirectory).count() - 1, 0); + + final Path firstLevelDirectory = storageBaseDirectory.resolve(String.valueOf(firstLevel)); + Files.createDirectories(firstLevelDirectory); + + secondLevel = Math.max((int) Files.list(firstLevelDirectory).count() - 1, 0); + currentDirectory = firstLevelDirectory.resolve(String.valueOf(secondLevel)); + Files.createDirectories(currentDirectory); + + filesInSecondLevel = (int) Files.list(currentDirectory).count(); + } + + public Path insert(final String filenamePrefix, final String filenameSuffix) throws IOException { + + ensureCapacity(); + + String filename = filenamePrefix + filenameSuffix; + int index = 1; + Path newFile = currentDirectory.resolve(filename); + while (Files.exists(newFile)) { + filename = filenamePrefix + index++ + filenameSuffix; + newFile = currentDirectory.resolve(filename); + } + Files.createFile(newFile); + filesInSecondLevel++; + + return newFile; + } + + private void ensureCapacity() throws IOException { + if (filesInSecondLevel >= maxFilesPerFolder) { + secondLevel++; + if (secondLevel >= maxFilesPerFolder) { + firstLevel++; + secondLevel = 0; + } + filesInSecondLevel = 0; + + updateCurrentDirectory(); + } + } + + private void updateCurrentDirectory() throws IOException { + currentDirectory = storageBaseDirectory.resolve(String.valueOf(firstLevel)) + .resolve(String.valueOf(secondLevel)); + Files.createDirectories(currentDirectory); + } + + public Stream list() throws IOException { + final int maxDepth = Integer.MAX_VALUE; + final BiPredicate matchRegularFiles = (path, attr) -> Files.isRegularFile(path); + + return Files.find(storageBaseDirectory, maxDepth, matchRegularFiles); + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/RadixConverter.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/RadixConverter.java new file mode 100644 index 0000000..d8077aa --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/RadixConverter.java @@ -0,0 +1,42 @@ +package org.lucares.pdb.datastore.internal; + +public class RadixConverter { + + private static final String ALPHABET = "0123456789ABCDEFGHIJKLMNOPRSTUVWXYZacbdefghijklmnopqrstuvwxyz"; + + public static String toString(final int value) { + + if (value < 0) { + throw new IllegalArgumentException("value must not be negative"); + } + + final StringBuilder result = new StringBuilder(); + int v = value; + + if (v == 0) { + result.append(ALPHABET.charAt(0)); + } else { + while (v > 0) { + final int remainder = v % ALPHABET.length(); + v = v / ALPHABET.length(); + + result.insert(0, ALPHABET.charAt(remainder)); + } + } + + return result.toString(); + } + + public static int fromString(final String string) { + + int result = 0; + + for (int i = 0; i < string.length(); i++) { + final int value = ALPHABET.indexOf(string.charAt(i)); + result = result * ALPHABET.length() + value; + } + + return result; + } + +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/RuntimeIOException.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/RuntimeIOException.java new file mode 100644 index 0000000..c080902 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/RuntimeIOException.java @@ -0,0 +1,10 @@ +package org.lucares.pdb.datastore.internal; + +public class RuntimeIOException extends RuntimeException { + + private static final long serialVersionUID = 1L; + + public RuntimeIOException(final Throwable cause) { + super(cause); + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/StringCompressor.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/StringCompressor.java new file mode 100644 index 0000000..bd4eab9 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/StringCompressor.java @@ -0,0 +1,32 @@ +package org.lucares.pdb.datastore.internal; + +import java.nio.file.Path; + +import org.lucares.pdb.datastore.internal.map.UniqueStringIntegerPairs; + +/** + * Persistently maps Strings to integers. + */ +public class StringCompressor { + + private final UniqueStringIntegerPairs usip; + + public StringCompressor(final UniqueStringIntegerPairs usip) throws RuntimeIOException { + this.usip = usip; + } + + public static StringCompressor create(final Path path) { + final UniqueStringIntegerPairs mapsi = new UniqueStringIntegerPairs(path); + return new StringCompressor(mapsi); + } + + public Integer put(final String string) { + + return usip.computeIfAbsent(string, s -> usip.getHighestInteger() + 1); + } + + public String get(final int integer) { + + return usip.getKey(integer); + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/map/UniqueStringIntegerPairs.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/map/UniqueStringIntegerPairs.java new file mode 100644 index 0000000..1467344 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/map/UniqueStringIntegerPairs.java @@ -0,0 +1,126 @@ +package org.lucares.pdb.datastore.internal.map; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.function.Function; +import java.util.regex.Pattern; + +import org.lucares.pdb.datastore.internal.RuntimeIOException; + +/** + * A very simple {@link Set}-like or {@link Map}-like datastructure that stores + * unique¹ pairs of Strings and integers persistently. + *

+ * (1) Unique means, that neither the string, nor the integer may occur twice. + * For Example, imagine the pair ("a", 1) already exists, then neither ("a", 2) + * nor ("b", 1) may be added. + *

+ * You can only add pairs. No deletion. It keeps an in memory view for fast + * retrievals. + */ +public class UniqueStringIntegerPairs { + private static final String SEPARATOR = "\t"; + + private static final boolean APPEND = true; + + /** + * Maps a string to an integer. E.g. "myLongValue" -> 123 + */ + private final Map stringToInt = new HashMap<>(); + + /** + * Maps an integer to a string. E.g. 123 -> "myLongValue" + */ + private final SortedMap intToString = new TreeMap<>(); + + private final Path file; + + public UniqueStringIntegerPairs(final Path file) { + super(); + this.file = file; + init(file); + } + + private void init(final Path file) throws RuntimeIOException { + + try { + Files.createDirectories(file.getParent()); + if (!Files.exists(file)) { + Files.createFile(file); + } + + try (final BufferedReader reader = new BufferedReader( + new InputStreamReader(new FileInputStream(file.toFile()), StandardCharsets.UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + + final String[] tokens = line.split(Pattern.quote(SEPARATOR)); + + if (tokens.length == 2) { + final String string = tokens[0]; + final int value = Integer.parseInt(tokens[1]); + intToString.put(value, string); + stringToInt.put(string, value); + } + } + } + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + } + + public void put(final String first, final int second) { + + if (stringToInt.containsKey(first) || intToString.containsKey(second)) { + throw new IllegalArgumentException("Unique key constraint violation for (" + first + ", " + second + ")"); + } + + try (final Writer writer = new OutputStreamWriter(new FileOutputStream(file.toFile(), APPEND), + StandardCharsets.UTF_8)) { + + writer.write(first + SEPARATOR + second + "\n"); + + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + + intToString.put(second, first); + stringToInt.put(first, second); + } + + public Integer get(final String first) { + + return stringToInt.get(first); + } + + public String getKey(final Integer second) { + return intToString.get(second); + } + + public Integer getHighestInteger() { + return intToString.size() == 0 ? -1 : intToString.lastKey(); + } + + public Integer computeIfAbsent(final String first, final Function mappingFunction) { + + if (!stringToInt.containsKey(first)) { + final Integer second = mappingFunction.apply(first); + put(first, second); + } + + return stringToInt.get(first); + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ErrorListener.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ErrorListener.java new file mode 100644 index 0000000..7afc64e --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ErrorListener.java @@ -0,0 +1,19 @@ +package org.lucares.pdb.datastore.lang; + +import org.antlr.v4.runtime.BaseErrorListener; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Recognizer; + +public class ErrorListener extends BaseErrorListener { + + @Override + public void syntaxError(final Recognizer recognizer, final Object offendingSymbol, final int line, + final int charPositionInLine, final String msg, final RecognitionException e) { + + final int lineStart = line; + final int startIndex = charPositionInLine; + final int lineStop = line; + final int stopIndex = charPositionInLine; + throw new SyntaxException(msg, lineStart, startIndex, lineStop, stopIndex); + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java new file mode 100644 index 0000000..1299b2d --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java @@ -0,0 +1,437 @@ +package org.lucares.pdb.datastore.lang; + +abstract public class Expression { + + public T visit(final ExpressionVisitor visitor) { + throw new UnsupportedOperationException(); + } + + abstract static class UnaryExpression extends Expression { + + private final int line; + private final int startIndex; + private final int stopIndex; + + public UnaryExpression(final int line, final int startIndex, final int stopIndex) { + super(); + this.line = line; + this.startIndex = startIndex; + this.stopIndex = stopIndex; + } + + int getLine() { + return line; + } + + int getStartIndex() { + return startIndex; + } + + int getStopIndex() { + return stopIndex; + } + } + + abstract static class TemporaryExpression extends Expression { + + abstract Expression toExpression(Expression left, Expression right); + } + + public static MatchAll matchAll() { + return MatchAll.INSTANCE; + } + + static class OrTemporary extends TemporaryExpression { + + @Override + Expression toExpression(final Expression left, final Expression right) { + return new Or(left, right); + } + + @Override + public String toString() { + return "OrTemporary"; + } + } + + static class AndTemporary extends TemporaryExpression { + @Override + Expression toExpression(final Expression left, final Expression right) { + return new And(left, right); + } + + @Override + public String toString() { + return "AndTemporary"; + } + } + + static class Not extends Expression { + private final Expression expression; + + Not(final Expression expression) { + this.expression = expression; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return visitor.visit(this); + } + + Expression getExpression() { + return expression; + } + + @Override + public String toString() { + return "!" + expression; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((expression == null) ? 0 : expression.hashCode()); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final Not other = (Not) obj; + if (expression == null) { + if (other.expression != null) { + return false; + } + } else if (!expression.equals(other.expression)) { + return false; + } + return true; + } + + } + + static class Or extends Expression { + private final Expression left; + private final Expression right; + + Or(final Expression left, final Expression right) { + this.left = left; + this.right = right; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return visitor.visit(this); + } + + Expression getLeft() { + return left; + } + + Expression getRight() { + return right; + } + + @Override + public String toString() { + + return " (" + left + " or " + right + ") "; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((left == null) ? 0 : left.hashCode()); + result = prime * result + ((right == null) ? 0 : right.hashCode()); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final Or other = (Or) obj; + if (left == null) { + if (other.left != null) { + return false; + } + } else if (!left.equals(other.left)) { + return false; + } + if (right == null) { + if (other.right != null) { + return false; + } + } else if (!right.equals(other.right)) { + return false; + } + return true; + } + + } + + static class And extends Expression { + private final Expression left; + private final Expression right; + + And(final Expression left, final Expression right) { + this.left = left; + this.right = right; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return visitor.visit(this); + } + + Expression getLeft() { + return left; + } + + Expression getRight() { + return right; + } + + @Override + public String toString() { + + return " (" + left + " and " + right + ") "; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((left == null) ? 0 : left.hashCode()); + result = prime * result + ((right == null) ? 0 : right.hashCode()); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final And other = (And) obj; + if (left == null) { + if (other.left != null) { + return false; + } + } else if (!left.equals(other.left)) { + return false; + } + if (right == null) { + if (other.right != null) { + return false; + } + } else if (!right.equals(other.right)) { + return false; + } + return true; + } + + } + + static class MatchAll extends Expression { + + public static final MatchAll INSTANCE = new MatchAll(); + + private MatchAll() { + // + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return visitor.visit(this); + } + + @Override + public String toString() { + + return "true"; + } + } + + static class Terminal extends UnaryExpression { + private final String value; + + Terminal(final String value, final int line, final int startIndex, final int stopIndex) { + super(line, startIndex, stopIndex); + this.value = value; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return visitor.visit(this); + } + + @Override + public String toString() { + + return value; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((value == null) ? 0 : value.hashCode()); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final Terminal other = (Terminal) obj; + if (value == null) { + if (other.value != null) { + return false; + } + } else if (!value.equals(other.value)) { + return false; + } + return true; + } + + public String getValue() { + return value; + } + } + + static class Property extends Expression { + final String property; + final Terminal value; + + public Property(final String property, final Terminal value) { + this.property = property; + this.value = value; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return visitor.visit(this); + } + + @Override + public String toString() { + + return " " + property + " = " + value.getValue() + " "; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((property == null) ? 0 : property.hashCode()); + result = prime * result + ((value == null) ? 0 : value.hashCode()); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + final Property other = (Property) obj; + if (property == null) { + if (other.property != null) + return false; + } else if (!property.equals(other.property)) + return false; + if (value == null) { + if (other.value != null) + return false; + } else if (!value.equals(other.value)) + return false; + return true; + } + } + + static class Parentheses extends Expression { + private final Expression expression; + + Parentheses(final Expression expression) { + this.expression = expression; + } + + @Override + public T visit(final ExpressionVisitor visitor) { + return visitor.visit(this); + } + + public Expression getExpression() { + return expression; + } + + @Override + public String toString() { + + return " [ " + expression + " ] "; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((expression == null) ? 0 : expression.hashCode()); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final Parentheses other = (Parentheses) obj; + if (expression == null) { + if (other.expression != null) { + return false; + } + } else if (!expression.equals(other.expression)) { + return false; + } + return true; + } + } + +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java new file mode 100644 index 0000000..75d388d --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java @@ -0,0 +1,248 @@ +package org.lucares.pdb.datastore.lang; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.regex.Pattern; + +import org.apache.commons.lang3.StringUtils; +import org.lucares.collections.IntList; +import org.lucares.pdb.datastore.Doc; +import org.lucares.pdb.datastore.lang.Expression.And; +import org.lucares.pdb.datastore.lang.Expression.Not; +import org.lucares.pdb.datastore.lang.Expression.Or; +import org.lucares.pdb.datastore.lang.Expression.Parentheses; +import org.lucares.pdb.datastore.lang.Expression.Property; +import org.lucares.pdb.datastore.lang.Expression.Terminal; +import org.lucares.utils.CollectionUtils; + +public class ExpressionToDocIdVisitor extends ExpressionVisitor { + + public static final class AllDocIds { + + private final List docIdToPath; + + private IntList cachedPathIds = new IntList(); + + public AllDocIds(final List docIdToPath) { + this.docIdToPath = docIdToPath; + } + + public IntList getAllDocIds() { + + final int pathIds = docIdToPath.size(); + + if (cachedPathIds.size() != pathIds) { + final IntList result = new IntList(pathIds); + for (int i = 0; i < pathIds; i++) { + result.add(i); + } + cachedPathIds = result; + } + + return cachedPathIds; + } + } + + private static final Map EMPTY_VALUES = Collections.emptyMap(); + private static final IntList EMPTY_DOC_IDS = new IntList(); + private final Map> keyToValueToDocId; + private final AllDocIds allDocIds; + + public ExpressionToDocIdVisitor(final Map> keyToValueToDocId, + final AllDocIds allDocIds) { + this.keyToValueToDocId = keyToValueToDocId; + this.allDocIds = allDocIds; + } + + @Override + public IntList visit(final And expression) { + + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + final IntList leftFiles = left.visit(this); + final IntList rightFiles = right.visit(this); + + final IntList result = new IntList(Math.min(leftFiles.size(), rightFiles.size())); + + int l = 0; + int r = 0; + + while (l < leftFiles.size() && r < rightFiles.size()) { + + final int lv = leftFiles.get(l); + final int rv = rightFiles.get(r); + + if (lv < rv) { + l++; + } else if (lv > rv) { + r++; + } else { + result.add(lv); + l++; + r++; + } + } + + return result; + } + + @Override + public IntList visit(final Or expression) { + final Expression left = expression.getLeft(); + final Expression right = expression.getRight(); + + final IntList leftFiles = left.visit(this); + final IntList rightFiles = right.visit(this); + + final IntList result = merge(leftFiles, rightFiles); + + return result; + } + + @Override + public IntList visit(final Not expression) { + + final Expression negatedExpression = expression.getExpression(); + final IntList expr = negatedExpression.visit(this); + final IntList allDocIds = getAllDocIds(); + + final IntList result = new IntList(allDocIds.size()); + + final int[] docIdsToBeNegated = expr.toArray(); + for (int i = 0; i < allDocIds.size(); i++) { + final int docId = allDocIds.get(i); + if (Arrays.binarySearch(docIdsToBeNegated, docId) < 0) { + result.add(docId); + } + } + + return result; + } + + @Override + public IntList visit(final Parentheses parentheses) { + + throw new UnsupportedOperationException( + "Parenthesis not supported. The correct order should come from the parser."); + } + + @Override + public IntList visit(final Expression.MatchAll expression) { + + return getAllDocIds(); + } + + private IntList getAllDocIds() { + return allDocIds.getAllDocIds(); + } + + @Override + public IntList visit(final Property expression) { + + final String propertyName = expression.property; + final Terminal propertyValue = expression.value; + final String stringValue = propertyValue.getValue(); + + final IntList result; + if (isMatchAll(stringValue)) { + + final Map allValuesForKey = keyToValueToDocId.getOrDefault(propertyName, EMPTY_VALUES); + + result = merge(allValuesForKey.values()); + } else if (containsWildcard(stringValue)) { + + final Collection docIds = filterByWildcard(propertyName, globToRegex(stringValue)); + + result = merge(docIds); + } else { + result = keyToValueToDocId.getOrDefault(propertyName, EMPTY_VALUES).getOrDefault(stringValue, + EMPTY_DOC_IDS); + } + + return result; + } + + private Pattern globToRegex(final String globPattern) { + + final String[] tokens = StringUtils.splitPreserveAllTokens(globPattern, "*"); + + final List quotedTokens = CollectionUtils.map(tokens, Pattern::quote); + final String regex = String.join(".*", quotedTokens); + + return Pattern.compile(regex); + } + + private List filterByWildcard(final String propertyName, final Pattern valuePattern) { + + final List result = new ArrayList<>(); + + final Map valueToDocId = keyToValueToDocId.getOrDefault(propertyName, EMPTY_VALUES); + for (final Entry entry : valueToDocId.entrySet()) { + if (valuePattern.matcher(entry.getKey()).matches()) { + result.add(entry.getValue()); + } + } + + return result; + } + + private boolean containsWildcard(final String stringValue) { + return stringValue.contains("*"); + } + + private IntList merge(final Collection lists) { + + IntList result = new IntList(); + + for (final IntList intList : lists) { + result = merge(result, intList); + } + + return result; + } + + private boolean isMatchAll(final String stringValue) { + return Objects.equals("*", stringValue); + } + + private IntList merge(final IntList leftFiles, final IntList rightFiles) { + final IntList result = new IntList(leftFiles.size() + rightFiles.size()); + + int l = 0; + int r = 0; + + while (l < leftFiles.size() && r < rightFiles.size()) { + + final int lv = leftFiles.get(l); + final int rv = rightFiles.get(r); + + if (lv < rv) { + result.add(lv); + l++; + } else if (lv > rv) { + result.add(rv); + r++; + } else { + result.add(lv); + l++; + r++; + } + } + + if (l < leftFiles.size()) { + final int length = leftFiles.size() - l; + result.addAll(leftFiles.get(l, length)); + } else if (r < rightFiles.size()) { + final int length = rightFiles.size() - r; + result.addAll(rightFiles.get(r, length)); + } + return result; + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java new file mode 100644 index 0000000..20e1e50 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionVisitor.java @@ -0,0 +1,31 @@ +package org.lucares.pdb.datastore.lang; + +public abstract class ExpressionVisitor { + public T visit(final Expression.And expression) { + throw new UnsupportedOperationException(); + } + + public T visit(final Expression.Or expression) { + throw new UnsupportedOperationException(); + } + + public T visit(final Expression.Not expression) { + throw new UnsupportedOperationException(); + } + + public T visit(final Expression.Property expression) { + throw new UnsupportedOperationException(); + } + + public T visit(final Expression.Terminal expression) { + throw new UnsupportedOperationException(); + } + + public T visit(final Expression.MatchAll expression) { + throw new UnsupportedOperationException(); + } + + public T visit(final Expression.Parentheses parentheses) { + throw new UnsupportedOperationException(); + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java new file mode 100644 index 0000000..f75d1e39 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguage.java @@ -0,0 +1,109 @@ +package org.lucares.pdb.datastore.lang; + +import java.util.Stack; + +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.ParseTreeWalker; +import org.lucares.pdb.datastore.lang.Expression.AndTemporary; +import org.lucares.pdb.datastore.lang.Expression.Not; +import org.lucares.pdb.datastore.lang.Expression.OrTemporary; +import org.lucares.pdb.datastore.lang.Expression.Property; +import org.lucares.pdb.datastore.lang.Expression.TemporaryExpression; +import org.lucares.pdb.datastore.lang.Expression.Terminal; +import org.lucares.pdb.datastore.lang.PdbLangParser.BinaryAndExpressionContext; +import org.lucares.pdb.datastore.lang.PdbLangParser.BinaryOrExpressionContext; +import org.lucares.pdb.datastore.lang.PdbLangParser.IdentifierExpressionContext; +import org.lucares.pdb.datastore.lang.PdbLangParser.NotExpressionContext; +import org.lucares.pdb.datastore.lang.PdbLangParser.PropertyExpressionContext; + +public class QueryLanguage { + + public Expression parse(final String input) { + // define the input + final CharStream in = CharStreams.fromString(input); + + // create lexer and parser + final PdbLangLexer lexer = new PdbLangLexer(in); + lexer.addErrorListener(new ErrorListener()); + + final CommonTokenStream tokens = new CommonTokenStream(lexer); + final PdbLangParser parser = new PdbLangParser(tokens); + parser.addErrorListener(new ErrorListener()); + + final Stack stack = new Stack<>(); + + // define a listener that is called for every terminals and + // non-terminals + final ParseTreeListener listener = new PdbLangBaseListener() { + + @Override + public void exitIdentifierExpression(final IdentifierExpressionContext ctx) { + // System.out.println("push identifier " + ctx.getText()); + + if (ctx.getText().length() > 255) { + throw new SyntaxException(ctx, "token too long"); + } + + final int line = ctx.getStart().getLine(); + final int startIndex = ctx.getStart().getStartIndex(); + final int stopIndex = ctx.getStart().getStopIndex(); + + stack.push(new Terminal(ctx.getText(), line, startIndex, stopIndex)); + } + + @Override + public void exitPropertyExpression(final PropertyExpressionContext ctx) { + // System.out.println("property expression"); + + final Expression value = stack.pop(); + final Terminal property = (Terminal) stack.pop(); + + stack.push(new Property(property.getValue(), (Terminal) value)); + } + + @Override + public void exitNotExpression(final NotExpressionContext ctx) { + + final Expression expression = stack.pop(); + + final Expression notExpression = new Not(expression); + stack.push(notExpression); + } + + @Override + public void exitBinaryAndExpression(final BinaryAndExpressionContext ctx) { + final Expression right = stack.pop(); + final TemporaryExpression operation = new AndTemporary(); + final Expression left = stack.pop(); + + stack.push(operation.toExpression(left, right)); + } + + @Override + public void exitBinaryOrExpression(final BinaryOrExpressionContext ctx) { + final Expression right = stack.pop(); + final TemporaryExpression operation = new OrTemporary(); + final Expression left = stack.pop(); + + stack.push(operation.toExpression(left, right)); + } + }; + + // Specify our entry point + final ParseTree parseTree = parser.start(); + + // Walk it and attach our listener + final ParseTreeWalker walker = new ParseTreeWalker(); + walker.walk(listener, parseTree); + + if (stack.size() != 1) { + throw new RuntimeException("stack should have exactly one element " + stack); + } + + return stack.pop(); + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguageParser.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguageParser.java new file mode 100644 index 0000000..cc17728 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/QueryLanguageParser.java @@ -0,0 +1,17 @@ +package org.lucares.pdb.datastore.lang; + +import org.apache.commons.lang3.StringUtils; + +public class QueryLanguageParser { + public static Expression parse(final String query) { + + final Expression result; + if (StringUtils.isEmpty(query)) { + result = Expression.matchAll(); + } else { + final QueryLanguage lang = new QueryLanguage(); + result = lang.parse(query); + } + return result; + } +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/SyntaxException.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/SyntaxException.java new file mode 100644 index 0000000..f74f01a --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/SyntaxException.java @@ -0,0 +1,64 @@ +package org.lucares.pdb.datastore.lang; + +import org.antlr.v4.runtime.ParserRuleContext; + +public class SyntaxException extends RuntimeException { + + private static final long serialVersionUID = 1L; + private int lineStart; + private int startIndex; + private int lineStop; + private int stopIndex; + + public SyntaxException(final ParserRuleContext context, final String message) { + this(message, context.getStart().getLine(), context.getStart().getStartIndex(), context.getStop().getLine(), + context.getStop().getStopIndex()); + } + + public SyntaxException(final String message, final int lineStart, final int startIndex, final int lineStop, + final int stopIndex) { + super(message + ": " + generateMessage(lineStart, startIndex, lineStop, stopIndex)); + this.lineStart = lineStart; + this.startIndex = startIndex; + this.lineStop = lineStop; + this.stopIndex = stopIndex; + } + + private static String generateMessage(final int lineStart, final int startIndex, final int lineStop, + final int stopIndex) { + + return String.format("line=%d, start=%d, to line=%d stop=%d", lineStart, startIndex, lineStop, stopIndex); + } + + public int getLineStart() { + return lineStart; + } + + public void setLineStart(final int lineStart) { + this.lineStart = lineStart; + } + + public int getStartIndex() { + return startIndex; + } + + public void setStartIndex(final int startIndex) { + this.startIndex = startIndex; + } + + public int getLineStop() { + return lineStop; + } + + public void setLineStop(final int lineStop) { + this.lineStop = lineStop; + } + + public int getStopIndex() { + return stopIndex; + } + + public void setStopIndex(final int stopIndex) { + this.stopIndex = stopIndex; + } +} diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/DataStoreTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/DataStoreTest.java new file mode 100644 index 0000000..0983672 --- /dev/null +++ b/data-store/src/test/java/org/lucares/pdb/datastore/DataStoreTest.java @@ -0,0 +1,135 @@ +package org.lucares.pdb.datastore; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.lucares.pdb.api.Tags; +import org.lucares.utils.CollectionUtils; +import org.lucares.utils.file.FileUtils; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +@Test +public class DataStoreTest { + private Path dataDirectory; + private DataStore dataStore; + private Map tagsToPath; + + @BeforeMethod + public void beforeMethod() throws IOException { + dataDirectory = Files.createTempDirectory("pdb"); + } + + @AfterMethod + public void afterMethod() throws IOException { + FileUtils.delete(dataDirectory); + dataStore = null; + tagsToPath = null; + } + + public void testInsertSingleTag() throws Exception { + final Tags tags = Tags.create("key1", "value1", "key2", "value2"); + final Path path; + { + final DataStore dataStore = new DataStore(dataDirectory); + + path = dataStore.createNewFile(tags); + assertSearch(dataStore, "key1=value1", path); + } + { + final DataStore dataStore = new DataStore(dataDirectory); + assertSearch(dataStore, "key1=value1", path); + } + } + + public void testQuery() throws Exception { + + tagsToPath = new LinkedHashMap<>(); + final Tags eagleTim = Tags.create("bird", "eagle", "name", "Tim"); + final Tags pigeonJennifer = Tags.create("bird", "pigeon", "name", "Jennifer"); + final Tags flamingoJennifer = Tags.create("bird", "flamingo", "name", "Jennifer"); + final Tags labradorJenny = Tags.create("dog", "labrador", "name", "Jenny"); + final Tags labradorTim = Tags.create("dog", "labrador", "name", "Tim"); + + tagsToPath.put(eagleTim, null); + tagsToPath.put(pigeonJennifer, null); + tagsToPath.put(flamingoJennifer, null); + tagsToPath.put(labradorJenny, null); + tagsToPath.put(labradorTim, null); + + dataStore = new DataStore(dataDirectory); + + for (final Tags tags : tagsToPath.keySet()) { + final Path newFile = dataStore.createNewFile(tags); + tagsToPath.put(tags, newFile); + } + + assertSearch("bird=eagle", eagleTim); + assertSearch("dog=labrador", labradorJenny, labradorTim); + assertSearch("name=Tim", eagleTim, labradorTim); + assertSearch("dog=labrador and name=Tim", labradorTim); + assertSearch("dog=labrador and !name=Tim", labradorJenny); + assertSearch("name=Jennifer or name=Jenny", pigeonJennifer, flamingoJennifer, labradorJenny); + + // a͟n͟d binds stronger than o͟r + assertSearch("name=Tim and dog=labrador or bird=pigeon", pigeonJennifer, labradorTim); + assertSearch("bird=pigeon or name=Tim and dog=labrador", pigeonJennifer, labradorTim); + + // parenthesis override priority of a͟n͟d + assertSearch("name=Tim and (dog=labrador or bird=pigeon)", labradorTim); + assertSearch("(dog=labrador or bird=pigeon) and name=Tim", labradorTim); + + // wildcards + assertSearch("bird=*", eagleTim, pigeonJennifer, flamingoJennifer); + assertSearch("name=Jen*", pigeonJennifer, flamingoJennifer, labradorJenny); + assertSearch("dog=*dor", labradorJenny, labradorTim); + assertSearch("dog=lab*dor", labradorJenny, labradorTim); + assertSearch("dog=*lab*dor*", labradorJenny, labradorTim); + + } + + private void assertSearch(final String query, final Tags... tags) { + final List actualDocs = dataStore.search(query); + final List actual = CollectionUtils.map(actualDocs, Doc::getPath); + + final List expectedPaths = CollectionUtils.map(tags, tagsToPath::get); + + Assert.assertEquals(actual, expectedPaths, "Query: " + query + " Found: " + getTagsForPaths(actual)); + } + + private List getTagsForPaths(final List paths) { + + final List result = new ArrayList<>(); + + for (final Path path : paths) { + result.add(getTagForPath(path)); + } + return result; + } + + private Tags getTagForPath(final Path path) { + for (final Entry e : tagsToPath.entrySet()) { + + if (e.getValue().equals(path)) { + return e.getKey(); + } + } + return null; + } + + private void assertSearch(final DataStore dataStore, final String query, final Path... paths) { + final List actualDocs = dataStore.search(query); + final List actual = CollectionUtils.map(actualDocs, Doc::getPath); + + Assert.assertEquals(actual, Arrays.asList(paths)); + } +} diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/FolderStorageTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/FolderStorageTest.java new file mode 100644 index 0000000..a1e484d --- /dev/null +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/FolderStorageTest.java @@ -0,0 +1,91 @@ +package org.lucares.pdb.datastore.internal; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.lucares.utils.CollectionUtils; +import org.lucares.utils.file.FileUtils; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class FolderStorageTest { + private static final String SUFFIX = ".txt"; + private Path dataDirectory; + + @BeforeMethod + public void beforeMethod() throws IOException { + dataDirectory = Files.createTempDirectory("pdb"); + } + + @AfterMethod + public void afterMethod() throws IOException { + FileUtils.delete(dataDirectory); + } + + @Test + public void testFolderStructureRespectingToMaxFilesPerFolder() throws Exception { + final int maxFilesPerFolder = 2; + + storeFiles(maxFilesPerFolder); + storeFiles(maxFilesPerFolder, "a", "b", "c", "d", "e"); + storeFiles(maxFilesPerFolder, "f"); + storeFiles(maxFilesPerFolder, "g", "h", "i"); + + final List actualFiles = getPathsRelativeToDataDirectory(); + + final List expectedFiles = Arrays.asList(// + Paths.get("0", "0", "a" + SUFFIX), // + Paths.get("0", "0", "b" + SUFFIX), // + Paths.get("0", "1", "c" + SUFFIX), // + Paths.get("0", "1", "d" + SUFFIX), // + Paths.get("1", "0", "e" + SUFFIX), // + Paths.get("1", "0", "f" + SUFFIX), // + Paths.get("1", "1", "g" + SUFFIX), // + Paths.get("1", "1", "h" + SUFFIX), // + Paths.get("2", "0", "i" + SUFFIX)// The first level might + // overflow + ); + + Assert.assertEquals(actualFiles, expectedFiles); + } + + @Test + public void testDuplicateNames() throws Exception { + final int maxFilesPerFolder = 3; + + storeFiles(maxFilesPerFolder, "a", "a", "a", "a"); + + final List actualFiles = getPathsRelativeToDataDirectory(); + + final List expectedFiles = Arrays.asList(// + Paths.get("0", "0", "a" + SUFFIX), // + Paths.get("0", "0", "a1" + SUFFIX), // + Paths.get("0", "0", "a2" + SUFFIX), // + Paths.get("0", "1", "a" + SUFFIX)// + ); + + Assert.assertEquals(actualFiles, expectedFiles); + } + + private List getPathsRelativeToDataDirectory() throws IOException { + final List actualFiles = FileUtils.listRecursively(dataDirectory); + CollectionUtils.mapInPlace(actualFiles, p -> dataDirectory.relativize(p)); + Collections.sort(actualFiles); + return actualFiles; + } + + private void storeFiles(final int maxFilesPerFolder, final String... filenames) throws IOException { + final FolderStorage storage = new FolderStorage(dataDirectory, maxFilesPerFolder); + + for (final String filename : filenames) { + storage.insert(filename, SUFFIX); + } + } +} diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/RadixConverterTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/RadixConverterTest.java new file mode 100644 index 0000000..4d46f03 --- /dev/null +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/RadixConverterTest.java @@ -0,0 +1,24 @@ +package org.lucares.pdb.datastore.internal; + +import org.testng.Assert; +import org.testng.annotations.Test; + +@Test +public class RadixConverterTest { + + public void testConvertRoundtrip() { + + for (int i = 0; i < 1000; i++) { + + final String string = RadixConverter.toString(i); + final int actual = RadixConverter.fromString(string); + + Assert.assertEquals(actual, i, "string representation: " + string); + } + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testNoNegativeValues() { + RadixConverter.toString(-1); + } +} diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/StringCompressorTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/StringCompressorTest.java new file mode 100644 index 0000000..68fb27c --- /dev/null +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/StringCompressorTest.java @@ -0,0 +1,52 @@ +package org.lucares.pdb.datastore.internal; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.lucares.utils.file.FileUtils; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +@Test +public class StringCompressorTest { + private Path dataDirectory; + + @BeforeMethod + public void beforeMethod() throws IOException { + dataDirectory = Files.createTempDirectory("pdb"); + } + + @AfterMethod + public void afterMethod() throws IOException { + FileUtils.delete(dataDirectory); + } + + public void testKeyCompressorRoundtrip() throws Exception { + final StringCompressor keyValueCompressor = StringCompressor.create(dataDirectory.resolve("key.csv")); + + final String value = "foo"; + final Integer intFoo = keyValueCompressor.put(value); + final String actual = keyValueCompressor.get(intFoo); + + Assert.assertEquals(actual, value); + } + + public void testKeyCompressorInitialization() throws Exception { + final Path database = dataDirectory.resolve("key.csv"); + final String value = "foo"; + { + final StringCompressor keyValueCompressor = StringCompressor.create(database); + + keyValueCompressor.put(value); + } + { + final StringCompressor keyValueCompressor = StringCompressor.create(database); + + keyValueCompressor.get(0); + } + + } +} diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/map/UniqueStringIntegerPairsTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/map/UniqueStringIntegerPairsTest.java new file mode 100644 index 0000000..0de626f --- /dev/null +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/map/UniqueStringIntegerPairsTest.java @@ -0,0 +1,74 @@ +package org.lucares.pdb.datastore.internal.map; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.lucares.utils.file.FileUtils; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +@Test +public class UniqueStringIntegerPairsTest { + + private Path dataDirectory; + + @BeforeMethod + public void beforeMethod() throws IOException { + dataDirectory = Files.createTempDirectory("pdb"); + } + + @AfterMethod + public void afterMethod() throws IOException { + FileUtils.delete(dataDirectory); + } + + public void testPutGet() throws Exception { + final Path database = dataDirectory.resolve("key.csv"); + final String first = "key1"; + final Integer second = 1; + + { + final UniqueStringIntegerPairs usip = new UniqueStringIntegerPairs(database); + + usip.put(first, second); + Assert.assertEquals(usip.get(first), second); + Assert.assertEquals(usip.getKey(second), first); + } + + { + final UniqueStringIntegerPairs usip = new UniqueStringIntegerPairs(database); + + Assert.assertEquals(usip.get(first), second); + Assert.assertEquals(usip.getKey(second), first); + } + } + + public void testUniqueKeyContstraint() throws Exception { + final Path database = dataDirectory.resolve("key.csv"); + final String first = "key1"; + final Integer second = 1; + + final UniqueStringIntegerPairs usip = new UniqueStringIntegerPairs(database); + usip.put(first, second); + try { + // cannot add another pair with the first key + final int another = second + 1; + usip.put(first, another); + Assert.fail("expected an IllegalArgumentException"); + } catch (final IllegalArgumentException e) { + // expected + } + + try { + // cannot add another pair with the same second value + final String another = first + 1; + usip.put(another, second); + Assert.fail("expected an IllegalArgumentException"); + } catch (final IllegalArgumentException e) { + // expected + } + } +} diff --git a/file-utils/.gitignore b/file-utils/.gitignore new file mode 100644 index 0000000..691dc42 --- /dev/null +++ b/file-utils/.gitignore @@ -0,0 +1,7 @@ +/.settings/ +/.classpath +/.project +/bin/ +/build/ +/target/ +/test-output/ \ No newline at end of file diff --git a/file-utils/build.gradle b/file-utils/build.gradle new file mode 100644 index 0000000..c1c65c1 --- /dev/null +++ b/file-utils/build.gradle @@ -0,0 +1,6 @@ +dependencies { + + + compile 'org.apache.logging.log4j:log4j-core:2.8.2' + compile 'org.apache.logging.log4j:log4j-slf4j-impl:2.8.2' +} diff --git a/performanceDb/src/main/java/org/lucares/performance/db/FileUtils.java b/file-utils/src/main/java/org/lucares/utils/file/FileUtils.java similarity index 91% rename from performanceDb/src/main/java/org/lucares/performance/db/FileUtils.java rename to file-utils/src/main/java/org/lucares/utils/file/FileUtils.java index dc93fa2..3cb17b9 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/FileUtils.java +++ b/file-utils/src/main/java/org/lucares/utils/file/FileUtils.java @@ -1,4 +1,4 @@ -package org.lucares.performance.db; +package org.lucares.utils.file; import java.io.IOException; import java.nio.file.FileVisitResult; @@ -15,7 +15,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class FileUtils { - private static final Logger LOGGER = LoggerFactory.getLogger(FileUtils.class); private static final class RecursiveDeleter extends SimpleFileVisitor { @@ -58,15 +57,13 @@ public class FileUtils { } } - public static List listRecursively(final Path start) { + public static List listRecursively(final Path start) throws IOException { final int maxDepth = Integer.MAX_VALUE; final BiPredicate matcher = (path, attr) -> Files.isRegularFile(path); try (final Stream files = Files.find(start, maxDepth, matcher)) { return files.collect(Collectors.toList()); - } catch (final IOException e) { - throw new ReadException(e); } } } diff --git a/pdb-plotting/src/main/java/org/lucares/recommind/logs/Plotter.java b/pdb-plotting/src/main/java/org/lucares/recommind/logs/Plotter.java index 381e07f..745874a 100644 --- a/pdb-plotting/src/main/java/org/lucares/recommind/logs/Plotter.java +++ b/pdb-plotting/src/main/java/org/lucares/recommind/logs/Plotter.java @@ -27,8 +27,8 @@ import org.lucares.pdb.api.Result; import org.lucares.pdb.api.Tags; import org.lucares.pdb.plot.api.Limit; import org.lucares.pdb.plot.api.PlotSettings; -import org.lucares.performance.db.FileUtils; import org.lucares.performance.db.PerformanceDb; +import org.lucares.utils.file.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/pdb-ui/.gitignore b/pdb-ui/.gitignore index 7d24f74..bf7d3fd 100644 --- a/pdb-ui/.gitignore +++ b/pdb-ui/.gitignore @@ -1,6 +1,7 @@ -/bin/ -/build/ /.settings/ -/test-output/ /.classpath /.project +/bin/ +/build/ +/target/ +/test-output/ diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/MySpringConfiguration.java b/pdb-ui/src/main/java/org/lucares/pdbui/MySpringConfiguration.java index f5a9b10..85cfc78 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/MySpringConfiguration.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/MySpringConfiguration.java @@ -1,5 +1,6 @@ package org.lucares.pdbui; +import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; @@ -20,7 +21,7 @@ public class MySpringConfiguration { private static final Logger LOGGER = LoggerFactory.getLogger(MySpringConfiguration.class); @Bean - PerformanceDb performanceDb(@Value("${db.base}") final String dbBaseDir) { + PerformanceDb performanceDb(@Value("${db.base}") final String dbBaseDir) throws IOException { final Path dataDirectory = Paths.get(dbBaseDir); LOGGER.info("using database in {}", dataDirectory.toAbsolutePath()); diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/PdbController.java b/pdb-ui/src/main/java/org/lucares/pdbui/PdbController.java index b64a0c9..e9eb317 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/PdbController.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/PdbController.java @@ -9,22 +9,22 @@ import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.SortedSet; -import org.lucares.ludb.FieldNotExistsException; -import org.lucares.ludb.Proposal; import org.lucares.pdb.plot.api.PlotSettings; import org.lucares.pdbui.domain.AutocompleteProposal; import org.lucares.pdbui.domain.AutocompleteProposalByValue; import org.lucares.pdbui.domain.AutocompleteResponse; import org.lucares.pdbui.domain.PlotRequest; import org.lucares.pdbui.domain.PlotResponse; -import org.lucares.performance.db.CollectionUtils; import org.lucares.performance.db.PerformanceDb; +import org.lucares.performance.db.Proposal; import org.lucares.recommind.logs.DataSeries; import org.lucares.recommind.logs.InternalPlottingException; import org.lucares.recommind.logs.NoDataPointsException; import org.lucares.recommind.logs.PlotResult; import org.lucares.recommind.logs.Plotter; +import org.lucares.utils.CollectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.boot.autoconfigure.EnableAutoConfiguration; @@ -41,7 +41,7 @@ import org.springframework.web.servlet.ModelAndView; @Controller @EnableAutoConfiguration -public class PdbController implements HardcodedValues, CollectionUtils { +public class PdbController implements HardcodedValues { private static final Logger LOGGER = LoggerFactory.getLogger(PdbController.class); @@ -100,7 +100,7 @@ public class PdbController implements HardcodedValues, CollectionUtils { final int zeroBasedCaretIndex = caretIndex - 1; final List proposals = db.autocomplete(query, zeroBasedCaretIndex); - final List nonEmptyProposals = filter(proposals, p -> p.getResults() > 0); + final List nonEmptyProposals = CollectionUtils.filter(proposals, p -> p.getResults() > 0); final List autocompleteProposals = toAutocompleteProposals(nonEmptyProposals); Collections.sort(autocompleteProposals, new AutocompleteProposalByValue()); @@ -129,16 +129,12 @@ public class PdbController implements HardcodedValues, CollectionUtils { produces = MediaType.APPLICATION_JSON_UTF8_VALUE // ) @ResponseBody - List fields(@PathVariable(name = "fieldName") final String fieldName, + SortedSet fields(@PathVariable(name = "fieldName") final String fieldName, @RequestParam(name = "query") final String query) { - try { - final List fields = db.getFieldsValues(query, fieldName); + final SortedSet fields = db.getFieldsValues(query, fieldName); - return fields; - } catch (final FieldNotExistsException e) { - throw new NotFoundException(e); - } + return fields; } private List toAutocompleteProposals(final List proposals) { diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/TcpIngestor.java b/pdb-ui/src/main/java/org/lucares/pdbui/TcpIngestor.java index bc46b91..0a05dcd 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/TcpIngestor.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/TcpIngestor.java @@ -159,7 +159,7 @@ public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean { } } - public TcpIngestor(final Path dataDirectory) { + public TcpIngestor(final Path dataDirectory) throws IOException { LOGGER.info("opening performance db: " + dataDirectory); db = new PerformanceDb(dataDirectory); LOGGER.debug("performance db open"); diff --git a/pdb-ui/src/test/java/org/lucares/performance/db/ingestor/TcpIngestorTest.java b/pdb-ui/src/test/java/org/lucares/performance/db/ingestor/TcpIngestorTest.java index bc6f26d..93a9bda 100644 --- a/pdb-ui/src/test/java/org/lucares/performance/db/ingestor/TcpIngestorTest.java +++ b/pdb-ui/src/test/java/org/lucares/performance/db/ingestor/TcpIngestorTest.java @@ -14,8 +14,8 @@ import java.util.Map; import org.lucares.pdb.api.Entry; import org.lucares.pdbui.TcpIngestor; -import org.lucares.performance.db.FileUtils; import org.lucares.performance.db.PerformanceDb; +import org.lucares.utils.file.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.testng.Assert; @@ -23,8 +23,6 @@ import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; -import liquibase.exception.LiquibaseException; - @Test public class TcpIngestorTest { @@ -42,7 +40,7 @@ public class TcpIngestorTest { FileUtils.delete(dataDirectory); } - public void testIngestDataViaTcpStream() throws LiquibaseException, Exception { + public void testIngestDataViaTcpStream() throws Exception { final OffsetDateTime dateA = OffsetDateTime.now(); final OffsetDateTime dateB = OffsetDateTime.now(); diff --git a/pdb-utils/.gitignore b/pdb-utils/.gitignore index 080ea93..bf7d3fd 100644 --- a/pdb-utils/.gitignore +++ b/pdb-utils/.gitignore @@ -1,5 +1,7 @@ -/bin/ -/build/ /.settings/ /.classpath /.project +/bin/ +/build/ +/target/ +/test-output/ diff --git a/pdb-utils/src/main/java/org/lucares/pdb/api/Entry.java b/pdb-utils/src/main/java/org/lucares/pdb/api/Entry.java deleted file mode 100644 index 28c5252..0000000 --- a/pdb-utils/src/main/java/org/lucares/pdb/api/Entry.java +++ /dev/null @@ -1,99 +0,0 @@ -package org.lucares.pdb.api; - -import java.time.Instant; -import java.time.OffsetDateTime; -import java.time.ZoneOffset; -import java.time.format.DateTimeFormatter; - -public class Entry { - - /** - * A special {@link Entry} that can be used as poison object for - * {@link BlockingQueueIterator}. - */ - public static final Entry POISON = new Entry(0, -1); - - public static final long MAX_VALUE = 0xFF_FF_FF_FFL; - - private final long epochMilli; - - private final long value; - - private final Tags tags; - - public Entry(final OffsetDateTime date, final long value, final Tags tags) { - this.tags = tags; - this.epochMilli = date.toInstant().toEpochMilli(); - this.value = value; - } - - public Entry(final long epochMilli, final long value, final Tags tags) { - if (value < 0 || value > MAX_VALUE) { - throw new IllegalArgumentException("value must be between 0 and " + MAX_VALUE + ", but was " + value); - } - - this.epochMilli = epochMilli; - this.value = value; - this.tags = tags; - } - - private Entry(final long epochMilli, final long value) { - this.epochMilli = epochMilli; - this.value = value; - this.tags = null; - } - - public OffsetDateTime getDate() { - final Instant instant = Instant.ofEpochMilli(epochMilli); - return OffsetDateTime.ofInstant(instant, ZoneOffset.UTC); - } - - public long getValue() { - return value; - } - - public long getEpochMilli() { - return epochMilli; - } - - public Tags getTags() { - return tags; - } - - @Override - public String toString() { - final OffsetDateTime date = getDate(); - return date.format(DateTimeFormatter.ISO_ZONED_DATE_TIME) + " = " + value + " (" + tags + ")"; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + (int) (epochMilli ^ (epochMilli >>> 32)); - result = prime * result + ((tags == null) ? 0 : tags.hashCode()); - result = prime * result + (int) (value ^ (value >>> 32)); - return result; - } - - @Override - public boolean equals(final Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - final Entry other = (Entry) obj; - if (epochMilli != other.epochMilli) - return false; - if (tags == null) { - if (other.tags != null) - return false; - } else if (!tags.equals(other.tags)) - return false; - if (value != other.value) - return false; - return true; - } -} diff --git a/pdb-utils/src/main/java/org/lucares/pdb/api/GroupResult.java b/pdb-utils/src/main/java/org/lucares/pdb/api/GroupResult.java deleted file mode 100644 index 04761d0..0000000 --- a/pdb-utils/src/main/java/org/lucares/pdb/api/GroupResult.java +++ /dev/null @@ -1,32 +0,0 @@ -package org.lucares.pdb.api; - -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -public class GroupResult { - - private final Tags groupedBy; - - private final Stream entries; - - public GroupResult(final Stream entries, final Tags groupedBy) { - this.entries = entries; - this.groupedBy = groupedBy; - } - - /** - * @return {@link Stream} unbound, unordered and non-parallel - */ - public Stream asStream() { - return entries; - } - - public List asList() { - return entries.collect(Collectors.toList()); - } - - public Tags getGroupedBy() { - return groupedBy; - } -} diff --git a/pdb-utils/src/main/java/org/lucares/pdb/api/Result.java b/pdb-utils/src/main/java/org/lucares/pdb/api/Result.java deleted file mode 100644 index 9c90054..0000000 --- a/pdb-utils/src/main/java/org/lucares/pdb/api/Result.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.lucares.pdb.api; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; - -public class Result { - - private final List groupResults; - - public Result(final GroupResult... groupResults) { - this(Arrays.asList(groupResults)); - } - - public Result(final Collection groupResults) { - this.groupResults = new ArrayList<>(groupResults); - } - - public GroupResult singleGroup() { - if (groupResults.size() != 1) { - throw new IllegalStateException("the result does not contain exactly one group"); - } - return groupResults.get(0); - } - - public List getGroups() { - return new ArrayList<>(groupResults); - } -} diff --git a/pdb-utils/src/main/java/org/lucares/pdb/api/Tag.java b/pdb-utils/src/main/java/org/lucares/pdb/api/Tag.java deleted file mode 100644 index beb43a6..0000000 --- a/pdb-utils/src/main/java/org/lucares/pdb/api/Tag.java +++ /dev/null @@ -1,57 +0,0 @@ -package org.lucares.pdb.api; - -public class Tag { - private final String key; - - private final String value; - - public Tag(final String key, final String value) { - this.key = key; - this.value = value; - } - - public String getKey() { - return key; - } - - public String getValue() { - return value; - } - - @Override - public String toString() { - return key + "=" + value; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((key == null) ? 0 : key.hashCode()); - result = prime * result + ((value == null) ? 0 : value.hashCode()); - return result; - } - - @Override - public boolean equals(final Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - final Tag other = (Tag) obj; - if (key == null) { - if (other.key != null) - return false; - } else if (!key.equals(other.key)) - return false; - if (value == null) { - if (other.value != null) - return false; - } else if (!value.equals(other.value)) - return false; - return true; - } - -} diff --git a/pdb-utils/src/main/java/org/lucares/pdb/api/Tags.java b/pdb-utils/src/main/java/org/lucares/pdb/api/Tags.java deleted file mode 100644 index 00be70c..0000000 --- a/pdb-utils/src/main/java/org/lucares/pdb/api/Tags.java +++ /dev/null @@ -1,136 +0,0 @@ -package org.lucares.pdb.api; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; -import java.util.function.BiConsumer; - -public class Tags { - static final Tags EMPTY = new Tags(); - - private final Map tags; - - private Tags() { - super(); - tags = Collections.emptyMap(); - } - - private Tags(final Map tags) { - this.tags = tags; - } - - public static Tags create() { - return EMPTY; - } - - public static Tags create(final String key1, final String value1, final String key2, final String value2) { - final Map tags = new HashMap<>(2); - tags.put(key1, new Tag(key1, value1)); - tags.put(key2, new Tag(key2, value2)); - return new Tags(tags); - } - - public static Tags create(final String key, final String value) { - final Map tags = new HashMap<>(1); - tags.put(key, new Tag(key, value)); - return new Tags(tags); - } - - public Tags copyAdd(final String key, final String value) { - Objects.requireNonNull(key, "key must not be null"); - Objects.requireNonNull(value, "value must not be null"); - - final Map newTags = new HashMap<>(tags); - - newTags.put(key, new Tag(key, value)); - - return new Tags(newTags); - } - - public Tags copyAddIfNotNull(final String key, final String value) { - - final Tags result; - if (value != null) { - result = copyAdd(key, value); - } else { - result = this; - } - return result; - } - - public String getValue(final String key) { - final Tag tag = tags.get(key); - final String value = tag != null ? tag.getValue() : null; - return value; - } - - public Set getKeys() { - return new TreeSet<>(tags.keySet()); - } - - public void forEach(final BiConsumer keyValueConsumer) { - for (final Map.Entry e : tags.entrySet()) { - keyValueConsumer.accept(e.getKey(), e.getValue().getValue()); - } - } - - @Override - public String toString() { - return String.valueOf(tags); - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((tags == null) ? 0 : tags.hashCode()); - return result; - } - - @Override - public boolean equals(final Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - final Tags other = (Tags) obj; - if (tags == null) { - if (other.tags != null) - return false; - } else if (!tags.equals(other.tags)) - return false; - return true; - } - - public String abbreviatedRepresentation() { - final StringBuilder result = new StringBuilder(); - final int maxLength = 200; - - final SortedSet keys = new TreeSet<>(tags.keySet()); - - final int cutAt = maxLength / (keys.size() * 2 + 2); - - for (final String key : keys) { - - final String value = tags.get(key).getValue(); - - result.append(substr(key, cutAt)); - result.append("-"); - result.append(substr(value, cutAt)); - result.append("_"); - } - - return substr(result.toString(), maxLength); - } - - private static String substr(final String s, final int maxLength) { - return s.substring(0, Math.min(maxLength, s.length())); - } - -} diff --git a/pdb-utils/src/main/java/org/lucares/utils/CollectionUtils.java b/pdb-utils/src/main/java/org/lucares/utils/CollectionUtils.java new file mode 100644 index 0000000..9e23efb --- /dev/null +++ b/pdb-utils/src/main/java/org/lucares/utils/CollectionUtils.java @@ -0,0 +1,52 @@ +package org.lucares.utils; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class CollectionUtils { + public static void mapInPlace(final List list, final Function mapper) { + for (int i = 0; i < list.size(); i++) { + final T value = list.get(i); + final T newValue = mapper.apply(value); + list.set(i, newValue); + } + } + + public static List map(final Collection list, final Function mapper) { + final List result = new ArrayList<>(list.size()); + + for (final T t : list) { + result.add(mapper.apply(t)); + } + + return result; + } + + public static List map(final T[] input, final Function mapper) { + return Stream.of(input).map(mapper).collect(Collectors.toList()); + } + + public static Map toMap(final Iterable iterable, final Function keyMapper) { + final Map result = new HashMap<>(); + + for (final V value : iterable) { + final T key = keyMapper.apply(value); + + result.put(key, value); + } + + return result; + } + + public static List filter(final Collection collection, final Predicate predicate) { + return collection.stream().filter(predicate).collect(Collectors.toList()); + } + +} diff --git a/performanceDb/build.gradle b/performanceDb/build.gradle index b88e6ac..b69bacc 100644 --- a/performanceDb/build.gradle +++ b/performanceDb/build.gradle @@ -1,9 +1,11 @@ dependencies { compile project(':pdb-api') - compile 'org.lucares:ludb:1.0.20170408081113' + compile project(':data-store') + compile project(':file-utils') + //compile 'org.lucares:ludb:1.0.20170408081113' compile 'com.fasterxml.jackson.core:jackson-databind:2.8.8' - + compile 'org.apache.commons:commons-collections4:4.1' compile 'org.apache.logging.log4j:log4j-api:2.8.2' diff --git a/performanceDb/src/main/java/org/lucares/performance/db/CollectionUtils.java b/performanceDb/src/main/java/org/lucares/performance/db/CollectionUtils.java deleted file mode 100644 index c1461f5..0000000 --- a/performanceDb/src/main/java/org/lucares/performance/db/CollectionUtils.java +++ /dev/null @@ -1,42 +0,0 @@ -package org.lucares.performance.db; - -import java.util.Collection; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.function.Function; -import java.util.function.Predicate; -import java.util.stream.Collectors; - -public interface CollectionUtils { - - public default Map toMap(final Iterable iterable, final Function keyMapper) { - final Map result = new HashMap<>(); - - for (final V value : iterable) { - final T key = keyMapper.apply(value); - - result.put(key, value); - } - - return result; - } - - public default List filter(final Collection collection, final Predicate predicate) { - return collection.stream().filter(predicate).collect(Collectors.toList()); - } - - public default List map(final Collection collection, final Function mapper) { - return collection.stream().map(mapper).collect(Collectors.toList()); - } - - public default List sorted(final Collection collection, final Comparator comparator) { - return collection.stream().sorted(comparator).collect(Collectors.toList()); - } - - public default Optional findFirst(final Collection collection) { - return collection.stream().findFirst(); - } -} diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbFileIterator.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileIterator.java index 46a47a6..182e556 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbFileIterator.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileIterator.java @@ -8,7 +8,6 @@ import java.util.Queue; import java.util.function.Supplier; import org.lucares.pdb.api.Entry; -import org.lucares.pdb.api.Tags; public class PdbFileIterator implements Iterator, AutoCloseable { @@ -31,15 +30,14 @@ public class PdbFileIterator implements Iterator, AutoCloseable { if (reader == null) { return null; } - final Entry entry = reader.readNullableEntry(reader.getPdbFile().getTags()); + final Entry entry = reader.readNullableEntry(); if (entry == null) { nextFile(); if (reader == null) { return null; } else { - final Tags tags = reader.getPdbFile().getTags(); - return reader.readEntry(tags).orElse(null); + return reader.readEntry().orElse(null); } } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbFileOffsetTime.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileOffsetTime.java deleted file mode 100644 index 3ed4a12..0000000 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbFileOffsetTime.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.lucares.performance.db; - -import java.time.OffsetDateTime; - -public class PdbFileOffsetTime { - private final PdbFile pdbFile; - - private final OffsetDateTime offsetTime; - - public PdbFileOffsetTime(final PdbFile pdbFile, final OffsetDateTime offsetTime) { - super(); - this.pdbFile = pdbFile; - this.offsetTime = offsetTime; - } - - public PdbFile getPdbFile() { - return pdbFile; - } - - public OffsetDateTime getOffsetTime() { - return offsetTime; - } - - @Override - public String toString() { - return "PdbFileOffsetTime [pdbFile=" + pdbFile + ", offsetTime=" + offsetTime + "]"; - } - -} diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbFileViewer.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileViewer.java index a7f6f8c..51e5abe 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbFileViewer.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileViewer.java @@ -13,6 +13,7 @@ public class PdbFileViewer { final File file = new File(args[0]); final PdbFile pdbFile = new PdbFile(file.toPath(), TAGS); + long countMeasurements = 0; try (final PdbReader reader = new PdbReader(pdbFile, false)) { long value = 0; @@ -20,6 +21,7 @@ public class PdbFileViewer { while ((nextByte = reader.readNextByte()) >= 0) { final ByteType type = ByteType.getType(nextByte); + countMeasurements = countMeasurements + (type == ByteType.MEASUREMENT ? 1 : 0); final long bytesValue = type.getValue(nextByte); if (type == ByteType.CONTINUATION) { @@ -29,10 +31,36 @@ public class PdbFileViewer { value = bytesValue; } - System.out.printf("%s %3d %3d %-14s %14d\n", toBinary(nextByte), nextByte, bytesValue, type, value); + String additionalInfo = ""; + if (ByteType.MEASUREMENT == ByteType.getType(reader.peekNextByte())) { + additionalInfo = format(value); + } + + System.out.printf("%s %3d %3d %-14s %14d %s\n", toBinary(nextByte), nextByte, bytesValue, type, value, + additionalInfo); } } + System.out.println("Bytes: " + file.length()); + System.out.println("Measurements: " + countMeasurements); + System.out.println("Bytes/Measurements: " + (file.length() / (double) countMeasurements)); + } + + private static String format(final long millis) { + + final long years = millis / (1000L * 3600 * 24 * 365); + final long days = millis % (1000L * 3600 * 24 * 365) / (1000 * 3600 * 24); + final long hours = (millis % (1000 * 3600 * 24)) / (1000 * 3600); + final long minutes = (millis % (1000 * 3600)) / (1000 * 60); + final long seconds = (millis % (1000 * 60)) / 1000; + final long ms = millis % 1000; + + if (years > 0) { + return String.format("%d years %d days %02d:%02d:%02d,%03d", years, days, hours, minutes, seconds, ms); + } else if (days > 0) { + return String.format("%d days %02d:%02d:%02d,%03d", days, hours, minutes, seconds, ms); + } + return String.format("%02d:%02d:%02d,%03d", hours, minutes, seconds, ms); } private static String toBinary(final int b) { diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbReader.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbReader.java index d7f48bf..b4ea080 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbReader.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbReader.java @@ -12,7 +12,6 @@ import java.time.ZoneId; import java.util.Optional; import org.lucares.pdb.api.Entry; -import org.lucares.pdb.api.Tags; class PdbReader implements AutoCloseable { @@ -74,7 +73,7 @@ class PdbReader implements AutoCloseable { */ public void seekToLastValue() { - while (readEntry(Tags.EMPTY).isPresent()) { + while (readEntry().isPresent()) { // seek to the end // TODO @ahr add date offsets every x kb, so we don't have // to read the whole file @@ -90,7 +89,7 @@ class PdbReader implements AutoCloseable { } } - Entry readNullableEntry(final Tags tags) throws ReadRuntimeException { + Entry readNullableEntry() throws ReadRuntimeException { try { final long epochMilliIncrement = readValue(ByteType.DATE_INCREMENT); if (epochMilliIncrement < 0) { @@ -103,15 +102,16 @@ class PdbReader implements AutoCloseable { return null; } dateOffsetAtCurrentLocation = epochMilli; - return new Entry(epochMilli, value, tags); + + return new Entry(epochMilli, value, pdbFile.getTags()); } catch (final IOException e) { throw new ReadException(e); } } - public Optional readEntry(final Tags tags) throws ReadRuntimeException { + public Optional readEntry() throws ReadRuntimeException { - final Entry entry = readNullableEntry(tags); + final Entry entry = readNullableEntry(); return Optional.ofNullable(entry); } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java b/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java index d14e43a..659aeeb 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java @@ -1,13 +1,15 @@ package org.lucares.performance.db; -import java.io.File; +import java.io.IOException; import java.nio.file.Path; import java.time.OffsetDateTime; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Optional; +import java.util.SortedSet; import java.util.Spliterator; import java.util.Spliterators; import java.util.concurrent.BlockingQueue; @@ -16,33 +18,30 @@ import java.util.concurrent.TimeoutException; import java.util.stream.Stream; import java.util.stream.StreamSupport; -import org.lucares.ludb.Field; -import org.lucares.ludb.FieldNotExistsException; -import org.lucares.ludb.H2DB; -import org.lucares.ludb.Proposal; import org.lucares.pdb.api.Entry; import org.lucares.pdb.api.GroupResult; import org.lucares.pdb.api.Result; import org.lucares.pdb.api.Tags; +import org.lucares.pdb.datastore.DataStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -public class PerformanceDb implements AutoCloseable, CollectionUtils { +public class PerformanceDb implements AutoCloseable { private final static Logger LOGGER = LoggerFactory.getLogger(PerformanceDb.class); private final static Logger METRICS_LOGGER = LoggerFactory.getLogger("org.lucares.metrics.ingestion.block"); private final TagsToFile tagsToFile; - private final H2DB db; + private final DataStore db; - public PerformanceDb(final Path dataDirectory) { + public PerformanceDb(final Path dataDirectory) throws IOException { - db = new H2DB(new File(dataDirectory.toFile(), "lu.db")); + db = new DataStore(dataDirectory); - tagsToFile = new TagsToFile(dataDirectory, db); + tagsToFile = new TagsToFile(db); } public void put(final Entry entry) throws WriteException { @@ -192,28 +191,24 @@ public class PerformanceDb implements AutoCloseable, CollectionUtils { @Override public void close() { - try { - db.close(); - } catch (final Exception e) { - // H2 doesn't actually do anything in close - throw new IllegalStateException(e); - } - tagsToFile.close(); } public List autocomplete(final String query, final int caretIndex) { - return db.proposeTagForQuery(query, caretIndex); + + // TODO implement proposals + // return db.proposeTagForQuery(query, caretIndex); + return Collections.emptyList(); } public List getFields() { - final List fields = db.getAvailableFields(); + final List fields = db.getAvailableFields(); - return map(fields, Field::getName); + return fields; } - public List getFieldsValues(final String query, final String fieldName) throws FieldNotExistsException { - return db.getAvailableValuesForField(query, fieldName); + public SortedSet getFieldsValues(final String query, final String fieldName) { + return db.getAvailableValuesForKey(query, fieldName); } } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/Proposal.java b/performanceDb/src/main/java/org/lucares/performance/db/Proposal.java new file mode 100644 index 0000000..dfcb98e --- /dev/null +++ b/performanceDb/src/main/java/org/lucares/performance/db/Proposal.java @@ -0,0 +1,78 @@ +package org.lucares.performance.db; + +public class Proposal implements Comparable { + private final String proposedTag; + + private final String proposedQuery; + + private final long results; + + public Proposal(final String proposedTag, final String proposedQuery, final long results) { + super(); + this.proposedTag = proposedTag; + this.proposedQuery = proposedQuery; + this.results = results; + } + + public String getProposedTag() { + return proposedTag; + } + + public String getProposedQuery() { + return proposedQuery; + } + + public long getResults() { + return results; + } + + @Override + public String toString() { + return "Proposal [proposedTag=" + proposedTag + ", proposedQuery=" + proposedQuery + ", results=" + results + + "]"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((proposedQuery == null) ? 0 : proposedQuery.hashCode()); + result = prime * result + ((proposedTag == null) ? 0 : proposedTag.hashCode()); + result = prime * result + (int) (results ^ (results >>> 32)); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + final Proposal other = (Proposal) obj; + if (proposedQuery == null) { + if (other.proposedQuery != null) + return false; + } else if (!proposedQuery.equals(other.proposedQuery)) + return false; + if (proposedTag == null) { + if (other.proposedTag != null) + return false; + } else if (!proposedTag.equals(other.proposedTag)) + return false; + if (results != other.results) + return false; + return true; + } + + @Override + public int compareTo(final Proposal o) { + + if (results != o.results) { + return results < o.results ? 1 : -1; + } + + return proposedTag.compareToIgnoreCase(o.proposedTag); + } +} diff --git a/performanceDb/src/main/java/org/lucares/performance/db/Query.java b/performanceDb/src/main/java/org/lucares/performance/db/Query.java index 67053e5..df0a57d 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/Query.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/Query.java @@ -1,20 +1,27 @@ package org.lucares.performance.db; +import java.util.ArrayList; +import java.util.List; + import org.lucares.pdb.api.Tags; final class Query { static String createQuery(final Tags tags) { - final StringBuilder result = new StringBuilder(); + + final List terms = new ArrayList<>(); for (final String key : tags.getKeys()) { final String value = tags.getValue(key); - result.append(key); - result.append("="); - result.append(value); - result.append(" "); + final StringBuilder term = new StringBuilder(); + term.append(key); + term.append("="); + term.append(value); + term.append(" "); + + terms.add(term.toString()); } - return result.toString().trim(); + return String.join(" and ", terms); } } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/StorageUtils.java b/performanceDb/src/main/java/org/lucares/performance/db/StorageUtils.java deleted file mode 100644 index e33afd9..0000000 --- a/performanceDb/src/main/java/org/lucares/performance/db/StorageUtils.java +++ /dev/null @@ -1,32 +0,0 @@ -package org.lucares.performance.db; - -import java.nio.file.Path; -import java.util.UUID; - -import org.lucares.pdb.api.Tags; - -public class StorageUtils { - - public static Path createStorageFile(final Path tagSpecificStorageFolder) { - - final Path storageFile = tagSpecificStorageFolder.resolve(UUID.randomUUID().toString()); - - return storageFile; - } - - public static Path createTagSpecificStorageFolder(final Path dataDirectory, final Tags tags) { - - final String tagBaseDir = tags.abbreviatedRepresentation() + UUID.randomUUID().toString(); - - final Path dataBaseDir = dataDirectory.resolve("data"); - final Path tagSpecificFolder = dataBaseDir.resolve(tagBaseDir); - - return tagSpecificFolder; - } - - public static Path getTagSpecificStorageFolder(final Path storageFilePath) { - - return storageFilePath // - .getParent(); // tag specific - } -} diff --git a/performanceDb/src/main/java/org/lucares/performance/db/TagsToFile.java b/performanceDb/src/main/java/org/lucares/performance/db/TagsToFile.java index 3caac5f..3d586c2 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/TagsToFile.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/TagsToFile.java @@ -1,7 +1,6 @@ package org.lucares.performance.db; import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; import java.time.OffsetDateTime; import java.util.ArrayList; @@ -17,14 +16,15 @@ import java.util.Set; import java.util.function.Consumer; import java.util.stream.Collectors; -import org.lucares.ludb.Document; -import org.lucares.ludb.H2DB; -import org.lucares.ludb.internal.FieldNotExistsInternalException; import org.lucares.pdb.api.Tags; +import org.lucares.pdb.datastore.DataStore; +import org.lucares.pdb.datastore.Doc; +import org.lucares.utils.CollectionUtils; +import org.lucares.utils.file.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class TagsToFile implements CollectionUtils, AutoCloseable { +public class TagsToFile implements AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(TagsToFile.class); @@ -62,31 +62,20 @@ public class TagsToFile implements CollectionUtils, AutoCloseable { public Optional writer(final PdbFile pdbFile) { return writers.stream().filter(w -> Objects.equals(w.getPdbFile(), pdbFile)).findAny(); } - - public Optional tagSpecificBaseDir() { - - if (writers.size() > 0) { - return Optional.of(writers.get(0).getPdbFile().getPath().getParent()); - } - return Optional.empty(); - } - } - private final H2DB db; - private final Path dataDirectory; + private final DataStore db; private final Map cachedWriters = new HashMap<>(); - public TagsToFile(final Path dataDirectory, final H2DB db) { - this.dataDirectory = dataDirectory; + public TagsToFile(final DataStore db) { this.db = db; } private List getFilesMatchingTagsExactly(final Tags tags) { final List files = getFilesMatchingTags(tags); - return filter(files, f -> f.getTags().equals(tags)); + return CollectionUtils.filter(files, f -> f.getTags().equals(tags)); } private List getFilesMatchingTags(final Tags tags) { @@ -116,7 +105,7 @@ public class TagsToFile implements CollectionUtils, AutoCloseable { final Tags fileSpecificTags = tagSpecific.getTags(); - final List storageFiles = FileUtils.listRecursively(tagSpecific.getPath()); + final List storageFiles = listFiles(tagSpecific); for (final Path storageFile : storageFiles) { final PdbFile pdbFile = new PdbFile(storageFile, fileSpecificTags); @@ -128,36 +117,30 @@ public class TagsToFile implements CollectionUtils, AutoCloseable { return result; } + private List listFiles(final TagSpecificBaseDir tagSpecific) { + try { + return FileUtils.listRecursively(tagSpecific.getPath()); + } catch (final IOException e) { + throw new ReadException(e); + } + } + private List getTagSpecificFolders(final String query) { final List result = new ArrayList<>(); - try { - final List searchResult = db.search(query); - for (final Document document : searchResult) { + final List searchResult = db.search(query); - final Path path = document.getFile().toPath(); - final Tags tags = toTags(document); + for (final Doc document : searchResult) { - result.add(new TagSpecificBaseDir(path, tags)); - } - } catch (final FieldNotExistsInternalException e) { - // happens if there is not yet a tag specific base dir + final Path path = document.getPath(); + final Tags tags = document.getTags(); + + result.add(new TagSpecificBaseDir(path, tags)); } return result; } - private Tags toTags(final Document document) { - Tags tagsOfFile = Tags.create(); - - for (final String key : document.getProperties().keySet()) { - - final String value = document.getPropertyString(key); - tagsOfFile = tagsOfFile.copyAdd(key, value); - } - return tagsOfFile; - } - public PdbWriter getWriter(final OffsetDateTime date, final Tags tags) throws ReadException, WriteException { final PdbWriter result; final WriterCache writersForTags = getOrInit(tags); @@ -173,9 +156,10 @@ public class TagsToFile implements CollectionUtils, AutoCloseable { assertAllFilesHaveSameFolder(pdbFiles); pdbFiles.removeIf(f -> !f.exists()); - final List> optionalWriters = map(pdbFiles, writersForTags::writer); - final List> existingWriters = filter(optionalWriters, Optional::isPresent); - final List writers = map(existingWriters, Optional::get); + final List> optionalWriters = CollectionUtils.map(pdbFiles, writersForTags::writer); + final List> existingWriters = CollectionUtils.filter(optionalWriters, + Optional::isPresent); + final List writers = CollectionUtils.map(existingWriters, Optional::get); final Optional optionalFirst = chooseBestMatchingWriter(writers, date); @@ -233,12 +217,8 @@ public class TagsToFile implements CollectionUtils, AutoCloseable { private PdbWriter newPdbWriter(final Tags tags) { try { - PdbWriter result; - final Path tagSpecificStorageFolder = getOrInit(tags).tagSpecificBaseDir() - .orElse(StorageUtils.createTagSpecificStorageFolder(dataDirectory, tags)); - - final PdbFile pdbFile = createNewPdbFile(tags, tagSpecificStorageFolder); - result = new PdbWriter(pdbFile); + final PdbFile pdbFile = createNewPdbFile(tags); + final PdbWriter result = new PdbWriter(pdbFile); getOrInit(tags).addWriter(result); return result; @@ -259,38 +239,15 @@ public class TagsToFile implements CollectionUtils, AutoCloseable { } } - private PdbFile createNewPdbFile(final Tags tags, final Path tagSpecificStorageFolder) throws IOException { - final Path storageFile; - PdbFile result; - storageFile = createNewFile(tagSpecificStorageFolder); + private PdbFile createNewPdbFile(final Tags tags) throws IOException { - final Document document = db.getDocument(tagSpecificStorageFolder.toFile()); - if (document == null) { - db.addDocument(tagSpecificStorageFolder.toFile()); + final Path storageFile = db.createNewFile(tags); - tags.forEach((fieldName, value) -> { - TagsUtils.setProperty(db, tagSpecificStorageFolder.toFile(), fieldName, value); - }); - } - - result = new PdbFile(storageFile, tags); + final PdbFile result = new PdbFile(storageFile, tags); PdbWriter.init(result); return result; } - private Path createNewFile(final Path tagSpecificStorageFolder) { - - final Path result = StorageUtils.createStorageFile(tagSpecificStorageFolder); - try { - Files.createDirectories(result.getParent()); - Files.createFile(result); - } catch (final IOException e) { - throw new IllegalStateException(e); // very unlikely - } - - return result; - } - private void forEachWriter(final Consumer consumer) { for (final Entry readersWriters : cachedWriters.entrySet()) { diff --git a/performanceDb/src/main/java/org/lucares/performance/db/TagsUtils.java b/performanceDb/src/main/java/org/lucares/performance/db/TagsUtils.java deleted file mode 100644 index 044e4e0..0000000 --- a/performanceDb/src/main/java/org/lucares/performance/db/TagsUtils.java +++ /dev/null @@ -1,24 +0,0 @@ -package org.lucares.performance.db; - -import java.io.File; - -import org.lucares.ludb.Field; -import org.lucares.ludb.FieldNotExistsException; -import org.lucares.ludb.FieldType; -import org.lucares.ludb.H2DB; - -class TagsUtils { - - static void setProperty(final H2DB db, final File file, final String fieldName, final String value) { - try { - db.setProperty(file, fieldName, value); - } catch (final FieldNotExistsException e) { - db.createField(new Field(fieldName, FieldType.STRING)); - try { - db.setProperty(file, fieldName, value); - } catch (final FieldNotExistsException e1) { - throw new IllegalStateException(e1); - } - } - } -} diff --git a/performanceDb/src/test/java/org/lucares/performance/db/PdbReaderWriterTest.java b/performanceDb/src/test/java/org/lucares/performance/db/PdbReaderWriterTest.java index 6e41e1f..e531172 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/PdbReaderWriterTest.java +++ b/performanceDb/src/test/java/org/lucares/performance/db/PdbReaderWriterTest.java @@ -34,7 +34,7 @@ public class PdbReaderWriterTest { @AfterMethod public void afterMethod() throws IOException { - FileUtils.delete(dataDirectory); + org.lucares.utils.file.FileUtils.delete(dataDirectory); } @DataProvider(name = "providerWriteRead") @@ -84,7 +84,7 @@ public class PdbReaderWriterTest { for (final Entry entry : entries) { - final Entry actual = reader.readEntry(TAGS).orElseThrow(() -> new AssertionError()); + final Entry actual = reader.readEntry().orElseThrow(() -> new AssertionError()); Assert.assertEquals(actual, entry); } diff --git a/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java b/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java index 60f90f7..b659845 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java +++ b/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java @@ -1,6 +1,5 @@ package org.lucares.performance.db; -import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -9,13 +8,14 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.concurrent.ThreadLocalRandom; -import java.util.stream.Collectors; import org.apache.commons.collections4.CollectionUtils; import org.lucares.pdb.api.Entry; import org.lucares.pdb.api.GroupResult; import org.lucares.pdb.api.Result; import org.lucares.pdb.api.Tags; +import org.lucares.pdb.datastore.DataStore; +import org.lucares.utils.file.FileUtils; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -33,7 +33,7 @@ public class PerformanceDbTest { @AfterMethod public void afterMethod() throws IOException { - FileUtils.delete(dataDirectory); + org.lucares.utils.file.FileUtils.delete(dataDirectory); } public void testInsertRead() throws Exception { @@ -111,15 +111,19 @@ public class PerformanceDbTest { final List actualEntries = db.get(Query.createQuery(tags)).singleGroup().asList(); Assert.assertEquals(actualEntries, entries); - final List foldersInStorage = Files.list(dataDirectory.resolve("data")).filter(Files::isDirectory) - .collect(Collectors.toList()); - Assert.assertEquals(foldersInStorage.size(), 1); + final List filesInStorage = FileUtils.listRecursively(DataStore.storageDirectory(dataDirectory)); - final Path tagSpecificFolder = foldersInStorage.get(0); + Assert.assertEquals(filesInStorage.size(), 1); - final File[] filesInStorage = tagSpecificFolder.toFile().listFiles(); - Assert.assertEquals(filesInStorage.length, 1, - "one file in storage, but was: " + Arrays.asList(filesInStorage)); + final Path tagSpecificFile = filesInStorage.get(0); + + final PdbFile pdbFile = new PdbFile(tagSpecificFile, tags); + + try (PdbReader pdbReader = new PdbReader(pdbFile)) { + Assert.assertEquals(pdbReader.readEntry().get(), entries.get(0)); + Assert.assertEquals(pdbReader.readEntry().get(), entries.get(1)); + Assert.assertEquals(pdbReader.readEntry().isPresent(), false); + } } } diff --git a/performanceDb/src/test/java/org/lucares/performance/db/StorageUtilsTest.java b/performanceDb/src/test/java/org/lucares/performance/db/StorageUtilsTest.java deleted file mode 100644 index a6cf664..0000000 --- a/performanceDb/src/test/java/org/lucares/performance/db/StorageUtilsTest.java +++ /dev/null @@ -1,25 +0,0 @@ -package org.lucares.performance.db; - -import java.nio.file.Path; -import java.nio.file.Paths; - -import org.lucares.pdb.api.Tags; -import org.testng.Assert; -import org.testng.annotations.Test; - -@Test -public class StorageUtilsTest { - - public void testGetTagSpecificStorageFolder() { - final Path dataDirectory = Paths.get("/tmp"); - final Tags tags = Tags.create("key", "value"); - - final Path tagSpecifiStorageFolder = StorageUtils.createTagSpecificStorageFolder(dataDirectory, tags); - - final Path storageFile = StorageUtils.createStorageFile(tagSpecifiStorageFolder); - - final Path extractedTagSpecifiStorageFolder = StorageUtils.getTagSpecificStorageFolder(storageFile); - - Assert.assertEquals(extractedTagSpecifiStorageFolder, extractedTagSpecifiStorageFolder); - } -} diff --git a/performanceDb/src/test/java/org/lucares/performance/db/TagsToFilesTest.java b/performanceDb/src/test/java/org/lucares/performance/db/TagsToFilesTest.java index 717e244..bf004c0 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/TagsToFilesTest.java +++ b/performanceDb/src/test/java/org/lucares/performance/db/TagsToFilesTest.java @@ -1,15 +1,14 @@ package org.lucares.performance.db; -import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.time.OffsetDateTime; import java.time.ZoneOffset; -import org.lucares.ludb.H2DB; import org.lucares.pdb.api.Entry; import org.lucares.pdb.api.Tags; +import org.lucares.pdb.datastore.DataStore; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -27,13 +26,13 @@ public class TagsToFilesTest { @AfterMethod public void afterMethod() throws IOException { - org.lucares.performance.db.FileUtils.delete(dataDirectory); + org.lucares.utils.file.FileUtils.delete(dataDirectory); } public void test() throws Exception { - try (H2DB db = new H2DB(new File(dataDirectory.toFile(), "lu.db")); - final TagsToFile tagsToFile = new TagsToFile(dataDirectory, db);) { + final DataStore db = new DataStore(dataDirectory); + try (final TagsToFile tagsToFile = new TagsToFile(db)) { final OffsetDateTime date = OffsetDateTime.now(ZoneOffset.UTC); final Tags tags = Tags.create("myKey", "myValue"); @@ -47,9 +46,8 @@ public class TagsToFilesTest { } public void testAppendingToSameFileIfNewDateIsAfter() throws Exception { - - try (H2DB db = new H2DB(new File(dataDirectory.toFile(), "lu.db")); - final TagsToFile tagsToFile = new TagsToFile(dataDirectory, db);) { + final DataStore db = new DataStore(dataDirectory); + try (final TagsToFile tagsToFile = new TagsToFile(db);) { final OffsetDateTime day1 = DateUtils.getDate(2016, 1, 1, 1, 1, 1); final OffsetDateTime day2 = DateUtils.getDate(2016, 1, 2, 1, 1, 1); @@ -68,8 +66,8 @@ public class TagsToFilesTest { @Test(invocationCount = 1) public void testNewFileIfDateIsTooOld() throws Exception { - try (H2DB db = new H2DB(new File(dataDirectory.toFile(), "lu.db")); - final TagsToFile tagsToFile = new TagsToFile(dataDirectory, db);) { + final DataStore db = new DataStore(dataDirectory); + try (final TagsToFile tagsToFile = new TagsToFile(db);) { final OffsetDateTime afternoon = DateUtils.getDate(2016, 1, 1, 13, 1, 1); final OffsetDateTime morning = DateUtils.getDate(2016, 1, 1, 12, 1, 1); @@ -106,8 +104,8 @@ public class TagsToFilesTest { public void testIdenticalDatesGoIntoSameFile() throws Exception { - try (H2DB db = new H2DB(new File(dataDirectory.toFile(), "lu.db")); - final TagsToFile tagsToFile = new TagsToFile(dataDirectory, db);) { + final DataStore db = new DataStore(dataDirectory); + try (final TagsToFile tagsToFile = new TagsToFile(db)) { final OffsetDateTime timestamp = DateUtils.getDate(2016, 1, 1, 13, 1, 1);