diff --git a/block-storage/build.gradle b/block-storage/build.gradle index 4a8022d..1bf9b18 100644 --- a/block-storage/build.gradle +++ b/block-storage/build.gradle @@ -1,6 +1,7 @@ apply plugin: 'antlr' dependencies { + compile project(':byte-utils') compile project(':file-utils') compile project(':pdb-utils') diff --git a/block-storage/src/main/java/org/lucares/pdb/blockstorage/BSFile.java b/block-storage/src/main/java/org/lucares/pdb/blockstorage/BSFile.java index 8994423..b090f16 100644 --- a/block-storage/src/main/java/org/lucares/pdb/blockstorage/BSFile.java +++ b/block-storage/src/main/java/org/lucares/pdb/blockstorage/BSFile.java @@ -11,9 +11,9 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; import org.lucares.collections.LongList; -import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder; import org.lucares.pdb.diskstorage.DiskBlock; import org.lucares.pdb.diskstorage.DiskStorage; +import org.lucares.utils.byteencoder.VariableByteEncoder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/block-storage/src/main/java/org/lucares/pdb/blockstorage/BSFileDiskBlock.java b/block-storage/src/main/java/org/lucares/pdb/blockstorage/BSFileDiskBlock.java index c33830c..1563d3f 100644 --- a/block-storage/src/main/java/org/lucares/pdb/blockstorage/BSFileDiskBlock.java +++ b/block-storage/src/main/java/org/lucares/pdb/blockstorage/BSFileDiskBlock.java @@ -3,8 +3,8 @@ package org.lucares.pdb.blockstorage; import java.nio.MappedByteBuffer; import org.lucares.collections.LongList; -import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder; import org.lucares.pdb.diskstorage.DiskBlock; +import org.lucares.utils.byteencoder.VariableByteEncoder; public class BSFileDiskBlock { diff --git a/block-storage/src/main/java/org/lucares/pdb/map/NodeEntry.java b/block-storage/src/main/java/org/lucares/pdb/map/NodeEntry.java index 81becb5..cf1fbe9 100644 --- a/block-storage/src/main/java/org/lucares/pdb/map/NodeEntry.java +++ b/block-storage/src/main/java/org/lucares/pdb/map/NodeEntry.java @@ -7,7 +7,7 @@ import java.util.List; import java.util.function.Function; import java.util.function.Predicate; -import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder; +import org.lucares.utils.byteencoder.VariableByteEncoder; class NodeEntry { enum ValueType { @@ -87,7 +87,13 @@ class NodeEntry { public String toString(final Function keyDecoder, final Function valueDecoder) { final String valueAsString = isInnerNode() ? String.valueOf(VariableByteEncoder.decodeFirstValue(value)) : valueDecoder.apply(value); - final String keyAsString = keyDecoder.apply(key); + + final String keyAsString; + if (Arrays.equals(key, PersistentMap.MAX_KEY)) { + keyAsString = "<<>>"; + } else { + keyAsString = keyDecoder.apply(key); + } return "NodeEntry [type=" + type + ", key=" + keyAsString + ", value=" + valueAsString + "]"; } diff --git a/byte-utils/.gitignore b/byte-utils/.gitignore new file mode 100644 index 0000000..691dc42 --- /dev/null +++ b/byte-utils/.gitignore @@ -0,0 +1,7 @@ +/.settings/ +/.classpath +/.project +/bin/ +/build/ +/target/ +/test-output/ \ No newline at end of file diff --git a/byte-utils/build.gradle b/byte-utils/build.gradle new file mode 100644 index 0000000..a0821b1 --- /dev/null +++ b/byte-utils/build.gradle @@ -0,0 +1,7 @@ +dependencies { + + + compile 'org.apache.logging.log4j:log4j-core:2.10.0' + compile 'org.apache.logging.log4j:log4j-slf4j-impl:2.10.0' + compile 'org.lucares:primitiveCollections:0.1.20180908084945' +} diff --git a/block-storage/src/main/java/org/lucares/pdb/blockstorage/intsequence/VariableByteEncoder.java b/byte-utils/src/main/java/org/lucares/utils/byteencoder/VariableByteEncoder.java similarity index 99% rename from block-storage/src/main/java/org/lucares/pdb/blockstorage/intsequence/VariableByteEncoder.java rename to byte-utils/src/main/java/org/lucares/utils/byteencoder/VariableByteEncoder.java index 17591f6..b11cf88 100644 --- a/block-storage/src/main/java/org/lucares/pdb/blockstorage/intsequence/VariableByteEncoder.java +++ b/byte-utils/src/main/java/org/lucares/utils/byteencoder/VariableByteEncoder.java @@ -1,4 +1,4 @@ -package org.lucares.pdb.blockstorage.intsequence; +package org.lucares.utils.byteencoder; import java.util.Arrays; diff --git a/block-storage/src/test/java/org/lucares/pdb/blockstorage/intsequence/VariableByteEncoderTest.java b/byte-utils/src/test/java/org/lucares/utils/byteencoder/VariableByteEncoderTest.java similarity index 97% rename from block-storage/src/test/java/org/lucares/pdb/blockstorage/intsequence/VariableByteEncoderTest.java rename to byte-utils/src/test/java/org/lucares/utils/byteencoder/VariableByteEncoderTest.java index a95543b..b975cfc 100644 --- a/block-storage/src/test/java/org/lucares/pdb/blockstorage/intsequence/VariableByteEncoderTest.java +++ b/byte-utils/src/test/java/org/lucares/utils/byteencoder/VariableByteEncoderTest.java @@ -1,4 +1,4 @@ -package org.lucares.pdb.blockstorage.intsequence; +package org.lucares.utils.byteencoder; import static org.testng.Assert.assertEquals; @@ -6,6 +6,7 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicInteger; import org.lucares.collections.LongList; +import org.lucares.utils.byteencoder.VariableByteEncoder; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index f89dc3e..1fe5c9a 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -19,7 +19,6 @@ import org.lucares.pdb.api.StringCompressor; import org.lucares.pdb.api.Tag; import org.lucares.pdb.api.Tags; import org.lucares.pdb.blockstorage.BSFile; -import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder; import org.lucares.pdb.datastore.Doc; import org.lucares.pdb.datastore.Proposal; import org.lucares.pdb.datastore.lang.Expression; @@ -29,6 +28,7 @@ import org.lucares.pdb.diskstorage.DiskStorage; import org.lucares.pdb.map.PersistentMap; import org.lucares.pdb.map.PersistentMap.EncoderDecoder; import org.lucares.utils.Preconditions; +import org.lucares.utils.byteencoder.VariableByteEncoder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,13 +52,13 @@ public class DataStore implements AutoCloseable { private static final EncoderDecoder ENCODER_TAGS = new EncoderDecoder<>() { @Override - public byte[] encode(final Tags object) { - return object.getFilenameBytes(); + public byte[] encode(final Tags tags) { + return tags.toBytes(); } @Override public Tags decode(final byte[] bytes) { - return new Tags(bytes); + return Tags.fromBytes(bytes); } }; @@ -68,7 +68,7 @@ public class DataStore implements AutoCloseable { public byte[] encode(final Doc doc) { final byte[] rootBlockNumber = VariableByteEncoder.encode(doc.getRootBlockNumber()); - final byte[] tags = doc.getTags().getFilenameBytes(); + final byte[] tags = doc.getTags().toBytes(); final byte[] result = new byte[rootBlockNumber.length + tags.length]; @@ -83,7 +83,7 @@ public class DataStore implements AutoCloseable { final long rootBlockNumber = VariableByteEncoder.decodeFirstValue(bytes); final int bytesRootBlockNumber = VariableByteEncoder.neededBytes(rootBlockNumber); - final Tags tags = new Tags(Arrays.copyOfRange(bytes, bytesRootBlockNumber, bytes.length)); + final Tags tags = Tags.fromBytes(Arrays.copyOfRange(bytes, bytesRootBlockNumber, bytes.length)); return new Doc(tags, rootBlockNumber); } }; diff --git a/pdb-api/build.gradle b/pdb-api/build.gradle index 16236f2..6e0851f 100644 --- a/pdb-api/build.gradle +++ b/pdb-api/build.gradle @@ -1,5 +1,6 @@ dependencies { + compile project(':byte-utils') compile project(':pdb-utils') compile project(':file-utils') compile 'org.lucares:primitiveCollections:0.1.20180908084945' diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/StringCompressor.java b/pdb-api/src/main/java/org/lucares/pdb/api/StringCompressor.java index c7f9b1e..76b44df 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/StringCompressor.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/StringCompressor.java @@ -18,7 +18,7 @@ public class StringCompressor { return new StringCompressor(mapsi); } - public Integer put(final String string) { + public int put(final String string) { return usip.computeIfAbsent(string, s -> usip.getHighestInteger() + 1); } diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/TagByKeyAndValueComparator.java b/pdb-api/src/main/java/org/lucares/pdb/api/TagByKeyAndValueComparator.java new file mode 100644 index 0000000..b86cf83 --- /dev/null +++ b/pdb-api/src/main/java/org/lucares/pdb/api/TagByKeyAndValueComparator.java @@ -0,0 +1,8 @@ +package org.lucares.pdb.api; + +import java.util.Comparator; + +public class TagByKeyAndValueComparator { + + public static final Comparator INSTANCE = Comparator.comparing(Tag::getKey).thenComparing(Tag::getValue); +} diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/TagByKeyComparator.java b/pdb-api/src/main/java/org/lucares/pdb/api/TagByKeyComparator.java deleted file mode 100644 index 6bf2e9b..0000000 --- a/pdb-api/src/main/java/org/lucares/pdb/api/TagByKeyComparator.java +++ /dev/null @@ -1,15 +0,0 @@ -package org.lucares.pdb.api; - -import java.io.Serializable; -import java.util.Comparator; - -public class TagByKeyComparator implements Comparator, Serializable { - - private static final long serialVersionUID = -6683582291996307323L; - public static final TagByKeyComparator INSTANCE = new TagByKeyComparator(); - - @Override - public int compare(final Tag a, final Tag b) { - return a.getKey().compareToIgnoreCase(b.getKey()); - } -} diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java b/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java index 1971da2..3bcbf50 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java @@ -1,9 +1,8 @@ package org.lucares.pdb.api; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.Set; @@ -11,57 +10,30 @@ import java.util.SortedSet; import java.util.TreeSet; import java.util.function.BiConsumer; import java.util.function.Function; -import java.util.regex.Matcher; -import java.util.regex.Pattern; + +import org.lucares.collections.LongList; +import org.lucares.utils.byteencoder.VariableByteEncoder; public class Tags { public static StringCompressor STRING_COMPRESSOR = null; - public static final byte[] EMPTY_BYTES = new byte[0]; + private static final byte[] EMPTY_BYTES = new byte[0]; public static final Tags EMPTY = new Tags(); - public static final String KEY_VALUE_SEPARATOR = "-"; - public static final String KEY_VALUE_PAIR_SEPARATOR = "_"; - public static final String KEY_VALUE_END_SEPARATOR = "$"; - - private static final String REGEX_KEY_VALUE = "[a-zA-Z0-9]+" + Pattern.quote(KEY_VALUE_SEPARATOR) + "[a-zA-Z0-9]+"; - - private static final String REGEX_KEY_VALUE_PAIRS = REGEX_KEY_VALUE + "(" + Pattern.quote(KEY_VALUE_PAIR_SEPARATOR) - + REGEX_KEY_VALUE + ")*";; - - private static final String REGEX_STORAGE_FILE = String.format("(%1$s)", REGEX_KEY_VALUE_PAIRS); - - private static final Pattern EXTRACT_TAGS_PATTERN = Pattern.compile(REGEX_STORAGE_FILE); - - private final byte[] filenameBytes; + private final SortedSet tags; public Tags() { - filenameBytes = EMPTY_BYTES; + tags = new TreeSet<>(TagByKeyAndValueComparator.INSTANCE); } - public Tags(final byte[] filenameBytes) { - this(new String(filenameBytes, StandardCharsets.UTF_8)); - } - - public Tags(final String serializedTags) { - // serialized tags look like this: 0-1_2-1M_H-28_4-5$1.pdb - // there can be several files for the same set of tags, in which case the number - // after the $ is incremented - // We only take the part until the $. - final int end = serializedTags.indexOf(KEY_VALUE_END_SEPARATOR); - final String normalizedFilename; - if (end >= 0) { - normalizedFilename = serializedTags.substring(0, end); - } else { - normalizedFilename = serializedTags; - } - this.filenameBytes = normalizedFilename.getBytes(StandardCharsets.UTF_8); + public Tags(final Collection tags) { + this.tags = new TreeSet<>(TagByKeyAndValueComparator.INSTANCE); + this.tags.addAll(tags); } public static Tags create(final Collection tags) { - final String newFilename = toFilename(tags); - return new Tags(newFilename); + return new Tags(tags); } public static Tags create() { @@ -85,12 +57,42 @@ public class Tags { return result; } - public String serialize() { - return new String(this.filenameBytes, StandardCharsets.UTF_8); + public static Tags fromBytes(final byte[] bytes) { + final SortedSet result = new TreeSet<>(TagByKeyAndValueComparator.INSTANCE); + + final LongList keyValuesAsLongs = VariableByteEncoder.decode(bytes); + + for (int i = 0; i < keyValuesAsLongs.size(); i += 2) { + + final long keyAsLong = keyValuesAsLongs.get(i); + final long valueAsLong = keyValuesAsLongs.get(i + 1); + + final String key = STRING_COMPRESSOR.get((int) keyAsLong); + final String value = STRING_COMPRESSOR.get((int) valueAsLong); + result.add(new Tag(key, value)); + } + + return new Tags(result); } - public byte[] getFilenameBytes() { - return filenameBytes; + public byte[] toBytes() { + final byte[] result; + + if (tags.size() > 0) { + final LongList keyValuesAsLongs = new LongList(tags.size() * 2); + for (final Tag tag : tags) { + final long keyAsLong = STRING_COMPRESSOR.put(tag.getKey()); + final long valueAsLong = STRING_COMPRESSOR.put(tag.getValue()); + + keyValuesAsLongs.add(keyAsLong); + keyValuesAsLongs.add(valueAsLong); + } + + result = VariableByteEncoder.encode(keyValuesAsLongs); + } else { + result = EMPTY_BYTES; + } + return result; } public String getValue(final String key) { @@ -105,54 +107,7 @@ public class Tags { } public SortedSet toTags() { - final SortedSet result = new TreeSet<>(TagByKeyComparator.INSTANCE); - final String filename = new String(this.filenameBytes, StandardCharsets.UTF_8); - final Matcher matcher = EXTRACT_TAGS_PATTERN.matcher(filename); - - if (matcher.find()) { - final String serializedTags = matcher.group(1); - - final String[] serializedKeyValuePairs = serializedTags.split(Pattern.quote(KEY_VALUE_PAIR_SEPARATOR)); - - for (int i = 0; i < serializedKeyValuePairs.length; i++) { - final String[] keyValuePair = serializedKeyValuePairs[i].split(Pattern.quote(KEY_VALUE_SEPARATOR)); - - if (keyValuePair.length == 2) { - - final String key = STRING_COMPRESSOR.get(RadixConverter.fromString(keyValuePair[0])); - final String value = STRING_COMPRESSOR.get(RadixConverter.fromString(keyValuePair[1])); - - result.add(new Tag(key, value)); - } - } - } - return result; - } - - private static String toFilename(final Collection tags) { - final StringBuilder path = new StringBuilder(); - - final Tag[] tagsAsArray = tags.toArray(new Tag[tags.size()]); - Arrays.sort(tagsAsArray, TagByKeyComparator.INSTANCE); - - for (final Tag tag : tagsAsArray) { - final String key = tag.getKey(); - final String value = tag.getValue(); - - final int compressedKey = STRING_COMPRESSOR.put(key); - final int compressedValue = STRING_COMPRESSOR.put(value); - - if (path.length() > 0) { - path.append(Tags.KEY_VALUE_PAIR_SEPARATOR); - } - - path.append(RadixConverter.toString(compressedKey)); - path.append(Tags.KEY_VALUE_SEPARATOR); - path.append(RadixConverter.toString(compressedValue)); - } - path.append(Tags.KEY_VALUE_END_SEPARATOR); - - return path.toString(); + return Collections.unmodifiableSortedSet(tags); } public Set getKeys() { @@ -183,14 +138,14 @@ public class Tags { @Override public String toString() { - return "Tags [filename=" + serialize() + ", tags=" + toTags() + "]"; + return "Tags [tags=" + toTags() + "]"; } @Override public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + Arrays.hashCode(filenameBytes); + result = prime * result + ((tags == null) ? 0 : tags.hashCode()); return result; } @@ -203,7 +158,10 @@ public class Tags { if (getClass() != obj.getClass()) return false; final Tags other = (Tags) obj; - if (!Arrays.equals(filenameBytes, other.filenameBytes)) + if (tags == null) { + if (other.tags != null) + return false; + } else if (!tags.equals(other.tags)) return false; return true; } @@ -224,11 +182,7 @@ public class Tags { } public boolean isEmpty() { - return filenameBytes == null || filenameBytes.length == 0; - } - - public static Tags create(final String filename) { - return new Tags(filename); + return tags.isEmpty(); } /** @@ -237,7 +191,6 @@ public class Tags { public String asString() { final StringBuilder result = new StringBuilder(); - final SortedSet tags = toTags(); for (final Tag tag : tags) { if (result.length() > 0) {