diff --git a/block-storage/src/main/java/org/lucares/pdb/blockstorage/intsequence/VariableByteEncoder.java b/block-storage/src/main/java/org/lucares/pdb/blockstorage/intsequence/VariableByteEncoder.java index 11efade..c5583f4 100644 --- a/block-storage/src/main/java/org/lucares/pdb/blockstorage/intsequence/VariableByteEncoder.java +++ b/block-storage/src/main/java/org/lucares/pdb/blockstorage/intsequence/VariableByteEncoder.java @@ -25,6 +25,9 @@ import org.lucares.collections.LongList; */ public class VariableByteEncoder { + public static final long MIN_VALUE = Long.MIN_VALUE / 2 + 1; + public static final long MAX_VALUE = Long.MAX_VALUE / 2; + private static final int MAX_BYTES_PER_VALUE = 10; private static final int CONTINUATION_BYTE_FLAG = 1 << 7; // 10000000 @@ -83,8 +86,8 @@ public class VariableByteEncoder { int offset = offsetInBuffer; - assert value >= Long.MIN_VALUE / 2 + 1 : "min encodable value is -2^62+1"; - assert value <= Long.MAX_VALUE / 2 : "max encodable value is 2^62"; + assert value >= MIN_VALUE : "min encodable value is -2^62+1 = " + MIN_VALUE; + assert value <= MAX_VALUE : "max encodable value is 2^62 = " + MAX_VALUE; long normVal = encodeIntoPositiveValue(value); diff --git a/block-storage/src/main/java/org/lucares/pdb/map/NodeEntry.java b/block-storage/src/main/java/org/lucares/pdb/map/NodeEntry.java index d58973c..3816957 100644 --- a/block-storage/src/main/java/org/lucares/pdb/map/NodeEntry.java +++ b/block-storage/src/main/java/org/lucares/pdb/map/NodeEntry.java @@ -1,15 +1,13 @@ package org.lucares.pdb.map; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.List; +import java.util.function.Function; import java.util.function.Predicate; -import org.lucares.collections.LongList; import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder; -import org.lucares.utils.Preconditions; class NodeEntry { enum ValueType { @@ -29,6 +27,10 @@ class NodeEntry { } throw new IllegalStateException("Cannot map byte " + b + " to a value type."); } + + public byte asByte() { + return b; + } } static final class KeyMatches implements Predicate { @@ -82,6 +84,14 @@ class NodeEntry { + valueAsString + "]"; } + public String toString(final Function keyDecoder, final Function valueDecoder) { + final String valueAsString = isInnerNode() ? String.valueOf(VariableByteEncoder.decodeFirstValue(value)) + : valueDecoder.apply(value); + final String keyAsString = keyDecoder.apply(key); + + return "NodeEntry [type=" + type + ", key=" + keyAsString + ", value=" + valueAsString + "]"; + } + @Override public int hashCode() { final int prime = 31; @@ -110,93 +120,10 @@ class NodeEntry { return true; } - public static List deserialize(final byte[] buffer) { - final List entries = new ArrayList<>(); - final LongList keyLengths = VariableByteEncoder.decode(buffer); - - if (keyLengths.isEmpty() || keyLengths.get(0) == 0) { - // node is empty -> should only happen for the root node - } else { - final int numEntries = (int) keyLengths.get(0); - - int offset = PersistentMap.BLOCK_SIZE; - for (int i = 0; i < numEntries; i++) { - final int keyLength = (int) keyLengths.get(i * 2 + 1); - final int valueLength = (int) keyLengths.get(i * 2 + 2); - - final int valueOffset = offset - valueLength; - final int keyOffset = valueOffset - keyLength; - final int typeOffset = keyOffset - 1; - - final byte typeByte = buffer[typeOffset]; - final byte[] key = Arrays.copyOfRange(buffer, keyOffset, keyOffset + keyLength); - final byte[] value = Arrays.copyOfRange(buffer, valueOffset, valueOffset + valueLength); - - final NodeEntry entry = new NodeEntry(ValueType.fromByte(typeByte), key, value); - - entries.add(entry); - - offset = typeOffset; - } - } - return entries; - } - public static int neededBytes(final List entries) { return entries.stream().mapToInt(NodeEntry::size).sum(); } - public static int neededBytesTotal(final List entries) { - final byte[] buffer = new byte[PersistentMap.BLOCK_SIZE]; - - final int usedBytes = serializeKeyLengths(entries, buffer); - - return usedBytes + NodeEntry.neededBytes(entries); - } - - public static byte[] serialize(final List entries) { - final byte[] buffer = new byte[PersistentMap.BLOCK_SIZE]; - - final int usedBytes = serializeKeyLengths(entries, buffer); - - Preconditions.checkGreater(PersistentMap.BLOCK_SIZE, usedBytes + NodeEntry.neededBytes(entries), ""); - - NodeEntry.serializeIntoFromTail(entries, buffer); - return buffer; - } - - private static int serializeKeyLengths(final List entries, final byte[] buffer) { - final var keyLengths = new LongList(); - keyLengths.add(entries.size()); - for (final NodeEntry nodeEntry : entries) { - keyLengths.add(nodeEntry.getKey().length); - keyLengths.add(nodeEntry.getValue().length); - } - - final int usedBytes = VariableByteEncoder.encodeInto(keyLengths, buffer, 0); - return usedBytes; - } - - private static void serializeIntoFromTail(final List entries, final byte[] buffer) { - - int offset = buffer.length; - - for (final var entry : entries) { - final byte[] valueBytes = entry.getValue(); - final byte[] keyBytes = entry.getKey(); - - final int offsetValue = offset - valueBytes.length; - final int offsetKey = offsetValue - keyBytes.length; - final int offsetType = offsetKey - 1; - - System.arraycopy(valueBytes, 0, buffer, offsetValue, valueBytes.length); - System.arraycopy(keyBytes, 0, buffer, offsetKey, keyBytes.length); - buffer[offsetType] = entry.getType().b; - - offset = offsetType; - } - } - public int compare(final byte[] otherKey) { int i = 0; diff --git a/block-storage/src/main/java/org/lucares/pdb/map/PersistentMap.java b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMap.java index 68b722a..9b2af99 100644 --- a/block-storage/src/main/java/org/lucares/pdb/map/PersistentMap.java +++ b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMap.java @@ -1,37 +1,59 @@ package org.lucares.pdb.map; import java.io.IOException; +import java.io.PrintStream; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; -import java.util.Collections; import java.util.Stack; +import java.util.function.Function; import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder; import org.lucares.pdb.diskstorage.DiskBlock; import org.lucares.pdb.diskstorage.DiskStorage; import org.lucares.utils.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class PersistentMap { + private static final Logger LOGGER = LoggerFactory.getLogger(PersistentMap.class); + + // the maximum key + private static final byte[] MAX_KEY = new byte[] { Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, + Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, + Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, + Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, + Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE }; + interface VisitorCallback { - void visit(NodeEntry nodeEntry, int depth); + void visit(PersistentMapDiskNode node, PersistentMapDiskNode parentNode, NodeEntry nodeEntry, int depth); } interface NodeVisitorCallback { void visit(PersistentMapDiskNode node, int depth); } + public static final Function STRING_DECODER = t -> new String(t, StandardCharsets.UTF_8); + public static final Function LONG_DECODER = t -> String + .valueOf(VariableByteEncoder.decodeFirstValue(t)); + private static final Charset UTF8 = StandardCharsets.UTF_8; static final int BLOCK_SIZE = 4096; static final long NODE_OFFSET_TO_ROOT_NODE = 8; private final DiskStorage diskStore; + private int maxEntriesInNode = Integer.MAX_VALUE; + public PersistentMap(final DiskStorage diskStore) throws IOException { this.diskStore = diskStore; initIfNew(); } + public void setMaxEntriesInNode(final int maxEntriesInNode) { + this.maxEntriesInNode = maxEntriesInNode; + } + private void initIfNew() throws IOException { if (diskStore.size() < BLOCK_SIZE) { final long nodeOffsetToRootNode = diskStore.allocateBlock(diskStore.minAllocationSize()); @@ -39,30 +61,51 @@ public class PersistentMap { "The offset of the pointer to the root node must be at a well known location. " + "Otherwise we would not be able to find it in an already existing file."); - // 2. make sure that new blocks are aligned to the block size (for faster disk - // IO) + // 2. make sure new blocks are aligned to the block size (for faster disk IO) diskStore.ensureAlignmentForNewBlocks(BLOCK_SIZE); // 3. initialize an empty root node final long blockOffset = diskStore.allocateBlock(BLOCK_SIZE); + final var rootNode = PersistentMapDiskNode.emptyRootNode(blockOffset); + writeNode(rootNode); - // 4. upate pointer to root node + // 4. update pointer to root node writeNodeOffsetOfRootNode(blockOffset); + + // 5. insert a dummy entry with a 'maximum' key + putValue(MAX_KEY, new byte[] { 0 }); } } - public Long put(final String key, final long value) throws IOException { - final byte[] oldValue = put(key.getBytes(UTF8), VariableByteEncoder.encode(value)); + public Long putValue(final String key, final long value) throws IOException { + final byte[] oldValue = putValue(key.getBytes(UTF8), VariableByteEncoder.encode(value)); return oldValue == null ? null : VariableByteEncoder.decodeFirstValue(oldValue); } - public long getAsLong(final String key) throws IOException { + public String putValue(final long key, final String value) throws IOException { + final byte[] oldValue = putValue(VariableByteEncoder.encode(key), value.getBytes(UTF8)); + return oldValue == null ? null : new String(oldValue, UTF8); + } + + public Long putValue(final long key, final long value) throws IOException { + final byte[] oldValue = putValue(VariableByteEncoder.encode(key), VariableByteEncoder.encode(value)); + return oldValue == null ? null : VariableByteEncoder.decodeFirstValue(oldValue); + } + + public Long getAsLong(final String key) throws IOException { final byte[] buffer = get(key.getBytes(UTF8)); return buffer == null ? null : VariableByteEncoder.decodeFirstValue(buffer); } - public String put(final String key, final String value) throws IOException { - final byte[] oldValue = put(key.getBytes(UTF8), value.getBytes(UTF8)); + public Long getAsLong(final long key) throws IOException { + final byte[] buffer = get(VariableByteEncoder.encode(key)); + return buffer == null ? null : VariableByteEncoder.decodeFirstValue(buffer); + } + + public String putValue(final String key, final String value) throws IOException { + final byte[] keyBytes = key.getBytes(UTF8); + final byte[] valueBytes = value.getBytes(UTF8); + final byte[] oldValue = putValue(keyBytes, valueBytes); return oldValue == null ? null : new String(oldValue, UTF8); } @@ -72,7 +115,7 @@ public class PersistentMap { return value == null ? null : new String(value, UTF8); } - public byte[] put(final byte[] key, final byte[] value) throws IOException { + public byte[] putValue(final byte[] key, final byte[] value) throws IOException { final long rootNodeOffset = readNodeOffsetOfRootNode(); final Stack parents = new Stack<>(); return insert(parents, rootNodeOffset, key, value); @@ -105,7 +148,7 @@ public class PersistentMap { } } - if (node.canAdd(key, value)) { + if (node.canAdd(key, value, maxEntriesInNode)) { // insert in existing node node.addKeyValue(key, value); writeNode(node); @@ -117,7 +160,7 @@ public class PersistentMap { // 2. insert the value // start from the root, because we might have added a new root node - return put(key, value); + return putValue(key, value); } } else { final long childNodeOffset = toNodeOffset(entry); @@ -126,37 +169,60 @@ public class PersistentMap { } } - private void splitNode(final Stack parents, final PersistentMapDiskNode node) - throws IOException { + private PersistentMapDiskNode splitNode(final Stack parents, + final PersistentMapDiskNode node) throws IOException { + + // System.out.println("\n\npre split node: " + node + "\n"); final long newBlockOffset = diskStore.allocateBlock(BLOCK_SIZE); final PersistentMapDiskNode newNode = node.split(newBlockOffset); final PersistentMapDiskNode parent = parents.isEmpty() ? null : parents.pop(); + if (parent != null) { final byte[] newNodeKey = newNode.getTopNodeEntry().getKey(); - parent.addKeyNodePointer(newNodeKey, newBlockOffset); + if (parent.canAdd(newNodeKey, newBlockOffset, maxEntriesInNode)) { + parent.addKeyNodePointer(newNodeKey, newBlockOffset); + writeNode(parent); + writeNode(newNode); + writeNode(node); + return parent; + } else { + final PersistentMapDiskNode grandParentNode = splitNode(parents, parent); + + final NodeEntry pointerToParentAfterSplit = grandParentNode.getNodeEntryTo(newNodeKey); + + Preconditions.checkEqual(pointerToParentAfterSplit.isInnerNode(), true, "{0} is pointer to inner node", + pointerToParentAfterSplit); + final long parentNodeOffset = toNodeOffset(pointerToParentAfterSplit); // the parent we have to add the + // newNode to + final PersistentMapDiskNode parentNode = getNode(parentNodeOffset); + parentNode.addKeyNodePointer(newNodeKey, newBlockOffset); + writeNode(parentNode); + writeNode(newNode); + writeNode(node); + return parentNode; + } - final byte[] oldNodeKey = node.getTopNodeEntry().getKey(); - parent.addKeyNodePointer(oldNodeKey, node.getNodeOffset()); - writeNode(parent); } else { // has no parent -> create a new parent (the new parent will also be the new // root) - final long newRootOffset = diskStore.allocateBlock(BLOCK_SIZE); - final PersistentMapDiskNode rootNode = new PersistentMapDiskNode(newRootOffset, Collections.emptyList()); + final long newRootNodeOffset = diskStore.allocateBlock(BLOCK_SIZE); + final PersistentMapDiskNode rootNode = PersistentMapDiskNode.emptyRootNode(newRootNodeOffset); final byte[] newNodeKey = newNode.getTopNodeEntry().getKey(); rootNode.addKeyNodePointer(newNodeKey, newBlockOffset); final byte[] oldNodeKey = node.getTopNodeEntry().getKey(); rootNode.addKeyNodePointer(oldNodeKey, node.getNodeOffset()); writeNode(rootNode); - writeNodeOffsetOfRootNode(newRootOffset); - } - writeNode(newNode); - writeNode(node); + writeNode(newNode); + writeNode(node); + + writeNodeOffsetOfRootNode(newRootNodeOffset); + return rootNode; + } } private NodeEntry findNodeEntry(final long nodeOffest, final byte[] key) throws IOException { @@ -190,6 +256,7 @@ public class PersistentMap { } private void writeNode(final PersistentMapDiskNode node) throws IOException { + LOGGER.info("writing node {}", node); final long nodeOffest = node.getNodeOffset(); final DiskBlock diskBlock = diskStore.getDiskBlock(nodeOffest, BLOCK_SIZE); final byte[] buffer = diskBlock.getBuffer(); @@ -199,27 +266,35 @@ public class PersistentMap { diskBlock.force(); } - public void print() throws IOException { + public void print(final Function keyDecoder, final Function valueDecoder) + throws IOException { - visitNodeEntriesPreOrder((nodeEntry, depth) -> System.out.println(" ".repeat(depth) + nodeEntry)); + visitNodeEntriesPreOrder((node, parentNode, nodeEntry, depth) -> { + final PrintStream writer = System.out; + + final String children = "#" + node.getEntries().size(); + + writer.println(" ".repeat(depth) + "@" + node.getNodeOffset() + " " + children + " " + + nodeEntry.toString(keyDecoder, valueDecoder)); + }); } public void visitNodeEntriesPreOrder(final VisitorCallback visitor) throws IOException { final long rootNodeOffset = readNodeOffsetOfRootNode(); - visitNodeEntriesPreOrderRecursively(rootNodeOffset, visitor, 0); + visitNodeEntriesPreOrderRecursively(rootNodeOffset, null, visitor, 0); } - private void visitNodeEntriesPreOrderRecursively(final long nodeOffset, final VisitorCallback visitor, - final int depth) throws IOException { + private void visitNodeEntriesPreOrderRecursively(final long nodeOffset, final PersistentMapDiskNode parentNode, + final VisitorCallback visitor, final int depth) throws IOException { final PersistentMapDiskNode node = getNode(nodeOffset); for (final NodeEntry child : node.getEntries()) { - visitor.visit(child, depth); + visitor.visit(node, parentNode, child, depth); if (child.isInnerNode()) { final long childNodeOffset = VariableByteEncoder.decodeFirstValue(child.getValue()); - visitNodeEntriesPreOrderRecursively(childNodeOffset, visitor, depth + 1); + visitNodeEntriesPreOrderRecursively(childNodeOffset, node, visitor, depth + 1); } } } diff --git a/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapDiskNode.java b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapDiskNode.java index 2becb4f..8c1d38c 100644 --- a/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapDiskNode.java +++ b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapDiskNode.java @@ -1,30 +1,40 @@ package org.lucares.pdb.map; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; +import org.lucares.collections.LongList; import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder; import org.lucares.pdb.map.NodeEntry.ValueType; +import org.lucares.utils.Preconditions; /** *
  * Node layout:
- * ┏━━━┳━━━━━┳━━━━━┳━━━━━┳╸╺╸╺╸╺╸╺┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
- * ┃ 6 ┃ 5,6 ┃ 3,6 ┃ 3,2 ┃        ┃"ba"->"147"┃"foobar"->"467"┃"foobaz"->"value"┃
- * ┗━━━┻━━━━━┻━━━━━┻━━━━━┻╸╺╸╺╸╺╸╺┻━━━━━━━━━━━┻━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━┛
- *   │   │ │   │ │   │ └▶ size of the third last key ("ba" in this example)
- *   │   │ │   │ │   └▶ size of the third last value ("147" in this example)
- *   │   │ │   │ └▶ size of the second last key ("foobar" in this example)
- *   │   │ │   └▶ size of the second last value ("467" in this example)
- *   │   │ └▶ size of the last key ("foobaz" in this example)
- *   │   └▶ size of the last value (the string "value" in this example)
- *   └▶ number of entries * 2
+ *
+ *  ◀──────────   Prefix ──────────▶         ◀───────────────── Suffix ──────────────────▶
+ * ┏━━━━━┳━━━┳━━━━━┳━━━━━┳━━━━━┳━━━┳╸╺╸╺╸╺╸╺┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
+ * ┃ 456 ┃ 6 ┃ 5,6 ┃ 3,6 ┃ 3,2 ┃ ∅ ┃        ┃"ba"->"147"┃"foobar"->"467"┃"foobaz"->"value"┃
+ * ┗━━━━━┻━━━┻━━━━━┻━━━━━┻━━━━━┻━━━┻╸╺╸╺╸╺╸╺┻━━━━━━━━━━━┻━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━┛
+ *    │    │   │ │   │ │   │ │   └▶  null byte that serves as a separator for the prefix.
+ *    │    │   │ │   │ │   │ └▶ size of the third last key ("ba" in this example)
+ *    │    │   │ │   │ │   └▶ size of the third last value ("147" in this example)
+ *    │    │   │ │   │ └▶ size of the second last key ("foobar" in this example)
+ *    │    │   │ │   └▶ size of the second last value ("467" in this example)
+ *    │    │   │ └▶ size of the last key ("foobaz" in this example)
+ *    │    │   └▶ size of the last value (the string "value" in this example)
+ *    │    └▶ number of entries * 2
+ *    └▶ node offset of the parent node (-1 if there is no parent node)
+ *
  * 
*/ public class PersistentMapDiskNode { + public static final long NO_NODE_OFFSET = -1; + private final List entries; private final long nodeOffset; @@ -33,19 +43,58 @@ public class PersistentMapDiskNode { this.entries = new ArrayList<>(entries); } + public static PersistentMapDiskNode emptyRootNode(final long nodeOffset) { + return new PersistentMapDiskNode(nodeOffset, Collections.emptyList()); + } + public static PersistentMapDiskNode parse(final long nodeOffset, final byte[] data) { if (data.length != PersistentMap.BLOCK_SIZE) { throw new IllegalStateException( "block size must be " + PersistentMap.BLOCK_SIZE + " but was " + data.length); } + final LongList longs = VariableByteEncoder.decode(data); + if (longs.size() == 0) { + System.out.println(); + } - final List entries = NodeEntry.deserialize(data); + final List entries = deserialize(longs, data); return new PersistentMapDiskNode(nodeOffset, entries); } + public static List deserialize(final LongList keyLengths, final byte[] buffer) { + final List entries = new ArrayList<>(); + + if (keyLengths.isEmpty() || keyLengths.get(0) == 0) { + // node is empty -> should only happen for the root node + } else { + final int numEntries = (int) keyLengths.get(0); + + int offset = PersistentMap.BLOCK_SIZE; + for (int i = 0; i < numEntries; i++) { + final int keyLength = (int) keyLengths.get(i * 2 + 1); + final int valueLength = (int) keyLengths.get(i * 2 + 2); + + final int valueOffset = offset - valueLength; + final int keyOffset = valueOffset - keyLength; + final int typeOffset = keyOffset - 1; + + final byte typeByte = buffer[typeOffset]; + final byte[] key = Arrays.copyOfRange(buffer, keyOffset, keyOffset + keyLength); + final byte[] value = Arrays.copyOfRange(buffer, valueOffset, valueOffset + valueLength); + + final NodeEntry entry = new NodeEntry(ValueType.fromByte(typeByte), key, value); + + entries.add(entry); + + offset = typeOffset; + } + } + return entries; + } + public byte[] serialize() { - return NodeEntry.serialize(entries); + return serialize(entries); } public long getNodeOffset() { @@ -56,7 +105,7 @@ public class PersistentMapDiskNode { final NodeEntry result = null; for (final NodeEntry entry : entries) { - // if (entry.compare(key) <= 0) { + if (entry.compare(key) >= 0) { return entry; } else { @@ -84,12 +133,23 @@ public class PersistentMapDiskNode { Collections.sort(entries, NodeEntry.SORT_BY_KEY); } - public boolean canAdd(final byte[] key, final byte[] value) { - final NodeEntry entry = new NodeEntry(ValueType.VALUE_INLINE, key, value); - final List tmp = new ArrayList<>(entries.size() + 1); - tmp.addAll(entries); - tmp.add(entry); - return NodeEntry.neededBytesTotal(tmp) <= PersistentMap.BLOCK_SIZE; + public boolean canAdd(final byte[] key, final long nodeOffset, final int maxEntriesInNode) { + return canAdd(key, VariableByteEncoder.encode(nodeOffset), maxEntriesInNode); + } + + public boolean canAdd(final byte[] key, final byte[] value, final int maxEntriesInNode) { + + if (entries.size() > maxEntriesInNode) { + return false; + } else { + final NodeEntry entry = new NodeEntry(ValueType.VALUE_INLINE, key, value); + final List tmp = new ArrayList<>(entries.size() + 1); + tmp.addAll(entries); + tmp.add(entry); + + // the +1 is for the null-byte terminator of the prefix + return neededBytesTotal(tmp) + 1 <= PersistentMap.BLOCK_SIZE; + } } public void removeKey(final byte[] key) { @@ -106,7 +166,8 @@ public class PersistentMapDiskNode { @Override public String toString() { - return String.join("\n", entries.stream().map(NodeEntry::toString).collect(Collectors.toList())); + return "@" + nodeOffset + ": " + + String.join("\n", entries.stream().map(NodeEntry::toString).collect(Collectors.toList())); } public NodeEntry getTopNodeEntry() { @@ -119,9 +180,68 @@ public class PersistentMapDiskNode { final var rightEntries = new ArrayList<>(entries.subList(entries.size() / 2, entries.size())); entries.clear(); - entries.addAll(leftEntries); + entries.addAll(rightEntries); - return new PersistentMapDiskNode(newBlockOffset, rightEntries); + return new PersistentMapDiskNode(newBlockOffset, leftEntries); + } + + public static int neededBytesTotal(final List entries) { + final byte[] buffer = new byte[PersistentMap.BLOCK_SIZE]; + + final int usedBytes = serializePrefix(entries, buffer); + + return usedBytes + NodeEntry.neededBytes(entries); + } + + private static byte[] serialize(final List entries) { + final byte[] buffer = new byte[PersistentMap.BLOCK_SIZE]; + + final int usedBytes = serializePrefix(entries, buffer); + + // the +1 is for the null-byte terminator of the prefix + Preconditions.checkGreaterOrEqual(PersistentMap.BLOCK_SIZE, usedBytes + 1 + NodeEntry.neededBytes(entries), + "The node is too big. It cannot be encoded into " + PersistentMap.BLOCK_SIZE + " bytes."); + + serializeIntoFromTail(entries, buffer); + return buffer; + } + + private static int serializePrefix(final List entries, final byte[] buffer) { + final LongList longs = serializeKeyLengths(entries); + + final int usedBytes = VariableByteEncoder.encodeInto(longs, buffer, 0); + return usedBytes; + } + + private static LongList serializeKeyLengths(final List entries) { + final var keyLengths = new LongList(); + keyLengths.add(entries.size()); + for (final NodeEntry nodeEntry : entries) { + keyLengths.add(nodeEntry.getKey().length); + keyLengths.add(nodeEntry.getValue().length); + } + + return keyLengths; + } + + private static void serializeIntoFromTail(final List entries, final byte[] buffer) { + + int offset = buffer.length; + + for (final var entry : entries) { + final byte[] valueBytes = entry.getValue(); + final byte[] keyBytes = entry.getKey(); + + final int offsetValue = offset - valueBytes.length; + final int offsetKey = offsetValue - keyBytes.length; + final int offsetType = offsetKey - 1; + + System.arraycopy(valueBytes, 0, buffer, offsetValue, valueBytes.length); + System.arraycopy(keyBytes, 0, buffer, offsetKey, keyBytes.length); + buffer[offsetType] = entry.getType().asByte(); + + offset = offsetType; + } } } diff --git a/block-storage/src/test/java/org/lucares/pdb/map/NodeEntryTest.java b/block-storage/src/test/java/org/lucares/pdb/map/NodeEntryTest.java index f6a4e59..f66005d 100644 --- a/block-storage/src/test/java/org/lucares/pdb/map/NodeEntryTest.java +++ b/block-storage/src/test/java/org/lucares/pdb/map/NodeEntryTest.java @@ -1,32 +1,7 @@ package org.lucares.pdb.map; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; - -import org.lucares.pdb.map.NodeEntry.ValueType; -import org.testng.Assert; import org.testng.annotations.Test; @Test public class NodeEntryTest { - public void serializeDeserialize() throws Exception { - - final List entries = new ArrayList<>(); - entries.add(newNode(ValueType.NODE_POINTER, "key1", "value1")); - entries.add(newNode(ValueType.VALUE_INLINE, "key2_", "value2--")); - entries.add(newNode(ValueType.NODE_POINTER, "key3__", "value3---")); - entries.add(newNode(ValueType.VALUE_INLINE, "key4___", "value4----")); - - final byte[] buffer = NodeEntry.serialize(entries); - - final List actualEntries = NodeEntry.deserialize(buffer); - - Assert.assertEquals(actualEntries, entries); - } - - private static NodeEntry newNode(final ValueType type, final String key, final String value) { - return new NodeEntry(ValueType.VALUE_INLINE, key.getBytes(StandardCharsets.UTF_8), - value.getBytes(StandardCharsets.UTF_8)); - } } diff --git a/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapDiskNodeTest.java b/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapDiskNodeTest.java new file mode 100644 index 0000000..612ade0 --- /dev/null +++ b/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapDiskNodeTest.java @@ -0,0 +1,38 @@ +package org.lucares.pdb.map; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; + +import org.lucares.pdb.map.NodeEntry.ValueType; +import org.testng.Assert; +import org.testng.annotations.Test; + +@Test +public class PersistentMapDiskNodeTest { + + public void serializeDeserialize() throws Exception { + + final List entries = new ArrayList<>(); + entries.add(newNode(ValueType.NODE_POINTER, "key1", "value1")); + entries.add(newNode(ValueType.VALUE_INLINE, "key2_", "value2--")); + entries.add(newNode(ValueType.NODE_POINTER, "key3__", "value3---")); + entries.add(newNode(ValueType.VALUE_INLINE, "key4___", "value4----")); + + final long nodeOffset = ThreadLocalRandom.current().nextInt(); + final PersistentMapDiskNode node = new PersistentMapDiskNode(nodeOffset, entries); + + final byte[] buffer = node.serialize(); + + final PersistentMapDiskNode actualNode = PersistentMapDiskNode.parse(nodeOffset, buffer); + + Assert.assertEquals(actualNode.getEntries(), entries); + } + + private static NodeEntry newNode(final ValueType type, final String key, final String value) { + return new NodeEntry(ValueType.VALUE_INLINE, key.getBytes(StandardCharsets.UTF_8), + value.getBytes(StandardCharsets.UTF_8)); + } + +} diff --git a/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapTest.java b/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapTest.java index 3b5d993..ec40348 100644 --- a/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapTest.java +++ b/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapTest.java @@ -1,10 +1,15 @@ package org.lucares.pdb.map; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.security.SecureRandom; +import java.util.Arrays; import java.util.HashMap; +import java.util.LinkedList; +import java.util.Objects; +import java.util.Queue; +import java.util.Random; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; @@ -40,7 +45,7 @@ public class PersistentMapTest { Assert.assertNull(map.getAsString(key)); - Assert.assertNull(map.put(key, value)); + Assert.assertNull(map.putValue(key, value)); Assert.assertEquals(map.getAsString(key), value); } @@ -55,21 +60,153 @@ public class PersistentMapTest { final Path file = dataDirectory.resolve("map.db"); final var insertedValues = new HashMap(); + final Random rnd = new Random(1); + try (final DiskStorage ds = new DiskStorage(file)) { final PersistentMap map = new PersistentMap(ds); + map.setMaxEntriesInNode(2); - for (int i = 0; i < 200; i++) { - - final String key = UUID.randomUUID().toString() + "__" + i; - final String value = "long value to waste some bytes " + i; + for (int i = 0; i < 100; i++) { + // System.out.println("\n\ninserting: " + i); + final UUID nextUUID = new UUID(rnd.nextLong(), rnd.nextLong()); + final String key = nextUUID.toString() + "__" + i; + final String value = "long value to waste some bytes " + i + "__" + + UUID.randomUUID().toString().repeat(1); Assert.assertNull(map.getAsString(key)); - Assert.assertNull(map.put(key, value)); + Assert.assertNull(map.putValue(key, value)); insertedValues.put(key, value); + // map.print(PersistentMap.STRING_DECODER, PersistentMap.STRING_DECODER); + + final boolean failEarly = false; + if (failEarly) { + for (final var entry : insertedValues.entrySet()) { + final String actualValue = map.getAsString(entry.getKey()); + + if (!Objects.equals(actualValue, entry.getValue())) { + map.print(PersistentMap.STRING_DECODER, PersistentMap.STRING_DECODER); + } + + Assert.assertEquals(actualValue, entry.getValue(), + "value for key " + entry.getKey() + " in the " + i + "th iteration"); + } + } + } + } + + try (final DiskStorage ds = new DiskStorage(file)) { + final PersistentMap map = new PersistentMap(ds); + // map.print(PersistentMap.STRING_DECODER, PersistentMap.STRING_DECODER); + final AtomicInteger maxDepth = new AtomicInteger(); + map.visitNodeEntriesPreOrder( + (node, parentNode, nodeEntry, depth) -> maxDepth.set(Math.max(depth, maxDepth.get()))); + + Assert.assertTrue(maxDepth.get() >= 4, + "The tree's depth. This test must have at least depth 4, " + + "so that we can be sure that splitting parent nodes works recursively, but was " + + maxDepth.get()); + + for (final var entry : insertedValues.entrySet()) { + final String actualValue = map.getAsString(entry.getKey()); + Assert.assertEquals(actualValue, entry.getValue(), + "value for key " + entry.getKey() + " after all iterations"); + } + + } + } + + @Test + public void testManySmallValues() throws Exception { + final Path file = dataDirectory.resolve("map.db"); + final var insertedValues = new HashMap(); + + final SecureRandom rnd = new SecureRandom(); + rnd.setSeed(1); + + try (final DiskStorage ds = new DiskStorage(file)) { + final PersistentMap map = new PersistentMap(ds); + + for (int i = 0; i < 1000; i++) { + // System.out.println("\n\ninserting: " + i); + + final Long key = (long) (rnd.nextGaussian() * Integer.MAX_VALUE); + final Long value = (long) (rnd.nextGaussian() * Integer.MAX_VALUE); + Assert.assertNull(map.getAsLong(key)); + + Assert.assertNull(map.putValue(key, value)); + + insertedValues.put(key, value); + + // map.print(); + + final boolean failEarly = false; + if (failEarly) { + for (final var entry : insertedValues.entrySet()) { + final Long actualValue = map.getAsLong(entry.getKey()); + + if (!Objects.equals(actualValue, entry.getValue())) { + map.print(PersistentMap.LONG_DECODER, PersistentMap.LONG_DECODER); + } + + Assert.assertEquals(actualValue, entry.getValue(), + "value for key " + entry.getKey() + " in the " + i + "th iteration"); + } + } + } + } + + try (final DiskStorage ds = new DiskStorage(file)) { + final PersistentMap map = new PersistentMap(ds); + // map.print(PersistentMap.LONG_DECODER, PersistentMap.LONG_DECODER); + final AtomicInteger counter = new AtomicInteger(); + map.visitNodeEntriesPreOrder( + (node, parentNode, nodeEntry, depth) -> counter.addAndGet(nodeEntry.isInnerNode() ? 1 : 0)); + + Assert.assertEquals(counter.get(), 4, + "number of nodes should be small. Any number larger than 4 indicates, " + + "that new inner nodes are created even though the existing inner " + + "nodes could hold the values"); + + for (final var entry : insertedValues.entrySet()) { + final Long actualValue = map.getAsLong(entry.getKey()); + Assert.assertEquals(actualValue, entry.getValue(), + "value for key " + entry.getKey() + " after all iterations"); + } + + } + } + + @Test(invocationCount = 1) + public void testEasyValues() throws Exception { + final Path file = dataDirectory.resolve("map.db"); + final var insertedValues = new HashMap(); + + final Queue numbers = new LinkedList<>(Arrays.asList(1, 15, 11, 4, 16, 3, 13)); + + try (final DiskStorage ds = new DiskStorage(file)) { + final PersistentMap map = new PersistentMap(ds); + + final int numbersSize = numbers.size(); + for (int i = 0; i < numbersSize; i++) { + + final Integer keyNumber = numbers.poll(); + // System.out.println("\n\ninserting: " + keyNumber); + + final String key = "" + keyNumber; + final String value = "value"; + Assert.assertNull(map.getAsString(key)); + + Assert.assertNull(map.putValue(key, value)); + + insertedValues.put(key, value); + + // map.print(PersistentMap.STRING_DECODER, PersistentMap.STRING_DECODER); + for (final var entry : insertedValues.entrySet()) { final String actualValue = map.getAsString(entry.getKey()); + Assert.assertEquals(actualValue, entry.getValue(), "value for key " + entry.getKey() + " in the " + i + "th iteration"); } @@ -78,28 +215,11 @@ public class PersistentMapTest { try (final DiskStorage ds = new DiskStorage(file)) { final PersistentMap map = new PersistentMap(ds); + // map.print(PersistentMap.STRING_DECODER, PersistentMap.STRING_DECODER); - map.visitNodeEntriesPreOrder((nodeEntry, depth) -> { - if (nodeEntry.isInnerNode()) { - System.out.println(" ".repeat(depth) + nodeEntry); - } else { - System.out.println(" ".repeat(depth) + nodeEntry); - } - }); final AtomicInteger counter = new AtomicInteger(); - map.visitNodeEntriesPreOrder((nodeEntry, depth) -> counter.addAndGet(nodeEntry.isInnerNode() ? 1 : 0)); - - System.out.println(" -------------"); - map.visitNodesPreOrder((node, depth) -> { - final String key = new String(node.getTopNodeEntry().getKey(), StandardCharsets.UTF_8); - System.out.println(" ".repeat(depth) + node.getNodeOffset() + " " + key + " (children: " - + node.getEntries().size() + ")"); - }); - -// Assert.assertEquals(counter.get(), 3, -// "number of nodes should be small. Any number larger than 3 indicates, " -// + "that new inner nodes are created even though the existing inner " -// + "nodes could hold the values"); + map.visitNodeEntriesPreOrder( + (node, parentNode, nodeEntry, depth) -> counter.addAndGet(nodeEntry.isInnerNode() ? 1 : 0)); for (final var entry : insertedValues.entrySet()) { final String actualValue = map.getAsString(entry.getKey()); diff --git a/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java b/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java index 80a36fe..f8a7559 100644 --- a/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java +++ b/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java @@ -24,12 +24,23 @@ public class Preconditions { } } - public static void checkEqual(final Object actual, final Object expected) { - if (!Objects.equals(actual, expected)) { - throw new IllegalStateException(); + /** + * + * @param a + * @param b + * @param message formatted with {@link MessageFormat} + * @param args + */ + public static void checkGreaterOrEqual(final long a, final long b, final String message, final Object... args) { + if (a < b) { + throw new IllegalStateException(MessageFormat.format(message, args) + " Expected: " + a + " >= " + b); } } + public static void checkEqual(final Object actual, final Object expected) { + checkEqual(actual, expected, "expected {0} is equal to {1}", actual, expected); + } + /** * Check that the given values are equal. The check is done with * {@link Objects#equals(Object, Object)} @@ -46,4 +57,5 @@ public class Preconditions { MessageFormat.format(message, args) + " Expected: " + actual + " equals " + expected); } } + }