insertion of many values into the persistent map
This commit is contained in:
@@ -25,6 +25,9 @@ import org.lucares.collections.LongList;
|
||||
*/
|
||||
public class VariableByteEncoder {
|
||||
|
||||
public static final long MIN_VALUE = Long.MIN_VALUE / 2 + 1;
|
||||
public static final long MAX_VALUE = Long.MAX_VALUE / 2;
|
||||
|
||||
private static final int MAX_BYTES_PER_VALUE = 10;
|
||||
|
||||
private static final int CONTINUATION_BYTE_FLAG = 1 << 7; // 10000000
|
||||
@@ -83,8 +86,8 @@ public class VariableByteEncoder {
|
||||
|
||||
int offset = offsetInBuffer;
|
||||
|
||||
assert value >= Long.MIN_VALUE / 2 + 1 : "min encodable value is -2^62+1";
|
||||
assert value <= Long.MAX_VALUE / 2 : "max encodable value is 2^62";
|
||||
assert value >= MIN_VALUE : "min encodable value is -2^62+1 = " + MIN_VALUE;
|
||||
assert value <= MAX_VALUE : "max encodable value is 2^62 = " + MAX_VALUE;
|
||||
|
||||
long normVal = encodeIntoPositiveValue(value);
|
||||
|
||||
|
||||
@@ -1,15 +1,13 @@
|
||||
package org.lucares.pdb.map;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.lucares.collections.LongList;
|
||||
import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder;
|
||||
import org.lucares.utils.Preconditions;
|
||||
|
||||
class NodeEntry {
|
||||
enum ValueType {
|
||||
@@ -29,6 +27,10 @@ class NodeEntry {
|
||||
}
|
||||
throw new IllegalStateException("Cannot map byte " + b + " to a value type.");
|
||||
}
|
||||
|
||||
public byte asByte() {
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
||||
static final class KeyMatches implements Predicate<NodeEntry> {
|
||||
@@ -82,6 +84,14 @@ class NodeEntry {
|
||||
+ valueAsString + "]";
|
||||
}
|
||||
|
||||
public String toString(final Function<byte[], String> keyDecoder, final Function<byte[], String> valueDecoder) {
|
||||
final String valueAsString = isInnerNode() ? String.valueOf(VariableByteEncoder.decodeFirstValue(value))
|
||||
: valueDecoder.apply(value);
|
||||
final String keyAsString = keyDecoder.apply(key);
|
||||
|
||||
return "NodeEntry [type=" + type + ", key=" + keyAsString + ", value=" + valueAsString + "]";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
@@ -110,93 +120,10 @@ class NodeEntry {
|
||||
return true;
|
||||
}
|
||||
|
||||
public static List<NodeEntry> deserialize(final byte[] buffer) {
|
||||
final List<NodeEntry> entries = new ArrayList<>();
|
||||
final LongList keyLengths = VariableByteEncoder.decode(buffer);
|
||||
|
||||
if (keyLengths.isEmpty() || keyLengths.get(0) == 0) {
|
||||
// node is empty -> should only happen for the root node
|
||||
} else {
|
||||
final int numEntries = (int) keyLengths.get(0);
|
||||
|
||||
int offset = PersistentMap.BLOCK_SIZE;
|
||||
for (int i = 0; i < numEntries; i++) {
|
||||
final int keyLength = (int) keyLengths.get(i * 2 + 1);
|
||||
final int valueLength = (int) keyLengths.get(i * 2 + 2);
|
||||
|
||||
final int valueOffset = offset - valueLength;
|
||||
final int keyOffset = valueOffset - keyLength;
|
||||
final int typeOffset = keyOffset - 1;
|
||||
|
||||
final byte typeByte = buffer[typeOffset];
|
||||
final byte[] key = Arrays.copyOfRange(buffer, keyOffset, keyOffset + keyLength);
|
||||
final byte[] value = Arrays.copyOfRange(buffer, valueOffset, valueOffset + valueLength);
|
||||
|
||||
final NodeEntry entry = new NodeEntry(ValueType.fromByte(typeByte), key, value);
|
||||
|
||||
entries.add(entry);
|
||||
|
||||
offset = typeOffset;
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
public static int neededBytes(final List<NodeEntry> entries) {
|
||||
return entries.stream().mapToInt(NodeEntry::size).sum();
|
||||
}
|
||||
|
||||
public static int neededBytesTotal(final List<NodeEntry> entries) {
|
||||
final byte[] buffer = new byte[PersistentMap.BLOCK_SIZE];
|
||||
|
||||
final int usedBytes = serializeKeyLengths(entries, buffer);
|
||||
|
||||
return usedBytes + NodeEntry.neededBytes(entries);
|
||||
}
|
||||
|
||||
public static byte[] serialize(final List<NodeEntry> entries) {
|
||||
final byte[] buffer = new byte[PersistentMap.BLOCK_SIZE];
|
||||
|
||||
final int usedBytes = serializeKeyLengths(entries, buffer);
|
||||
|
||||
Preconditions.checkGreater(PersistentMap.BLOCK_SIZE, usedBytes + NodeEntry.neededBytes(entries), "");
|
||||
|
||||
NodeEntry.serializeIntoFromTail(entries, buffer);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
private static int serializeKeyLengths(final List<NodeEntry> entries, final byte[] buffer) {
|
||||
final var keyLengths = new LongList();
|
||||
keyLengths.add(entries.size());
|
||||
for (final NodeEntry nodeEntry : entries) {
|
||||
keyLengths.add(nodeEntry.getKey().length);
|
||||
keyLengths.add(nodeEntry.getValue().length);
|
||||
}
|
||||
|
||||
final int usedBytes = VariableByteEncoder.encodeInto(keyLengths, buffer, 0);
|
||||
return usedBytes;
|
||||
}
|
||||
|
||||
private static void serializeIntoFromTail(final List<NodeEntry> entries, final byte[] buffer) {
|
||||
|
||||
int offset = buffer.length;
|
||||
|
||||
for (final var entry : entries) {
|
||||
final byte[] valueBytes = entry.getValue();
|
||||
final byte[] keyBytes = entry.getKey();
|
||||
|
||||
final int offsetValue = offset - valueBytes.length;
|
||||
final int offsetKey = offsetValue - keyBytes.length;
|
||||
final int offsetType = offsetKey - 1;
|
||||
|
||||
System.arraycopy(valueBytes, 0, buffer, offsetValue, valueBytes.length);
|
||||
System.arraycopy(keyBytes, 0, buffer, offsetKey, keyBytes.length);
|
||||
buffer[offsetType] = entry.getType().b;
|
||||
|
||||
offset = offsetType;
|
||||
}
|
||||
}
|
||||
|
||||
public int compare(final byte[] otherKey) {
|
||||
|
||||
int i = 0;
|
||||
|
||||
@@ -1,37 +1,59 @@
|
||||
package org.lucares.pdb.map;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collections;
|
||||
import java.util.Stack;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder;
|
||||
import org.lucares.pdb.diskstorage.DiskBlock;
|
||||
import org.lucares.pdb.diskstorage.DiskStorage;
|
||||
import org.lucares.utils.Preconditions;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class PersistentMap {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(PersistentMap.class);
|
||||
|
||||
// the maximum key
|
||||
private static final byte[] MAX_KEY = new byte[] { Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE,
|
||||
Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE,
|
||||
Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE,
|
||||
Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE,
|
||||
Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE };
|
||||
|
||||
interface VisitorCallback {
|
||||
void visit(NodeEntry nodeEntry, int depth);
|
||||
void visit(PersistentMapDiskNode node, PersistentMapDiskNode parentNode, NodeEntry nodeEntry, int depth);
|
||||
}
|
||||
|
||||
interface NodeVisitorCallback {
|
||||
void visit(PersistentMapDiskNode node, int depth);
|
||||
}
|
||||
|
||||
public static final Function<byte[], String> STRING_DECODER = t -> new String(t, StandardCharsets.UTF_8);
|
||||
public static final Function<byte[], String> LONG_DECODER = t -> String
|
||||
.valueOf(VariableByteEncoder.decodeFirstValue(t));
|
||||
|
||||
private static final Charset UTF8 = StandardCharsets.UTF_8;
|
||||
static final int BLOCK_SIZE = 4096;
|
||||
static final long NODE_OFFSET_TO_ROOT_NODE = 8;
|
||||
|
||||
private final DiskStorage diskStore;
|
||||
|
||||
private int maxEntriesInNode = Integer.MAX_VALUE;
|
||||
|
||||
public PersistentMap(final DiskStorage diskStore) throws IOException {
|
||||
this.diskStore = diskStore;
|
||||
initIfNew();
|
||||
}
|
||||
|
||||
public void setMaxEntriesInNode(final int maxEntriesInNode) {
|
||||
this.maxEntriesInNode = maxEntriesInNode;
|
||||
}
|
||||
|
||||
private void initIfNew() throws IOException {
|
||||
if (diskStore.size() < BLOCK_SIZE) {
|
||||
final long nodeOffsetToRootNode = diskStore.allocateBlock(diskStore.minAllocationSize());
|
||||
@@ -39,30 +61,51 @@ public class PersistentMap {
|
||||
"The offset of the pointer to the root node must be at a well known location. "
|
||||
+ "Otherwise we would not be able to find it in an already existing file.");
|
||||
|
||||
// 2. make sure that new blocks are aligned to the block size (for faster disk
|
||||
// IO)
|
||||
// 2. make sure new blocks are aligned to the block size (for faster disk IO)
|
||||
diskStore.ensureAlignmentForNewBlocks(BLOCK_SIZE);
|
||||
|
||||
// 3. initialize an empty root node
|
||||
final long blockOffset = diskStore.allocateBlock(BLOCK_SIZE);
|
||||
final var rootNode = PersistentMapDiskNode.emptyRootNode(blockOffset);
|
||||
writeNode(rootNode);
|
||||
|
||||
// 4. upate pointer to root node
|
||||
// 4. update pointer to root node
|
||||
writeNodeOffsetOfRootNode(blockOffset);
|
||||
|
||||
// 5. insert a dummy entry with a 'maximum' key
|
||||
putValue(MAX_KEY, new byte[] { 0 });
|
||||
}
|
||||
}
|
||||
|
||||
public Long put(final String key, final long value) throws IOException {
|
||||
final byte[] oldValue = put(key.getBytes(UTF8), VariableByteEncoder.encode(value));
|
||||
public Long putValue(final String key, final long value) throws IOException {
|
||||
final byte[] oldValue = putValue(key.getBytes(UTF8), VariableByteEncoder.encode(value));
|
||||
return oldValue == null ? null : VariableByteEncoder.decodeFirstValue(oldValue);
|
||||
}
|
||||
|
||||
public long getAsLong(final String key) throws IOException {
|
||||
public String putValue(final long key, final String value) throws IOException {
|
||||
final byte[] oldValue = putValue(VariableByteEncoder.encode(key), value.getBytes(UTF8));
|
||||
return oldValue == null ? null : new String(oldValue, UTF8);
|
||||
}
|
||||
|
||||
public Long putValue(final long key, final long value) throws IOException {
|
||||
final byte[] oldValue = putValue(VariableByteEncoder.encode(key), VariableByteEncoder.encode(value));
|
||||
return oldValue == null ? null : VariableByteEncoder.decodeFirstValue(oldValue);
|
||||
}
|
||||
|
||||
public Long getAsLong(final String key) throws IOException {
|
||||
final byte[] buffer = get(key.getBytes(UTF8));
|
||||
return buffer == null ? null : VariableByteEncoder.decodeFirstValue(buffer);
|
||||
}
|
||||
|
||||
public String put(final String key, final String value) throws IOException {
|
||||
final byte[] oldValue = put(key.getBytes(UTF8), value.getBytes(UTF8));
|
||||
public Long getAsLong(final long key) throws IOException {
|
||||
final byte[] buffer = get(VariableByteEncoder.encode(key));
|
||||
return buffer == null ? null : VariableByteEncoder.decodeFirstValue(buffer);
|
||||
}
|
||||
|
||||
public String putValue(final String key, final String value) throws IOException {
|
||||
final byte[] keyBytes = key.getBytes(UTF8);
|
||||
final byte[] valueBytes = value.getBytes(UTF8);
|
||||
final byte[] oldValue = putValue(keyBytes, valueBytes);
|
||||
return oldValue == null ? null : new String(oldValue, UTF8);
|
||||
}
|
||||
|
||||
@@ -72,7 +115,7 @@ public class PersistentMap {
|
||||
return value == null ? null : new String(value, UTF8);
|
||||
}
|
||||
|
||||
public byte[] put(final byte[] key, final byte[] value) throws IOException {
|
||||
public byte[] putValue(final byte[] key, final byte[] value) throws IOException {
|
||||
final long rootNodeOffset = readNodeOffsetOfRootNode();
|
||||
final Stack<PersistentMapDiskNode> parents = new Stack<>();
|
||||
return insert(parents, rootNodeOffset, key, value);
|
||||
@@ -105,7 +148,7 @@ public class PersistentMap {
|
||||
}
|
||||
}
|
||||
|
||||
if (node.canAdd(key, value)) {
|
||||
if (node.canAdd(key, value, maxEntriesInNode)) {
|
||||
// insert in existing node
|
||||
node.addKeyValue(key, value);
|
||||
writeNode(node);
|
||||
@@ -117,7 +160,7 @@ public class PersistentMap {
|
||||
|
||||
// 2. insert the value
|
||||
// start from the root, because we might have added a new root node
|
||||
return put(key, value);
|
||||
return putValue(key, value);
|
||||
}
|
||||
} else {
|
||||
final long childNodeOffset = toNodeOffset(entry);
|
||||
@@ -126,37 +169,60 @@ public class PersistentMap {
|
||||
}
|
||||
}
|
||||
|
||||
private void splitNode(final Stack<PersistentMapDiskNode> parents, final PersistentMapDiskNode node)
|
||||
throws IOException {
|
||||
private PersistentMapDiskNode splitNode(final Stack<PersistentMapDiskNode> parents,
|
||||
final PersistentMapDiskNode node) throws IOException {
|
||||
|
||||
// System.out.println("\n\npre split node: " + node + "\n");
|
||||
|
||||
final long newBlockOffset = diskStore.allocateBlock(BLOCK_SIZE);
|
||||
|
||||
final PersistentMapDiskNode newNode = node.split(newBlockOffset);
|
||||
|
||||
final PersistentMapDiskNode parent = parents.isEmpty() ? null : parents.pop();
|
||||
|
||||
if (parent != null) {
|
||||
final byte[] newNodeKey = newNode.getTopNodeEntry().getKey();
|
||||
parent.addKeyNodePointer(newNodeKey, newBlockOffset);
|
||||
if (parent.canAdd(newNodeKey, newBlockOffset, maxEntriesInNode)) {
|
||||
parent.addKeyNodePointer(newNodeKey, newBlockOffset);
|
||||
writeNode(parent);
|
||||
writeNode(newNode);
|
||||
writeNode(node);
|
||||
return parent;
|
||||
} else {
|
||||
final PersistentMapDiskNode grandParentNode = splitNode(parents, parent);
|
||||
|
||||
final NodeEntry pointerToParentAfterSplit = grandParentNode.getNodeEntryTo(newNodeKey);
|
||||
|
||||
Preconditions.checkEqual(pointerToParentAfterSplit.isInnerNode(), true, "{0} is pointer to inner node",
|
||||
pointerToParentAfterSplit);
|
||||
final long parentNodeOffset = toNodeOffset(pointerToParentAfterSplit); // the parent we have to add the
|
||||
// newNode to
|
||||
final PersistentMapDiskNode parentNode = getNode(parentNodeOffset);
|
||||
parentNode.addKeyNodePointer(newNodeKey, newBlockOffset);
|
||||
writeNode(parentNode);
|
||||
writeNode(newNode);
|
||||
writeNode(node);
|
||||
return parentNode;
|
||||
}
|
||||
|
||||
final byte[] oldNodeKey = node.getTopNodeEntry().getKey();
|
||||
parent.addKeyNodePointer(oldNodeKey, node.getNodeOffset());
|
||||
writeNode(parent);
|
||||
} else {
|
||||
// has no parent -> create a new parent (the new parent will also be the new
|
||||
// root)
|
||||
final long newRootOffset = diskStore.allocateBlock(BLOCK_SIZE);
|
||||
final PersistentMapDiskNode rootNode = new PersistentMapDiskNode(newRootOffset, Collections.emptyList());
|
||||
final long newRootNodeOffset = diskStore.allocateBlock(BLOCK_SIZE);
|
||||
final PersistentMapDiskNode rootNode = PersistentMapDiskNode.emptyRootNode(newRootNodeOffset);
|
||||
final byte[] newNodeKey = newNode.getTopNodeEntry().getKey();
|
||||
rootNode.addKeyNodePointer(newNodeKey, newBlockOffset);
|
||||
|
||||
final byte[] oldNodeKey = node.getTopNodeEntry().getKey();
|
||||
rootNode.addKeyNodePointer(oldNodeKey, node.getNodeOffset());
|
||||
writeNode(rootNode);
|
||||
writeNodeOffsetOfRootNode(newRootOffset);
|
||||
}
|
||||
|
||||
writeNode(newNode);
|
||||
writeNode(node);
|
||||
writeNode(newNode);
|
||||
writeNode(node);
|
||||
|
||||
writeNodeOffsetOfRootNode(newRootNodeOffset);
|
||||
return rootNode;
|
||||
}
|
||||
}
|
||||
|
||||
private NodeEntry findNodeEntry(final long nodeOffest, final byte[] key) throws IOException {
|
||||
@@ -190,6 +256,7 @@ public class PersistentMap {
|
||||
}
|
||||
|
||||
private void writeNode(final PersistentMapDiskNode node) throws IOException {
|
||||
LOGGER.info("writing node {}", node);
|
||||
final long nodeOffest = node.getNodeOffset();
|
||||
final DiskBlock diskBlock = diskStore.getDiskBlock(nodeOffest, BLOCK_SIZE);
|
||||
final byte[] buffer = diskBlock.getBuffer();
|
||||
@@ -199,27 +266,35 @@ public class PersistentMap {
|
||||
diskBlock.force();
|
||||
}
|
||||
|
||||
public void print() throws IOException {
|
||||
public void print(final Function<byte[], String> keyDecoder, final Function<byte[], String> valueDecoder)
|
||||
throws IOException {
|
||||
|
||||
visitNodeEntriesPreOrder((nodeEntry, depth) -> System.out.println(" ".repeat(depth) + nodeEntry));
|
||||
visitNodeEntriesPreOrder((node, parentNode, nodeEntry, depth) -> {
|
||||
|
||||
final PrintStream writer = System.out;
|
||||
|
||||
final String children = "#" + node.getEntries().size();
|
||||
|
||||
writer.println(" ".repeat(depth) + "@" + node.getNodeOffset() + " " + children + " "
|
||||
+ nodeEntry.toString(keyDecoder, valueDecoder));
|
||||
});
|
||||
}
|
||||
|
||||
public void visitNodeEntriesPreOrder(final VisitorCallback visitor) throws IOException {
|
||||
final long rootNodeOffset = readNodeOffsetOfRootNode();
|
||||
visitNodeEntriesPreOrderRecursively(rootNodeOffset, visitor, 0);
|
||||
visitNodeEntriesPreOrderRecursively(rootNodeOffset, null, visitor, 0);
|
||||
}
|
||||
|
||||
private void visitNodeEntriesPreOrderRecursively(final long nodeOffset, final VisitorCallback visitor,
|
||||
final int depth) throws IOException {
|
||||
private void visitNodeEntriesPreOrderRecursively(final long nodeOffset, final PersistentMapDiskNode parentNode,
|
||||
final VisitorCallback visitor, final int depth) throws IOException {
|
||||
final PersistentMapDiskNode node = getNode(nodeOffset);
|
||||
|
||||
for (final NodeEntry child : node.getEntries()) {
|
||||
|
||||
visitor.visit(child, depth);
|
||||
visitor.visit(node, parentNode, child, depth);
|
||||
if (child.isInnerNode()) {
|
||||
final long childNodeOffset = VariableByteEncoder.decodeFirstValue(child.getValue());
|
||||
visitNodeEntriesPreOrderRecursively(childNodeOffset, visitor, depth + 1);
|
||||
visitNodeEntriesPreOrderRecursively(childNodeOffset, node, visitor, depth + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,30 +1,40 @@
|
||||
package org.lucares.pdb.map;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.lucares.collections.LongList;
|
||||
import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder;
|
||||
import org.lucares.pdb.map.NodeEntry.ValueType;
|
||||
import org.lucares.utils.Preconditions;
|
||||
|
||||
/**
|
||||
* <pre>
|
||||
* Node layout:
|
||||
* ┏━━━┳━━━━━┳━━━━━┳━━━━━┳╸╺╸╺╸╺╸╺┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
|
||||
* ┃ 6 ┃ 5,6 ┃ 3,6 ┃ 3,2 ┃ ┃"ba"->"147"┃"foobar"->"467"┃"foobaz"->"value"┃
|
||||
* ┗━━━┻━━━━━┻━━━━━┻━━━━━┻╸╺╸╺╸╺╸╺┻━━━━━━━━━━━┻━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━┛
|
||||
* │ │ │ │ │ │ └▶ size of the third last key ("ba" in this example)
|
||||
* │ │ │ │ │ └▶ size of the third last value ("147" in this example)
|
||||
* │ │ │ │ └▶ size of the second last key ("foobar" in this example)
|
||||
* │ │ │ └▶ size of the second last value ("467" in this example)
|
||||
* │ │ └▶ size of the last key ("foobaz" in this example)
|
||||
* │ └▶ size of the last value (the string "value" in this example)
|
||||
* └▶ number of entries * 2
|
||||
*
|
||||
* ◀────────── Prefix ──────────▶ ◀───────────────── Suffix ──────────────────▶
|
||||
* ┏━━━━━┳━━━┳━━━━━┳━━━━━┳━━━━━┳━━━┳╸╺╸╺╸╺╸╺┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
|
||||
* ┃ 456 ┃ 6 ┃ 5,6 ┃ 3,6 ┃ 3,2 ┃ ∅ ┃ ┃"ba"->"147"┃"foobar"->"467"┃"foobaz"->"value"┃
|
||||
* ┗━━━━━┻━━━┻━━━━━┻━━━━━┻━━━━━┻━━━┻╸╺╸╺╸╺╸╺┻━━━━━━━━━━━┻━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━┛
|
||||
* │ │ │ │ │ │ │ │ └▶ null byte that serves as a separator for the prefix.
|
||||
* │ │ │ │ │ │ │ └▶ size of the third last key ("ba" in this example)
|
||||
* │ │ │ │ │ │ └▶ size of the third last value ("147" in this example)
|
||||
* │ │ │ │ │ └▶ size of the second last key ("foobar" in this example)
|
||||
* │ │ │ │ └▶ size of the second last value ("467" in this example)
|
||||
* │ │ │ └▶ size of the last key ("foobaz" in this example)
|
||||
* │ │ └▶ size of the last value (the string "value" in this example)
|
||||
* │ └▶ number of entries * 2
|
||||
* └▶ node offset of the parent node (-1 if there is no parent node)
|
||||
*
|
||||
* </pre>
|
||||
*/
|
||||
public class PersistentMapDiskNode {
|
||||
|
||||
public static final long NO_NODE_OFFSET = -1;
|
||||
|
||||
private final List<NodeEntry> entries;
|
||||
private final long nodeOffset;
|
||||
|
||||
@@ -33,19 +43,58 @@ public class PersistentMapDiskNode {
|
||||
this.entries = new ArrayList<>(entries);
|
||||
}
|
||||
|
||||
public static PersistentMapDiskNode emptyRootNode(final long nodeOffset) {
|
||||
return new PersistentMapDiskNode(nodeOffset, Collections.emptyList());
|
||||
}
|
||||
|
||||
public static PersistentMapDiskNode parse(final long nodeOffset, final byte[] data) {
|
||||
if (data.length != PersistentMap.BLOCK_SIZE) {
|
||||
throw new IllegalStateException(
|
||||
"block size must be " + PersistentMap.BLOCK_SIZE + " but was " + data.length);
|
||||
}
|
||||
final LongList longs = VariableByteEncoder.decode(data);
|
||||
if (longs.size() == 0) {
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
final List<NodeEntry> entries = NodeEntry.deserialize(data);
|
||||
final List<NodeEntry> entries = deserialize(longs, data);
|
||||
return new PersistentMapDiskNode(nodeOffset, entries);
|
||||
}
|
||||
|
||||
public static List<NodeEntry> deserialize(final LongList keyLengths, final byte[] buffer) {
|
||||
final List<NodeEntry> entries = new ArrayList<>();
|
||||
|
||||
if (keyLengths.isEmpty() || keyLengths.get(0) == 0) {
|
||||
// node is empty -> should only happen for the root node
|
||||
} else {
|
||||
final int numEntries = (int) keyLengths.get(0);
|
||||
|
||||
int offset = PersistentMap.BLOCK_SIZE;
|
||||
for (int i = 0; i < numEntries; i++) {
|
||||
final int keyLength = (int) keyLengths.get(i * 2 + 1);
|
||||
final int valueLength = (int) keyLengths.get(i * 2 + 2);
|
||||
|
||||
final int valueOffset = offset - valueLength;
|
||||
final int keyOffset = valueOffset - keyLength;
|
||||
final int typeOffset = keyOffset - 1;
|
||||
|
||||
final byte typeByte = buffer[typeOffset];
|
||||
final byte[] key = Arrays.copyOfRange(buffer, keyOffset, keyOffset + keyLength);
|
||||
final byte[] value = Arrays.copyOfRange(buffer, valueOffset, valueOffset + valueLength);
|
||||
|
||||
final NodeEntry entry = new NodeEntry(ValueType.fromByte(typeByte), key, value);
|
||||
|
||||
entries.add(entry);
|
||||
|
||||
offset = typeOffset;
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
public byte[] serialize() {
|
||||
|
||||
return NodeEntry.serialize(entries);
|
||||
return serialize(entries);
|
||||
}
|
||||
|
||||
public long getNodeOffset() {
|
||||
@@ -56,7 +105,7 @@ public class PersistentMapDiskNode {
|
||||
|
||||
final NodeEntry result = null;
|
||||
for (final NodeEntry entry : entries) {
|
||||
// if (entry.compare(key) <= 0) {
|
||||
|
||||
if (entry.compare(key) >= 0) {
|
||||
return entry;
|
||||
} else {
|
||||
@@ -84,12 +133,23 @@ public class PersistentMapDiskNode {
|
||||
Collections.sort(entries, NodeEntry.SORT_BY_KEY);
|
||||
}
|
||||
|
||||
public boolean canAdd(final byte[] key, final byte[] value) {
|
||||
final NodeEntry entry = new NodeEntry(ValueType.VALUE_INLINE, key, value);
|
||||
final List<NodeEntry> tmp = new ArrayList<>(entries.size() + 1);
|
||||
tmp.addAll(entries);
|
||||
tmp.add(entry);
|
||||
return NodeEntry.neededBytesTotal(tmp) <= PersistentMap.BLOCK_SIZE;
|
||||
public boolean canAdd(final byte[] key, final long nodeOffset, final int maxEntriesInNode) {
|
||||
return canAdd(key, VariableByteEncoder.encode(nodeOffset), maxEntriesInNode);
|
||||
}
|
||||
|
||||
public boolean canAdd(final byte[] key, final byte[] value, final int maxEntriesInNode) {
|
||||
|
||||
if (entries.size() > maxEntriesInNode) {
|
||||
return false;
|
||||
} else {
|
||||
final NodeEntry entry = new NodeEntry(ValueType.VALUE_INLINE, key, value);
|
||||
final List<NodeEntry> tmp = new ArrayList<>(entries.size() + 1);
|
||||
tmp.addAll(entries);
|
||||
tmp.add(entry);
|
||||
|
||||
// the +1 is for the null-byte terminator of the prefix
|
||||
return neededBytesTotal(tmp) + 1 <= PersistentMap.BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
public void removeKey(final byte[] key) {
|
||||
@@ -106,7 +166,8 @@ public class PersistentMapDiskNode {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.join("\n", entries.stream().map(NodeEntry::toString).collect(Collectors.toList()));
|
||||
return "@" + nodeOffset + ": "
|
||||
+ String.join("\n", entries.stream().map(NodeEntry::toString).collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
public NodeEntry getTopNodeEntry() {
|
||||
@@ -119,9 +180,68 @@ public class PersistentMapDiskNode {
|
||||
final var rightEntries = new ArrayList<>(entries.subList(entries.size() / 2, entries.size()));
|
||||
|
||||
entries.clear();
|
||||
entries.addAll(leftEntries);
|
||||
entries.addAll(rightEntries);
|
||||
|
||||
return new PersistentMapDiskNode(newBlockOffset, rightEntries);
|
||||
return new PersistentMapDiskNode(newBlockOffset, leftEntries);
|
||||
}
|
||||
|
||||
public static int neededBytesTotal(final List<NodeEntry> entries) {
|
||||
final byte[] buffer = new byte[PersistentMap.BLOCK_SIZE];
|
||||
|
||||
final int usedBytes = serializePrefix(entries, buffer);
|
||||
|
||||
return usedBytes + NodeEntry.neededBytes(entries);
|
||||
}
|
||||
|
||||
private static byte[] serialize(final List<NodeEntry> entries) {
|
||||
final byte[] buffer = new byte[PersistentMap.BLOCK_SIZE];
|
||||
|
||||
final int usedBytes = serializePrefix(entries, buffer);
|
||||
|
||||
// the +1 is for the null-byte terminator of the prefix
|
||||
Preconditions.checkGreaterOrEqual(PersistentMap.BLOCK_SIZE, usedBytes + 1 + NodeEntry.neededBytes(entries),
|
||||
"The node is too big. It cannot be encoded into " + PersistentMap.BLOCK_SIZE + " bytes.");
|
||||
|
||||
serializeIntoFromTail(entries, buffer);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
private static int serializePrefix(final List<NodeEntry> entries, final byte[] buffer) {
|
||||
final LongList longs = serializeKeyLengths(entries);
|
||||
|
||||
final int usedBytes = VariableByteEncoder.encodeInto(longs, buffer, 0);
|
||||
return usedBytes;
|
||||
}
|
||||
|
||||
private static LongList serializeKeyLengths(final List<NodeEntry> entries) {
|
||||
final var keyLengths = new LongList();
|
||||
keyLengths.add(entries.size());
|
||||
for (final NodeEntry nodeEntry : entries) {
|
||||
keyLengths.add(nodeEntry.getKey().length);
|
||||
keyLengths.add(nodeEntry.getValue().length);
|
||||
}
|
||||
|
||||
return keyLengths;
|
||||
}
|
||||
|
||||
private static void serializeIntoFromTail(final List<NodeEntry> entries, final byte[] buffer) {
|
||||
|
||||
int offset = buffer.length;
|
||||
|
||||
for (final var entry : entries) {
|
||||
final byte[] valueBytes = entry.getValue();
|
||||
final byte[] keyBytes = entry.getKey();
|
||||
|
||||
final int offsetValue = offset - valueBytes.length;
|
||||
final int offsetKey = offsetValue - keyBytes.length;
|
||||
final int offsetType = offsetKey - 1;
|
||||
|
||||
System.arraycopy(valueBytes, 0, buffer, offsetValue, valueBytes.length);
|
||||
System.arraycopy(keyBytes, 0, buffer, offsetKey, keyBytes.length);
|
||||
buffer[offsetType] = entry.getType().asByte();
|
||||
|
||||
offset = offsetType;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user