add block size to the header of a PersistentMap and optimize storage

usage for monotonically incrementing keys.
This commit is contained in:
2020-10-17 10:13:46 +02:00
parent 277dba4c04
commit 46070a31b9
7 changed files with 133 additions and 38 deletions

View File

@@ -49,6 +49,10 @@ public final class ByteArrayKey implements Comparable<ByteArrayKey> {
return compare(key, otherKey) == 0;
}
public boolean equal(final byte[] otherKey) {
return compare(this.bytes, otherKey) == 0;
}
@Override
public String toString() {
return Arrays.toString(bytes);
@@ -75,5 +79,4 @@ public final class ByteArrayKey implements Comparable<ByteArrayKey> {
return false;
return true;
}
}

View File

@@ -85,14 +85,18 @@ class NodeEntry {
final String valueAsString = isInnerNode() ? String.valueOf(VariableByteEncoder.decodeFirstValue(value))
: String.valueOf(valueDecoder.apply(value));
final String keyAsString = keyToString(keyDecoder);
return "NodeEntry [type=" + type + ", key=" + keyAsString + ", value=" + valueAsString + "]";
}
public <K> String keyToString(final Function<byte[], K> keyDecoder) {
final String keyAsString;
if (Arrays.equals(key, PersistentMap.MAX_KEY)) {
if (Arrays.equals(key, PersistentMapDiskNode.MAX_KEY)) {
keyAsString = "<<<MAX_KEY>>>";
} else {
keyAsString = String.valueOf(keyDecoder.apply(key));
}
return "NodeEntry [type=" + type + ", key=" + keyAsString + ", value=" + valueAsString + "]";
return keyAsString;
}
@Override

View File

@@ -32,15 +32,28 @@ public class PersistentMap<K, V> implements AutoCloseable {
private static final Logger LOGGER = LoggerFactory.getLogger(PersistentMap.class);
/**
* <ul>
* <li>Version 0:
* <ul>
* <li>bytes 0-7 are used by the free list root pointer</li>
* <li>bytes 8-15 are used by the pointer to the root node</li>
* </ul>
* </li>
* <li>Version 1:
* <ul>
* <li>bytes 0-7 are used by the free list root pointer</li>
* <li>bytes 8-15 are used by the pointer to the root node</li>
* <li>bytes 16-23 are used for the version number. In the previous version they
* were guaranteed to be 0, because we always allocated blocks of at least 32
* bytes.</li>
* </ul>
* </li>
* </ul>
*
*/
private static final long CURRENT_VERSION = 1;
// the maximum key
static final byte[] MAX_KEY;
static {
MAX_KEY = new byte[20];
Arrays.fill(MAX_KEY, Byte.MAX_VALUE);
}
interface VisitorCallback {
void visit(PersistentMapDiskNode node, PersistentMapDiskNode parentNode, NodeEntry nodeEntry, int depth);
}
@@ -153,7 +166,7 @@ public class PersistentMap<K, V> implements AutoCloseable {
public static final EncoderDecoder<String> STRING_CODER = new StringCoder();
public static final EncoderDecoder<Empty> EMPTY_ENCODER = new EmptyCoder();
static final int BLOCK_SIZE = 4096;
public static final int BLOCK_SIZE = 4096;
static final long OFFSET_META_DATA = 8;
private DiskStorage diskStore;
@@ -215,7 +228,7 @@ public class PersistentMap<K, V> implements AutoCloseable {
initMetaDataSection(blockOffset);
// 5. insert a dummy entry with a 'maximum' key
putValue(MAX_KEY, valueEncoder.getEmptyValue());
putValue(PersistentMapDiskNode.MAX_KEY, valueEncoder.getEmptyValue());
}
}
@@ -363,7 +376,7 @@ public class PersistentMap<K, V> implements AutoCloseable {
final PersistentMapDiskNode parent = parents.isEmpty() ? null : parents.pop();
if (parent != null) {
final byte[] newNodeKey = newNode.getTopNodeEntry().getKey();
final byte[] newNodeKey = newNode.getLastNodeEntry().getKey();
if (parent.canAdd(newNodeKey, newBlockOffset, maxEntriesInNode)) {
parent.addKeyNodePointer(newNodeKey, newBlockOffset);
writeNode(parent);
@@ -392,10 +405,10 @@ public class PersistentMap<K, V> implements AutoCloseable {
// root)
final long newRootNodeOffset = diskStore.allocateBlock(BLOCK_SIZE);
final PersistentMapDiskNode rootNode = PersistentMapDiskNode.emptyRootNode(newRootNodeOffset);
final byte[] newNodeKey = newNode.getTopNodeEntry().getKey();
final byte[] newNodeKey = newNode.getLastNodeEntry().getKey();
rootNode.addKeyNodePointer(newNodeKey, newBlockOffset);
final byte[] oldNodeKey = node.getTopNodeEntry().getKey();
final byte[] oldNodeKey = node.getLastNodeEntry().getKey();
rootNode.addKeyNodePointer(oldNodeKey, node.getNodeOffset());
writeNode(rootNode);
@@ -461,9 +474,30 @@ public class PersistentMap<K, V> implements AutoCloseable {
// diskBlock.force(); // makes writing nodes slower by factor 800 (sic!)
}
public synchronized void printNodes() {
System.out.println("printing nodes:");
visitNodesPreOrder((node, depth) -> {
final String children = "#" + node.getEntries().size();
final NodeEntry firstNodeEntry = node.getFirstNodeEntry();
final NodeEntry lastNodeEntry = node.getLastNodeEntry();
System.out.println(" ".repeat(depth) + "@" + node.getNodeOffset() + " " + children //
+ " keys: " //
+ firstNodeEntry.keyToString(keyEncoder::decode)//
+ " to "//
+ lastNodeEntry.keyToString(keyEncoder::decode)//
+ " as bytes: "//
+ Arrays.toString(firstNodeEntry.getKey())//
+ " to "//
+ Arrays.toString(lastNodeEntry.getKey())//
);
});
}
public synchronized void print(final boolean printValues) {
System.out.println("printing nodes:");
System.out.println("printing node entries:");
visitNodeEntriesPreOrder((node, parentNode, nodeEntry, depth) -> {
@@ -490,7 +524,8 @@ public class PersistentMap<K, V> implements AutoCloseable {
for (final NodeEntry child : node.getEntries()) {
if (Arrays.compare(child.getKey(), MAX_KEY) != 0) {
// if (Arrays.compare(child.getKey(), PersistentMapDiskNode.MAX_KEY) != 0)
{
visitor.visit(node, parentNode, child, depth);
}
if (child.isInnerNode()) {
@@ -539,7 +574,8 @@ public class PersistentMap<K, V> implements AutoCloseable {
public synchronized void reindex() throws IOException {
final long start = System.nanoTime();
final AtomicLong countValues = new AtomicLong();
LOGGER.info("start reindexing file: {}, version: {}, stats before:\n{}", path, version, stats());
final PersistentMapStats previousStats = stats();
LOGGER.info("start reindexing file: {}, version: {}, stats before:\n{}", path, version, previousStats);
final Path newFile = path.getParent().resolve(path.getFileName() + ".tmp");
try (PersistentMap<K, V> newMap = new PersistentMap<>(newFile, null, keyEncoder, valueEncoder)) {
@@ -550,6 +586,13 @@ public class PersistentMap<K, V> implements AutoCloseable {
LOGGER.info("written {} values", count);
}
});
final PersistentMapStats newStats = newMap.stats();
LOGGER.info("stats after reindex:\n{} ", newStats);
if (previousStats.getValues() != newStats.getValues()) {
throw new IllegalStateException("reindex of " + path + " failed");
}
}
diskStore.close();
@@ -609,7 +652,7 @@ public class PersistentMap<K, V> implements AutoCloseable {
final int prefixCompareResult = entry.compareKeyPrefix(keyPrefix);
if (prefixCompareResult == 0) {
if (Arrays.equals(entry.getKey(), MAX_KEY)) {
if (Arrays.equals(entry.getKey(), PersistentMapDiskNode.MAX_KEY)) {
continue;
}
final K key = keyEncoder.decode(entry.getKey());

View File

@@ -42,6 +42,13 @@ import org.lucares.utils.byteencoder.VariableByteEncoder;
*/
public class PersistentMapDiskNode {
// the maximum key
static final byte[] MAX_KEY;
static {
MAX_KEY = new byte[20];
Arrays.fill(MAX_KEY, Byte.MAX_VALUE);
}
private final TreeMap<ByteArrayKey, NodeEntry> entries;
private final long nodeOffset;
private final DiskBlock diskBlock;
@@ -216,24 +223,39 @@ public class PersistentMapDiskNode {
return result.toString();
}
public NodeEntry getTopNodeEntry() {
public NodeEntry getFirstNodeEntry() {
return entries.firstEntry().getValue();
}
public NodeEntry getLastNodeEntry() {
return entries.lastEntry().getValue();
}
public PersistentMapDiskNode split(final long newBlockOffset) {
final List<NodeEntry> entriesAsCollection = new ArrayList<>(entries.values());
final List<NodeEntry> leftEntries;
final List<NodeEntry> rightEntries;
final var leftEntries = new ArrayList<>(entriesAsCollection.subList(0, entriesAsCollection.size() / 2));
final var rightEntries = new ArrayList<>(
entriesAsCollection.subList(entriesAsCollection.size() / 2, entriesAsCollection.size()));
if (isMaxKey(entries.lastKey())) {
leftEntries = new ArrayList<>(entriesAsCollection.subList(0, entriesAsCollection.size() - 1));
rightEntries = new ArrayList<>(
entriesAsCollection.subList(entriesAsCollection.size() - 1, entriesAsCollection.size()));
} else {
leftEntries = new ArrayList<>(entriesAsCollection.subList(0, entriesAsCollection.size() / 2));
rightEntries = new ArrayList<>(
entriesAsCollection.subList(entriesAsCollection.size() / 2, entriesAsCollection.size()));
}
entries.clear();
entries.putAll(toMap(rightEntries));
return new PersistentMapDiskNode(newBlockOffset, leftEntries, null);
}
private boolean isMaxKey(final ByteArrayKey lastKey) {
return lastKey.equal(MAX_KEY);
}
public static int neededBytesTotal(final List<NodeEntry> entries) {
final byte[] buffer = new byte[PersistentMap.BLOCK_SIZE];

View File

@@ -20,6 +20,26 @@ public class PersistentMapStats {
super();
}
public long getValues() {
return values;
}
public long getNodes() {
return nodes;
}
public long getInnerNodes() {
return innerNodes;
}
public double getAverageFill() {
return averageFill;
}
public long getMaxDepth() {
return maxDepth;
}
public void incrementValues() {
values++;
}