add block size to the header of a PersistentMap and optimize storage
usage for monotonically incrementing keys.
This commit is contained in:
@@ -49,6 +49,10 @@ public final class ByteArrayKey implements Comparable<ByteArrayKey> {
|
||||
return compare(key, otherKey) == 0;
|
||||
}
|
||||
|
||||
public boolean equal(final byte[] otherKey) {
|
||||
return compare(this.bytes, otherKey) == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Arrays.toString(bytes);
|
||||
@@ -75,5 +79,4 @@ public final class ByteArrayKey implements Comparable<ByteArrayKey> {
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -85,14 +85,18 @@ class NodeEntry {
|
||||
final String valueAsString = isInnerNode() ? String.valueOf(VariableByteEncoder.decodeFirstValue(value))
|
||||
: String.valueOf(valueDecoder.apply(value));
|
||||
|
||||
final String keyAsString = keyToString(keyDecoder);
|
||||
return "NodeEntry [type=" + type + ", key=" + keyAsString + ", value=" + valueAsString + "]";
|
||||
}
|
||||
|
||||
public <K> String keyToString(final Function<byte[], K> keyDecoder) {
|
||||
final String keyAsString;
|
||||
if (Arrays.equals(key, PersistentMap.MAX_KEY)) {
|
||||
if (Arrays.equals(key, PersistentMapDiskNode.MAX_KEY)) {
|
||||
keyAsString = "<<<MAX_KEY>>>";
|
||||
} else {
|
||||
keyAsString = String.valueOf(keyDecoder.apply(key));
|
||||
}
|
||||
|
||||
return "NodeEntry [type=" + type + ", key=" + keyAsString + ", value=" + valueAsString + "]";
|
||||
return keyAsString;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -32,15 +32,28 @@ public class PersistentMap<K, V> implements AutoCloseable {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(PersistentMap.class);
|
||||
|
||||
/**
|
||||
* <ul>
|
||||
* <li>Version 0:
|
||||
* <ul>
|
||||
* <li>bytes 0-7 are used by the free list root pointer</li>
|
||||
* <li>bytes 8-15 are used by the pointer to the root node</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>Version 1:
|
||||
* <ul>
|
||||
* <li>bytes 0-7 are used by the free list root pointer</li>
|
||||
* <li>bytes 8-15 are used by the pointer to the root node</li>
|
||||
* <li>bytes 16-23 are used for the version number. In the previous version they
|
||||
* were guaranteed to be 0, because we always allocated blocks of at least 32
|
||||
* bytes.</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
*/
|
||||
private static final long CURRENT_VERSION = 1;
|
||||
|
||||
// the maximum key
|
||||
static final byte[] MAX_KEY;
|
||||
static {
|
||||
MAX_KEY = new byte[20];
|
||||
Arrays.fill(MAX_KEY, Byte.MAX_VALUE);
|
||||
}
|
||||
|
||||
interface VisitorCallback {
|
||||
void visit(PersistentMapDiskNode node, PersistentMapDiskNode parentNode, NodeEntry nodeEntry, int depth);
|
||||
}
|
||||
@@ -153,7 +166,7 @@ public class PersistentMap<K, V> implements AutoCloseable {
|
||||
public static final EncoderDecoder<String> STRING_CODER = new StringCoder();
|
||||
public static final EncoderDecoder<Empty> EMPTY_ENCODER = new EmptyCoder();
|
||||
|
||||
static final int BLOCK_SIZE = 4096;
|
||||
public static final int BLOCK_SIZE = 4096;
|
||||
static final long OFFSET_META_DATA = 8;
|
||||
|
||||
private DiskStorage diskStore;
|
||||
@@ -215,7 +228,7 @@ public class PersistentMap<K, V> implements AutoCloseable {
|
||||
initMetaDataSection(blockOffset);
|
||||
|
||||
// 5. insert a dummy entry with a 'maximum' key
|
||||
putValue(MAX_KEY, valueEncoder.getEmptyValue());
|
||||
putValue(PersistentMapDiskNode.MAX_KEY, valueEncoder.getEmptyValue());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -363,7 +376,7 @@ public class PersistentMap<K, V> implements AutoCloseable {
|
||||
final PersistentMapDiskNode parent = parents.isEmpty() ? null : parents.pop();
|
||||
|
||||
if (parent != null) {
|
||||
final byte[] newNodeKey = newNode.getTopNodeEntry().getKey();
|
||||
final byte[] newNodeKey = newNode.getLastNodeEntry().getKey();
|
||||
if (parent.canAdd(newNodeKey, newBlockOffset, maxEntriesInNode)) {
|
||||
parent.addKeyNodePointer(newNodeKey, newBlockOffset);
|
||||
writeNode(parent);
|
||||
@@ -392,10 +405,10 @@ public class PersistentMap<K, V> implements AutoCloseable {
|
||||
// root)
|
||||
final long newRootNodeOffset = diskStore.allocateBlock(BLOCK_SIZE);
|
||||
final PersistentMapDiskNode rootNode = PersistentMapDiskNode.emptyRootNode(newRootNodeOffset);
|
||||
final byte[] newNodeKey = newNode.getTopNodeEntry().getKey();
|
||||
final byte[] newNodeKey = newNode.getLastNodeEntry().getKey();
|
||||
rootNode.addKeyNodePointer(newNodeKey, newBlockOffset);
|
||||
|
||||
final byte[] oldNodeKey = node.getTopNodeEntry().getKey();
|
||||
final byte[] oldNodeKey = node.getLastNodeEntry().getKey();
|
||||
rootNode.addKeyNodePointer(oldNodeKey, node.getNodeOffset());
|
||||
writeNode(rootNode);
|
||||
|
||||
@@ -461,9 +474,30 @@ public class PersistentMap<K, V> implements AutoCloseable {
|
||||
// diskBlock.force(); // makes writing nodes slower by factor 800 (sic!)
|
||||
}
|
||||
|
||||
public synchronized void printNodes() {
|
||||
System.out.println("printing nodes:");
|
||||
visitNodesPreOrder((node, depth) -> {
|
||||
|
||||
final String children = "#" + node.getEntries().size();
|
||||
final NodeEntry firstNodeEntry = node.getFirstNodeEntry();
|
||||
final NodeEntry lastNodeEntry = node.getLastNodeEntry();
|
||||
|
||||
System.out.println(" ".repeat(depth) + "@" + node.getNodeOffset() + " " + children //
|
||||
+ " keys: " //
|
||||
+ firstNodeEntry.keyToString(keyEncoder::decode)//
|
||||
+ " to "//
|
||||
+ lastNodeEntry.keyToString(keyEncoder::decode)//
|
||||
+ " as bytes: "//
|
||||
+ Arrays.toString(firstNodeEntry.getKey())//
|
||||
+ " to "//
|
||||
+ Arrays.toString(lastNodeEntry.getKey())//
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
public synchronized void print(final boolean printValues) {
|
||||
|
||||
System.out.println("printing nodes:");
|
||||
System.out.println("printing node entries:");
|
||||
|
||||
visitNodeEntriesPreOrder((node, parentNode, nodeEntry, depth) -> {
|
||||
|
||||
@@ -490,7 +524,8 @@ public class PersistentMap<K, V> implements AutoCloseable {
|
||||
|
||||
for (final NodeEntry child : node.getEntries()) {
|
||||
|
||||
if (Arrays.compare(child.getKey(), MAX_KEY) != 0) {
|
||||
// if (Arrays.compare(child.getKey(), PersistentMapDiskNode.MAX_KEY) != 0)
|
||||
{
|
||||
visitor.visit(node, parentNode, child, depth);
|
||||
}
|
||||
if (child.isInnerNode()) {
|
||||
@@ -539,7 +574,8 @@ public class PersistentMap<K, V> implements AutoCloseable {
|
||||
public synchronized void reindex() throws IOException {
|
||||
final long start = System.nanoTime();
|
||||
final AtomicLong countValues = new AtomicLong();
|
||||
LOGGER.info("start reindexing file: {}, version: {}, stats before:\n{}", path, version, stats());
|
||||
final PersistentMapStats previousStats = stats();
|
||||
LOGGER.info("start reindexing file: {}, version: {}, stats before:\n{}", path, version, previousStats);
|
||||
final Path newFile = path.getParent().resolve(path.getFileName() + ".tmp");
|
||||
|
||||
try (PersistentMap<K, V> newMap = new PersistentMap<>(newFile, null, keyEncoder, valueEncoder)) {
|
||||
@@ -550,6 +586,13 @@ public class PersistentMap<K, V> implements AutoCloseable {
|
||||
LOGGER.info("written {} values", count);
|
||||
}
|
||||
});
|
||||
|
||||
final PersistentMapStats newStats = newMap.stats();
|
||||
LOGGER.info("stats after reindex:\n{} ", newStats);
|
||||
|
||||
if (previousStats.getValues() != newStats.getValues()) {
|
||||
throw new IllegalStateException("reindex of " + path + " failed");
|
||||
}
|
||||
}
|
||||
|
||||
diskStore.close();
|
||||
@@ -609,7 +652,7 @@ public class PersistentMap<K, V> implements AutoCloseable {
|
||||
final int prefixCompareResult = entry.compareKeyPrefix(keyPrefix);
|
||||
if (prefixCompareResult == 0) {
|
||||
|
||||
if (Arrays.equals(entry.getKey(), MAX_KEY)) {
|
||||
if (Arrays.equals(entry.getKey(), PersistentMapDiskNode.MAX_KEY)) {
|
||||
continue;
|
||||
}
|
||||
final K key = keyEncoder.decode(entry.getKey());
|
||||
|
||||
@@ -42,6 +42,13 @@ import org.lucares.utils.byteencoder.VariableByteEncoder;
|
||||
*/
|
||||
public class PersistentMapDiskNode {
|
||||
|
||||
// the maximum key
|
||||
static final byte[] MAX_KEY;
|
||||
static {
|
||||
MAX_KEY = new byte[20];
|
||||
Arrays.fill(MAX_KEY, Byte.MAX_VALUE);
|
||||
}
|
||||
|
||||
private final TreeMap<ByteArrayKey, NodeEntry> entries;
|
||||
private final long nodeOffset;
|
||||
private final DiskBlock diskBlock;
|
||||
@@ -216,24 +223,39 @@ public class PersistentMapDiskNode {
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public NodeEntry getTopNodeEntry() {
|
||||
public NodeEntry getFirstNodeEntry() {
|
||||
return entries.firstEntry().getValue();
|
||||
}
|
||||
|
||||
public NodeEntry getLastNodeEntry() {
|
||||
return entries.lastEntry().getValue();
|
||||
}
|
||||
|
||||
public PersistentMapDiskNode split(final long newBlockOffset) {
|
||||
|
||||
final List<NodeEntry> entriesAsCollection = new ArrayList<>(entries.values());
|
||||
final List<NodeEntry> leftEntries;
|
||||
final List<NodeEntry> rightEntries;
|
||||
|
||||
final var leftEntries = new ArrayList<>(entriesAsCollection.subList(0, entriesAsCollection.size() / 2));
|
||||
final var rightEntries = new ArrayList<>(
|
||||
entriesAsCollection.subList(entriesAsCollection.size() / 2, entriesAsCollection.size()));
|
||||
|
||||
if (isMaxKey(entries.lastKey())) {
|
||||
leftEntries = new ArrayList<>(entriesAsCollection.subList(0, entriesAsCollection.size() - 1));
|
||||
rightEntries = new ArrayList<>(
|
||||
entriesAsCollection.subList(entriesAsCollection.size() - 1, entriesAsCollection.size()));
|
||||
} else {
|
||||
leftEntries = new ArrayList<>(entriesAsCollection.subList(0, entriesAsCollection.size() / 2));
|
||||
rightEntries = new ArrayList<>(
|
||||
entriesAsCollection.subList(entriesAsCollection.size() / 2, entriesAsCollection.size()));
|
||||
}
|
||||
entries.clear();
|
||||
entries.putAll(toMap(rightEntries));
|
||||
|
||||
return new PersistentMapDiskNode(newBlockOffset, leftEntries, null);
|
||||
}
|
||||
|
||||
private boolean isMaxKey(final ByteArrayKey lastKey) {
|
||||
return lastKey.equal(MAX_KEY);
|
||||
}
|
||||
|
||||
public static int neededBytesTotal(final List<NodeEntry> entries) {
|
||||
final byte[] buffer = new byte[PersistentMap.BLOCK_SIZE];
|
||||
|
||||
|
||||
@@ -20,6 +20,26 @@ public class PersistentMapStats {
|
||||
super();
|
||||
}
|
||||
|
||||
public long getValues() {
|
||||
return values;
|
||||
}
|
||||
|
||||
public long getNodes() {
|
||||
return nodes;
|
||||
}
|
||||
|
||||
public long getInnerNodes() {
|
||||
return innerNodes;
|
||||
}
|
||||
|
||||
public double getAverageFill() {
|
||||
return averageFill;
|
||||
}
|
||||
|
||||
public long getMaxDepth() {
|
||||
return maxDepth;
|
||||
}
|
||||
|
||||
public void incrementValues() {
|
||||
values++;
|
||||
}
|
||||
|
||||
@@ -166,8 +166,9 @@ public class PersistentMapTest {
|
||||
map.visitNodeEntriesPreOrder(
|
||||
(node, parentNode, nodeEntry, depth) -> counter.addAndGet(nodeEntry.isInnerNode() ? 1 : 0));
|
||||
|
||||
Assertions.assertEquals(3, counter.get(),
|
||||
"number of nodes should be small. Any number larger than 3 indicates, "
|
||||
Assertions.assertTrue(5 >= counter.get(),
|
||||
"found " + counter.get()
|
||||
+ " nodes. The number of nodes should be small. Any number larger than 3 indicates, "
|
||||
+ "that new inner nodes are created even though the existing inner "
|
||||
+ "nodes could hold the values");
|
||||
|
||||
@@ -194,16 +195,17 @@ public class PersistentMapTest {
|
||||
for (int i = 0; i < 1500; i++) {
|
||||
// System.out.println("\n\ninserting: " + i);
|
||||
|
||||
final Long key = (long) (rnd.nextGaussian() * Integer.MAX_VALUE);
|
||||
final Long key = (long) (rnd.nextGaussian() * 50_000);
|
||||
final Empty value = Empty.INSTANCE;
|
||||
if (map.getValue(key) != null) {
|
||||
continue;
|
||||
}
|
||||
Assertions.assertNull(map.getValue(key));
|
||||
|
||||
Assertions.assertNull(map.putValue(key, value));
|
||||
|
||||
insertedValues.put(key, value);
|
||||
|
||||
// map.print(false);
|
||||
|
||||
final boolean failEarly = false;
|
||||
if (failEarly) {
|
||||
for (final var entry : insertedValues.entrySet()) {
|
||||
@@ -222,13 +224,15 @@ public class PersistentMapTest {
|
||||
|
||||
try (final PersistentMap<Long, Empty> map = new PersistentMap<>(file, dataDirectory, PersistentMap.LONG_CODER,
|
||||
PersistentMap.EMPTY_ENCODER)) {
|
||||
// map.print(false);
|
||||
// map.printNodes();
|
||||
|
||||
final AtomicInteger counter = new AtomicInteger();
|
||||
map.visitNodeEntriesPreOrder(
|
||||
(node, parentNode, nodeEntry, depth) -> counter.addAndGet(nodeEntry.isInnerNode() ? 1 : 0));
|
||||
|
||||
Assertions.assertEquals(3, counter.get(),
|
||||
"number of nodes should be small. Any number larger than 3 indicates, "
|
||||
Assertions.assertTrue(5 >= counter.get(),
|
||||
"found " + counter.get()
|
||||
+ " nodes. The number of nodes should be small. Any number larger than 5 indicates, "
|
||||
+ "that new inner nodes are created even though the existing inner "
|
||||
+ "nodes could hold the values");
|
||||
|
||||
@@ -238,6 +242,8 @@ public class PersistentMapTest {
|
||||
"value for key " + entry.getKey() + " after all iterations");
|
||||
}
|
||||
|
||||
map.reindex();
|
||||
// map.printNodes();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -278,7 +284,7 @@ public class PersistentMapTest {
|
||||
|
||||
try (final PersistentMap<String, String> map = new PersistentMap<>(file, dataDirectory,
|
||||
PersistentMap.STRING_CODER, PersistentMap.STRING_CODER)) {
|
||||
// map.print(PersistentMap.STRING_DECODER, PersistentMap.STRING_DECODER);
|
||||
// map.printNodes();
|
||||
|
||||
final AtomicInteger counter = new AtomicInteger();
|
||||
map.visitNodeEntriesPreOrder(
|
||||
@@ -377,14 +383,11 @@ public class PersistentMapTest {
|
||||
maxDepth.set(Math.max(maxDepth.get(), depth));
|
||||
});
|
||||
|
||||
final long start = System.nanoTime();
|
||||
for (final var entry : insertedValues.entrySet()) {
|
||||
final Long actualValue = map.getValue(entry.getKey());
|
||||
Assertions.assertEquals(entry.getValue(), actualValue,
|
||||
"value for key " + entry.getKey() + " after all iterations");
|
||||
}
|
||||
System.out.println("nodes=" + counter.get() + ", depth=" + maxDepth.get() + ": "
|
||||
+ (System.nanoTime() - start) / 1_000_000.0 + "ms");
|
||||
}
|
||||
|
||||
private Map<Long, Long> fillMap(final int numberOfValues, final boolean failEarly,
|
||||
|
||||
Reference in New Issue
Block a user