method to print some stats for a tree

This commit is contained in:
2020-10-07 18:50:19 +02:00
parent 4749dca73a
commit f9ed27f03b
4 changed files with 161 additions and 11 deletions

View File

@@ -20,6 +20,7 @@ import java.util.function.Function;
import org.lucares.collections.LongList;
import org.lucares.pdb.diskstorage.DiskBlock;
import org.lucares.pdb.diskstorage.DiskStorage;
import org.lucares.utils.ArrayUtils;
import org.lucares.utils.Preconditions;
import org.lucares.utils.byteencoder.VariableByteEncoder;
import org.lucares.utils.cache.LRUCache;
@@ -41,6 +42,10 @@ public class PersistentMap<K, V> implements AutoCloseable {
void visit(PersistentMapDiskNode node, PersistentMapDiskNode parentNode, NodeEntry nodeEntry, int depth);
}
interface NodeVisitorCallback {
void visit(PersistentMapDiskNode node, int depth);
}
public interface EncoderDecoder<O> {
public byte[] encode(O object);
@@ -453,7 +458,9 @@ public class PersistentMap<K, V> implements AutoCloseable {
for (final NodeEntry child : node.getEntries()) {
visitor.visit(node, parentNode, child, depth);
if (Arrays.compare(child.getKey(), MAX_KEY) != 0) {
visitor.visit(node, parentNode, child, depth);
}
if (child.isInnerNode()) {
final long childNodeOffset = VariableByteEncoder.decodeFirstValue(child.getValue());
visitNodeEntriesPreOrderRecursively(childNodeOffset, node, visitor, depth + 1);
@@ -461,10 +468,35 @@ public class PersistentMap<K, V> implements AutoCloseable {
}
}
private synchronized void visitNodesPreOrder(final NodeVisitorCallback visitor) {
final long rootNodeOffset = readNodeOffsetOfRootNode();
visitNodesPreOrderRecursively(rootNodeOffset, visitor, 0);
}
private void visitNodesPreOrderRecursively(final long nodeOffset, final NodeVisitorCallback visitor,
final int depth) {
final PersistentMapDiskNode node = getNode(nodeOffset);
visitor.visit(node, depth);
for (final NodeEntry child : node.getEntries()) {
if (child.isInnerNode()) {
final long childNodeOffset = VariableByteEncoder.decodeFirstValue(child.getValue());
visitNodesPreOrderRecursively(childNodeOffset, visitor, depth + 1);
}
}
}
enum VisitByPrefixMode {
FIND, ITERATE
}
public synchronized void forAll(final Visitor<K, V> visitor) {
final byte[] encodedKeyPrefix = new byte[0];
final long rootNodeOffset = readNodeOffsetOfRootNode();
iterateNodeEntryByPrefix(rootNodeOffset, encodedKeyPrefix, visitor);
}
public synchronized void visitValues(final K keyPrefix, final Visitor<K, V> visitor) {
final byte[] encodedKeyPrefix = keyEncoder.encode(keyPrefix);
@@ -479,9 +511,7 @@ public class PersistentMap<K, V> implements AutoCloseable {
final Path newFile = path.getParent().resolve(path.getFileName() + ".tmp");
try (PersistentMap<K, V> newMap = new PersistentMap<>(newFile, null, keyEncoder, valueEncoder)) {
final long rootNodeOffset = readNodeOffsetOfRootNode();
final byte[] encodedKeyPrefix = new byte[0];
iterateNodeEntryByPrefix(rootNodeOffset, encodedKeyPrefix, (k, v) -> {
forAll((k, v) -> {
newMap.putValue(k, v);
final long count = countValues.incrementAndGet();
if (count % 100000 == 0) {
@@ -494,12 +524,9 @@ public class PersistentMap<K, V> implements AutoCloseable {
valueCache.clear();
nodeCache.clear();
final Path backupFile = path.getParent().resolve(path.getFileName() + "."
+ DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss").format(OffsetDateTime.now()) + ".backup");
Files.move(path, backupFile);
Files.move(newFile, path);
swapFiles(newFile);
this.diskStore = new DiskStorage(path, null);
diskStore = new DiskStorage(path, null);
readOffsetOfRootNode();
final double durationInMs = (System.nanoTime() - start) / 1_000_000.0;
final double valuesPerSecond = countValues.get() / (durationInMs / 1000);
@@ -507,6 +534,36 @@ public class PersistentMap<K, V> implements AutoCloseable {
countValues.get(), valuesPerSecond);
}
public synchronized PersistentMapStats stats() {
final PersistentMapStats stats = new PersistentMapStats();
visitNodesPreOrder((node, depth) -> {
stats.addDepth(depth);
stats.incrementNodes(node.isInnerNode());
stats.addValuesInNode(node.getEntries().size());
final byte[] serialized = node.serialize();
final int usedBytes = BLOCK_SIZE
- ArrayUtils.countConsecutive(serialized, (byte) 0, ArrayUtils.findFirst(serialized, (byte) 0));
stats.addFill(usedBytes / (double) BLOCK_SIZE);
for (final NodeEntry entry : node.getEntries()) {
if (entry.isDataNode()) {
stats.incrementValues();
}
}
});
return stats;
}
private void swapFiles(final Path newFile) throws IOException {
final Path backupFile = path.getParent().resolve(path.getFileName() + "."
+ DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss").format(OffsetDateTime.now()) + ".backup");
Files.move(path, backupFile);
Files.move(newFile, path);
}
private void iterateNodeEntryByPrefix(final long nodeOffest, final byte[] keyPrefix, final Visitor<K, V> visitor) {
final PersistentMapDiskNode node = getNode(nodeOffest);
@@ -525,8 +582,6 @@ public class PersistentMap<K, V> implements AutoCloseable {
final K key = keyEncoder.decode(entry.getKey());
final V value = valueEncoder.decode(entry.getValue());
visitor.visit(key, value);
// System.out.println("--> " + key + "=" + value);
} else if (prefixCompareResult > 0) {
break;
}

View File

@@ -294,4 +294,9 @@ public class PersistentMapDiskNode {
}
}
public boolean isInnerNode() {
return entries.size() > 0 ? entries.firstEntry().getValue().isInnerNode() : false;
}
}

View File

@@ -0,0 +1,66 @@
package org.lucares.pdb.map;
public class PersistentMapStats {
private long values = 0;
private long nodes = 0;
private long maxDepth = 0;
private long countDepth = 0;
private double averageDepth = 0;
private long countFill = 0;
private double averageFill = 0;
private int countValuesInNode;
private double averageValuesInNode;
private long innerNodes = 0;
public PersistentMapStats() {
super();
}
public void incrementValues() {
values++;
}
public void incrementNodes(final boolean innerNode) {
nodes++;
if (innerNode) {
innerNodes++;
}
}
public void addDepth(final long depth) {
maxDepth = Math.max(maxDepth, depth);
countDepth++;
averageDepth = averageDepth * (countDepth - 1) / countDepth + depth / (double) countDepth;
}
public void addFill(final double fill) {
// online algorithm to compute average values:
countFill++;
averageFill = averageFill * (countFill - 1) / countFill + fill / countFill;
}
public void addValuesInNode(final int valuesInNode) {
countValuesInNode++;
averageValuesInNode = averageValuesInNode * (countValuesInNode - 1) / countValuesInNode
+ valuesInNode / (double) countValuesInNode;
}
@Override
public String toString() {
final StringBuilder builder = new StringBuilder();
builder.append("values= " + values);
builder.append("\nnodes= " + nodes);
builder.append("\ninnerNodes= " + innerNodes);
builder.append("\nmaxDepth= " + maxDepth);
builder.append(String.format("\navg. depth= %.2f", averageDepth));
builder.append(String.format("\navg. fill= %.2f", averageFill));
builder.append(String.format("\nvalues/node=%.2f\n", averageValuesInNode));
return builder.toString();
}
}

View File

@@ -0,0 +1,24 @@
package org.lucares.utils;
public class ArrayUtils {
public static int findFirst(final byte[] bytes, final byte key) {
for (int i = 0; i < bytes.length; i++) {
if (bytes[i] == key) {
return i;
}
}
return -1;
}
public static int countConsecutive(final byte[] bytes, final byte key, final int offset) {
int count = 0;
for (int i = offset; i < bytes.length; i++) {
if (bytes[i] == key) {
count++;
} else {
break;
}
}
return count;
}
}