From 4749dca73a5b8d0112999e426171b744178a68be Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 7 Oct 2020 12:30:33 +0200 Subject: [PATCH] add reindex method to PersistentMap --- .../org/lucares/pdb/map/PersistentMap.java | 57 +++++++- .../lucares/pdb/map/PersistentMapTest.java | 129 +++++++++++------- .../org/lucares/utils/cache/LRUCache.java | 6 + 3 files changed, 138 insertions(+), 54 deletions(-) diff --git a/block-storage/src/main/java/org/lucares/pdb/map/PersistentMap.java b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMap.java index 9ca4b64..451dd70 100644 --- a/block-storage/src/main/java/org/lucares/pdb/map/PersistentMap.java +++ b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMap.java @@ -1,8 +1,12 @@ package org.lucares.pdb.map; +import java.io.IOException; import java.io.PrintStream; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.nio.file.Path; +import java.time.OffsetDateTime; +import java.time.format.DateTimeFormatter; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -10,6 +14,7 @@ import java.util.Map.Entry; import java.util.Objects; import java.util.Stack; import java.util.UUID; +import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; import org.lucares.collections.LongList; @@ -52,7 +57,7 @@ public class PersistentMap implements AutoCloseable { public byte[] getEmptyValue(); } - private static final class StringCoder implements EncoderDecoder { + public static final class StringCoder implements EncoderDecoder { @Override public byte[] encode(final String object) { @@ -70,7 +75,7 @@ public class PersistentMap implements AutoCloseable { } } - private static final class LongCoder implements EncoderDecoder { + public static final class LongCoder implements EncoderDecoder { @Override public byte[] encode(final Long object) { @@ -88,7 +93,7 @@ public class PersistentMap implements AutoCloseable { } } - private static final class UUIDCoder implements EncoderDecoder { + public static final class UUIDCoder implements EncoderDecoder { @Override public byte[] encode(final UUID uuid) { @@ -143,7 +148,7 @@ public class PersistentMap implements AutoCloseable { static final int BLOCK_SIZE = 4096; static final long NODE_OFFSET_TO_ROOT_NODE = 8; - private final DiskStorage diskStore; + private DiskStorage diskStore; private int maxEntriesInNode = Integer.MAX_VALUE; @@ -158,13 +163,20 @@ public class PersistentMap implements AutoCloseable { // guarded by: this private volatile long nodeOffsetOfRootNode = -1; + private final Path path; + public PersistentMap(final Path path, final Path storageBasePath, final EncoderDecoder keyEncoder, final EncoderDecoder valueEncoder) { + this.path = path; this.diskStore = new DiskStorage(path, storageBasePath); this.keyEncoder = keyEncoder; this.valueEncoder = valueEncoder; initIfNew(); + readOffsetOfRootNode(); + } + + private void readOffsetOfRootNode() { final DiskBlock diskBlock = diskStore.getDiskBlock(NODE_OFFSET_TO_ROOT_NODE, diskStore.minAllocationSize()); nodeOffsetOfRootNode = diskBlock.getByteBuffer().getLong(0); } @@ -237,7 +249,7 @@ public class PersistentMap implements AutoCloseable { } private V getFromValueCache(final byte[] encodedKey, final K key) { - LOGGER.info("valueCache hit rate: {} when getting key: {}", valueCache.cacheHitRate(), key); + LOGGER.trace("valueCache hit rate: {} when getting key: {}", valueCache.cacheHitRate(), key); return valueCache.get(new ByteArrayKey(encodedKey)); } @@ -460,6 +472,41 @@ public class PersistentMap implements AutoCloseable { iterateNodeEntryByPrefix(rootNodeOffset, encodedKeyPrefix, visitor); } + public synchronized void reindex() throws IOException { + final long start = System.nanoTime(); + final AtomicLong countValues = new AtomicLong(); + LOGGER.info("start reindexing file: {}", path); + final Path newFile = path.getParent().resolve(path.getFileName() + ".tmp"); + + try (PersistentMap newMap = new PersistentMap<>(newFile, null, keyEncoder, valueEncoder)) { + final long rootNodeOffset = readNodeOffsetOfRootNode(); + final byte[] encodedKeyPrefix = new byte[0]; + iterateNodeEntryByPrefix(rootNodeOffset, encodedKeyPrefix, (k, v) -> { + newMap.putValue(k, v); + final long count = countValues.incrementAndGet(); + if (count % 100000 == 0) { + LOGGER.info("written {} values", count); + } + }); + } + + diskStore.close(); + valueCache.clear(); + nodeCache.clear(); + + final Path backupFile = path.getParent().resolve(path.getFileName() + "." + + DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss").format(OffsetDateTime.now()) + ".backup"); + Files.move(path, backupFile); + Files.move(newFile, path); + + this.diskStore = new DiskStorage(path, null); + readOffsetOfRootNode(); + final double durationInMs = (System.nanoTime() - start) / 1_000_000.0; + final double valuesPerSecond = countValues.get() / (durationInMs / 1000); + LOGGER.info("done reindexing, took {} ms, {} values, {} values/s", (int) Math.ceil(durationInMs), + countValues.get(), valuesPerSecond); + } + private void iterateNodeEntryByPrefix(final long nodeOffest, final byte[] keyPrefix, final Visitor visitor) { final PersistentMapDiskNode node = getNode(nodeOffest); diff --git a/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapTest.java b/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapTest.java index e33256b..47a1d9f 100644 --- a/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapTest.java +++ b/block-storage/src/test/java/org/lucares/pdb/map/PersistentMapTest.java @@ -16,9 +16,9 @@ import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.Assertions; import org.lucares.utils.file.FileUtils; public class PersistentMapTest { @@ -332,61 +332,92 @@ public class PersistentMapTest { @Test public void testLotsOfValues() throws Exception { final Path file = dataDirectory.resolve("map.db"); - final var insertedValues = new HashMap(); - - final SecureRandom rnd = new SecureRandom(); - rnd.setSeed(1); - + final Map insertedValues; try (final PersistentMap map = new PersistentMap<>(file, dataDirectory, PersistentMap.LONG_CODER, PersistentMap.LONG_CODER)) { - - for (int i = 0; i < 1_000; i++) { - - final Long key = (long) (rnd.nextGaussian() * Integer.MAX_VALUE); - final Long value = (long) (rnd.nextGaussian() * Integer.MAX_VALUE); - - if (insertedValues.containsKey(key)) { - continue; - } - - Assertions.assertNull(map.putValue(key, value)); - - insertedValues.put(key, value); - - final boolean failEarly = false; - if (failEarly) { - for (final var entry : insertedValues.entrySet()) { - final Long actualValue = map.getValue(entry.getKey()); - - if (!Objects.equals(actualValue, entry.getValue())) { - map.print(); - } - - Assertions.assertEquals(entry.getValue(), actualValue, - "value for key " + entry.getKey() + " in the " + i + "th iteration"); - } - } - } + insertedValues = fillMap(1000, true, map); } try (final PersistentMap map = new PersistentMap<>(file, dataDirectory, PersistentMap.LONG_CODER, PersistentMap.LONG_CODER)) { - final AtomicInteger counter = new AtomicInteger(); - final AtomicInteger maxDepth = new AtomicInteger(); - map.visitNodeEntriesPreOrder((node, parentNode, nodeEntry, depth) -> { - counter.addAndGet(nodeEntry.isInnerNode() ? 1 : 0); - maxDepth.set(Math.max(maxDepth.get(), depth)); - }); - - final long start = System.nanoTime(); - for (final var entry : insertedValues.entrySet()) { - final Long actualValue = map.getValue(entry.getKey()); - Assertions.assertEquals(entry.getValue(), actualValue, - "value for key " + entry.getKey() + " after all iterations"); - } - System.out.println("nodes=" + counter.get() + ", depth=" + maxDepth.get() + ": " - + (System.nanoTime() - start) / 1_000_000.0 + "ms"); + assertValuesInMap(insertedValues, map); } } + @Test + public void testReindexing() throws IOException { + final Path file = dataDirectory.resolve("map.db"); + final Map insertedValuesBeforeReindex; + final Map insertedValuesAfterReindex; + try (final PersistentMap map = new PersistentMap<>(file, dataDirectory, PersistentMap.LONG_CODER, + PersistentMap.LONG_CODER)) { + insertedValuesBeforeReindex = fillMap(1_000, true, map); + + map.reindex(); + + assertValuesInMap(insertedValuesBeforeReindex, map); + + insertedValuesAfterReindex = fillMap(1_000, true, map); + + assertValuesInMap(insertedValuesBeforeReindex, map); + assertValuesInMap(insertedValuesAfterReindex, map); + } + try (final PersistentMap map = new PersistentMap<>(file, dataDirectory, PersistentMap.LONG_CODER, + PersistentMap.LONG_CODER)) { + assertValuesInMap(insertedValuesBeforeReindex, map); + assertValuesInMap(insertedValuesAfterReindex, map); + } + } + + private void assertValuesInMap(final Map insertedValues, final PersistentMap map) { + final AtomicInteger counter = new AtomicInteger(); + final AtomicInteger maxDepth = new AtomicInteger(); + map.visitNodeEntriesPreOrder((node, parentNode, nodeEntry, depth) -> { + counter.addAndGet(nodeEntry.isInnerNode() ? 1 : 0); + maxDepth.set(Math.max(maxDepth.get(), depth)); + }); + + final long start = System.nanoTime(); + for (final var entry : insertedValues.entrySet()) { + final Long actualValue = map.getValue(entry.getKey()); + Assertions.assertEquals(entry.getValue(), actualValue, + "value for key " + entry.getKey() + " after all iterations"); + } + System.out.println("nodes=" + counter.get() + ", depth=" + maxDepth.get() + ": " + + (System.nanoTime() - start) / 1_000_000.0 + "ms"); + } + + private Map fillMap(final int numberOfValues, final boolean failEarly, + final PersistentMap map) { + final Map insertedValues = new HashMap<>(); + final SecureRandom rnd = new SecureRandom(); + rnd.setSeed(1); + for (int i = 0; i < numberOfValues; i++) { + + final Long key = (long) (rnd.nextGaussian() * Integer.MAX_VALUE); + final Long value = (long) (rnd.nextGaussian() * Integer.MAX_VALUE); + + if (insertedValues.containsKey(key)) { + continue; + } + + Assertions.assertNull(map.putValue(key, value)); + + insertedValues.put(key, value); + + if (failEarly) { + for (final var entry : insertedValues.entrySet()) { + final Long actualValue = map.getValue(entry.getKey()); + + if (!Objects.equals(actualValue, entry.getValue())) { + map.print(); + } + + Assertions.assertEquals(entry.getValue(), actualValue, + "value for key " + entry.getKey() + " in the " + i + "th iteration"); + } + } + } + return insertedValues; + } } diff --git a/pdb-utils/src/main/java/org/lucares/utils/cache/LRUCache.java b/pdb-utils/src/main/java/org/lucares/utils/cache/LRUCache.java index ffe1163..3978ad0 100644 --- a/pdb-utils/src/main/java/org/lucares/utils/cache/LRUCache.java +++ b/pdb-utils/src/main/java/org/lucares/utils/cache/LRUCache.java @@ -47,4 +47,10 @@ public class LRUCache { public double cacheHitRate() { return (double) countGetHits / (double) countGet; } + + public void clear() { + cache.clear(); + countGet = 0; + countGetHits = 0; + } }