diff --git a/block-storage/src/main/java/org/lucares/pdb/diskstorage/DiskStorage.java b/block-storage/src/main/java/org/lucares/pdb/diskstorage/DiskStorage.java index 0cad1c1..5834069 100644 --- a/block-storage/src/main/java/org/lucares/pdb/diskstorage/DiskStorage.java +++ b/block-storage/src/main/java/org/lucares/pdb/diskstorage/DiskStorage.java @@ -5,10 +5,10 @@ import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.channels.FileChannel.MapMode; -import java.nio.channels.FileLock; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.Optional; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -17,6 +17,12 @@ public class DiskStorage implements AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(DiskStorage.class); + private static final long FREE_LIST_ROOT_OFFSET = 0; + private static final int FREE_LIST_NEXT_POINTER = 0; + private static final int FREE_LIST_PREV_POINTER = 8; + private static final int FREE_LIST_SIZE = 16; + private static final int FREE_LIST_NODE_SIZE = 32; + private final FileChannel fileChannel; public DiskStorage(final Path databaseFile) throws IOException { @@ -36,17 +42,11 @@ public class DiskStorage implements AutoCloseable { public DiskBlock getDiskBlock(final long blockOffset, final int blockSize) throws IOException { - final long start = System.nanoTime(); + LOGGER.trace("read block={}", blockOffset); - try (final FileLock lock = fileChannel.lock(blockOffset, blockSize, true)) { + final MappedByteBuffer byteBuffer = fileChannel.map(MapMode.READ_WRITE, blockOffset, blockSize); - final MappedByteBuffer byteBuffer = fileChannel.map(MapMode.READ_WRITE, blockOffset, blockSize); - - return new DiskBlock(blockOffset, byteBuffer); - - } finally { - LOGGER.trace("read block={}: {}ms", blockOffset, (System.nanoTime() - start) / 1_000_000.0); - } + return new DiskBlock(blockOffset, byteBuffer); } @Override @@ -67,18 +67,183 @@ public class DiskStorage implements AutoCloseable { return result; } - public long allocateBlock(final int blockSize) throws IOException { + public synchronized long allocateBlock(final int blockSize) throws IOException { + if (blockSize < FREE_LIST_NODE_SIZE) { + throw new IllegalArgumentException("The minimal allocation size is 32 byte."); + } + + final Optional optionalFreeBlock = findFreeBlockWithSize(blockSize); + if (optionalFreeBlock.isPresent()) { + final FreeListNode freeBlock = optionalFreeBlock.get(); + removeBlockFromFreeList(freeBlock); + clearBlock(freeBlock); + return freeBlock.getOffset(); + } else { + return allocateNewBlock(blockSize); + } + } + + private long allocateNewBlock(final int blockSize) throws IOException { final byte[] buffer = new byte[blockSize]; final ByteBuffer src = ByteBuffer.wrap(buffer); - synchronized (fileChannel) { - // block numbers start with 1, so that the uninitialized value - // (0) means 'no block'. That way we do not have to write - // data to a newly created block, which reduces IO. - final long blockOffset = fileChannel.size(); - fileChannel.write(src, fileChannel.size()); - return blockOffset; + // block numbers start with 1, so that the uninitialized value + // (0) means 'no block'. That way we do not have to write + // data to a newly created block, which reduces IO. + final long blockOffset = fileChannel.size(); + fileChannel.write(src, fileChannel.size()); + return blockOffset; + } + + public synchronized void free(final long blockOffset, final int blockSize) throws IOException { + + final Optional neighboringFreeListNode = getNeighboringFreeListNode(blockOffset); + + if (neighboringFreeListNode.isPresent()) { + // insert new free node into the free list + final FreeListNode prev = neighboringFreeListNode.get(); + + insertFreeListNode(prev, blockOffset, blockSize); + + } else { + // add new free list node as the first node in the list + insertFreeListNodeAsNewRoot(blockOffset, blockSize); } } + + private void insertFreeListNodeAsNewRoot(final long blockOffset, final int blockSize) throws IOException { + final long freeListRootNodePosition = readFreeListRootNodePosition(); + + if (freeListRootNodePosition > 0) { + // there are free list nodes, but they are after the new node + + final FreeListNode next = readFreeListNode(freeListRootNodePosition); + final FreeListNode newNode = new FreeListNode(blockOffset, blockSize); + + FreeListNode.link(newNode, next); + + writeFreeListNode(newNode, next); + writeFreeListRootNodePosition(blockOffset); + + } else { + // this is the first free list node + final FreeListNode newNode = new FreeListNode(blockOffset, blockSize); + writeFreeListNode(newNode); + writeFreeListRootNodePosition(blockOffset); + } + } + + private void insertFreeListNode(final FreeListNode prev, final long blockOffset, final int blockSize) + throws IOException { + + final FreeListNode newNode = new FreeListNode(blockOffset, blockSize); + final FreeListNode next = prev.hasNext() ? readFreeListNode(prev.getNext()) : null; + + FreeListNode.link(prev, newNode, next); + + writeFreeListNode(prev, newNode, next); + } + + /** + * + * @param blockOffset the offset of the block that is about to be free'd + * @return the free list node before the block + * @throws IOException + */ + private Optional getNeighboringFreeListNode(final long blockOffset) throws IOException { + FreeListNode result = null; + final long freeListRootNodePosition = readFreeListRootNodePosition(); + if (freeListRootNodePosition < blockOffset) { + + long nextFreeListNodeOffset = freeListRootNodePosition; + while (nextFreeListNodeOffset > 0) { + final FreeListNode freeListNode = readFreeListNode(nextFreeListNodeOffset); + + if (freeListNode.getOffset() > blockOffset) { + break; + } + nextFreeListNodeOffset = freeListNode.getNext(); + result = freeListNode; + } + } + return Optional.ofNullable(result); + } + + private Optional findFreeBlockWithSize(final long blockSize) throws IOException { + FreeListNode result = null; + final long freeListRootNodePosition = readFreeListRootNodePosition(); + + long nextFreeListNodeOffset = freeListRootNodePosition; + while (nextFreeListNodeOffset > 0) { + final FreeListNode freeListNode = readFreeListNode(nextFreeListNodeOffset); + + if (freeListNode.getSize() == blockSize) { + result = freeListNode; + break; + } + nextFreeListNodeOffset = freeListNode.getNext(); + } + + return Optional.ofNullable(result); + } + + private void clearBlock(final FreeListNode freeBlock) throws IOException { + final ByteBuffer src = ByteBuffer.allocate(freeBlock.getSize()); + fileChannel.write(src, freeBlock.getOffset()); + } + + private void removeBlockFromFreeList(final FreeListNode freeBlock) throws IOException { + + if (freeBlock.getPrev() == 0) { + writeFreeListRootNodePosition(freeBlock.getNext()); + } + + if (freeBlock.getNext() > 0) { + final FreeListNode next = readFreeListNode(freeBlock.getNext()); + next.setPrev(freeBlock.getPrev()); + writeFreeListNode(next); + } + + if (freeBlock.getPrev() > 0) { + final FreeListNode prev = readFreeListNode(freeBlock.getPrev()); + prev.setNext(freeBlock.getNext()); + writeFreeListNode(prev); + } + } + + private FreeListNode readFreeListNode(final long freeListNodePosition) throws IOException { + final ByteBuffer freeListNode = ByteBuffer.allocate(FREE_LIST_NODE_SIZE); + fileChannel.read(freeListNode, freeListNodePosition); + final long offset = freeListNodePosition; + final long next = freeListNode.getLong(FREE_LIST_NEXT_POINTER); + final long prev = freeListNode.getLong(FREE_LIST_PREV_POINTER); + final int size = freeListNode.getInt(FREE_LIST_SIZE); + return new FreeListNode(offset, next, prev, size); + } + + private void writeFreeListNode(final FreeListNode... nodes) throws IOException { + + for (final FreeListNode node : nodes) { + if (node != null) { + final ByteBuffer src = ByteBuffer.allocate(FREE_LIST_NODE_SIZE); + src.putLong(FREE_LIST_NEXT_POINTER, node.getNext()); + src.putLong(FREE_LIST_PREV_POINTER, node.getPrev()); + src.putInt(FREE_LIST_SIZE, node.getSize()); + fileChannel.write(src, node.getOffset()); + } + } + } + + private long readFreeListRootNodePosition() throws IOException { + final ByteBuffer freeListFirstBlock = ByteBuffer.allocate(8); + fileChannel.read(freeListFirstBlock, FREE_LIST_ROOT_OFFSET); + return freeListFirstBlock.getLong(0); + } + + private void writeFreeListRootNodePosition(final long freeListRootNodePosition) throws IOException { + final ByteBuffer freeListFirstBlock = ByteBuffer.allocate(8); + freeListFirstBlock.putLong(0, freeListRootNodePosition); + fileChannel.write(freeListFirstBlock, FREE_LIST_ROOT_OFFSET); + } } diff --git a/block-storage/src/main/java/org/lucares/pdb/diskstorage/FreeListNode.java b/block-storage/src/main/java/org/lucares/pdb/diskstorage/FreeListNode.java new file mode 100644 index 0000000..42e3958 --- /dev/null +++ b/block-storage/src/main/java/org/lucares/pdb/diskstorage/FreeListNode.java @@ -0,0 +1,82 @@ +package org.lucares.pdb.diskstorage; + +public class FreeListNode { + private final long offset; + private long next; + private long prev; + private int size; + + public FreeListNode(final long offset, final int size) { + this.offset = offset; + this.size = size; + } + + public FreeListNode(final long offset, final long next, final long prev, final int size) { + this.offset = offset; + this.next = next; + this.prev = prev; + this.size = size; + } + + public long getOffset() { + return offset; + } + + public long getNext() { + return next; + } + + public void setNext(final long next) { + this.next = next; + } + + public void setNext(final FreeListNode next) { + this.next = next != null ? next.getOffset() : 0; + } + + public long getPrev() { + return prev; + } + + public void setPrev(final long prev) { + this.prev = prev; + } + + public void setPrev(final FreeListNode prev) { + this.prev = prev != null ? prev.getOffset() : 0; + } + + public int getSize() { + return size; + } + + public void setSize(final int size) { + this.size = size; + } + + @Override + public String toString() { + return "FreeListNode [offset=" + offset + ", next=" + next + ", prev=" + prev + ", size=" + size + "]"; + } + + public boolean hasNext() { + return next != 0; + } + + public static void link(final FreeListNode prev, final FreeListNode next) { + prev.setNext(next); + next.setPrev(prev); + } + + public static void link(final FreeListNode prev, final FreeListNode middle, final FreeListNode next) { + if (prev != null) { + prev.setNext(middle); + } + middle.setPrev(prev); + middle.setNext(next); + if (next != null) { + next.setPrev(prev); + } + } + +} diff --git a/block-storage/src/test/java/org/lucares/pdb/diskstorage/DiskStorageTest.java b/block-storage/src/test/java/org/lucares/pdb/diskstorage/DiskStorageTest.java index 83a2cf9..3df8814 100644 --- a/block-storage/src/test/java/org/lucares/pdb/diskstorage/DiskStorageTest.java +++ b/block-storage/src/test/java/org/lucares/pdb/diskstorage/DiskStorageTest.java @@ -11,6 +11,7 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import org.lucares.utils.file.FileUtils; +import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -135,6 +136,130 @@ public class DiskStorageTest { } } + @Test(enabled = true, expectedExceptions = IllegalArgumentException.class) + public void testAllocationSmallerThanMinimalBlockSize() throws Exception { + final Path databaseFile = dataDirectory.resolve("db.ds"); + + try (DiskStorage ds = new DiskStorage(databaseFile)) { + + final int blockSize = 31; // minimal block size is 32 + ds.allocateBlock(blockSize); + } + } + + @Test(enabled = true) + public void testAllocateAndFreeSingleBlockInFreeList() throws Exception { + final Path databaseFile = dataDirectory.resolve("db.ds"); + + try (DiskStorage ds = new DiskStorage(databaseFile)) { + + final int blockSize = 32; + final long block_8_39 = ds.allocateBlock(blockSize); + final long block_40_71 = ds.allocateBlock(blockSize); + final long block_72_103 = ds.allocateBlock(blockSize); + + Assert.assertEquals(block_8_39, 8); + Assert.assertEquals(block_40_71, 40); + Assert.assertEquals(block_72_103, 72); + + ds.free(block_40_71, blockSize); + + // should reuse the block we just freed + final long actual_block_40_71 = ds.allocateBlock(blockSize); + + Assert.assertEquals(actual_block_40_71, 40); + } + } + + @Test(enabled = true) + public void testAllocateAndFreeMultipleBlocksInFreeList() throws Exception { + final Path databaseFile = dataDirectory.resolve("db.ds"); + + try (DiskStorage ds = new DiskStorage(databaseFile)) { + + final int blockSize = 32; + ds.allocateBlock(blockSize); + final long block_40_71 = ds.allocateBlock(blockSize); + final long block_72_103 = ds.allocateBlock(blockSize); + final long block_104_135 = ds.allocateBlock(blockSize); + ds.allocateBlock(blockSize); + + ds.free(block_72_103, blockSize); + ds.free(block_104_135, blockSize); + ds.free(block_40_71, blockSize); // the block with the smaller index is freed last, this increases line + // coverage, because there is a branch for prepending the root node + + // should reuse the first block we just freed + // this removes the root node of the free list + final long actual_block_40_71 = ds.allocateBlock(blockSize); + Assert.assertEquals(actual_block_40_71, 40); + + // should reuse the second block we just freed + final long actual_block_72_103 = ds.allocateBlock(blockSize); + Assert.assertEquals(actual_block_72_103, 72); + + // should reuse the third block we just freed + // this removes the last node of the free list + final long actual_block_104_135 = ds.allocateBlock(blockSize); + Assert.assertEquals(actual_block_104_135, 104); + + final long block_168_199 = ds.allocateBlock(blockSize); + Assert.assertEquals(block_168_199, 168); + } + } + + @Test(enabled = true) + public void testAllocateAndFreeInsertFreeNodeInTheMiddleOfTheFreeList() throws Exception { + final Path databaseFile = dataDirectory.resolve("db.ds"); + + try (DiskStorage ds = new DiskStorage(databaseFile)) { + + final int blockSize = 32; + ds.allocateBlock(blockSize); + ds.allocateBlock(blockSize); + final long block_72_103 = ds.allocateBlock(blockSize); + final long block_104_135 = ds.allocateBlock(blockSize); + final long block_136_167 = ds.allocateBlock(blockSize); + + // free the last block first, to increase code coverage + ds.free(block_136_167, blockSize); + ds.free(block_72_103, blockSize); + ds.free(block_104_135, blockSize); + + // the first free block is re-used + final long actual_block_72_103 = ds.allocateBlock(blockSize); + Assert.assertEquals(actual_block_72_103, block_72_103); + + final long actual_block_104_135 = ds.allocateBlock(blockSize); + Assert.assertEquals(actual_block_104_135, block_104_135); + + final long actual_block_136_167 = ds.allocateBlock(blockSize); + Assert.assertEquals(actual_block_136_167, block_136_167); + } + } + + @Test(enabled = true) + public void testAllocateAndFreeMultipleBlocksWithDifferentSizes() throws Exception { + final Path databaseFile = dataDirectory.resolve("db.ds"); + + try (DiskStorage ds = new DiskStorage(databaseFile)) { + + final int blockSizeSmall = 32; + final int blockSizeBig = 64; + ds.allocateBlock(blockSizeSmall); + ds.allocateBlock(blockSizeSmall); + final long big_block_72_103 = ds.allocateBlock(blockSizeBig); + final long small_block_136_167 = ds.allocateBlock(blockSizeSmall); + ds.allocateBlock(blockSizeSmall); + + ds.free(big_block_72_103, blockSizeBig); + ds.free(small_block_136_167, blockSizeSmall); + + final long actual_small_block_136_167 = ds.allocateBlock(blockSizeSmall); + Assert.assertEquals(actual_small_block_136_167, small_block_136_167); + } + } + private void assertAllValuesAreEqual(final DiskBlock diskBlock, final byte expectedVal) { final byte[] buffer = diskBlock.getBuffer(); for (int i = 0; i < buffer.length; i++) {