new implementation of an integer storage

It can store multiple streams of integers in a single
file. It uses blocks of 512 byte, which is only 1/8th
of the block size the file based data-store uses. This
reduces the overhead and waste of memory for short
integer streams significantly. Storing data in one big
file, instead of many small files, makes backups much
more efficient.
This commit is contained in:
2018-08-26 09:37:56 +02:00
parent 15a72f09d7
commit b7ebb8ce6a
9 changed files with 1041 additions and 0 deletions

View File

@@ -0,0 +1,135 @@
package org.lucares.pdb.blockstorage;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.stream.LongStream;
import org.lucares.collections.LongList;
import org.lucares.pdb.diskstorage.DiskStorage;
import org.lucares.utils.file.FileUtils;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@Test
public class BSFileTest {
private Path dataDirectory;
@BeforeMethod
public void beforeMethod() throws IOException {
dataDirectory = Files.createTempDirectory("pdb");
}
@AfterMethod
public void afterMethod() throws IOException {
FileUtils.delete(dataDirectory);
}
public void testBlockStorage() throws Exception {
final Path file = dataDirectory.resolve("data.int.db");
final int numLongs = 1000;
long blockNumber = -1;
long start = System.nanoTime();
//
try (final DiskStorage ds = new DiskStorage(file)) {
try (final BSFile bsFile = BSFile.newFile(ds)) {
blockNumber = bsFile.getRootBlockNumber();
for (long i = 0; i < numLongs / 2; i++) {
bsFile.append(i);
}
}
try (final BSFile bsFile = BSFile.existingFile(blockNumber, ds)) {
for (long i = numLongs / 2; i < numLongs; i++) {
bsFile.append(i);
}
}
}
System.out.println("duration write: " + (System.nanoTime() - start) / 1_000_000.0 + "ms");
start = System.nanoTime();
try (final DiskStorage ds = new DiskStorage(file)) {
final BSFile bsFile = BSFile.existingFile(blockNumber, ds);
final long[] actualLongs = bsFile.stream().toArray();
final long[] expectedLongs = LongStream.rangeClosed(0, numLongs - 1).toArray();
Assert.assertEquals(actualLongs, expectedLongs);
}
System.out.println("duration read: " + (System.nanoTime() - start) / 1_000_000.0 + "ms");
}
public void testBlockStorageMultithreading() throws Exception {
final ExecutorService pool = Executors.newCachedThreadPool();
final Path file = dataDirectory.resolve("data.int.db");
final int threads = 50;
final int values = 10000;
final Map<Long, LongList> expected = new HashMap<>();
final List<Future<Void>> futures = new ArrayList<>();
final long start = System.nanoTime();
try (final DiskStorage ds = new DiskStorage(file)) {
for (int i = 0; i < threads; i++) {
final Future<Void> future = pool.submit(() -> {
final ThreadLocalRandom random = ThreadLocalRandom.current();
final LongList listOfValues = new LongList();
try (BSFile bsFile = BSFile.newFile(ds)) {
for (int j = 0; j < values; j++) {
// will produce 1,2 and 3 byte sequences when encoded
final long value = random.nextLong(32768);
listOfValues.add(value);
bsFile.append(value);
}
expected.put(bsFile.getRootBlockNumber(), listOfValues);
}
return null;
});
futures.add(future);
}
for (final Future<Void> future : futures) {
future.get();
}
pool.shutdown();
pool.awaitTermination(5, TimeUnit.MINUTES);
}
System.out.println("duration write: " + (System.nanoTime() - start) / 1_000_000.0 + "ms");
// verification
try (final DiskStorage ds = new DiskStorage(file)) {
for (final Entry<Long, LongList> entry : expected.entrySet()) {
final long rootBlockNumber = entry.getKey();
final LongList expectedValues = entry.getValue();
try (BSFile bsFile = BSFile.existingFile(rootBlockNumber, ds)) {
final long[] actualLongs = bsFile.stream().toArray();
final long[] expectedLongs = expectedValues.toArray();
Assert.assertEquals(actualLongs, expectedLongs, "for rootBlockNumber=" + rootBlockNumber);
}
}
}
}
}

View File

@@ -0,0 +1,82 @@
package org.lucares.pdb.blockstorage.intsequence;
import static org.testng.Assert.assertEquals;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicInteger;
import org.lucares.collections.LongList;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@Test
public class LongSequenceEncoderDecoderTest {
@DataProvider
public Object[][] providerComputeNumberOfEncodedBytes() {
return new Object[][] {
// 2^6-1 = 63 -> 1 byte
// 2^13-1 = 8191 -> 2 byte
// 2^20-1 = 1048575 -> 3 byte
// 2^27-1 = 134217727 -> 4 byte
// 2^34-1 = 17179869183 -> 5 byte
// 2^41-1 = 2199023255551 -> 6 byte
// 2^48-1 = 281474976710655-> 7 byte
// 2^55-1 = 36028797018963967-> 8 byte
// 2^62-1 = 4611686018427387903-> 9 byte
// 2^69-1 = 590295810358705651711 -> 10 byte
{ 0, 1 }, //
{ 63, 1 }, //
{ 64, 2 }, //
{ 8191, 2 }, //
{ 8192, 3 }, //
{ 1048575, 3 }, //
{ 1048576, 4 }, //
{ 134217727, 4 }, //
{ 134217728, 5 }, //
{ 17179869183L, 5 }, //
{ 17179869184L, 6 }, //
{ 2199023255551L, 6 }, //
{ 2199023255552L, 7 }, //
{ 281474976710655L, 7 }, //
{ 2814749767106556L, 8 },//
};
}
@Test(dataProvider = "providerComputeNumberOfEncodedBytes")
public void testComputeNumberOfEncodedBytes(final long value, final long expected) {
final long actual = LongSequenceEncoderDecoder.computeNumberOfEncodedBytes(value);
assertEquals(actual, expected);
}
@DataProvider
public Object[][] providerEncodeDecode() {
return new Object[][] { { 10, 0, 5 }, //
{ 10, 0, 63 }, //
{ 10, 0, 8191 }, //
{ 10, 0, Long.MAX_VALUE },//
};
}
@Test(dataProvider = "providerEncodeDecode")
public void testEncodeDecode(final long numValues, final long minValue, final long maxValue) {
final LongSequenceEncoderDecoder encoder = new LongSequenceEncoderDecoder();
final LongList originalValues = new LongList();
final byte[] buffer = new byte[1024];
final AtomicInteger offsetInBuffer = new AtomicInteger(0);
ThreadLocalRandom.current().longs(numValues, minValue, maxValue).forEachOrdered(value -> {
originalValues.add(value);
final int appendedBytes = encoder.encodeInto(value, buffer, offsetInBuffer.get());
offsetInBuffer.addAndGet(appendedBytes);
});
final LongList actualValues = encoder.decode(buffer);
assertEquals(actualValues.toString(), originalValues.toString());
}
}

View File

@@ -0,0 +1,172 @@
package org.lucares.pdb.diskstorage;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import org.lucares.utils.file.FileUtils;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@Test
public class DiskStorageTest {
private Path dataDirectory;
@BeforeMethod
public void beforeMethod() throws IOException {
dataDirectory = Files.createTempDirectory("pdb");
}
@AfterMethod
public void afterMethod() throws IOException {
FileUtils.delete(dataDirectory);
}
/**
* File systems work with 4096 byte blocks, but we want to work with 512 bytes
* per block. Does flushing a 512 byte block flush the full 4096 byte block?
*
* @throws Exception
*/
@Test(enabled = false)
public void testFlushingASectorOrABlock() throws Exception {
final Path databaseFile = dataDirectory.resolve("db.ds");
Files.deleteIfExists(databaseFile);
try (DiskStorage ds = new DiskStorage(databaseFile)) {
final int numBlocks = 10;
ds.appendNewBlocks(numBlocks);
Assert.assertEquals(ds.getNumBlocks(), numBlocks);
final List<DiskBlock> blocks = new ArrayList<>();
// fill the first 16 512-byte blocks
// that is more than on 4096 byte block
for (int i = 0; i < numBlocks; i++) {
final DiskBlock diskBlock = ds.getDiskBlock(i);
assertAllValuesAreEqual(diskBlock);
fill(diskBlock, (byte) i);
diskBlock.writeAsync();
blocks.add(diskBlock);
}
// now force (aka flush) a block in the middle of the first 4096 byte block
blocks.get(3).writeAsync();
blocks.get(3).force();
System.exit(0);
// read all blocks again an check what they contain
// 1. we do this with the existing file channel
// this one should see every change, because we wrote them to the file channel
for (int i = 0; i < numBlocks; i++) {
final DiskBlock diskBlock = ds.getDiskBlock(i);
assertAllValuesAreEqual(diskBlock, (byte) i);
fill(diskBlock, (byte) i);
blocks.add(diskBlock);
}
// 2. we read the file from another file channel
// this one might not see changes made to the first file channel
//
// But it does see the changes. Most likely, because both channels
// use the same buffers from the operating system.
try (DiskStorage ds2 = new DiskStorage(databaseFile)) {
for (int i = 0; i < numBlocks; i++) {
final DiskBlock diskBlock = ds2.getDiskBlock(i);
assertAllValuesAreEqual(diskBlock, (byte) i);
fill(diskBlock, (byte) i);
blocks.add(diskBlock);
}
}
}
}
@Test(enabled = false)
public void testDiskStorage() throws Exception {
final Path databaseFile = dataDirectory.resolve("db.ds");
final ExecutorService pool = Executors.newCachedThreadPool();
try (DiskStorage ds = new DiskStorage(databaseFile)) {
final int numBlocks = 10;
ds.appendNewBlocks(numBlocks);
Assert.assertEquals(ds.getNumBlocks(), numBlocks);
for (int i = 0; i < numBlocks; i++) {
final int block = i;
pool.submit(() -> {
final ThreadLocalRandom random = ThreadLocalRandom.current();
try {
// now read/write random blocks
for (int j = 0; j < 10; j++) {
final DiskBlock diskBlock = ds.getDiskBlock(block);
assertAllValuesAreEqual(diskBlock);
fill(diskBlock, (byte) random.nextInt(127));
if (random.nextBoolean()) {
diskBlock.writeAsync();
} else {
diskBlock.writeAsync();
diskBlock.force();
}
}
} catch (final Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
});
}
pool.shutdown();
pool.awaitTermination(1, TimeUnit.MINUTES);
}
}
private void assertAllValuesAreEqual(final DiskBlock diskBlock, final byte expectedVal) {
final byte[] buffer = diskBlock.getBuffer();
for (int i = 0; i < buffer.length; i++) {
if (expectedVal != buffer[i]) {
System.err.println(
"block " + diskBlock.getBlockNumber() + " " + buffer[i] + " != " + expectedVal + " at " + i);
break;
}
}
}
private void assertAllValuesAreEqual(final DiskBlock diskBlock) {
final byte[] buffer = diskBlock.getBuffer();
final byte expected = buffer[0];
for (int i = 0; i < buffer.length; i++) {
if (expected != buffer[i]) {
System.err.println(
"block " + diskBlock.getBlockNumber() + " " + buffer[i] + " != " + expected + " at " + i);
break;
}
}
}
private void fill(final DiskBlock diskBlock, final byte val) {
final byte[] buffer = diskBlock.getBuffer();
for (int i = 0; i < buffer.length; i++) {
buffer[i] = val;
}
}
}