support for negative values in variable byte encoding

We now support negative values. This will allow us to
store time/value sequences that are not monotonically
increasing, so that we do not have to create multiple
files just because some values were send out of order.

This is done by first transforming the values into
positive values by using interleaved encoding (there
is a name for it, but I don't remember it). We are
mapping values like this:
 0 -> 1
 1 -> 2
-1 -> 3
 2 -> 4
-2 -> 5
...

Renamed LongSequenceEncoderDecoder to VariableByteEncoder.
Made methods static.
This commit is contained in:
2018-09-29 19:48:57 +02:00
parent f07977c27a
commit e03fccbdf7
6 changed files with 244 additions and 317 deletions

View File

@@ -1,82 +0,0 @@
package org.lucares.pdb.blockstorage.intsequence;
import static org.testng.Assert.assertEquals;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicInteger;
import org.lucares.collections.LongList;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@Test
public class LongSequenceEncoderDecoderTest {
@DataProvider
public Object[][] providerComputeNumberOfEncodedBytes() {
return new Object[][] {
// 2^6-1 = 63 -> 1 byte
// 2^13-1 = 8191 -> 2 byte
// 2^20-1 = 1048575 -> 3 byte
// 2^27-1 = 134217727 -> 4 byte
// 2^34-1 = 17179869183 -> 5 byte
// 2^41-1 = 2199023255551 -> 6 byte
// 2^48-1 = 281474976710655-> 7 byte
// 2^55-1 = 36028797018963967-> 8 byte
// 2^62-1 = 4611686018427387903-> 9 byte
// 2^69-1 = 590295810358705651711 -> 10 byte
{ 0, 1 }, //
{ 63, 1 }, //
{ 64, 2 }, //
{ 8191, 2 }, //
{ 8192, 3 }, //
{ 1048575, 3 }, //
{ 1048576, 4 }, //
{ 134217727, 4 }, //
{ 134217728, 5 }, //
{ 17179869183L, 5 }, //
{ 17179869184L, 6 }, //
{ 2199023255551L, 6 }, //
{ 2199023255552L, 7 }, //
{ 281474976710655L, 7 }, //
{ 2814749767106556L, 8 },//
};
}
@Test(dataProvider = "providerComputeNumberOfEncodedBytes")
public void testComputeNumberOfEncodedBytes(final long value, final long expected) {
final long actual = LongSequenceEncoderDecoder.computeNumberOfEncodedBytes(value);
assertEquals(actual, expected);
}
@DataProvider
public Object[][] providerEncodeDecode() {
return new Object[][] { { 10, 0, 5 }, //
{ 10, 0, 63 }, //
{ 10, 0, 8191 }, //
{ 10, 0, Long.MAX_VALUE },//
};
}
@Test(dataProvider = "providerEncodeDecode")
public void testEncodeDecode(final long numValues, final long minValue, final long maxValue) {
final LongSequenceEncoderDecoder encoder = new LongSequenceEncoderDecoder();
final LongList originalValues = new LongList();
final byte[] buffer = new byte[1024];
final AtomicInteger offsetInBuffer = new AtomicInteger(0);
ThreadLocalRandom.current().longs(numValues, minValue, maxValue).forEachOrdered(value -> {
originalValues.add(value);
final int appendedBytes = encoder.encodeInto(value, buffer, offsetInBuffer.get());
offsetInBuffer.addAndGet(appendedBytes);
});
final LongList actualValues = encoder.decode(buffer);
assertEquals(actualValues.toString(), originalValues.toString());
}
}

View File

@@ -0,0 +1,84 @@
package org.lucares.pdb.blockstorage.intsequence;
import static org.testng.Assert.assertEquals;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicInteger;
import org.lucares.collections.LongList;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@Test
public class VariableByteEncoderTest {
@DataProvider
public Object[][] providerEncodeDecode() {
return new Object[][] { //
// encoded into 1 byte
{ 10, -5, 5 }, //
{ 10, 0, 5 }, //
{ 10, -63, 63 }, //
// encoded into 2 bytes
{ 10, 130, 131 }, //
// encoded into 3 bytes
{ 10, -8191, 8191 }, //
// encoded into n bytes
{ 1, Long.MAX_VALUE / 2 - 4, Long.MAX_VALUE / 2 }, //
{ 1, Long.MIN_VALUE / 2, Long.MAX_VALUE / 2 }, //
{ 11, Long.MIN_VALUE / 2 + 1, Long.MIN_VALUE / 2 + 3 }, //
{ 12, Long.MAX_VALUE / 2 - 3, Long.MAX_VALUE / 2 },//
};
}
@Test(dataProvider = "providerEncodeDecode")
public void testEncodeDecode(final long numValues, final long minValue, final long maxValue) {
final LongList originalValues = new LongList();
final byte[] buffer = new byte[1024];
final AtomicInteger offsetInBuffer = new AtomicInteger(0);
ThreadLocalRandom.current().longs(numValues, minValue, maxValue).forEachOrdered(value -> {
originalValues.add(value);
final int appendedBytes = VariableByteEncoder.encodeInto(value, buffer, offsetInBuffer.get());
offsetInBuffer.addAndGet(appendedBytes);
});
final LongList actualValues = VariableByteEncoder.decode(buffer);
assertEquals(actualValues.toString(), originalValues.toString());
}
@DataProvider
public Object[][] providerEncodeDecodeOfTwoValues() {
return new Object[][] { //
{ 12345, 67890, false, 1 }, // first value needs three bytes, it does not fit
{ 12345, 67890, false, 2 }, // first value needs three bytes, it does not fit
{ 12345, 67890, false, 3 }, // first value needs three bytes, second value does not fit
{ 12345, 67890, false, 4 }, // first value needs three bytes, second value does not fit
{ 12345, 67890, false, 5 }, // first value needs three bytes, second value does not fit
{ 12345, 67890, true, 6 }, // both values need three bytes
{ 12345, 67890, true, 10 }, //
};
}
@Test(dataProvider = "providerEncodeDecodeOfTwoValues")
public void testEncodeDecodeOfTwoValues(final long value1, final long value2, final boolean fits,
final int bufferSize) {
final LongList originalValues = new LongList();
final byte[] buffer = new byte[bufferSize];
final int bytesAdded = VariableByteEncoder.encodeInto(value1, value2, buffer, 0);
Assert.assertEquals(bytesAdded > 0, fits);
if (fits) {
originalValues.addAll(value1, value2);
} else {
Assert.assertEquals(buffer, new byte[bufferSize],
"checks that buffer is resetted after it discovers the values do not fit");
}
final LongList decodedValues = VariableByteEncoder.decode(buffer);
Assert.assertEquals(decodedValues, originalValues);
}
}