support for negative values in variable byte encoding
We now support negative values. This will allow us to store time/value sequences that are not monotonically increasing, so that we do not have to create multiple files just because some values were send out of order. This is done by first transforming the values into positive values by using interleaved encoding (there is a name for it, but I don't remember it). We are mapping values like this: 0 -> 1 1 -> 2 -1 -> 3 2 -> 4 -2 -> 5 ... Renamed LongSequenceEncoderDecoder to VariableByteEncoder. Made methods static.
This commit is contained in:
@@ -1,82 +0,0 @@
|
||||
package org.lucares.pdb.blockstorage.intsequence;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.lucares.collections.LongList;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
@Test
|
||||
public class LongSequenceEncoderDecoderTest {
|
||||
@DataProvider
|
||||
public Object[][] providerComputeNumberOfEncodedBytes() {
|
||||
|
||||
return new Object[][] {
|
||||
// 2^6-1 = 63 -> 1 byte
|
||||
// 2^13-1 = 8191 -> 2 byte
|
||||
// 2^20-1 = 1048575 -> 3 byte
|
||||
// 2^27-1 = 134217727 -> 4 byte
|
||||
// 2^34-1 = 17179869183 -> 5 byte
|
||||
// 2^41-1 = 2199023255551 -> 6 byte
|
||||
// 2^48-1 = 281474976710655-> 7 byte
|
||||
// 2^55-1 = 36028797018963967-> 8 byte
|
||||
// 2^62-1 = 4611686018427387903-> 9 byte
|
||||
// 2^69-1 = 590295810358705651711 -> 10 byte
|
||||
|
||||
{ 0, 1 }, //
|
||||
{ 63, 1 }, //
|
||||
{ 64, 2 }, //
|
||||
{ 8191, 2 }, //
|
||||
{ 8192, 3 }, //
|
||||
{ 1048575, 3 }, //
|
||||
{ 1048576, 4 }, //
|
||||
{ 134217727, 4 }, //
|
||||
{ 134217728, 5 }, //
|
||||
{ 17179869183L, 5 }, //
|
||||
{ 17179869184L, 6 }, //
|
||||
{ 2199023255551L, 6 }, //
|
||||
{ 2199023255552L, 7 }, //
|
||||
{ 281474976710655L, 7 }, //
|
||||
{ 2814749767106556L, 8 },//
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "providerComputeNumberOfEncodedBytes")
|
||||
public void testComputeNumberOfEncodedBytes(final long value, final long expected) {
|
||||
final long actual = LongSequenceEncoderDecoder.computeNumberOfEncodedBytes(value);
|
||||
|
||||
assertEquals(actual, expected);
|
||||
}
|
||||
|
||||
@DataProvider
|
||||
public Object[][] providerEncodeDecode() {
|
||||
return new Object[][] { { 10, 0, 5 }, //
|
||||
{ 10, 0, 63 }, //
|
||||
{ 10, 0, 8191 }, //
|
||||
{ 10, 0, Long.MAX_VALUE },//
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "providerEncodeDecode")
|
||||
public void testEncodeDecode(final long numValues, final long minValue, final long maxValue) {
|
||||
final LongSequenceEncoderDecoder encoder = new LongSequenceEncoderDecoder();
|
||||
|
||||
final LongList originalValues = new LongList();
|
||||
final byte[] buffer = new byte[1024];
|
||||
final AtomicInteger offsetInBuffer = new AtomicInteger(0);
|
||||
|
||||
ThreadLocalRandom.current().longs(numValues, minValue, maxValue).forEachOrdered(value -> {
|
||||
originalValues.add(value);
|
||||
final int appendedBytes = encoder.encodeInto(value, buffer, offsetInBuffer.get());
|
||||
offsetInBuffer.addAndGet(appendedBytes);
|
||||
});
|
||||
|
||||
final LongList actualValues = encoder.decode(buffer);
|
||||
|
||||
assertEquals(actualValues.toString(), originalValues.toString());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
package org.lucares.pdb.blockstorage.intsequence;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.lucares.collections.LongList;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
@Test
|
||||
public class VariableByteEncoderTest {
|
||||
|
||||
@DataProvider
|
||||
public Object[][] providerEncodeDecode() {
|
||||
return new Object[][] { //
|
||||
// encoded into 1 byte
|
||||
{ 10, -5, 5 }, //
|
||||
{ 10, 0, 5 }, //
|
||||
{ 10, -63, 63 }, //
|
||||
// encoded into 2 bytes
|
||||
{ 10, 130, 131 }, //
|
||||
// encoded into 3 bytes
|
||||
{ 10, -8191, 8191 }, //
|
||||
// encoded into n bytes
|
||||
{ 1, Long.MAX_VALUE / 2 - 4, Long.MAX_VALUE / 2 }, //
|
||||
{ 1, Long.MIN_VALUE / 2, Long.MAX_VALUE / 2 }, //
|
||||
{ 11, Long.MIN_VALUE / 2 + 1, Long.MIN_VALUE / 2 + 3 }, //
|
||||
{ 12, Long.MAX_VALUE / 2 - 3, Long.MAX_VALUE / 2 },//
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "providerEncodeDecode")
|
||||
public void testEncodeDecode(final long numValues, final long minValue, final long maxValue) {
|
||||
|
||||
final LongList originalValues = new LongList();
|
||||
final byte[] buffer = new byte[1024];
|
||||
final AtomicInteger offsetInBuffer = new AtomicInteger(0);
|
||||
|
||||
ThreadLocalRandom.current().longs(numValues, minValue, maxValue).forEachOrdered(value -> {
|
||||
originalValues.add(value);
|
||||
final int appendedBytes = VariableByteEncoder.encodeInto(value, buffer, offsetInBuffer.get());
|
||||
offsetInBuffer.addAndGet(appendedBytes);
|
||||
});
|
||||
|
||||
final LongList actualValues = VariableByteEncoder.decode(buffer);
|
||||
|
||||
assertEquals(actualValues.toString(), originalValues.toString());
|
||||
}
|
||||
|
||||
@DataProvider
|
||||
public Object[][] providerEncodeDecodeOfTwoValues() {
|
||||
return new Object[][] { //
|
||||
{ 12345, 67890, false, 1 }, // first value needs three bytes, it does not fit
|
||||
{ 12345, 67890, false, 2 }, // first value needs three bytes, it does not fit
|
||||
{ 12345, 67890, false, 3 }, // first value needs three bytes, second value does not fit
|
||||
{ 12345, 67890, false, 4 }, // first value needs three bytes, second value does not fit
|
||||
{ 12345, 67890, false, 5 }, // first value needs three bytes, second value does not fit
|
||||
{ 12345, 67890, true, 6 }, // both values need three bytes
|
||||
{ 12345, 67890, true, 10 }, //
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "providerEncodeDecodeOfTwoValues")
|
||||
public void testEncodeDecodeOfTwoValues(final long value1, final long value2, final boolean fits,
|
||||
final int bufferSize) {
|
||||
final LongList originalValues = new LongList();
|
||||
final byte[] buffer = new byte[bufferSize];
|
||||
|
||||
final int bytesAdded = VariableByteEncoder.encodeInto(value1, value2, buffer, 0);
|
||||
Assert.assertEquals(bytesAdded > 0, fits);
|
||||
if (fits) {
|
||||
originalValues.addAll(value1, value2);
|
||||
} else {
|
||||
Assert.assertEquals(buffer, new byte[bufferSize],
|
||||
"checks that buffer is resetted after it discovers the values do not fit");
|
||||
}
|
||||
|
||||
final LongList decodedValues = VariableByteEncoder.decode(buffer);
|
||||
Assert.assertEquals(decodedValues, originalValues);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user