merge projects file-utils, byte-utils and pdb-utils
It turned out that most projects needed at least two of the utils projects. file-utils and byte-utils had only one class. Merging them made sense.
This commit is contained in:
@@ -0,0 +1,242 @@
|
||||
package org.lucares.utils.byteencoder;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.lucares.collections.LongList;
|
||||
|
||||
/**
|
||||
* Encodes longs into bytes using variable byte encoding. We are using a
|
||||
* transformation that encodes negative values into positive values. Even
|
||||
* numbers represent positive longs, uneven values represent negative longs, or
|
||||
* the null.
|
||||
* <p>
|
||||
* We then encode encode each 7 bits into one byte. This highest value bit is
|
||||
* reserved for a flag that tells us whether or not more bytes follow. This bit
|
||||
* is set for all but the last byte.
|
||||
* <p>
|
||||
* Please note two things:
|
||||
* <ol>
|
||||
* <li>0 is encoded to 1; the encoded bytes do not contain the null byte
|
||||
* <li>all but the last byte have the high value bit set
|
||||
* </ol>
|
||||
* No byte will have the value 0. This is important when decoding bytes, because
|
||||
* we can decode bytes until we encounter the first null byte, or we reach the
|
||||
* end of the array.
|
||||
*/
|
||||
public class VariableByteEncoder {
|
||||
|
||||
public static final long MIN_VALUE = Long.MIN_VALUE / 2 + 1;
|
||||
public static final long MAX_VALUE = Long.MAX_VALUE / 2;
|
||||
|
||||
private static final int MAX_BYTES_PER_VALUE = 10;
|
||||
|
||||
private static final int CONTINUATION_BYTE_FLAG = 1 << 7; // 10000000
|
||||
|
||||
private static final long DATA_BITS = (1 << 7) - 1; // 01111111
|
||||
|
||||
private static final ThreadLocal<byte[]> SINGLE_VALUE_BUFFER = ThreadLocal
|
||||
.withInitial(() -> new byte[MAX_BYTES_PER_VALUE]);
|
||||
|
||||
/**
|
||||
* Encodes time and value into the given buffer.
|
||||
* <p>
|
||||
* If the encoded values do not fit into the buffer, then 0 is returned. The
|
||||
* caller will have to provide a new buffer with more space.
|
||||
*
|
||||
* @param value1 first value, (between -(2^62)+1 and 2^62)
|
||||
* @param value2 second value, (between -(2^62)+1 and 2^62)
|
||||
* @param buffer
|
||||
* @param offsetInBuffer
|
||||
* @return number of bytes appended to the provided buffer
|
||||
*/
|
||||
public static int encodeInto(final long value1, final long value2, final byte[] buffer, final int offsetInBuffer) {
|
||||
|
||||
int offset = offsetInBuffer;
|
||||
final int bytesAdded1 = encodeInto(value1, buffer, offset);
|
||||
if (bytesAdded1 > 0) {
|
||||
offset += bytesAdded1;
|
||||
final int bytesAdded2 = encodeInto(value2, buffer, offset);
|
||||
|
||||
if (bytesAdded2 > 0) {
|
||||
// both value fit into the buffer
|
||||
// return the number of added bytes
|
||||
return bytesAdded1 + bytesAdded2;
|
||||
} else {
|
||||
// second value did not fit into the buffer,
|
||||
// remove the first value
|
||||
// and return 0 to indicate that the values did not fit
|
||||
Arrays.fill(buffer, offsetInBuffer, buffer.length, (byte) 0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// return 0 if the encoded bytes do not fit
|
||||
// the caller will have to provide a new buffer
|
||||
return 0;
|
||||
}
|
||||
|
||||
public static LongList decode(final byte[] buffer) {
|
||||
|
||||
final LongList result = new LongList();
|
||||
decodeInto(buffer, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public static int encodeInto(final long value, final byte[] buffer, final int offsetInBuffer) {
|
||||
|
||||
int offset = offsetInBuffer;
|
||||
|
||||
assert value >= MIN_VALUE : "min encodable value is -2^62+1 = " + MIN_VALUE;
|
||||
assert value <= MAX_VALUE : "max encodable value is 2^62 = " + MAX_VALUE;
|
||||
|
||||
long normVal = encodeIntoPositiveValue(value);
|
||||
|
||||
try {
|
||||
final long maxFirstByteValue = 127;
|
||||
|
||||
while (normVal > maxFirstByteValue) {
|
||||
buffer[offset] = (byte) ((normVal & DATA_BITS) | CONTINUATION_BYTE_FLAG);
|
||||
offset++;
|
||||
normVal = normVal >> 7; // shift by number of value bits
|
||||
}
|
||||
buffer[offset] = (byte) (normVal);
|
||||
return offset - offsetInBuffer + 1; // return number of encoded bytes
|
||||
} catch (final ArrayIndexOutOfBoundsException e) {
|
||||
// We need more bytes to store the value than are available.
|
||||
// Reset the bytes we just wrote.
|
||||
Arrays.fill(buffer, offsetInBuffer, buffer.length, (byte) 0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
private static void decodeInto(final byte[] buffer, final LongList bufferedLongs) {
|
||||
for (int i = 0; i < buffer.length; i++) {
|
||||
|
||||
if (buffer[i] == 0) {
|
||||
// no value is encoded to 0 => there are no further values
|
||||
break;
|
||||
} else {
|
||||
long val = buffer[i] & DATA_BITS;
|
||||
int shift = 7;
|
||||
while (!isLastByte(buffer[i]) && i + 1 < buffer.length) {
|
||||
val = val | ((buffer[i + 1] & DATA_BITS) << shift);
|
||||
i++;
|
||||
shift += 7;
|
||||
}
|
||||
bufferedLongs.add(decodeIntoSignedValue(val));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The input value (positive, negative or null) is encoded into a positive
|
||||
* value.
|
||||
*
|
||||
* <pre>
|
||||
*
|
||||
* input: 0 1 -1 2 -2 3 -3
|
||||
* encoded: 1 2 3 4 5 6 7
|
||||
* </pre>
|
||||
*/
|
||||
private static long encodeIntoPositiveValue(final long value) {
|
||||
return value > 0 ? value * 2 : (value * -2) + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* inverse of {@link #encodeIntoPositiveValue(long)}
|
||||
*
|
||||
* @param value
|
||||
* @return
|
||||
*/
|
||||
private static long decodeIntoSignedValue(final long value) {
|
||||
return (value / 2) * (value % 2 == 0 ? 1 : -1);
|
||||
}
|
||||
|
||||
private static boolean isLastByte(final byte b) {
|
||||
return (b & CONTINUATION_BYTE_FLAG) == 0;
|
||||
}
|
||||
|
||||
public static byte[] encode(final long... longs) {
|
||||
|
||||
int neededBytes = 0;
|
||||
for (final long l : longs) {
|
||||
neededBytes += VariableByteEncoder.neededBytes(l);
|
||||
}
|
||||
|
||||
final byte[] result = new byte[neededBytes];
|
||||
|
||||
final int bytesWritten = encodeInto(longs, result, 0);
|
||||
if (bytesWritten <= 0) {
|
||||
throw new IllegalStateException(
|
||||
"Did not reserve enough space to store " + longs + ". We reserved only " + neededBytes + " bytes.");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static long decodeFirstValue(final byte[] buffer) {
|
||||
|
||||
int offset = 0;
|
||||
long val = buffer[offset] & DATA_BITS;
|
||||
int shift = 7;
|
||||
while (!isLastByte(buffer[offset]) && offset + 1 < buffer.length) {
|
||||
val = val | ((buffer[offset + 1] & DATA_BITS) << shift);
|
||||
offset++;
|
||||
shift += 7;
|
||||
}
|
||||
return decodeIntoSignedValue(val);
|
||||
}
|
||||
|
||||
public static int encodeInto(final LongList values, final byte[] buffer, final int offsetInBuffer) {
|
||||
|
||||
int offset = offsetInBuffer;
|
||||
for (int i = 0; i < values.size(); i++) {
|
||||
final long value = values.get(i);
|
||||
|
||||
final int bytesAdded = encodeInto(value, buffer, offset);
|
||||
if (bytesAdded <= 0) {
|
||||
Arrays.fill(buffer, offsetInBuffer, offset, (byte) 0);
|
||||
return 0;
|
||||
}
|
||||
offset += bytesAdded;
|
||||
}
|
||||
return offset - offsetInBuffer;
|
||||
}
|
||||
|
||||
public static int encodeInto(final long[] values, final byte[] buffer, final int offsetInBuffer) {
|
||||
|
||||
int offset = offsetInBuffer;
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
final long value = values[i];
|
||||
|
||||
final int bytesAdded = encodeInto(value, buffer, offset);
|
||||
if (bytesAdded <= 0) {
|
||||
Arrays.fill(buffer, offsetInBuffer, offset, (byte) 0);
|
||||
return 0;
|
||||
}
|
||||
offset += bytesAdded;
|
||||
}
|
||||
return offset - offsetInBuffer;
|
||||
}
|
||||
|
||||
public static byte[] encode(final LongList longs) {
|
||||
|
||||
final int neededBytes = longs.stream().mapToInt(VariableByteEncoder::neededBytes).sum();
|
||||
final byte[] result = new byte[neededBytes];
|
||||
|
||||
final int bytesWritten = encodeInto(longs, result, 0);
|
||||
if (bytesWritten <= 0) {
|
||||
throw new IllegalStateException(
|
||||
"Did not reserve enough space to store " + longs + ". We reserved only " + neededBytes + " bytes.");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static int neededBytes(final long value) {
|
||||
final byte[] buffer = SINGLE_VALUE_BUFFER.get();
|
||||
final int usedBytes = encodeInto(value, buffer, 0);
|
||||
return usedBytes;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
package org.lucares.utils.file;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.FileVisitResult;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.SimpleFileVisitor;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.util.List;
|
||||
import java.util.function.BiPredicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class FileUtils {
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(FileUtils.class);
|
||||
|
||||
private static final class RecursiveDeleter extends SimpleFileVisitor<Path> {
|
||||
|
||||
@Override
|
||||
public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException {
|
||||
|
||||
Files.delete(file);
|
||||
LOGGER.trace("deleted: {}", file);
|
||||
|
||||
return FileVisitResult.CONTINUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileVisitResult postVisitDirectory(final Path dir, final IOException exc) throws IOException {
|
||||
|
||||
Files.delete(dir);
|
||||
LOGGER.trace("deleted: {}", dir);
|
||||
|
||||
return FileVisitResult.CONTINUE;
|
||||
}
|
||||
}
|
||||
|
||||
public static void deleteSilently(final Iterable<Path> paths) {
|
||||
for (final Path path : paths) {
|
||||
try {
|
||||
delete(path);
|
||||
} catch (final Exception e) {
|
||||
LOGGER.info("failed to delete {}", path, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void delete(final Path path) {
|
||||
|
||||
final int maxAttempts = 10;
|
||||
int attempt = 1;
|
||||
|
||||
while (attempt <= maxAttempts) {
|
||||
try {
|
||||
LOGGER.debug("deleting '{}' attempt {} of {}", path.toFile().getAbsolutePath(), attempt, maxAttempts);
|
||||
Files.walkFileTree(path, new RecursiveDeleter());
|
||||
break;
|
||||
} catch (final IOException e) {
|
||||
final String msg = "failed to delete '" + path.toFile().getAbsolutePath() + "' on attempt " + attempt
|
||||
+ " of " + maxAttempts;
|
||||
LOGGER.warn(msg, e);
|
||||
}
|
||||
attempt++;
|
||||
}
|
||||
}
|
||||
|
||||
public static List<Path> listRecursively(final Path start) throws IOException {
|
||||
|
||||
final int maxDepth = Integer.MAX_VALUE;
|
||||
final BiPredicate<Path, BasicFileAttributes> matcher = (path, attr) -> Files.isRegularFile(path);
|
||||
|
||||
try (final Stream<Path> files = Files.find(start, maxDepth, matcher)) {
|
||||
return files.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
package org.lucares.utils.byteencoder;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.lucares.collections.LongList;
|
||||
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
@Test
|
||||
public class VariableByteEncoderTest {
|
||||
|
||||
@DataProvider
|
||||
public Object[][] providerEncodeDecode() {
|
||||
return new Object[][] { //
|
||||
// encoded into 1 byte
|
||||
{ 10, -5, 5 }, //
|
||||
{ 10, 0, 5 }, //
|
||||
{ 10, -63, 63 }, //
|
||||
// encoded into 2 bytes
|
||||
{ 10, 130, 131 }, //
|
||||
// encoded into 3 bytes
|
||||
{ 10, -8191, 8191 }, //
|
||||
// encoded into n bytes
|
||||
{ 1, Long.MAX_VALUE / 2 - 4, Long.MAX_VALUE / 2 }, //
|
||||
{ 1, Long.MIN_VALUE / 2, Long.MAX_VALUE / 2 }, //
|
||||
{ 11, Long.MIN_VALUE / 2 + 1, Long.MIN_VALUE / 2 + 3 }, //
|
||||
{ 12, Long.MAX_VALUE / 2 - 3, Long.MAX_VALUE / 2 },//
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "providerEncodeDecode")
|
||||
public void testEncodeDecode(final long numValues, final long minValue, final long maxValue) {
|
||||
|
||||
final LongList originalValues = new LongList();
|
||||
final byte[] buffer = new byte[1024];
|
||||
final AtomicInteger offsetInBuffer = new AtomicInteger(0);
|
||||
|
||||
ThreadLocalRandom.current().longs(numValues, minValue, maxValue).forEachOrdered(value -> {
|
||||
originalValues.add(value);
|
||||
final int appendedBytes = VariableByteEncoder.encodeInto(value, buffer, offsetInBuffer.get());
|
||||
offsetInBuffer.addAndGet(appendedBytes);
|
||||
});
|
||||
|
||||
final LongList actualValues = VariableByteEncoder.decode(buffer);
|
||||
|
||||
assertEquals(actualValues.toString(), originalValues.toString());
|
||||
}
|
||||
|
||||
@DataProvider
|
||||
public Object[][] providerEncodeDecodeOfTwoValues() {
|
||||
return new Object[][] { //
|
||||
{ 12345, 67890, false, 1 }, // first value needs three bytes, it does not fit
|
||||
{ 12345, 67890, false, 2 }, // first value needs three bytes, it does not fit
|
||||
{ 12345, 67890, false, 3 }, // first value needs three bytes, second value does not fit
|
||||
{ 12345, 67890, false, 4 }, // first value needs three bytes, second value does not fit
|
||||
{ 12345, 67890, false, 5 }, // first value needs three bytes, second value does not fit
|
||||
{ 12345, 67890, true, 6 }, // both values need three bytes
|
||||
{ 12345, 67890, true, 10 }, //
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "providerEncodeDecodeOfTwoValues")
|
||||
public void testEncodeDecodeOfTwoValues(final long value1, final long value2, final boolean fits,
|
||||
final int bufferSize) {
|
||||
final LongList originalValues = new LongList();
|
||||
final byte[] buffer = new byte[bufferSize];
|
||||
|
||||
final int bytesAdded = VariableByteEncoder.encodeInto(value1, value2, buffer, 0);
|
||||
Assert.assertEquals(bytesAdded > 0, fits);
|
||||
if (fits) {
|
||||
originalValues.addAll(value1, value2);
|
||||
} else {
|
||||
Assert.assertEquals(buffer, new byte[bufferSize],
|
||||
"checks that buffer is resetted after it discovers the values do not fit");
|
||||
}
|
||||
|
||||
final LongList decodedValues = VariableByteEncoder.decode(buffer);
|
||||
Assert.assertEquals(decodedValues, originalValues);
|
||||
}
|
||||
|
||||
@DataProvider
|
||||
public Object[][] providerNededBytes() {
|
||||
return new Object[][] { //
|
||||
{ 0, 1 }, //
|
||||
{ -10, 1 }, //
|
||||
{ 10, 1 }, //
|
||||
{ -63, 1 }, //
|
||||
{ 63, 1 }, //
|
||||
{ -64, 2 }, //
|
||||
{ 64, 2 }, //
|
||||
{ -8191, 2 }, //
|
||||
{ 8191, 2 }, //
|
||||
{ -8192, 3 }, //
|
||||
{ 8192, 3 }, //
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "providerNededBytes")
|
||||
public void testNeededBytes(final long value, final int expectedNeededBytes) {
|
||||
|
||||
final int neededBytes = VariableByteEncoder.neededBytes(value);
|
||||
final byte[] encoded = VariableByteEncoder.encode(value);
|
||||
Assert.assertEquals(encoded.length, neededBytes);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user