merge projects file-utils, byte-utils and pdb-utils

It turned out that most projects needed at least
two of the utils projects. file-utils and byte-utils
had only one class. Merging them made sense.
This commit is contained in:
2019-12-08 18:47:54 +01:00
parent 85679ca0c8
commit e931856041
13 changed files with 1 additions and 32 deletions

View File

@@ -0,0 +1,242 @@
package org.lucares.utils.byteencoder;
import java.util.Arrays;
import org.lucares.collections.LongList;
/**
* Encodes longs into bytes using variable byte encoding. We are using a
* transformation that encodes negative values into positive values. Even
* numbers represent positive longs, uneven values represent negative longs, or
* the null.
* <p>
* We then encode encode each 7 bits into one byte. This highest value bit is
* reserved for a flag that tells us whether or not more bytes follow. This bit
* is set for all but the last byte.
* <p>
* Please note two things:
* <ol>
* <li>0 is encoded to 1; the encoded bytes do not contain the null byte
* <li>all but the last byte have the high value bit set
* </ol>
* No byte will have the value 0. This is important when decoding bytes, because
* we can decode bytes until we encounter the first null byte, or we reach the
* end of the array.
*/
public class VariableByteEncoder {
public static final long MIN_VALUE = Long.MIN_VALUE / 2 + 1;
public static final long MAX_VALUE = Long.MAX_VALUE / 2;
private static final int MAX_BYTES_PER_VALUE = 10;
private static final int CONTINUATION_BYTE_FLAG = 1 << 7; // 10000000
private static final long DATA_BITS = (1 << 7) - 1; // 01111111
private static final ThreadLocal<byte[]> SINGLE_VALUE_BUFFER = ThreadLocal
.withInitial(() -> new byte[MAX_BYTES_PER_VALUE]);
/**
* Encodes time and value into the given buffer.
* <p>
* If the encoded values do not fit into the buffer, then 0 is returned. The
* caller will have to provide a new buffer with more space.
*
* @param value1 first value, (between -(2^62)+1 and 2^62)
* @param value2 second value, (between -(2^62)+1 and 2^62)
* @param buffer
* @param offsetInBuffer
* @return number of bytes appended to the provided buffer
*/
public static int encodeInto(final long value1, final long value2, final byte[] buffer, final int offsetInBuffer) {
int offset = offsetInBuffer;
final int bytesAdded1 = encodeInto(value1, buffer, offset);
if (bytesAdded1 > 0) {
offset += bytesAdded1;
final int bytesAdded2 = encodeInto(value2, buffer, offset);
if (bytesAdded2 > 0) {
// both value fit into the buffer
// return the number of added bytes
return bytesAdded1 + bytesAdded2;
} else {
// second value did not fit into the buffer,
// remove the first value
// and return 0 to indicate that the values did not fit
Arrays.fill(buffer, offsetInBuffer, buffer.length, (byte) 0);
return 0;
}
}
// return 0 if the encoded bytes do not fit
// the caller will have to provide a new buffer
return 0;
}
public static LongList decode(final byte[] buffer) {
final LongList result = new LongList();
decodeInto(buffer, result);
return result;
}
public static int encodeInto(final long value, final byte[] buffer, final int offsetInBuffer) {
int offset = offsetInBuffer;
assert value >= MIN_VALUE : "min encodable value is -2^62+1 = " + MIN_VALUE;
assert value <= MAX_VALUE : "max encodable value is 2^62 = " + MAX_VALUE;
long normVal = encodeIntoPositiveValue(value);
try {
final long maxFirstByteValue = 127;
while (normVal > maxFirstByteValue) {
buffer[offset] = (byte) ((normVal & DATA_BITS) | CONTINUATION_BYTE_FLAG);
offset++;
normVal = normVal >> 7; // shift by number of value bits
}
buffer[offset] = (byte) (normVal);
return offset - offsetInBuffer + 1; // return number of encoded bytes
} catch (final ArrayIndexOutOfBoundsException e) {
// We need more bytes to store the value than are available.
// Reset the bytes we just wrote.
Arrays.fill(buffer, offsetInBuffer, buffer.length, (byte) 0);
return 0;
}
}
private static void decodeInto(final byte[] buffer, final LongList bufferedLongs) {
for (int i = 0; i < buffer.length; i++) {
if (buffer[i] == 0) {
// no value is encoded to 0 => there are no further values
break;
} else {
long val = buffer[i] & DATA_BITS;
int shift = 7;
while (!isLastByte(buffer[i]) && i + 1 < buffer.length) {
val = val | ((buffer[i + 1] & DATA_BITS) << shift);
i++;
shift += 7;
}
bufferedLongs.add(decodeIntoSignedValue(val));
}
}
}
/**
* The input value (positive, negative or null) is encoded into a positive
* value.
*
* <pre>
*
* input: 0 1 -1 2 -2 3 -3
* encoded: 1 2 3 4 5 6 7
* </pre>
*/
private static long encodeIntoPositiveValue(final long value) {
return value > 0 ? value * 2 : (value * -2) + 1;
}
/**
* inverse of {@link #encodeIntoPositiveValue(long)}
*
* @param value
* @return
*/
private static long decodeIntoSignedValue(final long value) {
return (value / 2) * (value % 2 == 0 ? 1 : -1);
}
private static boolean isLastByte(final byte b) {
return (b & CONTINUATION_BYTE_FLAG) == 0;
}
public static byte[] encode(final long... longs) {
int neededBytes = 0;
for (final long l : longs) {
neededBytes += VariableByteEncoder.neededBytes(l);
}
final byte[] result = new byte[neededBytes];
final int bytesWritten = encodeInto(longs, result, 0);
if (bytesWritten <= 0) {
throw new IllegalStateException(
"Did not reserve enough space to store " + longs + ". We reserved only " + neededBytes + " bytes.");
}
return result;
}
public static long decodeFirstValue(final byte[] buffer) {
int offset = 0;
long val = buffer[offset] & DATA_BITS;
int shift = 7;
while (!isLastByte(buffer[offset]) && offset + 1 < buffer.length) {
val = val | ((buffer[offset + 1] & DATA_BITS) << shift);
offset++;
shift += 7;
}
return decodeIntoSignedValue(val);
}
public static int encodeInto(final LongList values, final byte[] buffer, final int offsetInBuffer) {
int offset = offsetInBuffer;
for (int i = 0; i < values.size(); i++) {
final long value = values.get(i);
final int bytesAdded = encodeInto(value, buffer, offset);
if (bytesAdded <= 0) {
Arrays.fill(buffer, offsetInBuffer, offset, (byte) 0);
return 0;
}
offset += bytesAdded;
}
return offset - offsetInBuffer;
}
public static int encodeInto(final long[] values, final byte[] buffer, final int offsetInBuffer) {
int offset = offsetInBuffer;
for (int i = 0; i < values.length; i++) {
final long value = values[i];
final int bytesAdded = encodeInto(value, buffer, offset);
if (bytesAdded <= 0) {
Arrays.fill(buffer, offsetInBuffer, offset, (byte) 0);
return 0;
}
offset += bytesAdded;
}
return offset - offsetInBuffer;
}
public static byte[] encode(final LongList longs) {
final int neededBytes = longs.stream().mapToInt(VariableByteEncoder::neededBytes).sum();
final byte[] result = new byte[neededBytes];
final int bytesWritten = encodeInto(longs, result, 0);
if (bytesWritten <= 0) {
throw new IllegalStateException(
"Did not reserve enough space to store " + longs + ". We reserved only " + neededBytes + " bytes.");
}
return result;
}
public static int neededBytes(final long value) {
final byte[] buffer = SINGLE_VALUE_BUFFER.get();
final int usedBytes = encodeInto(value, buffer, 0);
return usedBytes;
}
}

View File

@@ -0,0 +1,79 @@
package org.lucares.utils.file;
import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.List;
import java.util.function.BiPredicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class FileUtils {
private static final Logger LOGGER = LoggerFactory.getLogger(FileUtils.class);
private static final class RecursiveDeleter extends SimpleFileVisitor<Path> {
@Override
public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException {
Files.delete(file);
LOGGER.trace("deleted: {}", file);
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult postVisitDirectory(final Path dir, final IOException exc) throws IOException {
Files.delete(dir);
LOGGER.trace("deleted: {}", dir);
return FileVisitResult.CONTINUE;
}
}
public static void deleteSilently(final Iterable<Path> paths) {
for (final Path path : paths) {
try {
delete(path);
} catch (final Exception e) {
LOGGER.info("failed to delete {}", path, e);
}
}
}
public static void delete(final Path path) {
final int maxAttempts = 10;
int attempt = 1;
while (attempt <= maxAttempts) {
try {
LOGGER.debug("deleting '{}' attempt {} of {}", path.toFile().getAbsolutePath(), attempt, maxAttempts);
Files.walkFileTree(path, new RecursiveDeleter());
break;
} catch (final IOException e) {
final String msg = "failed to delete '" + path.toFile().getAbsolutePath() + "' on attempt " + attempt
+ " of " + maxAttempts;
LOGGER.warn(msg, e);
}
attempt++;
}
}
public static List<Path> listRecursively(final Path start) throws IOException {
final int maxDepth = Integer.MAX_VALUE;
final BiPredicate<Path, BasicFileAttributes> matcher = (path, attr) -> Files.isRegularFile(path);
try (final Stream<Path> files = Files.find(start, maxDepth, matcher)) {
return files.collect(Collectors.toList());
}
}
}