replace ludb with data-store
LuDB has a few disadvantages.
1. Most notably disk space. H2 wastes a lot of valuable disk space.
For my test data set with 44 million entries it is 14 MB
(sometimes a lot more; depends on H2 internal cleanup). With
data-store it is 15 KB.
Overall I could reduce the disk space from 231 MB to 200 MB (13.4 %
in this example). That is an average of 4.6 bytes per entry.
2. Speed:
a) Liquibase is slow. The first time it takes approx. three seconds
b) Query and insertion. with data-store we can insert entries
up to 1.6 times faster.
Data-store uses a few tricks to save disk space:
1. We encode the tags into the file names.
2. To keep them short we translate the key/value of the tag into
shorter numbers. For example "foo" -> 12 and "bar" to 47. So the
tag "foo"/"bar" would be 12/47.
We then translate this number into a numeral system of base 62
(a-zA-Z0-9), so it can be used for file names and it is shorter.
That way we only have to store the mapping of string to int.
3. We do that in a simple tab separated file.
This commit is contained in:
@@ -0,0 +1,69 @@
|
||||
package org.lucares.utils.file;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.FileVisitResult;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.SimpleFileVisitor;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.util.List;
|
||||
import java.util.function.BiPredicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class FileUtils {
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(FileUtils.class);
|
||||
|
||||
private static final class RecursiveDeleter extends SimpleFileVisitor<Path> {
|
||||
|
||||
@Override
|
||||
public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException {
|
||||
|
||||
Files.delete(file);
|
||||
LOGGER.trace("deleted: {}", file);
|
||||
|
||||
return FileVisitResult.CONTINUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileVisitResult postVisitDirectory(final Path dir, final IOException exc) throws IOException {
|
||||
|
||||
Files.delete(dir);
|
||||
LOGGER.trace("deleted: {}", dir);
|
||||
|
||||
return FileVisitResult.CONTINUE;
|
||||
}
|
||||
}
|
||||
|
||||
public static void delete(final Path path) {
|
||||
|
||||
final int maxAttempts = 10;
|
||||
int attempt = 1;
|
||||
|
||||
while (attempt <= maxAttempts) {
|
||||
try {
|
||||
LOGGER.debug("deleting '{}' attempt {} of {}", path.toFile().getAbsolutePath(), attempt, maxAttempts);
|
||||
Files.walkFileTree(path, new RecursiveDeleter());
|
||||
break;
|
||||
} catch (final IOException e) {
|
||||
final String msg = "failed to delete '" + path.toFile().getAbsolutePath() + "' on attempt " + attempt
|
||||
+ " of " + maxAttempts;
|
||||
LOGGER.warn(msg, e);
|
||||
}
|
||||
attempt++;
|
||||
}
|
||||
}
|
||||
|
||||
public static List<Path> listRecursively(final Path start) throws IOException {
|
||||
|
||||
final int maxDepth = Integer.MAX_VALUE;
|
||||
final BiPredicate<Path, BasicFileAttributes> matcher = (path, attr) -> Files.isRegularFile(path);
|
||||
|
||||
try (final Stream<Path> files = Files.find(start, maxDepth, matcher)) {
|
||||
return files.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user