replace keys.csv with persistent map

This commit is contained in:
2021-10-16 17:28:15 +02:00
parent cce2c052f2
commit 7754e54037
3 changed files with 31 additions and 26 deletions

View File

@@ -2,11 +2,8 @@ package org.lucares.pdb.api;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -19,6 +16,8 @@ import java.util.Set;
import java.util.function.Function;
import java.util.regex.Pattern;
import org.lucares.pdb.map.PersistentMap;
/**
* A very simple {@link Set}-like or {@link Map}-like data structure that stores
* unique¹ pairs of Strings and integers persistently.
@@ -33,8 +32,6 @@ import java.util.regex.Pattern;
public class UniqueStringIntegerPairs {
private static final String SEPARATOR = "\t";
private static final boolean APPEND = true;
private static final class ByteArray implements Comparable<ByteArray> {
private final byte[] array;
private final int start;
@@ -97,37 +94,50 @@ public class UniqueStringIntegerPairs {
*/
private final List<String> intToString = new ArrayList<>();
private final Path file;
final PersistentMap<String, Long> persistentMap;
public UniqueStringIntegerPairs() {
this(null);
}
public UniqueStringIntegerPairs(final Path storageBasePath) {
this.file = keyCompressionFile(storageBasePath);
if (file != null) {
init(file);
if (storageBasePath != null) {
persistentMap = new PersistentMap<>(storageBasePath.resolve("keys.bs"), storageBasePath,
PersistentMap.STRING_CODER, PersistentMap.LONG_CODER);
final Path oldKeysCsvFile = keyCompressionFile(storageBasePath);
if (persistentMap.isEmpty() && Files.exists(oldKeysCsvFile)) {
upgradeFromCsvFile(oldKeysCsvFile);
} else {
init();
}
} else {
// some unit tests disable the persistence and use this class memory only
persistentMap = null;
}
}
private void init() {
persistentMap.forAll((string, integer) -> {
intToStringPut(integer.intValue(), string);
stringToInt.put(string, integer.intValue());
bytesToInt.put(new ByteArray(string), integer.intValue());
});
}
private Path keyCompressionFile(final Path dataDirectory) {
return dataDirectory.resolve("keys.csv");
}
private void init(final Path file) throws RuntimeIOException {
private void upgradeFromCsvFile(final Path file) throws RuntimeIOException {
try {
Files.createDirectories(file.getParent());
if (!Files.exists(file)) {
Files.createFile(file);
}
try (final BufferedReader reader = new BufferedReader(
new InputStreamReader(new FileInputStream(file.toFile()), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
// TODO use more efficient code to read the CSV -> improves startup time
final String[] tokens = line.split(Pattern.quote(SEPARATOR));
if (tokens.length == 2) {
@@ -136,6 +146,7 @@ public class UniqueStringIntegerPairs {
intToStringPut(integer, string);
stringToInt.put(string, integer);
bytesToInt.put(new ByteArray(string), integer);
persistentMap.putValue(string, (long) integer);
}
}
}
@@ -159,15 +170,8 @@ public class UniqueStringIntegerPairs {
if (stringToInt.containsKey(string) || (intToString.size() > integer && intToString.get(integer) != null)) {
throw new IllegalArgumentException("Unique key constraint violation for (" + string + ", " + integer + ")");
}
if (file != null) {
try (final Writer writer = new OutputStreamWriter(new FileOutputStream(file.toFile(), APPEND),
StandardCharsets.UTF_8)) {
writer.write(string + SEPARATOR + integer + "\n");
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
if (persistentMap != null) {
persistentMap.putValue(string, (long) integer);
}
intToStringPut(integer, string);