replace keys.csv with persistent map

This commit is contained in:
2021-10-16 17:28:15 +02:00
parent cce2c052f2
commit 7754e54037
3 changed files with 31 additions and 26 deletions

View File

@@ -190,7 +190,7 @@ public class PersistentMap<K, V> implements AutoCloseable {
/** /**
* *
* @param path file relative to {@code storageBasePath} * @param path file for the index, must be child of storageBasePath
* @param storageBasePath base path * @param storageBasePath base path
* @param keyEncoder encoder for keys * @param keyEncoder encoder for keys
* @param valueEncoder encoder for values * @param valueEncoder encoder for values

View File

@@ -1,5 +1,6 @@
dependencies { dependencies {
implementation project(':pdb-utils') implementation project(':pdb-utils')
implementation project(':block-storage')
implementation lib_primitive_collections implementation lib_primitive_collections
} }

View File

@@ -2,11 +2,8 @@ package org.lucares.pdb.api;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
@@ -19,6 +16,8 @@ import java.util.Set;
import java.util.function.Function; import java.util.function.Function;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.lucares.pdb.map.PersistentMap;
/** /**
* A very simple {@link Set}-like or {@link Map}-like data structure that stores * A very simple {@link Set}-like or {@link Map}-like data structure that stores
* unique&sup1; pairs of Strings and integers persistently. * unique&sup1; pairs of Strings and integers persistently.
@@ -33,8 +32,6 @@ import java.util.regex.Pattern;
public class UniqueStringIntegerPairs { public class UniqueStringIntegerPairs {
private static final String SEPARATOR = "\t"; private static final String SEPARATOR = "\t";
private static final boolean APPEND = true;
private static final class ByteArray implements Comparable<ByteArray> { private static final class ByteArray implements Comparable<ByteArray> {
private final byte[] array; private final byte[] array;
private final int start; private final int start;
@@ -97,37 +94,50 @@ public class UniqueStringIntegerPairs {
*/ */
private final List<String> intToString = new ArrayList<>(); private final List<String> intToString = new ArrayList<>();
private final Path file; final PersistentMap<String, Long> persistentMap;
public UniqueStringIntegerPairs() { public UniqueStringIntegerPairs() {
this(null); this(null);
} }
public UniqueStringIntegerPairs(final Path storageBasePath) { public UniqueStringIntegerPairs(final Path storageBasePath) {
this.file = keyCompressionFile(storageBasePath);
if (file != null) { if (storageBasePath != null) {
init(file); persistentMap = new PersistentMap<>(storageBasePath.resolve("keys.bs"), storageBasePath,
PersistentMap.STRING_CODER, PersistentMap.LONG_CODER);
final Path oldKeysCsvFile = keyCompressionFile(storageBasePath);
if (persistentMap.isEmpty() && Files.exists(oldKeysCsvFile)) {
upgradeFromCsvFile(oldKeysCsvFile);
} else {
init();
}
} else {
// some unit tests disable the persistence and use this class memory only
persistentMap = null;
} }
} }
private void init() {
persistentMap.forAll((string, integer) -> {
intToStringPut(integer.intValue(), string);
stringToInt.put(string, integer.intValue());
bytesToInt.put(new ByteArray(string), integer.intValue());
});
}
private Path keyCompressionFile(final Path dataDirectory) { private Path keyCompressionFile(final Path dataDirectory) {
return dataDirectory.resolve("keys.csv"); return dataDirectory.resolve("keys.csv");
} }
private void init(final Path file) throws RuntimeIOException { private void upgradeFromCsvFile(final Path file) throws RuntimeIOException {
try { try {
Files.createDirectories(file.getParent());
if (!Files.exists(file)) {
Files.createFile(file);
}
try (final BufferedReader reader = new BufferedReader( try (final BufferedReader reader = new BufferedReader(
new InputStreamReader(new FileInputStream(file.toFile()), StandardCharsets.UTF_8))) { new InputStreamReader(new FileInputStream(file.toFile()), StandardCharsets.UTF_8))) {
String line; String line;
while ((line = reader.readLine()) != null) { while ((line = reader.readLine()) != null) {
// TODO use more efficient code to read the CSV -> improves startup time
final String[] tokens = line.split(Pattern.quote(SEPARATOR)); final String[] tokens = line.split(Pattern.quote(SEPARATOR));
if (tokens.length == 2) { if (tokens.length == 2) {
@@ -136,6 +146,7 @@ public class UniqueStringIntegerPairs {
intToStringPut(integer, string); intToStringPut(integer, string);
stringToInt.put(string, integer); stringToInt.put(string, integer);
bytesToInt.put(new ByteArray(string), integer); bytesToInt.put(new ByteArray(string), integer);
persistentMap.putValue(string, (long) integer);
} }
} }
} }
@@ -159,15 +170,8 @@ public class UniqueStringIntegerPairs {
if (stringToInt.containsKey(string) || (intToString.size() > integer && intToString.get(integer) != null)) { if (stringToInt.containsKey(string) || (intToString.size() > integer && intToString.get(integer) != null)) {
throw new IllegalArgumentException("Unique key constraint violation for (" + string + ", " + integer + ")"); throw new IllegalArgumentException("Unique key constraint violation for (" + string + ", " + integer + ")");
} }
if (file != null) { if (persistentMap != null) {
try (final Writer writer = new OutputStreamWriter(new FileOutputStream(file.toFile(), APPEND), persistentMap.putValue(string, (long) integer);
StandardCharsets.UTF_8)) {
writer.write(string + SEPARATOR + integer + "\n");
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
} }
intToStringPut(integer, string); intToStringPut(integer, string);