add support for renaming and post processing of csv columns
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package org.lucares.pdb.api;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* Persistently maps Strings to integers.
|
||||
@@ -23,8 +24,9 @@ public class StringCompressor {
|
||||
return usip.computeIfAbsent(string, s -> usip.getHighestInteger() + 1);
|
||||
}
|
||||
|
||||
public int put(final byte[] bytes, final int start, final int endExclusive) {
|
||||
return usip.computeIfAbsent(bytes, start, endExclusive);
|
||||
public int put(final byte[] bytes, final int start, final int endExclusive,
|
||||
final Function<String, String> postProcess) {
|
||||
return usip.computeIfAbsent(bytes, start, endExclusive, postProcess);
|
||||
}
|
||||
|
||||
public String get(final int integer) {
|
||||
|
||||
@@ -193,7 +193,8 @@ public class UniqueStringIntegerPairs {
|
||||
return stringToInt.get(string);
|
||||
}
|
||||
|
||||
public Integer computeIfAbsent(final byte[] bytes, final int start, final int endExclusive) {
|
||||
public Integer computeIfAbsent(final byte[] bytes, final int start, final int endExclusive,
|
||||
final Function<String, String> postProcess) {
|
||||
|
||||
final ByteArray byteArray = new ByteArray(bytes, start, endExclusive);
|
||||
Integer result = bytesToInt.get(byteArray);
|
||||
@@ -201,8 +202,16 @@ public class UniqueStringIntegerPairs {
|
||||
synchronized (stringToInt) {
|
||||
if (!bytesToInt.containsKey(byteArray)) {
|
||||
final String string = new String(bytes, start, endExclusive - start, StandardCharsets.UTF_8);
|
||||
final String normalizedString = postProcess.apply(string);
|
||||
result = get(normalizedString);
|
||||
if (result != null) {
|
||||
return result;
|
||||
}
|
||||
|
||||
final Integer integer = intToString.size();
|
||||
put(string, integer);
|
||||
put(normalizedString, integer); // adds the normalized String to stringToInt and bytesToInt
|
||||
bytesToInt.put(byteArray, integer); // also add the original String to bytesToInt, because it is
|
||||
// used as cache
|
||||
}
|
||||
result = bytesToInt.get(byteArray);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user