add second parser that uses a standard CSV reader

This commit is contained in:
2021-08-12 17:54:27 +02:00
parent 825bac24b9
commit 67c66ef89d
18 changed files with 584 additions and 221 deletions

View File

@@ -29,6 +29,11 @@ public class StringCompressor {
return usip.computeIfAbsent(bytes, start, endExclusive, postProcess);
}
public int put(final String value, final Function<String, String> postProcess) {
final String processedValue = postProcess.apply(value);
return usip.computeIfAbsentWithPostprocess(processedValue, postProcess);
}
public String get(final int integer) {
return usip.getKey(integer);

View File

@@ -40,6 +40,10 @@ public class UniqueStringIntegerPairs {
private final int start;
private final int endExclusive;
public ByteArray(final String string) {
this(string.getBytes(StandardCharsets.UTF_8));
}
public ByteArray(final byte[] array, final int start, final int endExclusive) {
super();
this.array = array;
@@ -127,7 +131,7 @@ public class UniqueStringIntegerPairs {
final int integer = Integer.parseInt(tokens[1]);
intToStringPut(integer, string);
stringToInt.put(string, integer);
bytesToInt.put(new ByteArray(string.getBytes(StandardCharsets.UTF_8)), integer);
bytesToInt.put(new ByteArray(string), integer);
}
}
}
@@ -164,7 +168,7 @@ public class UniqueStringIntegerPairs {
intToStringPut(integer, string);
stringToInt.put(string, integer);
bytesToInt.put(new ByteArray(string.getBytes(StandardCharsets.UTF_8)), integer);
bytesToInt.put(new ByteArray(string), integer);
}
public Integer get(final String string) {
@@ -198,10 +202,20 @@ public class UniqueStringIntegerPairs {
final ByteArray byteArray = new ByteArray(bytes, start, endExclusive);
Integer result = bytesToInt.get(byteArray);
if (result == null) {
final String string = new String(bytes, start, endExclusive - start, StandardCharsets.UTF_8);
result = computeIfAbsentWithPostprocess(string, postProcess);
}
return result;
}
public Integer computeIfAbsentWithPostprocess(final String string, final Function<String, String> postProcess) {
final ByteArray byteArray = new ByteArray(string);
Integer result = bytesToInt.get(byteArray);
if (result == null) {
synchronized (stringToInt) {
if (!bytesToInt.containsKey(byteArray)) {
final String string = new String(bytes, start, endExclusive - start, StandardCharsets.UTF_8);
final String normalizedString = postProcess.apply(string);
result = get(normalizedString);
if (result != null) {