replace ludb with data-store
LuDB has a few disadvantages.
1. Most notably disk space. H2 wastes a lot of valuable disk space.
For my test data set with 44 million entries it is 14 MB
(sometimes a lot more; depends on H2 internal cleanup). With
data-store it is 15 KB.
Overall I could reduce the disk space from 231 MB to 200 MB (13.4 %
in this example). That is an average of 4.6 bytes per entry.
2. Speed:
a) Liquibase is slow. The first time it takes approx. three seconds
b) Query and insertion. with data-store we can insert entries
up to 1.6 times faster.
Data-store uses a few tricks to save disk space:
1. We encode the tags into the file names.
2. To keep them short we translate the key/value of the tag into
shorter numbers. For example "foo" -> 12 and "bar" to 47. So the
tag "foo"/"bar" would be 12/47.
We then translate this number into a numeral system of base 62
(a-zA-Z0-9), so it can be used for file names and it is shorter.
That way we only have to store the mapping of string to int.
3. We do that in a simple tab separated file.
This commit is contained in:
@@ -0,0 +1,52 @@
|
||||
package org.lucares.utils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class CollectionUtils {
|
||||
public static <T, R extends T> void mapInPlace(final List<T> list, final Function<T, R> mapper) {
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
final T value = list.get(i);
|
||||
final T newValue = mapper.apply(value);
|
||||
list.set(i, newValue);
|
||||
}
|
||||
}
|
||||
|
||||
public static <T, R> List<R> map(final Collection<T> list, final Function<T, R> mapper) {
|
||||
final List<R> result = new ArrayList<>(list.size());
|
||||
|
||||
for (final T t : list) {
|
||||
result.add(mapper.apply(t));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static <T, R> List<R> map(final T[] input, final Function<T, R> mapper) {
|
||||
return Stream.of(input).map(mapper).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static <T, V> Map<T, V> toMap(final Iterable<V> iterable, final Function<V, T> keyMapper) {
|
||||
final Map<T, V> result = new HashMap<>();
|
||||
|
||||
for (final V value : iterable) {
|
||||
final T key = keyMapper.apply(value);
|
||||
|
||||
result.put(key, value);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static <T> List<T> filter(final Collection<T> collection, final Predicate<T> predicate) {
|
||||
return collection.stream().filter(predicate).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user