parallelize initialization of DataStore
When the files are already in the OS cache, then the initialization time for 750k files went down from 35 seconds to 15 seconds.
This commit is contained in:
@@ -5,12 +5,12 @@ import java.nio.file.Path;
|
|||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.SortedSet;
|
import java.util.SortedSet;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
@@ -47,7 +47,7 @@ public class DataStore {
|
|||||||
|
|
||||||
private final List<Doc> docIdToDoc = new ArrayList<>();
|
private final List<Doc> docIdToDoc = new ArrayList<>();
|
||||||
|
|
||||||
private final Map<String, Map<String, IntList>> keyToValueToDocId = new HashMap<>();
|
private final Map<String, Map<String, IntList>> keyToValueToDocId = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
private final StringCompressor stringCompressor;
|
private final StringCompressor stringCompressor;
|
||||||
private final FolderStorage folderStorage;
|
private final FolderStorage folderStorage;
|
||||||
@@ -62,7 +62,7 @@ public class DataStore {
|
|||||||
private void init(final FolderStorage folderStorage) throws IOException {
|
private void init(final FolderStorage folderStorage) throws IOException {
|
||||||
|
|
||||||
final Stream<Path> files = folderStorage.list();
|
final Stream<Path> files = folderStorage.list();
|
||||||
files.forEach(path -> {
|
files.parallel().forEach(path -> {
|
||||||
|
|
||||||
final String filename = path.getFileName().toString();
|
final String filename = path.getFileName().toString();
|
||||||
final Tags tags = toTags(filename);
|
final Tags tags = toTags(filename);
|
||||||
@@ -78,14 +78,16 @@ public class DataStore {
|
|||||||
docIdToDoc.add(new Doc(tags, path));
|
docIdToDoc.add(new Doc(tags, path));
|
||||||
|
|
||||||
for (final String key : tags.getKeys()) {
|
for (final String key : tags.getKeys()) {
|
||||||
final Map<String, IntList> valueToDocIds = keyToValueToDocId.computeIfAbsent(key, k -> new HashMap<>());
|
final Map<String, IntList> valueToDocIds = keyToValueToDocId.computeIfAbsent(key, k -> new ConcurrentHashMap<>());
|
||||||
|
|
||||||
final String value = tags.getValue(key);
|
final String value = tags.getValue(key);
|
||||||
|
|
||||||
final IntList docIds = valueToDocIds.computeIfAbsent(value, v -> new IntList());
|
final IntList docIds = valueToDocIds.computeIfAbsent(value, v -> new IntList());
|
||||||
|
synchronized (docIds) {
|
||||||
docIds.add(docId);
|
docIds.add(docId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void trimIntLists() {
|
private void trimIntLists() {
|
||||||
final long start = System.nanoTime();
|
final long start = System.nanoTime();
|
||||||
@@ -103,9 +105,10 @@ public class DataStore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
LOGGER.info(
|
LOGGER.info(
|
||||||
"trimming IntLists of index: values {}, {} kB before, {} kB after, difference {} kB, total size: {} kB, took: {} ms",
|
"trimming IntLists of index: values {}, {} kB before, {} kB after, difference {} kB, took: {} ms",
|
||||||
totalValues,
|
totalValues,
|
||||||
(totalBeforeTrim * 4) / 1024, (totalAfterTrim * 4) / 1024,
|
(totalBeforeTrim * 4) / 1024,
|
||||||
|
(totalAfterTrim * 4) / 1024,
|
||||||
((totalBeforeTrim - totalAfterTrim) * 4) / 1024,
|
((totalBeforeTrim - totalAfterTrim) * 4) / 1024,
|
||||||
(totalValues * 4) / 1024,
|
(totalValues * 4) / 1024,
|
||||||
(System.nanoTime() - start) / 1_000_000.0);
|
(System.nanoTime() - start) / 1_000_000.0);
|
||||||
|
|||||||
Reference in New Issue
Block a user