parallelize initialization of DataStore
When the files are already in the OS cache, then the initialization time for 750k files went down from 35 seconds to 15 seconds.
This commit is contained in:
@@ -5,12 +5,12 @@ import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Stream;
|
||||
@@ -47,7 +47,7 @@ public class DataStore {
|
||||
|
||||
private final List<Doc> docIdToDoc = new ArrayList<>();
|
||||
|
||||
private final Map<String, Map<String, IntList>> keyToValueToDocId = new HashMap<>();
|
||||
private final Map<String, Map<String, IntList>> keyToValueToDocId = new ConcurrentHashMap<>();
|
||||
|
||||
private final StringCompressor stringCompressor;
|
||||
private final FolderStorage folderStorage;
|
||||
@@ -62,7 +62,7 @@ public class DataStore {
|
||||
private void init(final FolderStorage folderStorage) throws IOException {
|
||||
|
||||
final Stream<Path> files = folderStorage.list();
|
||||
files.forEach(path -> {
|
||||
files.parallel().forEach(path -> {
|
||||
|
||||
final String filename = path.getFileName().toString();
|
||||
final Tags tags = toTags(filename);
|
||||
@@ -78,12 +78,14 @@ public class DataStore {
|
||||
docIdToDoc.add(new Doc(tags, path));
|
||||
|
||||
for (final String key : tags.getKeys()) {
|
||||
final Map<String, IntList> valueToDocIds = keyToValueToDocId.computeIfAbsent(key, k -> new HashMap<>());
|
||||
final Map<String, IntList> valueToDocIds = keyToValueToDocId.computeIfAbsent(key, k -> new ConcurrentHashMap<>());
|
||||
|
||||
final String value = tags.getValue(key);
|
||||
|
||||
final IntList docIds = valueToDocIds.computeIfAbsent(value, v -> new IntList());
|
||||
docIds.add(docId);
|
||||
synchronized (docIds) {
|
||||
docIds.add(docId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,9 +105,10 @@ public class DataStore {
|
||||
}
|
||||
|
||||
LOGGER.info(
|
||||
"trimming IntLists of index: values {}, {} kB before, {} kB after, difference {} kB, total size: {} kB, took: {} ms",
|
||||
"trimming IntLists of index: values {}, {} kB before, {} kB after, difference {} kB, took: {} ms",
|
||||
totalValues,
|
||||
(totalBeforeTrim * 4) / 1024, (totalAfterTrim * 4) / 1024,
|
||||
(totalBeforeTrim * 4) / 1024,
|
||||
(totalAfterTrim * 4) / 1024,
|
||||
((totalBeforeTrim - totalAfterTrim) * 4) / 1024,
|
||||
(totalValues * 4) / 1024,
|
||||
(System.nanoTime() - start) / 1_000_000.0);
|
||||
|
||||
Reference in New Issue
Block a user