sort IntLists in DataStore
The IntLists were no longer sorted since we made the initialization run in parallel. Therefore a much slower implementation for intersection/union was used.
This commit is contained in:
@@ -4,6 +4,7 @@ import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -71,6 +72,7 @@ public class DataStore {
|
||||
|
||||
});
|
||||
trimIntLists();
|
||||
sortIntLists();
|
||||
synchronized (docIdToDoc) {
|
||||
((ArrayList<Doc>)docIdToDoc).trimToSize();
|
||||
}
|
||||
@@ -120,6 +122,19 @@ public class DataStore {
|
||||
(totalValues * 4) / 1024,
|
||||
(System.nanoTime() - start) / 1_000_000.0);
|
||||
}
|
||||
|
||||
private void sortIntLists() {
|
||||
final long start = System.nanoTime();
|
||||
|
||||
final Collection<Map<String, IntList>> valueToDocIds = keyToValueToDocId.values();
|
||||
|
||||
valueToDocIds.stream().flatMap(map -> map.values().stream()).forEach(intList -> intList.sort());
|
||||
|
||||
|
||||
LOGGER.info(
|
||||
"sorting IntLists, took: {} ms",
|
||||
(System.nanoTime() - start) / 1_000_000.0);
|
||||
}
|
||||
|
||||
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
|
||||
return dataDirectory.resolve("keys.csv");
|
||||
|
||||
Reference in New Issue
Block a user