sort IntLists in DataStore
The IntLists were no longer sorted since we made the initialization run in parallel. Therefore a much slower implementation for intersection/union was used.
This commit is contained in:
@@ -4,6 +4,7 @@ import java.io.IOException;
|
|||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@@ -71,6 +72,7 @@ public class DataStore {
|
|||||||
|
|
||||||
});
|
});
|
||||||
trimIntLists();
|
trimIntLists();
|
||||||
|
sortIntLists();
|
||||||
synchronized (docIdToDoc) {
|
synchronized (docIdToDoc) {
|
||||||
((ArrayList<Doc>)docIdToDoc).trimToSize();
|
((ArrayList<Doc>)docIdToDoc).trimToSize();
|
||||||
}
|
}
|
||||||
@@ -121,6 +123,19 @@ public class DataStore {
|
|||||||
(System.nanoTime() - start) / 1_000_000.0);
|
(System.nanoTime() - start) / 1_000_000.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void sortIntLists() {
|
||||||
|
final long start = System.nanoTime();
|
||||||
|
|
||||||
|
final Collection<Map<String, IntList>> valueToDocIds = keyToValueToDocId.values();
|
||||||
|
|
||||||
|
valueToDocIds.stream().flatMap(map -> map.values().stream()).forEach(intList -> intList.sort());
|
||||||
|
|
||||||
|
|
||||||
|
LOGGER.info(
|
||||||
|
"sorting IntLists, took: {} ms",
|
||||||
|
(System.nanoTime() - start) / 1_000_000.0);
|
||||||
|
}
|
||||||
|
|
||||||
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
|
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
|
||||||
return dataDirectory.resolve("keys.csv");
|
return dataDirectory.resolve("keys.csv");
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user