sort IntLists in DataStore

The IntLists were no longer sorted since we made the initialization run
in parallel. Therefore a much slower implementation for
intersection/union was used.
This commit is contained in:
ahr
2017-12-30 09:45:50 +01:00
parent 5617547d63
commit fc30ffd928

View File

@@ -4,6 +4,7 @@ import java.io.IOException;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@@ -71,6 +72,7 @@ public class DataStore {
}); });
trimIntLists(); trimIntLists();
sortIntLists();
synchronized (docIdToDoc) { synchronized (docIdToDoc) {
((ArrayList<Doc>)docIdToDoc).trimToSize(); ((ArrayList<Doc>)docIdToDoc).trimToSize();
} }
@@ -120,6 +122,19 @@ public class DataStore {
(totalValues * 4) / 1024, (totalValues * 4) / 1024,
(System.nanoTime() - start) / 1_000_000.0); (System.nanoTime() - start) / 1_000_000.0);
} }
private void sortIntLists() {
final long start = System.nanoTime();
final Collection<Map<String, IntList>> valueToDocIds = keyToValueToDocId.values();
valueToDocIds.stream().flatMap(map -> map.values().stream()).forEach(intList -> intList.sort());
LOGGER.info(
"sorting IntLists, took: {} ms",
(System.nanoTime() - start) / 1_000_000.0);
}
private Path keyCompressionFile(final Path dataDirectory) throws IOException { private Path keyCompressionFile(final Path dataDirectory) throws IOException {
return dataDirectory.resolve("keys.csv"); return dataDirectory.resolve("keys.csv");