From fc30ffd9284426e539c0b512095029b61b01f4df Mon Sep 17 00:00:00 2001 From: ahr Date: Sat, 30 Dec 2017 09:45:50 +0100 Subject: [PATCH] sort IntLists in DataStore The IntLists were no longer sorted since we made the initialization run in parallel. Therefore a much slower implementation for intersection/union was used. --- .../lucares/pdb/datastore/internal/DataStore.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index 505ef37..7523539 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -4,6 +4,7 @@ import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; @@ -71,6 +72,7 @@ public class DataStore { }); trimIntLists(); + sortIntLists(); synchronized (docIdToDoc) { ((ArrayList)docIdToDoc).trimToSize(); } @@ -120,6 +122,19 @@ public class DataStore { (totalValues * 4) / 1024, (System.nanoTime() - start) / 1_000_000.0); } + + private void sortIntLists() { + final long start = System.nanoTime(); + + final Collection> valueToDocIds = keyToValueToDocId.values(); + + valueToDocIds.stream().flatMap(map -> map.values().stream()).forEach(intList -> intList.sort()); + + + LOGGER.info( + "sorting IntLists, took: {} ms", + (System.nanoTime() - start) / 1_000_000.0); + } private Path keyCompressionFile(final Path dataDirectory) throws IOException { return dataDirectory.resolve("keys.csv");