diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index a4facf4..f1414bb 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -27,6 +27,7 @@ import org.slf4j.LoggerFactory; public class DataStore { private static final Logger EXECUTE_QUERY_LOGGER = LoggerFactory.getLogger("org.lucares.metrics.dataStore.executeQuery"); + private static final Logger LOGGER = LoggerFactory.getLogger(DataStore.class); private static final String SUBDIR_STORAGE = "storage"; private static final String PDB_EXTENSION = ".pdb"; @@ -68,6 +69,7 @@ public class DataStore { cacheTagToFileMapping(tags, path); }); + trimIntLists(); } private void cacheTagToFileMapping(final Tags tags, final Path path) { @@ -85,6 +87,30 @@ public class DataStore { } } + private void trimIntLists() { + final long start = System.nanoTime(); + int totalBeforeTrim = 0; + int totalAfterTrim = 0; + int totalValues = 0; + for (Map valueToDocIds : keyToValueToDocId.values()) { + + for (IntList intList : valueToDocIds.values()) { + totalBeforeTrim += intList.getCapacity(); + intList.trim(); + totalAfterTrim += intList.getCapacity(); + totalValues += intList.size(); + } + } + + LOGGER.info( + "trimming IntLists of index: values {}, {} kB before, {} kB after, difference {} kB, total size: {} kB, took: {} ms", + totalValues, + (totalBeforeTrim * 4) / 1024, (totalAfterTrim * 4) / 1024, + ((totalBeforeTrim - totalAfterTrim) * 4) / 1024, + (totalValues * 4) / 1024, + (System.nanoTime() - start) / 1_000_000.0); + } + private Path keyCompressionFile(final Path dataDirectory) throws IOException { return dataDirectory.resolve("keys.csv"); }