From 6ef4e7a96b8f7189f77989a5e0ade3e52a6f9e55 Mon Sep 17 00:00:00 2001 From: ahr Date: Sat, 16 Dec 2017 17:57:15 +0100 Subject: [PATCH] reduce memory footprint of index by trimming IntLists Reduced the memory usage of the IntLists in the index by 4.1MB (19.9MB to 15.8MB) for 683,390 files and 4,046,250 values in the IntLists. --- .../pdb/datastore/internal/DataStore.java | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index a4facf4..f1414bb 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -27,6 +27,7 @@ import org.slf4j.LoggerFactory; public class DataStore { private static final Logger EXECUTE_QUERY_LOGGER = LoggerFactory.getLogger("org.lucares.metrics.dataStore.executeQuery"); + private static final Logger LOGGER = LoggerFactory.getLogger(DataStore.class); private static final String SUBDIR_STORAGE = "storage"; private static final String PDB_EXTENSION = ".pdb"; @@ -68,6 +69,7 @@ public class DataStore { cacheTagToFileMapping(tags, path); }); + trimIntLists(); } private void cacheTagToFileMapping(final Tags tags, final Path path) { @@ -85,6 +87,30 @@ public class DataStore { } } + private void trimIntLists() { + final long start = System.nanoTime(); + int totalBeforeTrim = 0; + int totalAfterTrim = 0; + int totalValues = 0; + for (Map valueToDocIds : keyToValueToDocId.values()) { + + for (IntList intList : valueToDocIds.values()) { + totalBeforeTrim += intList.getCapacity(); + intList.trim(); + totalAfterTrim += intList.getCapacity(); + totalValues += intList.size(); + } + } + + LOGGER.info( + "trimming IntLists of index: values {}, {} kB before, {} kB after, difference {} kB, total size: {} kB, took: {} ms", + totalValues, + (totalBeforeTrim * 4) / 1024, (totalAfterTrim * 4) / 1024, + ((totalBeforeTrim - totalAfterTrim) * 4) / 1024, + (totalValues * 4) / 1024, + (System.nanoTime() - start) / 1_000_000.0); + } + private Path keyCompressionFile(final Path dataDirectory) throws IOException { return dataDirectory.resolve("keys.csv"); }