From d98c45e8bd9f4b491ce8f94a0b7d92ea75706ca9 Mon Sep 17 00:00:00 2001 From: ahr Date: Sun, 14 Jan 2018 09:51:37 +0100 Subject: [PATCH] add index for tags-to-documents Now we can find writer much faster, because we don't have to execute a query for documents that match the tags. We can just look up the documents in the map. Speedup: 2-4ms -> 0.002-0.01ms --- .../java/org/lucares/pdb/datastore/PdbDB.java | 4 ++ .../pdb/datastore/internal/DataStore.java | 20 +++++++-- .../pdb/datastore/internal/DataStoreTest.java | 23 +++++++++- pdb-ui/src/main/resources/log4j2.xml | 1 + .../lucares/performance/db/TagsToFile.java | 45 +++++++------------ 5 files changed, 60 insertions(+), 33 deletions(-) diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/PdbDB.java b/data-store/src/main/java/org/lucares/pdb/datastore/PdbDB.java index 6c980ca..7c787f3 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/PdbDB.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/PdbDB.java @@ -39,4 +39,8 @@ public class PdbDB { return proposer.propose(query, caretIndex); } + public List getByTags(Tags tags) { + + return dataStore.getByTags(tags); + } } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index 7523539..dfcb9ef 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -48,8 +48,10 @@ public class DataStore { // to be guarded by itself private final List docIdToDoc = new ArrayList<>(); + + private final ConcurrentHashMap> tagsToDocs = new ConcurrentHashMap<>(); - private final Map> keyToValueToDocId = new ConcurrentHashMap<>(); + private final ConcurrentHashMap> keyToValueToDocId = new ConcurrentHashMap<>(); private final StringCompressor stringCompressor; private final FolderStorage folderStorage; @@ -81,11 +83,18 @@ public class DataStore { private void cacheTagToFileMapping(final Tags tags, final Path path) { final int docId; + final Doc newDoc = new Doc(tags, path); synchronized (docIdToDoc) { docId = docIdToDoc.size(); - docIdToDoc.add(new Doc(tags, path)); + docIdToDoc.add(newDoc); } - + + tagsToDocs.compute(tags, (t, listOfDocs) -> { + final List result = listOfDocs != null ? listOfDocs : new ArrayList<>(2); + result.add(newDoc); + return result; + }); + for (final String key : tags.getKeys()) { final Map valueToDocIds = keyToValueToDocId.computeIfAbsent(key, k -> new ConcurrentHashMap<>()); @@ -280,4 +289,9 @@ public class DataStore { } return result; } + + public List getByTags(Tags tags) { + final List result = tagsToDocs.getOrDefault(tags, new ArrayList<>(0)); + return result; + } } diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java index 0c6a1f2..d2e3b6b 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java @@ -12,7 +12,6 @@ import java.util.Map.Entry; import org.lucares.pdb.api.Tags; import org.lucares.pdb.datastore.Doc; -import org.lucares.pdb.datastore.internal.DataStore; import org.lucares.utils.CollectionUtils; import org.lucares.utils.file.FileUtils; import org.testng.Assert; @@ -98,6 +97,28 @@ public class DataStoreTest { assertSearch("dog=*lab*dor*", labradorJenny, labradorTim); } + + public void testGetByTags() throws IOException + { + final Tags eagleTim1 = Tags.create("bird", "eagle", "name", "Tim"); + final Tags eagleTim2 = Tags.create("bird", "eagle", "name", "Tim"); + final Tags pigeonJennifer = Tags.create("bird", "pigeon", "name", "Jennifer"); + final Tags flamingoJennifer = Tags.create("bird", "flamingo", "name", "Jennifer"); + + dataStore = new DataStore(dataDirectory); + + dataStore.createNewFile(eagleTim1); + dataStore.createNewFile(eagleTim2); + dataStore.createNewFile(pigeonJennifer); + dataStore.createNewFile(flamingoJennifer); + + // eagleTim1 and eagleTim2 have the same tags, so we find both docs + final List docsEagleTim = dataStore.getByTags(eagleTim1); + Assert.assertEquals(docsEagleTim.size(), 2, "two docs for eagleTim1 and eagleTim2"); + + final List docsFlamingoJennifer = dataStore.getByTags(flamingoJennifer); + Assert.assertEquals(docsFlamingoJennifer.size(), 1, "doc for docsFlamingoJennifer"); + } private void assertSearch(final String query, final Tags... tags) { final List actualDocs = dataStore.search(query); diff --git a/pdb-ui/src/main/resources/log4j2.xml b/pdb-ui/src/main/resources/log4j2.xml index f54bde2..47131b0 100644 --- a/pdb-ui/src/main/resources/log4j2.xml +++ b/pdb-ui/src/main/resources/log4j2.xml @@ -38,6 +38,7 @@ +