add index for tags-to-documents
Now we can find writer much faster, because we don't have to execute a query for documents that match the tags. We can just look up the documents in the map. Speedup: 2-4ms -> 0.002-0.01ms
This commit is contained in:
@@ -39,4 +39,8 @@ public class PdbDB {
|
||||
return proposer.propose(query, caretIndex);
|
||||
}
|
||||
|
||||
public List<Doc> getByTags(Tags tags) {
|
||||
|
||||
return dataStore.getByTags(tags);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,8 +48,10 @@ public class DataStore {
|
||||
|
||||
// to be guarded by itself
|
||||
private final List<Doc> docIdToDoc = new ArrayList<>();
|
||||
|
||||
private final ConcurrentHashMap<Tags, List<Doc>> tagsToDocs = new ConcurrentHashMap<>();
|
||||
|
||||
private final Map<String, Map<String, IntList>> keyToValueToDocId = new ConcurrentHashMap<>();
|
||||
private final ConcurrentHashMap<String, Map<String, IntList>> keyToValueToDocId = new ConcurrentHashMap<>();
|
||||
|
||||
private final StringCompressor stringCompressor;
|
||||
private final FolderStorage folderStorage;
|
||||
@@ -81,11 +83,18 @@ public class DataStore {
|
||||
private void cacheTagToFileMapping(final Tags tags, final Path path) {
|
||||
|
||||
final int docId;
|
||||
final Doc newDoc = new Doc(tags, path);
|
||||
synchronized (docIdToDoc) {
|
||||
docId = docIdToDoc.size();
|
||||
docIdToDoc.add(new Doc(tags, path));
|
||||
docIdToDoc.add(newDoc);
|
||||
}
|
||||
|
||||
|
||||
tagsToDocs.compute(tags, (t, listOfDocs) -> {
|
||||
final List<Doc> result = listOfDocs != null ? listOfDocs : new ArrayList<>(2);
|
||||
result.add(newDoc);
|
||||
return result;
|
||||
});
|
||||
|
||||
for (final String key : tags.getKeys()) {
|
||||
final Map<String, IntList> valueToDocIds = keyToValueToDocId.computeIfAbsent(key, k -> new ConcurrentHashMap<>());
|
||||
|
||||
@@ -280,4 +289,9 @@ public class DataStore {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<Doc> getByTags(Tags tags) {
|
||||
final List<Doc> result = tagsToDocs.getOrDefault(tags, new ArrayList<>(0));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@ import java.util.Map.Entry;
|
||||
|
||||
import org.lucares.pdb.api.Tags;
|
||||
import org.lucares.pdb.datastore.Doc;
|
||||
import org.lucares.pdb.datastore.internal.DataStore;
|
||||
import org.lucares.utils.CollectionUtils;
|
||||
import org.lucares.utils.file.FileUtils;
|
||||
import org.testng.Assert;
|
||||
@@ -98,6 +97,28 @@ public class DataStoreTest {
|
||||
assertSearch("dog=*lab*dor*", labradorJenny, labradorTim);
|
||||
|
||||
}
|
||||
|
||||
public void testGetByTags() throws IOException
|
||||
{
|
||||
final Tags eagleTim1 = Tags.create("bird", "eagle", "name", "Tim");
|
||||
final Tags eagleTim2 = Tags.create("bird", "eagle", "name", "Tim");
|
||||
final Tags pigeonJennifer = Tags.create("bird", "pigeon", "name", "Jennifer");
|
||||
final Tags flamingoJennifer = Tags.create("bird", "flamingo", "name", "Jennifer");
|
||||
|
||||
dataStore = new DataStore(dataDirectory);
|
||||
|
||||
dataStore.createNewFile(eagleTim1);
|
||||
dataStore.createNewFile(eagleTim2);
|
||||
dataStore.createNewFile(pigeonJennifer);
|
||||
dataStore.createNewFile(flamingoJennifer);
|
||||
|
||||
// eagleTim1 and eagleTim2 have the same tags, so we find both docs
|
||||
final List<Doc> docsEagleTim = dataStore.getByTags(eagleTim1);
|
||||
Assert.assertEquals(docsEagleTim.size(), 2, "two docs for eagleTim1 and eagleTim2");
|
||||
|
||||
final List<Doc> docsFlamingoJennifer = dataStore.getByTags(flamingoJennifer);
|
||||
Assert.assertEquals(docsFlamingoJennifer.size(), 1, "doc for docsFlamingoJennifer");
|
||||
}
|
||||
|
||||
private void assertSearch(final String query, final Tags... tags) {
|
||||
final List<Doc> actualDocs = dataStore.search(query);
|
||||
|
||||
Reference in New Issue
Block a user