introduce index clustering (part 1)
In order to prevent files from getting too big and make it easier to implement retention policies, we are splitting all files into chunks. Each chunk contains the data for a time interval (1 month per default). This first changeset introduces the ClusteredPersistentMap that implements this for PersistentMap. It is used for a couple (not all) of indices.
This commit is contained in:
@@ -1,29 +0,0 @@
|
||||
package org.lucares.pdb.datastore;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
|
||||
public class DateTimeRange {
|
||||
|
||||
public static final DateTimeRange MAX = new DateTimeRange(OffsetDateTime.MIN, OffsetDateTime.MAX);
|
||||
|
||||
private final OffsetDateTime start;
|
||||
private final OffsetDateTime end;
|
||||
|
||||
public DateTimeRange(final OffsetDateTime start, final OffsetDateTime end) {
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
public OffsetDateTime getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public OffsetDateTime getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return start + "-" + end;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
package org.lucares.pdb.datastore.internal;
|
||||
|
||||
public class ClusterId {
|
||||
private final String clusterId;
|
||||
|
||||
/**
|
||||
* Create a new cluster id.
|
||||
*
|
||||
* @param clusterId the id, e.g. a time like 201902 (cluster for entries of
|
||||
* February 2019)
|
||||
*/
|
||||
public ClusterId(final String clusterId) {
|
||||
super();
|
||||
this.clusterId = clusterId;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the id, e.g. a time like 201902 (cluster for entries of February
|
||||
* 2019)
|
||||
*/
|
||||
public String getClusterId() {
|
||||
return clusterId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return clusterId;
|
||||
}
|
||||
|
||||
/*
|
||||
* non-standard hashcode implementation! This class is just a wrapper for
|
||||
* string, so we delegate directly to String.hashCode().
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return clusterId.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (obj == null)
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
final ClusterId other = (ClusterId) obj;
|
||||
if (clusterId == null) {
|
||||
if (other.clusterId != null)
|
||||
return false;
|
||||
} else if (!clusterId.equals(other.clusterId))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
package org.lucares.pdb.datastore.internal;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.lucares.pdb.api.DateTimeRange;
|
||||
import org.lucares.pdb.datastore.ReadRuntimeException;
|
||||
import org.lucares.pdb.map.PersistentMap;
|
||||
import org.lucares.pdb.map.Visitor;
|
||||
|
||||
public class ClusteredPersistentMap<K, V> implements AutoCloseable {
|
||||
|
||||
private final ConcurrentHashMap<ClusterId, PersistentMap<K, V>> maps = new ConcurrentHashMap<>();
|
||||
|
||||
private final Function<ClusterId, PersistentMap<K, V>> creator;
|
||||
|
||||
// TODO we need two creators, one that actually creates a new map and one that
|
||||
// only creates a new map if the file on disk already exists
|
||||
public ClusteredPersistentMap(final Function<ClusterId, PersistentMap<K, V>> creator) {
|
||||
this.creator = (dateIndexPrefix) -> creator.apply(dateIndexPrefix);
|
||||
}
|
||||
|
||||
public V getValue(final ClusterId clusterId, final K key) {
|
||||
try {
|
||||
|
||||
final PersistentMap<K, V> map = maps.computeIfAbsent(clusterId, creator);
|
||||
return map != null ? map.getValue(key) : null;
|
||||
} catch (final IOException e) {
|
||||
throw new ReadRuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public List<V> getValues(final DateTimeRange dateRange, final K key) {
|
||||
try {
|
||||
final List<V> result = new ArrayList<>();
|
||||
final List<ClusterId> clusterIds = DateIndexExtension.toClusterIds(dateRange);
|
||||
|
||||
for (final ClusterId clusterId : clusterIds) {
|
||||
final PersistentMap<K, V> map = maps.computeIfAbsent(clusterId, creator);
|
||||
if (map != null) {
|
||||
final V value = map.getValue(key);
|
||||
if (value != null) {
|
||||
result.add(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (final IOException e) {
|
||||
throw new ReadRuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public V putValue(final ClusterId clusterId, final K key, final V value) {
|
||||
try {
|
||||
|
||||
final PersistentMap<K, V> map = maps.computeIfAbsent(clusterId, creator);
|
||||
return map.putValue(key, value);
|
||||
} catch (final IOException e) {
|
||||
throw new ReadRuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void visitValues(final ClusterId clusterId, final K keyPrefix, final Visitor<K, V> visitor) {
|
||||
try {
|
||||
final PersistentMap<K, V> map = maps.computeIfAbsent(clusterId, creator);
|
||||
if (map != null) {
|
||||
map.visitValues(keyPrefix, visitor);
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
throw new ReadRuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void visitValues(final DateTimeRange dateRange, final K keyPrefix, final Visitor<K, V> visitor) {
|
||||
try {
|
||||
final List<ClusterId> clusterIds = DateIndexExtension.toClusterIds(dateRange);
|
||||
|
||||
for (final ClusterId clusterId : clusterIds) {
|
||||
final PersistentMap<K, V> map = maps.get(clusterId);
|
||||
if (map != null) {
|
||||
map.visitValues(keyPrefix, visitor);
|
||||
}
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
throw new ReadRuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
final List<Throwable> throwables = new ArrayList<>();
|
||||
|
||||
for (final PersistentMap<K, V> map : maps.values()) {
|
||||
try {
|
||||
map.close();
|
||||
} catch (final IOException e) {
|
||||
throwables.add(e);
|
||||
}
|
||||
}
|
||||
if (!throwables.isEmpty()) {
|
||||
final RuntimeException ex = new RuntimeException();
|
||||
throwables.forEach(ex::addSuppressed);
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -16,6 +16,8 @@ import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.lucares.collections.LongList;
|
||||
import org.lucares.pdb.api.DateTimeRange;
|
||||
import org.lucares.pdb.api.Query;
|
||||
import org.lucares.pdb.api.RuntimeIOException;
|
||||
import org.lucares.pdb.api.StringCompressor;
|
||||
import org.lucares.pdb.api.Tag;
|
||||
@@ -61,11 +63,11 @@ public class DataStore implements AutoCloseable {
|
||||
|
||||
public static Tag TAG_ALL_DOCS = null;
|
||||
|
||||
private final PersistentMap<Long, Doc> docIdToDoc;
|
||||
private final ClusteredPersistentMap<Long, Doc> docIdToDoc;
|
||||
|
||||
private final PersistentMap<Tags, Long> tagsToDocId;
|
||||
private final ClusteredPersistentMap<Tags, Long> tagsToDocId;
|
||||
|
||||
private final PersistentMap<Tag, Long> tagToDocsId;
|
||||
private final ClusteredPersistentMap<Tag, Long> tagToDocsId;
|
||||
|
||||
private final QueryCompletionIndex queryCompletionIndex;
|
||||
|
||||
@@ -92,15 +94,33 @@ public class DataStore implements AutoCloseable {
|
||||
diskStorage = new DiskStorage(diskStorageFilePath);
|
||||
diskStorage.ensureAlignmentForNewBlocks(BSFile.BLOCK_SIZE);
|
||||
|
||||
final Path keyToValueToDocIdsIndexPath = storageBasePath.resolve("keyToValueToDocIdsIndex.bs");
|
||||
tagToDocsId = new PersistentMap<>(keyToValueToDocIdsIndexPath, new TagEncoderDecoder(),
|
||||
PersistentMap.LONG_CODER);
|
||||
tagToDocsId = new ClusteredPersistentMap<>(clusterId -> {
|
||||
try {
|
||||
final Path file = storageBasePath.resolve(clusterId.getClusterId())
|
||||
.resolve("keyToValueToDocIdsIndex.bs");
|
||||
return new PersistentMap<>(file, new TagEncoderDecoder(), PersistentMap.LONG_CODER);
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
});
|
||||
|
||||
final Path tagsToDocIdIndexPath = storageBasePath.resolve("tagsToDocIdIndex.bs");
|
||||
tagsToDocId = new PersistentMap<>(tagsToDocIdIndexPath, new TagsEncoderDecoder(), PersistentMap.LONG_CODER);
|
||||
tagsToDocId = new ClusteredPersistentMap<>(clusterId -> {
|
||||
try {
|
||||
final Path file = storageBasePath.resolve(clusterId.getClusterId()).resolve("tagsToDocIdIndex.bs");
|
||||
return new PersistentMap<>(file, new TagsEncoderDecoder(), PersistentMap.LONG_CODER);
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
});
|
||||
|
||||
final Path docIdToDocIndexPath = storageBasePath.resolve("docIdToDocIndex.bs");
|
||||
docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, new DocEncoderDecoder());
|
||||
docIdToDoc = new ClusteredPersistentMap<>(clusterId -> {
|
||||
try {
|
||||
final Path file = storageBasePath.resolve(clusterId.getClusterId()).resolve("docIdToDocIndex.bs");
|
||||
return new PersistentMap<>(file, PersistentMap.LONG_CODER, new DocEncoderDecoder());
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
});
|
||||
|
||||
queryCompletionIndex = new QueryCompletionIndex(storageBasePath);
|
||||
|
||||
@@ -117,7 +137,8 @@ public class DataStore implements AutoCloseable {
|
||||
}
|
||||
|
||||
public void write(final long dateAsEpochMilli, final Tags tags, final long value) {
|
||||
final PdbWriter writer = getWriter(dateAsEpochMilli, tags);
|
||||
final ClusterId clusterId = DateIndexExtension.toClusterId(dateAsEpochMilli);
|
||||
final PdbWriter writer = getWriter(clusterId, tags);
|
||||
writer.write(dateAsEpochMilli, value);
|
||||
}
|
||||
|
||||
@@ -126,15 +147,15 @@ public class DataStore implements AutoCloseable {
|
||||
return queryCompletionIndex;
|
||||
}
|
||||
|
||||
public long createNewFile(final Tags tags) {
|
||||
public long createNewFile(final ClusterId clusterId, final Tags tags) {
|
||||
try {
|
||||
final long newFilesRootBlockOffset = diskStorage.allocateBlock(BSFile.BLOCK_SIZE);
|
||||
|
||||
final long docId = createUniqueDocId();
|
||||
final Doc doc = new Doc(tags, newFilesRootBlockOffset);
|
||||
docIdToDoc.putValue(docId, doc);
|
||||
docIdToDoc.putValue(clusterId, docId, doc);
|
||||
|
||||
final Long oldDocId = tagsToDocId.putValue(tags, docId);
|
||||
final Long oldDocId = tagsToDocId.putValue(clusterId, tags, docId);
|
||||
Preconditions.checkNull(oldDocId, "There must be at most one document for tags: {0}", tags);
|
||||
|
||||
// store mapping from tag to docId, so that we can find all docs for a given tag
|
||||
@@ -142,11 +163,11 @@ public class DataStore implements AutoCloseable {
|
||||
ts.add(TAG_ALL_DOCS);
|
||||
for (final Tag tag : ts) {
|
||||
|
||||
Long diskStoreOffsetForDocIdsOfTag = tagToDocsId.getValue(tag);
|
||||
Long diskStoreOffsetForDocIdsOfTag = tagToDocsId.getValue(clusterId, tag);
|
||||
|
||||
if (diskStoreOffsetForDocIdsOfTag == null) {
|
||||
diskStoreOffsetForDocIdsOfTag = diskStorage.allocateBlock(BSFile.BLOCK_SIZE);
|
||||
tagToDocsId.putValue(tag, diskStoreOffsetForDocIdsOfTag);
|
||||
tagToDocsId.putValue(clusterId, tag, diskStoreOffsetForDocIdsOfTag);
|
||||
}
|
||||
|
||||
try (final LongStreamFile docIdsOfTag = LongStreamFile.existingFile(diskStoreOffsetForDocIdsOfTag,
|
||||
@@ -169,7 +190,7 @@ public class DataStore implements AutoCloseable {
|
||||
return NEXT_DOC_ID.getAndIncrement();
|
||||
}
|
||||
|
||||
public List<PdbFile> getFilesForQuery(final String query) {
|
||||
public List<PdbFile> getFilesForQuery(final Query query) {
|
||||
|
||||
final List<Doc> searchResult = search(query);
|
||||
if (searchResult.size() > 500_000) {
|
||||
@@ -193,55 +214,17 @@ public class DataStore implements AutoCloseable {
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<Doc> search(final String query) {
|
||||
public List<Doc> search(final Query query) {
|
||||
try {
|
||||
final LongList docIdsList = executeQuery(query);
|
||||
LOGGER.trace("query {} found {} docs", query, docIdsList.size());
|
||||
final List<Doc> result = mapDocIdsToDocs(docIdsList);
|
||||
return result;
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
}
|
||||
final List<Doc> result = new ArrayList<>();
|
||||
final List<ClusterId> clusterIds = DateIndexExtension.toClusterIds(query.getDateRange());
|
||||
for (final ClusterId clusterId : clusterIds) {
|
||||
|
||||
public int count(final String query) {
|
||||
final LongList docIdsList = executeQuery(query);
|
||||
final LongList docIdsList = executeQuery(clusterId, query.getQuery());
|
||||
LOGGER.trace("query {} found {} docs", query, docIdsList.size());
|
||||
final List<Doc> docs = mapDocIdsToDocs(clusterId, docIdsList);
|
||||
result.addAll(docs);
|
||||
|
||||
return docIdsList.size();
|
||||
}
|
||||
|
||||
public List<String> getAvailableFields() {
|
||||
try {
|
||||
final Set<String> keys = new HashSet<>();
|
||||
|
||||
final Tag keyPrefix = new Tag("", ""); // will find everything
|
||||
|
||||
tagToDocsId.visitValues(keyPrefix, (tags, __) -> keys.add(tags.getKeyAsString()));
|
||||
|
||||
keys.remove(ALL_DOCS_KEY);
|
||||
final List<String> result = new ArrayList<>(keys);
|
||||
Collections.sort(result);
|
||||
return result;
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public SortedSet<String> getAvailableValuesForKey(final String query, final String key) {
|
||||
|
||||
try {
|
||||
final SortedSet<String> result = new TreeSet<>();
|
||||
if (query.isEmpty()) {
|
||||
tagToDocsId.visitValues(new Tag(key, ""), (tag, value) -> result.add(tag.getValueAsString()));
|
||||
} else {
|
||||
final List<Doc> docs = search(query);
|
||||
for (final Doc doc : docs) {
|
||||
final String valueForKey = doc.getTags().getValue(key);
|
||||
|
||||
if (valueForKey != null) {
|
||||
result.add(valueForKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
@@ -250,11 +233,59 @@ public class DataStore implements AutoCloseable {
|
||||
}
|
||||
}
|
||||
|
||||
private LongList executeQuery(final String query) {
|
||||
public int count(final Query query) {
|
||||
int count = 0;
|
||||
final List<ClusterId> clusterIds = DateIndexExtension.toClusterIds(query.getDateRange());
|
||||
for (final ClusterId clusterId : clusterIds) {
|
||||
|
||||
final LongList docIdsList = executeQuery(clusterId, query.getQuery());
|
||||
count += docIdsList.size();
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
public List<String> getAvailableFields(final DateTimeRange dateRange) {
|
||||
|
||||
final Set<String> keys = new HashSet<>();
|
||||
|
||||
final Tag keyPrefix = new Tag("", ""); // will find everything
|
||||
|
||||
tagToDocsId.visitValues(dateRange, keyPrefix, (tags, __) -> keys.add(tags.getKeyAsString()));
|
||||
|
||||
keys.remove(ALL_DOCS_KEY);
|
||||
final List<String> result = new ArrayList<>(keys);
|
||||
Collections.sort(result);
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
public SortedSet<String> getAvailableValuesForKey(final Query query, final String key) {
|
||||
|
||||
final SortedSet<String> result = new TreeSet<>();
|
||||
if (query.getQuery().isEmpty()) {
|
||||
tagToDocsId.visitValues(query.getDateRange(), new Tag(key, ""),
|
||||
(tag, __) -> result.add(tag.getValueAsString()));
|
||||
} else {
|
||||
final List<Doc> docs = search(query);
|
||||
for (final Doc doc : docs) {
|
||||
final String valueForKey = doc.getTags().getValue(key);
|
||||
|
||||
if (valueForKey != null) {
|
||||
result.add(valueForKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
private LongList executeQuery(final ClusterId clusterId, final String query) {
|
||||
final long start = System.nanoTime();
|
||||
synchronized (docIdToDoc) {
|
||||
final Expression expression = QueryLanguageParser.parse(query);
|
||||
final ExpressionToDocIdVisitor visitor = new ExpressionToDocIdVisitor(tagToDocsId, diskStorage);
|
||||
final ExpressionToDocIdVisitor visitor = new ExpressionToDocIdVisitor(clusterId, tagToDocsId, diskStorage);
|
||||
final LongList docIdsList = expression.visit(visitor);
|
||||
EXECUTE_QUERY_LOGGER.debug("executeQuery({}) took {}ms returned {} results ", query,
|
||||
(System.nanoTime() - start) / 1_000_000.0, docIdsList.size());
|
||||
@@ -262,7 +293,7 @@ public class DataStore implements AutoCloseable {
|
||||
}
|
||||
}
|
||||
|
||||
private List<Doc> mapDocIdsToDocs(final LongList docIdsList) throws IOException {
|
||||
private List<Doc> mapDocIdsToDocs(final ClusterId clusterId, final LongList docIdsList) throws IOException {
|
||||
final List<Doc> result = new ArrayList<>(docIdsList.size());
|
||||
|
||||
synchronized (docIdToDoc) {
|
||||
@@ -270,7 +301,7 @@ public class DataStore implements AutoCloseable {
|
||||
for (int i = 0; i < docIdsList.size(); i++) {
|
||||
final long docId = docIdsList.get(i);
|
||||
|
||||
final Doc doc = getDocByDocId(docId);
|
||||
final Doc doc = getDocByDocId(clusterId, docId);
|
||||
Objects.requireNonNull(doc, "Doc with id " + docId + " did not exist.");
|
||||
|
||||
result.add(doc);
|
||||
@@ -281,26 +312,50 @@ public class DataStore implements AutoCloseable {
|
||||
return result;
|
||||
}
|
||||
|
||||
public Optional<Doc> getByTags(final Tags tags) {
|
||||
try {
|
||||
final Long docId = tagsToDocId.getValue(tags);
|
||||
if (docId != null) {
|
||||
final Doc doc = getDocByDocId(docId);
|
||||
return Optional.of(doc);
|
||||
}
|
||||
return Optional.empty();
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
public Optional<Doc> getByTags(final ClusterId clusterId, final Tags tags) {
|
||||
|
||||
final Long docId = tagsToDocId.getValue(clusterId, tags);
|
||||
|
||||
if (docId != null) {
|
||||
final Doc doc = getDocByDocId(clusterId, docId);
|
||||
return Optional.of(doc);
|
||||
}
|
||||
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
private Doc getDocByDocId(final Long docId) {
|
||||
return docIdToDocCache.putIfAbsent(docId, () -> {
|
||||
try {
|
||||
return docIdToDoc.getValue(docId);
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
public List<Doc> getByTags(final DateTimeRange dateRange, final Tags tags) {
|
||||
|
||||
final List<Doc> result = new ArrayList<>();
|
||||
final List<Long> docIds = tagsToDocId.getValues(dateRange, tags);
|
||||
for (final Long docId : docIds) {
|
||||
|
||||
if (docId != null) {
|
||||
final Doc doc = getDocByDocId(dateRange, docId);
|
||||
result.add(doc);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private Doc getDocByDocId(final ClusterId clusterId, final Long docId) {
|
||||
return docIdToDocCache.putIfAbsent(docId, () -> {
|
||||
return docIdToDoc.getValue(clusterId, docId);
|
||||
});
|
||||
}
|
||||
|
||||
private Doc getDocByDocId(final DateTimeRange dateRange, final Long docId) {
|
||||
return docIdToDocCache.putIfAbsent(docId, () -> {
|
||||
|
||||
final List<Doc> docIds = docIdToDoc.getValues(dateRange, docId);
|
||||
if (docIds.size() == 1) {
|
||||
return docIds.get(0);
|
||||
} else if (docIds.size() > 1) {
|
||||
throw new IllegalStateException(
|
||||
"Found multiple documents for " + dateRange + " and docId " + docId + ": " + docIds);
|
||||
}
|
||||
throw new IllegalStateException("Found no documents for " + dateRange + " and docId " + docId);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -316,13 +371,18 @@ public class DataStore implements AutoCloseable {
|
||||
return diskStorage;
|
||||
}
|
||||
|
||||
PdbWriter getWriter(final long dateAsEpochMilli, final Tags tags) throws ReadException, WriteException {
|
||||
private PdbWriter getWriter(final ClusterId clusterId, final Tags tags) throws ReadException, WriteException {
|
||||
|
||||
return writerCache.putIfAbsent(tags, () -> getWriter(tags));
|
||||
return writerCache.putIfAbsent(tags, () -> getWriterInternal(clusterId, tags));
|
||||
}
|
||||
|
||||
private PdbWriter getWriter(final Tags tags) {
|
||||
final Optional<Doc> docsForTags = getByTags(tags);
|
||||
// visible for test
|
||||
long sizeWriterCache() {
|
||||
return writerCache.size();
|
||||
}
|
||||
|
||||
private PdbWriter getWriterInternal(final ClusterId clusterId, final Tags tags) {
|
||||
final Optional<Doc> docsForTags = getByTags(clusterId, tags);
|
||||
PdbWriter writer;
|
||||
if (docsForTags.isPresent()) {
|
||||
try {
|
||||
@@ -333,15 +393,15 @@ public class DataStore implements AutoCloseable {
|
||||
throw new ReadException(e);
|
||||
}
|
||||
} else {
|
||||
writer = newPdbWriter(tags);
|
||||
writer = newPdbWriter(clusterId, tags);
|
||||
}
|
||||
return writer;
|
||||
}
|
||||
|
||||
private PdbWriter newPdbWriter(final Tags tags) {
|
||||
private PdbWriter newPdbWriter(final ClusterId clusterId, final Tags tags) {
|
||||
final long start = System.nanoTime();
|
||||
try {
|
||||
final PdbFile pdbFile = createNewPdbFile(tags);
|
||||
final PdbFile pdbFile = createNewPdbFile(clusterId, tags);
|
||||
final PdbWriter result = new PdbWriter(pdbFile, getDiskStorage());
|
||||
|
||||
METRICS_LOGGER_NEW_WRITER.debug("newPdbWriter took {}ms tags: {}",
|
||||
@@ -350,12 +410,11 @@ public class DataStore implements AutoCloseable {
|
||||
} catch (final IOException e) {
|
||||
throw new WriteException(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private PdbFile createNewPdbFile(final Tags tags) throws IOException {
|
||||
private PdbFile createNewPdbFile(final ClusterId clusterId, final Tags tags) throws IOException {
|
||||
|
||||
final long rootBlockNumber = createNewFile(tags);
|
||||
final long rootBlockNumber = createNewFile(clusterId, tags);
|
||||
|
||||
final PdbFile result = new PdbFile(rootBlockNumber, tags);
|
||||
return result;
|
||||
@@ -379,11 +438,7 @@ public class DataStore implements AutoCloseable {
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
} finally {
|
||||
try {
|
||||
tagToDocsId.close();
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
tagToDocsId.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
package org.lucares.pdb.datastore.lang;
|
||||
package org.lucares.pdb.datastore.internal;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.concurrent.ConcurrentNavigableMap;
|
||||
import java.util.concurrent.ConcurrentSkipListMap;
|
||||
|
||||
import org.lucares.pdb.datastore.DateTimeRange;
|
||||
import org.lucares.pdb.api.DateTimeRange;
|
||||
|
||||
public class DateIndexExtension {
|
||||
|
||||
@@ -25,18 +26,16 @@ public class DateIndexExtension {
|
||||
|
||||
static Set<String> toDateIndexPrefix(final DateTimeRange dateRange) {
|
||||
final Set<String> result = new TreeSet<>();
|
||||
if (Objects.equals(dateRange, DateTimeRange.MAX)) {
|
||||
result.add("*");
|
||||
} else {
|
||||
OffsetDateTime current = dateRange.getStart();
|
||||
while (current.isBefore(dateRange.getEnd())) {
|
||||
|
||||
result.add(toDateIndexPrefix(current));
|
||||
current = current.plusMonths(1);
|
||||
OffsetDateTime current = dateRange.getStart();
|
||||
while (current.isBefore(dateRange.getEnd())) {
|
||||
|
||||
result.add(toDateIndexPrefix(current));
|
||||
current = current.plusMonths(1);
|
||||
|
||||
}
|
||||
result.add(toDateIndexPrefix(dateRange.getEnd()));
|
||||
}
|
||||
result.add(toDateIndexPrefix(dateRange.getEnd()));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -44,6 +43,22 @@ public class DateIndexExtension {
|
||||
return time.format(DATE_PATTERN);
|
||||
}
|
||||
|
||||
public static ClusterId toClusterId(final long epochMilli) {
|
||||
// TODO most calls will be for a similar date -> add a shortcut
|
||||
final Entry<Long, DatePrefixAndRange> value = DATE_PREFIX_CACHE.floorEntry(epochMilli);
|
||||
|
||||
String result;
|
||||
if (value == null || !value.getValue().contains(epochMilli)) {
|
||||
final DatePrefixAndRange newValue = toDatePrefixAndRange(epochMilli);
|
||||
DATE_PREFIX_CACHE.put(newValue.getMinEpochMilli(), newValue);
|
||||
result = newValue.getDatePrefix();
|
||||
} else {
|
||||
result = value.getValue().getDatePrefix();
|
||||
}
|
||||
|
||||
return new ClusterId(result);
|
||||
}
|
||||
|
||||
public static String toDateIndexPrefix(final long epochMilli) {
|
||||
|
||||
final Entry<Long, DatePrefixAndRange> value = DATE_PREFIX_CACHE.floorEntry(epochMilli);
|
||||
@@ -60,7 +75,25 @@ public class DateIndexExtension {
|
||||
return result;
|
||||
}
|
||||
|
||||
private static DatePrefixAndRange toDatePrefixAndRange(final long epochMilli) {
|
||||
public static List<ClusterId> toClusterIds(final DateTimeRange dateRange) {
|
||||
final List<ClusterId> result = new ArrayList<>();
|
||||
|
||||
OffsetDateTime current = dateRange.getStart();
|
||||
final OffsetDateTime end = dateRange.getEnd();
|
||||
current = current.withOffsetSameInstant(ZoneOffset.UTC).withDayOfMonth(1).withHour(0).withMinute(0)
|
||||
.withSecond(0).withNano(0);
|
||||
|
||||
while (!current.isAfter(end)) {
|
||||
final String id = current.format(DATE_PATTERN);
|
||||
final ClusterId clusterId = new ClusterId(id);
|
||||
result.add(clusterId);
|
||||
current = current.plusMonths(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static DatePrefixAndRange toDatePrefixAndRange(final long epochMilli) {
|
||||
final OffsetDateTime date = Instant.ofEpochMilli(epochMilli).atOffset(ZoneOffset.UTC);
|
||||
final OffsetDateTime beginOfMonth = date.withDayOfMonth(1).withHour(0).withMinute(0).withSecond(0).withNano(0);
|
||||
final OffsetDateTime endOfMonth = beginOfMonth.plusMonths(1).minusNanos(1);
|
||||
@@ -72,6 +105,26 @@ public class DateIndexExtension {
|
||||
return new DatePrefixAndRange(datePrefix, minEpochMilli, maxEpochMilli);
|
||||
}
|
||||
|
||||
public static List<Long> toDateIndexEpochMillis(final DateTimeRange dateRange) {
|
||||
final List<Long> result = new ArrayList<>();
|
||||
|
||||
OffsetDateTime current = dateRange.getStart();
|
||||
final OffsetDateTime end = dateRange.getEnd();
|
||||
current = current.withOffsetSameInstant(ZoneOffset.UTC).withDayOfMonth(1).withHour(0).withMinute(0)
|
||||
.withSecond(0).withNano(0);
|
||||
|
||||
while (!current.isAfter(end)) {
|
||||
result.add(current.toInstant().toEpochMilli());
|
||||
current = current.plusMonths(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static ClusterId now() {
|
||||
return toClusterId(System.currentTimeMillis());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class DatePrefixAndRange {
|
||||
@@ -15,7 +15,7 @@ public class CandidateGrouper {
|
||||
final int numDotsInValue = countDotsInValue(queryWithCaretMarker);
|
||||
|
||||
for (final String value : values) {
|
||||
// keep everyting up to the (numDotsInValue+1)-th
|
||||
// keep everything up to the (numDotsInValue+1)-th
|
||||
final String[] token = value.split(Pattern.quote("."));
|
||||
final List<String> tokenlist = new ArrayList<>(Arrays.asList(token));
|
||||
final List<String> prefix = tokenlist.subList(0, numDotsInValue + 1);
|
||||
|
||||
@@ -11,13 +11,14 @@ import org.lucares.collections.LongList;
|
||||
import org.lucares.pdb.api.RuntimeIOException;
|
||||
import org.lucares.pdb.api.Tag;
|
||||
import org.lucares.pdb.blockstorage.LongStreamFile;
|
||||
import org.lucares.pdb.datastore.internal.ClusterId;
|
||||
import org.lucares.pdb.datastore.internal.ClusteredPersistentMap;
|
||||
import org.lucares.pdb.datastore.internal.DataStore;
|
||||
import org.lucares.pdb.datastore.lang.Expression.And;
|
||||
import org.lucares.pdb.datastore.lang.Expression.Not;
|
||||
import org.lucares.pdb.datastore.lang.Expression.Or;
|
||||
import org.lucares.pdb.datastore.lang.Expression.Parentheses;
|
||||
import org.lucares.pdb.diskstorage.DiskStorage;
|
||||
import org.lucares.pdb.map.PersistentMap;
|
||||
import org.lucares.utils.Preconditions;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@@ -25,10 +26,14 @@ import org.slf4j.LoggerFactory;
|
||||
public class ExpressionToDocIdVisitor extends ExpressionVisitor<LongList> {
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(ExpressionToDocIdVisitor.class);
|
||||
|
||||
private final PersistentMap<Tag, Long> keyToValueToDocId;
|
||||
private final ClusteredPersistentMap<Tag, Long> keyToValueToDocId;
|
||||
private final DiskStorage diskStorage;
|
||||
|
||||
public ExpressionToDocIdVisitor(final PersistentMap<Tag, Long> keyToValueToDocsId, final DiskStorage diskStorage) {
|
||||
private final ClusterId clusterId;
|
||||
|
||||
public ExpressionToDocIdVisitor(final ClusterId clusterId,
|
||||
final ClusteredPersistentMap<Tag, Long> keyToValueToDocsId, final DiskStorage diskStorage) {
|
||||
this.clusterId = clusterId;
|
||||
this.keyToValueToDocId = keyToValueToDocsId;
|
||||
this.diskStorage = diskStorage;
|
||||
}
|
||||
@@ -121,9 +126,17 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<LongList> {
|
||||
|
||||
private LongList getAllDocIds() {
|
||||
try {
|
||||
final Long blockOffset = keyToValueToDocId.getValue(DataStore.TAG_ALL_DOCS);
|
||||
final LongStreamFile bsFile = LongStreamFile.existingFile(blockOffset, diskStorage);
|
||||
return bsFile.asLongList();
|
||||
|
||||
final Long blockOffset = keyToValueToDocId.getValue(clusterId, DataStore.TAG_ALL_DOCS);
|
||||
|
||||
if (blockOffset != null) {
|
||||
final LongStreamFile bsFile = LongStreamFile.existingFile(blockOffset, diskStorage);
|
||||
final LongList longList = bsFile.asLongList();
|
||||
|
||||
return longList;
|
||||
} else {
|
||||
return new LongList(0);
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
@@ -131,42 +144,36 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor<LongList> {
|
||||
|
||||
private List<LongList> filterByWildcard(final String propertyName, final Pattern valuePattern) {
|
||||
final List<LongList> result = new ArrayList<>();
|
||||
try {
|
||||
final long start = System.nanoTime();
|
||||
keyToValueToDocId.visitValues(new Tag(propertyName, ""), (tags, blockOffsetToDocIds) -> {
|
||||
try {
|
||||
if (valuePattern.matcher(tags.getValueAsString()).matches()) {
|
||||
try (final LongStreamFile bsFile = LongStreamFile.existingFile(blockOffsetToDocIds,
|
||||
diskStorage)) {
|
||||
|
||||
// We know that all LongLists coming from a BSFile are sorted, non-overlapping
|
||||
// and increasing, that means we can just concatenate them and get a sorted
|
||||
// list.
|
||||
final List<LongList> longLists = bsFile.streamOfLongLists().collect(Collectors.toList());
|
||||
final LongList concatenatedLists = concatenateLists(longLists);
|
||||
final long start = System.nanoTime();
|
||||
keyToValueToDocId.visitValues(clusterId, new Tag(propertyName, ""), (tags, blockOffsetToDocIds) -> {
|
||||
try {
|
||||
if (valuePattern.matcher(tags.getValueAsString()).matches()) {
|
||||
try (final LongStreamFile bsFile = LongStreamFile.existingFile(blockOffsetToDocIds, diskStorage)) {
|
||||
|
||||
Preconditions.checkTrue(concatenatedLists.isSorted(),
|
||||
"The LongLists containing document ids must be sorted, "
|
||||
+ "non-overlapping and increasing, so that the concatenation "
|
||||
+ "is sorted. This is guaranteed by the fact that document ids "
|
||||
+ "are generated in monotonically increasing order.");
|
||||
// We know that all LongLists coming from a BSFile are sorted, non-overlapping
|
||||
// and increasing, that means we can just concatenate them and get a sorted
|
||||
// list.
|
||||
final List<LongList> longLists = bsFile.streamOfLongLists().collect(Collectors.toList());
|
||||
final LongList concatenatedLists = concatenateLists(longLists);
|
||||
|
||||
result.add(concatenatedLists);
|
||||
}
|
||||
Preconditions.checkTrue(concatenatedLists.isSorted(),
|
||||
"The LongLists containing document ids must be sorted, "
|
||||
+ "non-overlapping and increasing, so that the concatenation "
|
||||
+ "is sorted. This is guaranteed by the fact that document ids "
|
||||
+ "are generated in monotonically increasing order.");
|
||||
|
||||
result.add(concatenatedLists);
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
});
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
});
|
||||
|
||||
LOGGER.trace("filterByWildcard: for key {} took {}ms", propertyName,
|
||||
(System.nanoTime() - start) / 1_000_000.0);
|
||||
|
||||
return result;
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
LOGGER.trace("filterByWildcard: for key {} took {}ms", propertyName, (System.nanoTime() - start) / 1_000_000.0);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private LongList merge(final Collection<LongList> lists) {
|
||||
|
||||
@@ -6,8 +6,7 @@ import java.awt.event.KeyEvent;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
@@ -24,6 +23,8 @@ import javax.swing.JFrame;
|
||||
import javax.swing.JTextArea;
|
||||
import javax.swing.JTextField;
|
||||
|
||||
import org.lucares.pdb.api.DateTimeRange;
|
||||
import org.lucares.pdb.api.Query;
|
||||
import org.lucares.pdb.api.Tags;
|
||||
import org.lucares.pdb.blockstorage.BSFile;
|
||||
import org.lucares.pdb.datastore.Doc;
|
||||
@@ -59,6 +60,8 @@ public class DataStoreTest {
|
||||
public void testQuery() throws Exception {
|
||||
|
||||
dataStore = new DataStore(dataDirectory);
|
||||
final DateTimeRange dateRange = DateTimeRange.relativeHours(1);
|
||||
final ClusterId clusterId = DateIndexExtension.toClusterIds(dateRange).get(0);
|
||||
|
||||
final Tags eagleTim = Tags.createAndAddToDictionary("bird", "eagle", "name", "Tim");
|
||||
final Tags pigeonJennifer = Tags.createAndAddToDictionary("bird", "pigeon", "name", "Jennifer");
|
||||
@@ -67,40 +70,40 @@ public class DataStoreTest {
|
||||
final Tags labradorTim = Tags.createAndAddToDictionary("dog", "labrador", "name", "Tim");
|
||||
|
||||
tagsToBlockStorageRootBlockNumber = new HashMap<>();
|
||||
tagsToBlockStorageRootBlockNumber.put(eagleTim, dataStore.createNewFile(eagleTim));
|
||||
tagsToBlockStorageRootBlockNumber.put(pigeonJennifer, dataStore.createNewFile(pigeonJennifer));
|
||||
tagsToBlockStorageRootBlockNumber.put(flamingoJennifer, dataStore.createNewFile(flamingoJennifer));
|
||||
tagsToBlockStorageRootBlockNumber.put(labradorJenny, dataStore.createNewFile(labradorJenny));
|
||||
tagsToBlockStorageRootBlockNumber.put(labradorTim, dataStore.createNewFile(labradorTim));
|
||||
tagsToBlockStorageRootBlockNumber.put(eagleTim, dataStore.createNewFile(clusterId, eagleTim));
|
||||
tagsToBlockStorageRootBlockNumber.put(pigeonJennifer, dataStore.createNewFile(clusterId, pigeonJennifer));
|
||||
tagsToBlockStorageRootBlockNumber.put(flamingoJennifer, dataStore.createNewFile(clusterId, flamingoJennifer));
|
||||
tagsToBlockStorageRootBlockNumber.put(labradorJenny, dataStore.createNewFile(clusterId, labradorJenny));
|
||||
tagsToBlockStorageRootBlockNumber.put(labradorTim, dataStore.createNewFile(clusterId, labradorTim));
|
||||
|
||||
assertSearch("bird=eagle", eagleTim);
|
||||
assertSearch("dog=labrador", labradorJenny, labradorTim);
|
||||
assertSearch("name=Tim", eagleTim, labradorTim);
|
||||
assertSearch("dog=labrador and name=Tim", labradorTim);
|
||||
assertSearch("dog=labrador and !name=Tim", labradorJenny);
|
||||
assertSearch("name=Jennifer or name=Jenny", pigeonJennifer, flamingoJennifer, labradorJenny);
|
||||
assertSearch(dateRange, "bird=eagle", eagleTim);
|
||||
assertSearch(dateRange, "dog=labrador", labradorJenny, labradorTim);
|
||||
assertSearch(dateRange, "name=Tim", eagleTim, labradorTim);
|
||||
assertSearch(dateRange, "dog=labrador and name=Tim", labradorTim);
|
||||
assertSearch(dateRange, "dog=labrador and !name=Tim", labradorJenny);
|
||||
assertSearch(dateRange, "name=Jennifer or name=Jenny", pigeonJennifer, flamingoJennifer, labradorJenny);
|
||||
|
||||
// a͟n͟d binds stronger than o͟r
|
||||
assertSearch("name=Tim and dog=labrador or bird=pigeon", pigeonJennifer, labradorTim);
|
||||
assertSearch("bird=pigeon or name=Tim and dog=labrador", pigeonJennifer, labradorTim);
|
||||
assertSearch(dateRange, "name=Tim and dog=labrador or bird=pigeon", pigeonJennifer, labradorTim);
|
||||
assertSearch(dateRange, "bird=pigeon or name=Tim and dog=labrador", pigeonJennifer, labradorTim);
|
||||
|
||||
// parenthesis override priority of a͟n͟d
|
||||
assertSearch("name=Tim and (dog=labrador or bird=pigeon)", labradorTim);
|
||||
assertSearch("(dog=labrador or bird=pigeon) and name=Tim", labradorTim);
|
||||
assertSearch(dateRange, "name=Tim and (dog=labrador or bird=pigeon)", labradorTim);
|
||||
assertSearch(dateRange, "(dog=labrador or bird=pigeon) and name=Tim", labradorTim);
|
||||
|
||||
// wildcards
|
||||
assertSearch("bird=*", eagleTim, pigeonJennifer, flamingoJennifer);
|
||||
assertSearch("name=Jen*", pigeonJennifer, flamingoJennifer, labradorJenny);
|
||||
assertSearch("dog=*dor", labradorJenny, labradorTim);
|
||||
assertSearch("dog=lab*dor", labradorJenny, labradorTim);
|
||||
assertSearch("dog=*lab*dor*", labradorJenny, labradorTim);
|
||||
assertSearch(dateRange, "bird=*", eagleTim, pigeonJennifer, flamingoJennifer);
|
||||
assertSearch(dateRange, "name=Jen*", pigeonJennifer, flamingoJennifer, labradorJenny);
|
||||
assertSearch(dateRange, "dog=*dor", labradorJenny, labradorTim);
|
||||
assertSearch(dateRange, "dog=lab*dor", labradorJenny, labradorTim);
|
||||
assertSearch(dateRange, "dog=*lab*dor*", labradorJenny, labradorTim);
|
||||
|
||||
// 'in' queries
|
||||
assertSearch("bird=(eagle, pigeon, flamingo)", eagleTim, pigeonJennifer, flamingoJennifer);
|
||||
assertSearch("dog = (labrador) and name =Tim,Jennifer", labradorTim);
|
||||
assertSearch("name =Jenn*", pigeonJennifer, flamingoJennifer, labradorJenny);
|
||||
assertSearch("name = (*) and dog=labrador", labradorJenny, labradorTim);
|
||||
assertSearch("name =XYZ, * and dog=labrador", labradorJenny, labradorTim);
|
||||
assertSearch(dateRange, "bird=(eagle, pigeon, flamingo)", eagleTim, pigeonJennifer, flamingoJennifer);
|
||||
assertSearch(dateRange, "dog = (labrador) and name =Tim,Jennifer", labradorTim);
|
||||
assertSearch(dateRange, "name =Jenn*", pigeonJennifer, flamingoJennifer, labradorJenny);
|
||||
assertSearch(dateRange, "name = (*) and dog=labrador", labradorJenny, labradorTim);
|
||||
assertSearch(dateRange, "name =XYZ, * and dog=labrador", labradorJenny, labradorTim);
|
||||
|
||||
}
|
||||
|
||||
@@ -111,10 +114,11 @@ public class DataStoreTest {
|
||||
final Tags pigeonJennifer = Tags.createAndAddToDictionary("bird", "pigeon", "name", "Jennifer");
|
||||
final Tags flamingoJennifer = Tags.createAndAddToDictionary("bird", "flamingo", "name", "Jennifer");
|
||||
|
||||
tagsToBlockStorageRootBlockNumber.put(pigeonJennifer, dataStore.createNewFile(pigeonJennifer));
|
||||
tagsToBlockStorageRootBlockNumber.put(flamingoJennifer, dataStore.createNewFile(flamingoJennifer));
|
||||
final ClusterId clusterId = new ClusterId("clusterA");
|
||||
tagsToBlockStorageRootBlockNumber.put(pigeonJennifer, dataStore.createNewFile(clusterId, pigeonJennifer));
|
||||
tagsToBlockStorageRootBlockNumber.put(flamingoJennifer, dataStore.createNewFile(clusterId, flamingoJennifer));
|
||||
|
||||
final Optional<Doc> docsFlamingoJennifer = dataStore.getByTags(flamingoJennifer);
|
||||
final Optional<Doc> docsFlamingoJennifer = dataStore.getByTags(clusterId, flamingoJennifer);
|
||||
Assert.assertTrue(docsFlamingoJennifer.isPresent(), "doc for docsFlamingoJennifer");
|
||||
}
|
||||
|
||||
@@ -122,7 +126,7 @@ public class DataStoreTest {
|
||||
|
||||
dataStore = new DataStore(dataDirectory);
|
||||
final Tags eagleTim = Tags.createAndAddToDictionary("bird", "eagle", "name", "Tim");
|
||||
final long eagleTimBlockOffset = dataStore.createNewFile(eagleTim);
|
||||
final long eagleTimBlockOffset = dataStore.createNewFile(new ClusterId("clusterA"), eagleTim);
|
||||
Assert.assertEquals(eagleTimBlockOffset % BSFile.BLOCK_SIZE, 0);
|
||||
}
|
||||
|
||||
@@ -156,6 +160,7 @@ public class DataStoreTest {
|
||||
// TODO should only match "Jenny", because Jenny is the only non-bird name
|
||||
// starting with 'Jen'
|
||||
result.add(new Object[] { "!(type=bird and name=Jen|)", "name", Arrays.asList("Jennifer", "Jenny") });
|
||||
|
||||
result.add(new Object[] { "!(type=dog and name=|) and !type=cat", "name",
|
||||
Arrays.asList("Jennifer", "Jenny", "Tim") });
|
||||
|
||||
@@ -167,6 +172,8 @@ public class DataStoreTest {
|
||||
final List<String> expectedProposedValues) throws Exception {
|
||||
|
||||
dataStore = new DataStore(dataDirectory);
|
||||
final ClusterId clusterId = DateIndexExtension.now();
|
||||
final DateTimeRange dateRange = DateTimeRange.relativeHours(1);
|
||||
|
||||
final List<Tags> tags = Arrays.asList(
|
||||
Tags.createAndAddToDictionary("type", "bird", "subtype", "eagle", "age", "three", "name", "Tim"),
|
||||
@@ -182,48 +189,9 @@ public class DataStoreTest {
|
||||
Tags.createAndAddToDictionary("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"),
|
||||
Tags.createAndAddToDictionary("type", "cat", "subtype", "lion", "age", "four", "name", "John"));
|
||||
|
||||
tags.forEach(dataStore::createNewFile);
|
||||
tags.forEach(t -> dataStore.createNewFile(clusterId, t));
|
||||
|
||||
assertProposals(queryWithCaret, field, expectedProposedValues);
|
||||
}
|
||||
|
||||
public void test() throws Exception {
|
||||
|
||||
try (final DataStore dataStore = new DataStore(dataDirectory)) {
|
||||
|
||||
final OffsetDateTime date = OffsetDateTime.now(ZoneOffset.UTC);
|
||||
final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue");
|
||||
|
||||
final PdbWriter newFileForTags = dataStore.getWriter(date.toInstant().toEpochMilli(), tags);
|
||||
|
||||
final PdbWriter existingFileForTags = dataStore.getWriter(date.toInstant().toEpochMilli(), tags);
|
||||
|
||||
Assert.assertSame(newFileForTags, existingFileForTags);
|
||||
}
|
||||
}
|
||||
|
||||
public void testAppendingToSameFile() throws Exception {
|
||||
|
||||
try (final DataStore dataStore = new DataStore(dataDirectory)) {
|
||||
|
||||
// dayC is before dayA and dayB
|
||||
final long dayA = DateUtils.getDate(2016, 1, 2, 1, 1, 1).toInstant().toEpochMilli();
|
||||
final long dayB = DateUtils.getDate(2016, 1, 3, 1, 1, 1).toInstant().toEpochMilli();
|
||||
final long dayC = DateUtils.getDate(2016, 1, 1, 1, 1, 1).toInstant().toEpochMilli();
|
||||
|
||||
final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue");
|
||||
|
||||
final PdbWriter writerForDayA = dataStore.getWriter(dayA, tags);
|
||||
writerForDayA.write(dayA, 1);
|
||||
final PdbWriter writerForDayB = dataStore.getWriter(dayB, tags);
|
||||
writerForDayB.write(dayB, 2);
|
||||
|
||||
final PdbWriter writerForDayC = dataStore.getWriter(dayC, tags);
|
||||
writerForDayC.write(dayC, 3);
|
||||
|
||||
Assert.assertSame(writerForDayA, writerForDayB);
|
||||
Assert.assertSame(writerForDayA, writerForDayC);
|
||||
}
|
||||
assertProposals(dateRange, queryWithCaret, field, expectedProposedValues);
|
||||
}
|
||||
|
||||
public void testIdenticalDatesGoIntoSameFile() throws Exception {
|
||||
@@ -234,19 +202,16 @@ public class DataStoreTest {
|
||||
|
||||
final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue");
|
||||
|
||||
final PdbWriter fileA = dataStore.getWriter(timestamp, tags);
|
||||
fileA.write(timestamp, 1);
|
||||
dataStore.write(timestamp, tags, 1);
|
||||
dataStore.write(timestamp, tags, 2);
|
||||
|
||||
final PdbWriter fileB = dataStore.getWriter(timestamp, tags);
|
||||
fileA.write(timestamp, 2);
|
||||
|
||||
Assert.assertEquals(fileA, fileB);
|
||||
Assert.assertEquals(dataStore.sizeWriterCache(), 1, "size of the writer cache");
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(final String[] args) throws IOException, InterruptedException {
|
||||
final Path dir = Files.createTempDirectory("pdb");
|
||||
try (DataStore dataStore = new DataStore(dir)) {
|
||||
try (final DataStore dataStore = new DataStore(dir)) {
|
||||
|
||||
final List<Tags> tags = Arrays.asList(
|
||||
Tags.createAndAddToDictionary("type", "bird", "subtype", "eagle", "age", "three", "name", "Tim"),
|
||||
@@ -265,7 +230,8 @@ public class DataStoreTest {
|
||||
Tags.createAndAddToDictionary("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"),
|
||||
Tags.createAndAddToDictionary("type", "cat", "subtype", "lion", "age", "four", "name", "John"));
|
||||
|
||||
tags.forEach(dataStore::createNewFile);
|
||||
final ClusterId clusterId = DateIndexExtension.now();
|
||||
tags.forEach(t -> dataStore.createNewFile(clusterId, t));
|
||||
|
||||
final JFrame frame = new JFrame();
|
||||
final JTextField input = new JTextField();
|
||||
@@ -306,7 +272,7 @@ public class DataStoreTest {
|
||||
|
||||
}
|
||||
});
|
||||
final List<Doc> docs = dataStore.search("");
|
||||
final List<Doc> docs = dataStore.search(Query.createQuery("", DateTimeRange.relative(1, ChronoUnit.DAYS)));
|
||||
final StringBuilder out = new StringBuilder();
|
||||
out.append("info\n");
|
||||
for (final Doc doc : docs) {
|
||||
@@ -321,7 +287,7 @@ public class DataStoreTest {
|
||||
}
|
||||
}
|
||||
|
||||
private void assertProposals(final String queryWithCaret, final String field,
|
||||
private void assertProposals(final DateTimeRange dateRange, final String queryWithCaret, final String field,
|
||||
final List<String> expectedProposedValues) {
|
||||
final String query = queryWithCaret.replace("|", "");
|
||||
final int caretIndex = queryWithCaret.indexOf("|");
|
||||
@@ -329,7 +295,7 @@ public class DataStoreTest {
|
||||
System.out.println(
|
||||
"proposed values: " + proposals.stream().map(Proposal::getProposedTag).collect(Collectors.toList()));
|
||||
|
||||
proposals.forEach(p -> assertQueryFindsResults(p.getNewQuery()));
|
||||
proposals.forEach(p -> assertQueryFindsResults(dateRange, p.getNewQuery()));
|
||||
|
||||
final List<String> proposedValues = CollectionUtils.map(proposals, Proposal::getProposedTag);
|
||||
Collections.sort(proposedValues);
|
||||
@@ -337,13 +303,13 @@ public class DataStoreTest {
|
||||
Assert.assertEquals(proposedValues.toString(), expectedProposedValues.toString(), "proposed values:");
|
||||
}
|
||||
|
||||
private void assertQueryFindsResults(final String query) {
|
||||
final List<Doc> result = dataStore.search(query);
|
||||
private void assertQueryFindsResults(final DateTimeRange dateRange, final String query) {
|
||||
final List<Doc> result = dataStore.search(new Query(query, dateRange));
|
||||
Assert.assertFalse(result.isEmpty(), "The query '" + query + "' must return a result, but didn't.");
|
||||
}
|
||||
|
||||
private void assertSearch(final String query, final Tags... tags) {
|
||||
final List<Doc> actualDocs = dataStore.search(query);
|
||||
private void assertSearch(final DateTimeRange dateRange, final String query, final Tags... tags) {
|
||||
final List<Doc> actualDocs = dataStore.search(new Query(query, dateRange));
|
||||
final List<Long> actual = CollectionUtils.map(actualDocs, Doc::getRootBlockNumber);
|
||||
|
||||
final List<Long> expectedPaths = CollectionUtils.map(tags, tagsToBlockStorageRootBlockNumber::get);
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
package org.lucares.pdb.datastore.lang;
|
||||
package org.lucares.pdb.datastore.internal;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.lucares.pdb.datastore.DateTimeRange;
|
||||
import org.lucares.pdb.api.DateTimeRange;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
@@ -64,6 +65,42 @@ public class DateIndexExtensionTest {
|
||||
Assert.assertEquals(DateIndexExtension.toDateIndexPrefix(min_201801), "201801");
|
||||
Assert.assertEquals(DateIndexExtension.toDateIndexPrefix(max_201801), "201801");
|
||||
Assert.assertEquals(DateIndexExtension.toDateIndexPrefix(mid_201711), "201711");
|
||||
System.out.println(DateIndexExtension.DATE_PREFIX_CACHE);
|
||||
}
|
||||
|
||||
public void testDateRanges() {
|
||||
final OffsetDateTime mid_201712 = OffsetDateTime.of(2017, 12, 7, 1, 1, 1, 0, ZoneOffset.UTC)
|
||||
.withOffsetSameInstant(ZoneOffset.ofHours(-2));
|
||||
final OffsetDateTime min_201801 = OffsetDateTime.of(2018, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC)
|
||||
.withOffsetSameInstant(ZoneOffset.ofHours(-8));
|
||||
final OffsetDateTime min_201802 = OffsetDateTime.of(2018, 2, 1, 0, 0, 0, 0, ZoneOffset.UTC)
|
||||
.withOffsetSameInstant(ZoneOffset.ofHours(12));
|
||||
|
||||
final DateTimeRange range_201712_201802 = new DateTimeRange(mid_201712, min_201802);
|
||||
final DateTimeRange range_201712_201801 = new DateTimeRange(mid_201712, min_201801);
|
||||
final DateTimeRange range_201712_201712 = new DateTimeRange(mid_201712, mid_201712);
|
||||
|
||||
final List<ClusterId> dateIndexPrefixesWithEmptyCache = DateIndexExtension.toClusterIds(range_201712_201802);
|
||||
Assert.assertEquals(dateIndexPrefixesWithEmptyCache,
|
||||
Arrays.asList(new ClusterId("201712"), new ClusterId("201801"), new ClusterId("201802")));
|
||||
|
||||
final List<ClusterId> dateIndexPrefixesWithFilledCache = DateIndexExtension.toClusterIds(range_201712_201801);
|
||||
Assert.assertEquals(dateIndexPrefixesWithFilledCache,
|
||||
Arrays.asList(new ClusterId("201712"), new ClusterId("201801")));
|
||||
|
||||
final List<ClusterId> dateIndexPrefixesOneMonth = DateIndexExtension.toClusterIds(range_201712_201712);
|
||||
Assert.assertEquals(dateIndexPrefixesOneMonth, Arrays.asList(new ClusterId("201712")));
|
||||
}
|
||||
|
||||
public void testDateRangeToEpochMilli() {
|
||||
final OffsetDateTime mid_201712 = OffsetDateTime.of(2017, 12, 7, 1, 1, 1, 0, ZoneOffset.ofHours(3));
|
||||
final OffsetDateTime min_201802 = OffsetDateTime.of(2018, 2, 15, 0, 0, 0, 0, ZoneOffset.ofHours(7));
|
||||
|
||||
final long exp_201712 = OffsetDateTime.of(2017, 12, 1, 0, 0, 0, 0, ZoneOffset.UTC).toInstant().toEpochMilli();
|
||||
final long exp_201801 = OffsetDateTime.of(2018, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC).toInstant().toEpochMilli();
|
||||
final long exp_201802 = OffsetDateTime.of(2018, 2, 1, 0, 0, 0, 0, ZoneOffset.UTC).toInstant().toEpochMilli();
|
||||
|
||||
final List<Long> dateIndexEpochMillis = DateIndexExtension
|
||||
.toDateIndexEpochMillis(new DateTimeRange(mid_201712, min_201802));
|
||||
Assert.assertEquals(dateIndexEpochMillis, Arrays.asList(exp_201712, exp_201801, exp_201802));
|
||||
}
|
||||
}
|
||||
@@ -38,6 +38,7 @@ public class ProposerTest {
|
||||
|
||||
private void initDatabase() throws Exception {
|
||||
dataStore = new DataStore(dataDirectory);
|
||||
final ClusterId now = DateIndexExtension.now();
|
||||
|
||||
final Tags eagleTim = Tags.createAndAddToDictionary("bird", "eagle", "name", "Tim");
|
||||
final Tags eagleTimothy = Tags.createAndAddToDictionary("bird", "eagle", "name", "Timothy");
|
||||
@@ -46,12 +47,12 @@ public class ProposerTest {
|
||||
final Tags labradorJenny = Tags.createAndAddToDictionary("dog", "labrador", "name", "Jenny");
|
||||
final Tags labradorTim = Tags.createAndAddToDictionary("dog", "labrador", "name", "Tim");
|
||||
|
||||
dataStore.createNewFile(eagleTim);
|
||||
dataStore.createNewFile(eagleTimothy);
|
||||
dataStore.createNewFile(pigeonJennifer);
|
||||
dataStore.createNewFile(flamingoJennifer);
|
||||
dataStore.createNewFile(labradorJenny);
|
||||
dataStore.createNewFile(labradorTim);
|
||||
dataStore.createNewFile(now, eagleTim);
|
||||
dataStore.createNewFile(now, eagleTimothy);
|
||||
dataStore.createNewFile(now, pigeonJennifer);
|
||||
dataStore.createNewFile(now, flamingoJennifer);
|
||||
dataStore.createNewFile(now, labradorJenny);
|
||||
dataStore.createNewFile(now, labradorTim);
|
||||
}
|
||||
|
||||
public void testEmptyQuery() throws Exception {
|
||||
|
||||
Reference in New Issue
Block a user