From 59aea1a15f2f74a9d106c90147870f499b797eab Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sun, 24 Feb 2019 16:50:57 +0100 Subject: [PATCH] introduce index clustering (part 1) In order to prevent files from getting too big and make it easier to implement retention policies, we are splitting all files into chunks. Each chunk contains the data for a time interval (1 month per default). This first changeset introduces the ClusteredPersistentMap that implements this for PersistentMap. It is used for a couple (not all) of indices. --- build.gradle | 4 +- .../lucares/pdb/datastore/DateTimeRange.java | 29 -- .../pdb/datastore/internal/ClusterId.java | 56 ++++ .../internal/ClusteredPersistentMap.java | 111 ++++++++ .../pdb/datastore/internal/DataStore.java | 251 +++++++++++------- .../DateIndexExtension.java | 79 +++++- .../pdb/datastore/lang/CandidateGrouper.java | 2 +- .../lang/ExpressionToDocIdVisitor.java | 77 +++--- .../pdb/datastore/internal/DataStoreTest.java | 140 ++++------ .../DateIndexExtensionTest.java | 43 ++- .../pdb/datastore/internal/ProposerTest.java | 13 +- gradle/wrapper/gradle-wrapper.properties | 2 +- .../org/lucares/pdb/api/DateTimeRange.java | 100 +++++++ .../main/java/org/lucares/pdb/api/Query.java | 77 ++++++ .../lucares/pdb/api/DateTimeRangeTest.java | 19 +- .../lucares/pdb/plot/api/PlotSettings.java | 2 +- .../lucares/recommind/logs/ScatterPlot.java | 5 +- .../java/org/lucares/pdbui/PdbController.java | 13 +- .../org/lucares/pdbui/domain/DateRange.java | 76 ++++++ .../db/ingestor/TcpIngestorTest.java | 17 +- .../org/lucares/performance/db/PdbExport.java | 5 +- .../lucares/performance/db/PerformanceDb.java | 14 +- .../org/lucares/performance/db/TimeRange.java | 68 ----- .../performance/db/PerformanceDbTest.java | 55 ++-- .../org/lucares/performance/db/Query.java | 27 -- 25 files changed, 863 insertions(+), 422 deletions(-) delete mode 100644 data-store/src/main/java/org/lucares/pdb/datastore/DateTimeRange.java create mode 100644 data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusterId.java create mode 100644 data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusteredPersistentMap.java rename data-store/src/main/java/org/lucares/pdb/datastore/{lang => internal}/DateIndexExtension.java (55%) rename data-store/src/test/java/org/lucares/pdb/datastore/{lang => internal}/DateIndexExtensionTest.java (52%) create mode 100644 pdb-api/src/main/java/org/lucares/pdb/api/DateTimeRange.java create mode 100644 pdb-api/src/main/java/org/lucares/pdb/api/Query.java rename performanceDb/src/test/java/org/lucares/performance/db/TimeRangeTest.java => pdb-api/src/test/java/org/lucares/pdb/api/DateTimeRangeTest.java (60%) create mode 100644 pdb-ui/src/main/java/org/lucares/pdbui/domain/DateRange.java delete mode 100644 performanceDb/src/main/java/org/lucares/performance/db/TimeRange.java delete mode 100644 performanceDb/src/test/java/org/lucares/performance/db/Query.java diff --git a/build.gradle b/build.gradle index 2b3a64a..2214603 100644 --- a/build.gradle +++ b/build.gradle @@ -28,7 +28,7 @@ ext { lib_log4j2_core = 'org.apache.logging.log4j:log4j-core:2.11.1' lib_log4j2_slf4j_impl = 'org.apache.logging.log4j:log4j-slf4j-impl:2.11.1' - lib_primitive_collections='org.lucares:primitiveCollections:0.1.20181120195412' + lib_primitive_collections='org.lucares:primitiveCollections:0.1.20190217091430' lib_spring_boot_log4j2='org.springframework.boot:spring-boot-starter-log4j2:2.1.2.RELEASE' lib_spring_boot_mustache='org.springframework.boot:spring-boot-starter-mustache:2.1.2.RELEASE' @@ -89,5 +89,5 @@ allprojects { } wrapper { - gradleVersion = '5.1.1' + gradleVersion = '5.2.1' } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/DateTimeRange.java b/data-store/src/main/java/org/lucares/pdb/datastore/DateTimeRange.java deleted file mode 100644 index 08bfe23..0000000 --- a/data-store/src/main/java/org/lucares/pdb/datastore/DateTimeRange.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.lucares.pdb.datastore; - -import java.time.OffsetDateTime; - -public class DateTimeRange { - - public static final DateTimeRange MAX = new DateTimeRange(OffsetDateTime.MIN, OffsetDateTime.MAX); - - private final OffsetDateTime start; - private final OffsetDateTime end; - - public DateTimeRange(final OffsetDateTime start, final OffsetDateTime end) { - this.start = start; - this.end = end; - } - - public OffsetDateTime getStart() { - return start; - } - - public OffsetDateTime getEnd() { - return end; - } - - @Override - public String toString() { - return start + "-" + end; - } -} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusterId.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusterId.java new file mode 100644 index 0000000..cb034dd --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusterId.java @@ -0,0 +1,56 @@ +package org.lucares.pdb.datastore.internal; + +public class ClusterId { + private final String clusterId; + + /** + * Create a new cluster id. + * + * @param clusterId the id, e.g. a time like 201902 (cluster for entries of + * February 2019) + */ + public ClusterId(final String clusterId) { + super(); + this.clusterId = clusterId; + } + + /** + * @return the id, e.g. a time like 201902 (cluster for entries of February + * 2019) + */ + public String getClusterId() { + return clusterId; + } + + @Override + public String toString() { + return clusterId; + } + + /* + * non-standard hashcode implementation! This class is just a wrapper for + * string, so we delegate directly to String.hashCode(). + */ + @Override + public int hashCode() { + return clusterId.hashCode(); + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + final ClusterId other = (ClusterId) obj; + if (clusterId == null) { + if (other.clusterId != null) + return false; + } else if (!clusterId.equals(other.clusterId)) + return false; + return true; + } + +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusteredPersistentMap.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusteredPersistentMap.java new file mode 100644 index 0000000..519a879 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/ClusteredPersistentMap.java @@ -0,0 +1,111 @@ +package org.lucares.pdb.datastore.internal; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; + +import org.lucares.pdb.api.DateTimeRange; +import org.lucares.pdb.datastore.ReadRuntimeException; +import org.lucares.pdb.map.PersistentMap; +import org.lucares.pdb.map.Visitor; + +public class ClusteredPersistentMap implements AutoCloseable { + + private final ConcurrentHashMap> maps = new ConcurrentHashMap<>(); + + private final Function> creator; + + // TODO we need two creators, one that actually creates a new map and one that + // only creates a new map if the file on disk already exists + public ClusteredPersistentMap(final Function> creator) { + this.creator = (dateIndexPrefix) -> creator.apply(dateIndexPrefix); + } + + public V getValue(final ClusterId clusterId, final K key) { + try { + + final PersistentMap map = maps.computeIfAbsent(clusterId, creator); + return map != null ? map.getValue(key) : null; + } catch (final IOException e) { + throw new ReadRuntimeException(e); + } + } + + public List getValues(final DateTimeRange dateRange, final K key) { + try { + final List result = new ArrayList<>(); + final List clusterIds = DateIndexExtension.toClusterIds(dateRange); + + for (final ClusterId clusterId : clusterIds) { + final PersistentMap map = maps.computeIfAbsent(clusterId, creator); + if (map != null) { + final V value = map.getValue(key); + if (value != null) { + result.add(value); + } + } + } + + return result; + } catch (final IOException e) { + throw new ReadRuntimeException(e); + } + } + + public V putValue(final ClusterId clusterId, final K key, final V value) { + try { + + final PersistentMap map = maps.computeIfAbsent(clusterId, creator); + return map.putValue(key, value); + } catch (final IOException e) { + throw new ReadRuntimeException(e); + } + } + + public void visitValues(final ClusterId clusterId, final K keyPrefix, final Visitor visitor) { + try { + final PersistentMap map = maps.computeIfAbsent(clusterId, creator); + if (map != null) { + map.visitValues(keyPrefix, visitor); + } + } catch (final IOException e) { + throw new ReadRuntimeException(e); + } + } + + public void visitValues(final DateTimeRange dateRange, final K keyPrefix, final Visitor visitor) { + try { + final List clusterIds = DateIndexExtension.toClusterIds(dateRange); + + for (final ClusterId clusterId : clusterIds) { + final PersistentMap map = maps.get(clusterId); + if (map != null) { + map.visitValues(keyPrefix, visitor); + } + } + } catch (final IOException e) { + throw new ReadRuntimeException(e); + } + } + + @Override + public void close() { + final List throwables = new ArrayList<>(); + + for (final PersistentMap map : maps.values()) { + try { + map.close(); + } catch (final IOException e) { + throwables.add(e); + } + } + if (!throwables.isEmpty()) { + final RuntimeException ex = new RuntimeException(); + throwables.forEach(ex::addSuppressed); + throw ex; + } + } + +} diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java index e944a83..c7447b8 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DataStore.java @@ -16,6 +16,8 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.function.Consumer; import org.lucares.collections.LongList; +import org.lucares.pdb.api.DateTimeRange; +import org.lucares.pdb.api.Query; import org.lucares.pdb.api.RuntimeIOException; import org.lucares.pdb.api.StringCompressor; import org.lucares.pdb.api.Tag; @@ -61,11 +63,11 @@ public class DataStore implements AutoCloseable { public static Tag TAG_ALL_DOCS = null; - private final PersistentMap docIdToDoc; + private final ClusteredPersistentMap docIdToDoc; - private final PersistentMap tagsToDocId; + private final ClusteredPersistentMap tagsToDocId; - private final PersistentMap tagToDocsId; + private final ClusteredPersistentMap tagToDocsId; private final QueryCompletionIndex queryCompletionIndex; @@ -92,15 +94,33 @@ public class DataStore implements AutoCloseable { diskStorage = new DiskStorage(diskStorageFilePath); diskStorage.ensureAlignmentForNewBlocks(BSFile.BLOCK_SIZE); - final Path keyToValueToDocIdsIndexPath = storageBasePath.resolve("keyToValueToDocIdsIndex.bs"); - tagToDocsId = new PersistentMap<>(keyToValueToDocIdsIndexPath, new TagEncoderDecoder(), - PersistentMap.LONG_CODER); + tagToDocsId = new ClusteredPersistentMap<>(clusterId -> { + try { + final Path file = storageBasePath.resolve(clusterId.getClusterId()) + .resolve("keyToValueToDocIdsIndex.bs"); + return new PersistentMap<>(file, new TagEncoderDecoder(), PersistentMap.LONG_CODER); + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + }); - final Path tagsToDocIdIndexPath = storageBasePath.resolve("tagsToDocIdIndex.bs"); - tagsToDocId = new PersistentMap<>(tagsToDocIdIndexPath, new TagsEncoderDecoder(), PersistentMap.LONG_CODER); + tagsToDocId = new ClusteredPersistentMap<>(clusterId -> { + try { + final Path file = storageBasePath.resolve(clusterId.getClusterId()).resolve("tagsToDocIdIndex.bs"); + return new PersistentMap<>(file, new TagsEncoderDecoder(), PersistentMap.LONG_CODER); + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + }); - final Path docIdToDocIndexPath = storageBasePath.resolve("docIdToDocIndex.bs"); - docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, new DocEncoderDecoder()); + docIdToDoc = new ClusteredPersistentMap<>(clusterId -> { + try { + final Path file = storageBasePath.resolve(clusterId.getClusterId()).resolve("docIdToDocIndex.bs"); + return new PersistentMap<>(file, PersistentMap.LONG_CODER, new DocEncoderDecoder()); + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + }); queryCompletionIndex = new QueryCompletionIndex(storageBasePath); @@ -117,7 +137,8 @@ public class DataStore implements AutoCloseable { } public void write(final long dateAsEpochMilli, final Tags tags, final long value) { - final PdbWriter writer = getWriter(dateAsEpochMilli, tags); + final ClusterId clusterId = DateIndexExtension.toClusterId(dateAsEpochMilli); + final PdbWriter writer = getWriter(clusterId, tags); writer.write(dateAsEpochMilli, value); } @@ -126,15 +147,15 @@ public class DataStore implements AutoCloseable { return queryCompletionIndex; } - public long createNewFile(final Tags tags) { + public long createNewFile(final ClusterId clusterId, final Tags tags) { try { final long newFilesRootBlockOffset = diskStorage.allocateBlock(BSFile.BLOCK_SIZE); final long docId = createUniqueDocId(); final Doc doc = new Doc(tags, newFilesRootBlockOffset); - docIdToDoc.putValue(docId, doc); + docIdToDoc.putValue(clusterId, docId, doc); - final Long oldDocId = tagsToDocId.putValue(tags, docId); + final Long oldDocId = tagsToDocId.putValue(clusterId, tags, docId); Preconditions.checkNull(oldDocId, "There must be at most one document for tags: {0}", tags); // store mapping from tag to docId, so that we can find all docs for a given tag @@ -142,11 +163,11 @@ public class DataStore implements AutoCloseable { ts.add(TAG_ALL_DOCS); for (final Tag tag : ts) { - Long diskStoreOffsetForDocIdsOfTag = tagToDocsId.getValue(tag); + Long diskStoreOffsetForDocIdsOfTag = tagToDocsId.getValue(clusterId, tag); if (diskStoreOffsetForDocIdsOfTag == null) { diskStoreOffsetForDocIdsOfTag = diskStorage.allocateBlock(BSFile.BLOCK_SIZE); - tagToDocsId.putValue(tag, diskStoreOffsetForDocIdsOfTag); + tagToDocsId.putValue(clusterId, tag, diskStoreOffsetForDocIdsOfTag); } try (final LongStreamFile docIdsOfTag = LongStreamFile.existingFile(diskStoreOffsetForDocIdsOfTag, @@ -169,7 +190,7 @@ public class DataStore implements AutoCloseable { return NEXT_DOC_ID.getAndIncrement(); } - public List getFilesForQuery(final String query) { + public List getFilesForQuery(final Query query) { final List searchResult = search(query); if (searchResult.size() > 500_000) { @@ -193,55 +214,17 @@ public class DataStore implements AutoCloseable { return result; } - public List search(final String query) { + public List search(final Query query) { try { - final LongList docIdsList = executeQuery(query); - LOGGER.trace("query {} found {} docs", query, docIdsList.size()); - final List result = mapDocIdsToDocs(docIdsList); - return result; - } catch (final IOException e) { - throw new RuntimeIOException(e); - } - } + final List result = new ArrayList<>(); + final List clusterIds = DateIndexExtension.toClusterIds(query.getDateRange()); + for (final ClusterId clusterId : clusterIds) { - public int count(final String query) { - final LongList docIdsList = executeQuery(query); + final LongList docIdsList = executeQuery(clusterId, query.getQuery()); + LOGGER.trace("query {} found {} docs", query, docIdsList.size()); + final List docs = mapDocIdsToDocs(clusterId, docIdsList); + result.addAll(docs); - return docIdsList.size(); - } - - public List getAvailableFields() { - try { - final Set keys = new HashSet<>(); - - final Tag keyPrefix = new Tag("", ""); // will find everything - - tagToDocsId.visitValues(keyPrefix, (tags, __) -> keys.add(tags.getKeyAsString())); - - keys.remove(ALL_DOCS_KEY); - final List result = new ArrayList<>(keys); - Collections.sort(result); - return result; - } catch (final IOException e) { - throw new RuntimeIOException(e); - } - } - - public SortedSet getAvailableValuesForKey(final String query, final String key) { - - try { - final SortedSet result = new TreeSet<>(); - if (query.isEmpty()) { - tagToDocsId.visitValues(new Tag(key, ""), (tag, value) -> result.add(tag.getValueAsString())); - } else { - final List docs = search(query); - for (final Doc doc : docs) { - final String valueForKey = doc.getTags().getValue(key); - - if (valueForKey != null) { - result.add(valueForKey); - } - } } return result; @@ -250,11 +233,59 @@ public class DataStore implements AutoCloseable { } } - private LongList executeQuery(final String query) { + public int count(final Query query) { + int count = 0; + final List clusterIds = DateIndexExtension.toClusterIds(query.getDateRange()); + for (final ClusterId clusterId : clusterIds) { + + final LongList docIdsList = executeQuery(clusterId, query.getQuery()); + count += docIdsList.size(); + } + + return count; + } + + public List getAvailableFields(final DateTimeRange dateRange) { + + final Set keys = new HashSet<>(); + + final Tag keyPrefix = new Tag("", ""); // will find everything + + tagToDocsId.visitValues(dateRange, keyPrefix, (tags, __) -> keys.add(tags.getKeyAsString())); + + keys.remove(ALL_DOCS_KEY); + final List result = new ArrayList<>(keys); + Collections.sort(result); + return result; + + } + + public SortedSet getAvailableValuesForKey(final Query query, final String key) { + + final SortedSet result = new TreeSet<>(); + if (query.getQuery().isEmpty()) { + tagToDocsId.visitValues(query.getDateRange(), new Tag(key, ""), + (tag, __) -> result.add(tag.getValueAsString())); + } else { + final List docs = search(query); + for (final Doc doc : docs) { + final String valueForKey = doc.getTags().getValue(key); + + if (valueForKey != null) { + result.add(valueForKey); + } + } + } + + return result; + + } + + private LongList executeQuery(final ClusterId clusterId, final String query) { final long start = System.nanoTime(); synchronized (docIdToDoc) { final Expression expression = QueryLanguageParser.parse(query); - final ExpressionToDocIdVisitor visitor = new ExpressionToDocIdVisitor(tagToDocsId, diskStorage); + final ExpressionToDocIdVisitor visitor = new ExpressionToDocIdVisitor(clusterId, tagToDocsId, diskStorage); final LongList docIdsList = expression.visit(visitor); EXECUTE_QUERY_LOGGER.debug("executeQuery({}) took {}ms returned {} results ", query, (System.nanoTime() - start) / 1_000_000.0, docIdsList.size()); @@ -262,7 +293,7 @@ public class DataStore implements AutoCloseable { } } - private List mapDocIdsToDocs(final LongList docIdsList) throws IOException { + private List mapDocIdsToDocs(final ClusterId clusterId, final LongList docIdsList) throws IOException { final List result = new ArrayList<>(docIdsList.size()); synchronized (docIdToDoc) { @@ -270,7 +301,7 @@ public class DataStore implements AutoCloseable { for (int i = 0; i < docIdsList.size(); i++) { final long docId = docIdsList.get(i); - final Doc doc = getDocByDocId(docId); + final Doc doc = getDocByDocId(clusterId, docId); Objects.requireNonNull(doc, "Doc with id " + docId + " did not exist."); result.add(doc); @@ -281,26 +312,50 @@ public class DataStore implements AutoCloseable { return result; } - public Optional getByTags(final Tags tags) { - try { - final Long docId = tagsToDocId.getValue(tags); - if (docId != null) { - final Doc doc = getDocByDocId(docId); - return Optional.of(doc); - } - return Optional.empty(); - } catch (final IOException e) { - throw new RuntimeIOException(e); + public Optional getByTags(final ClusterId clusterId, final Tags tags) { + + final Long docId = tagsToDocId.getValue(clusterId, tags); + + if (docId != null) { + final Doc doc = getDocByDocId(clusterId, docId); + return Optional.of(doc); } + + return Optional.empty(); } - private Doc getDocByDocId(final Long docId) { - return docIdToDocCache.putIfAbsent(docId, () -> { - try { - return docIdToDoc.getValue(docId); - } catch (final IOException e) { - throw new RuntimeIOException(e); + public List getByTags(final DateTimeRange dateRange, final Tags tags) { + + final List result = new ArrayList<>(); + final List docIds = tagsToDocId.getValues(dateRange, tags); + for (final Long docId : docIds) { + + if (docId != null) { + final Doc doc = getDocByDocId(dateRange, docId); + result.add(doc); } + } + + return result; + } + + private Doc getDocByDocId(final ClusterId clusterId, final Long docId) { + return docIdToDocCache.putIfAbsent(docId, () -> { + return docIdToDoc.getValue(clusterId, docId); + }); + } + + private Doc getDocByDocId(final DateTimeRange dateRange, final Long docId) { + return docIdToDocCache.putIfAbsent(docId, () -> { + + final List docIds = docIdToDoc.getValues(dateRange, docId); + if (docIds.size() == 1) { + return docIds.get(0); + } else if (docIds.size() > 1) { + throw new IllegalStateException( + "Found multiple documents for " + dateRange + " and docId " + docId + ": " + docIds); + } + throw new IllegalStateException("Found no documents for " + dateRange + " and docId " + docId); }); } @@ -316,13 +371,18 @@ public class DataStore implements AutoCloseable { return diskStorage; } - PdbWriter getWriter(final long dateAsEpochMilli, final Tags tags) throws ReadException, WriteException { + private PdbWriter getWriter(final ClusterId clusterId, final Tags tags) throws ReadException, WriteException { - return writerCache.putIfAbsent(tags, () -> getWriter(tags)); + return writerCache.putIfAbsent(tags, () -> getWriterInternal(clusterId, tags)); } - private PdbWriter getWriter(final Tags tags) { - final Optional docsForTags = getByTags(tags); + // visible for test + long sizeWriterCache() { + return writerCache.size(); + } + + private PdbWriter getWriterInternal(final ClusterId clusterId, final Tags tags) { + final Optional docsForTags = getByTags(clusterId, tags); PdbWriter writer; if (docsForTags.isPresent()) { try { @@ -333,15 +393,15 @@ public class DataStore implements AutoCloseable { throw new ReadException(e); } } else { - writer = newPdbWriter(tags); + writer = newPdbWriter(clusterId, tags); } return writer; } - private PdbWriter newPdbWriter(final Tags tags) { + private PdbWriter newPdbWriter(final ClusterId clusterId, final Tags tags) { final long start = System.nanoTime(); try { - final PdbFile pdbFile = createNewPdbFile(tags); + final PdbFile pdbFile = createNewPdbFile(clusterId, tags); final PdbWriter result = new PdbWriter(pdbFile, getDiskStorage()); METRICS_LOGGER_NEW_WRITER.debug("newPdbWriter took {}ms tags: {}", @@ -350,12 +410,11 @@ public class DataStore implements AutoCloseable { } catch (final IOException e) { throw new WriteException(e); } - } - private PdbFile createNewPdbFile(final Tags tags) throws IOException { + private PdbFile createNewPdbFile(final ClusterId clusterId, final Tags tags) throws IOException { - final long rootBlockNumber = createNewFile(tags); + final long rootBlockNumber = createNewFile(clusterId, tags); final PdbFile result = new PdbFile(rootBlockNumber, tags); return result; @@ -379,11 +438,7 @@ public class DataStore implements AutoCloseable { } catch (final IOException e) { throw new RuntimeIOException(e); } finally { - try { - tagToDocsId.close(); - } catch (final IOException e) { - throw new RuntimeIOException(e); - } + tagToDocsId.close(); } } } diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/DateIndexExtension.java b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DateIndexExtension.java similarity index 55% rename from data-store/src/main/java/org/lucares/pdb/datastore/lang/DateIndexExtension.java rename to data-store/src/main/java/org/lucares/pdb/datastore/internal/DateIndexExtension.java index c1c1dc4..f191bfb 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/DateIndexExtension.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/internal/DateIndexExtension.java @@ -1,17 +1,18 @@ -package org.lucares.pdb.datastore.lang; +package org.lucares.pdb.datastore.internal; import java.time.Instant; import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; import java.util.Map.Entry; -import java.util.Objects; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.ConcurrentNavigableMap; import java.util.concurrent.ConcurrentSkipListMap; -import org.lucares.pdb.datastore.DateTimeRange; +import org.lucares.pdb.api.DateTimeRange; public class DateIndexExtension { @@ -25,18 +26,16 @@ public class DateIndexExtension { static Set toDateIndexPrefix(final DateTimeRange dateRange) { final Set result = new TreeSet<>(); - if (Objects.equals(dateRange, DateTimeRange.MAX)) { - result.add("*"); - } else { - OffsetDateTime current = dateRange.getStart(); - while (current.isBefore(dateRange.getEnd())) { - result.add(toDateIndexPrefix(current)); - current = current.plusMonths(1); + OffsetDateTime current = dateRange.getStart(); + while (current.isBefore(dateRange.getEnd())) { + + result.add(toDateIndexPrefix(current)); + current = current.plusMonths(1); - } - result.add(toDateIndexPrefix(dateRange.getEnd())); } + result.add(toDateIndexPrefix(dateRange.getEnd())); + return result; } @@ -44,6 +43,22 @@ public class DateIndexExtension { return time.format(DATE_PATTERN); } + public static ClusterId toClusterId(final long epochMilli) { + // TODO most calls will be for a similar date -> add a shortcut + final Entry value = DATE_PREFIX_CACHE.floorEntry(epochMilli); + + String result; + if (value == null || !value.getValue().contains(epochMilli)) { + final DatePrefixAndRange newValue = toDatePrefixAndRange(epochMilli); + DATE_PREFIX_CACHE.put(newValue.getMinEpochMilli(), newValue); + result = newValue.getDatePrefix(); + } else { + result = value.getValue().getDatePrefix(); + } + + return new ClusterId(result); + } + public static String toDateIndexPrefix(final long epochMilli) { final Entry value = DATE_PREFIX_CACHE.floorEntry(epochMilli); @@ -60,7 +75,25 @@ public class DateIndexExtension { return result; } - private static DatePrefixAndRange toDatePrefixAndRange(final long epochMilli) { + public static List toClusterIds(final DateTimeRange dateRange) { + final List result = new ArrayList<>(); + + OffsetDateTime current = dateRange.getStart(); + final OffsetDateTime end = dateRange.getEnd(); + current = current.withOffsetSameInstant(ZoneOffset.UTC).withDayOfMonth(1).withHour(0).withMinute(0) + .withSecond(0).withNano(0); + + while (!current.isAfter(end)) { + final String id = current.format(DATE_PATTERN); + final ClusterId clusterId = new ClusterId(id); + result.add(clusterId); + current = current.plusMonths(1); + } + + return result; + } + + public static DatePrefixAndRange toDatePrefixAndRange(final long epochMilli) { final OffsetDateTime date = Instant.ofEpochMilli(epochMilli).atOffset(ZoneOffset.UTC); final OffsetDateTime beginOfMonth = date.withDayOfMonth(1).withHour(0).withMinute(0).withSecond(0).withNano(0); final OffsetDateTime endOfMonth = beginOfMonth.plusMonths(1).minusNanos(1); @@ -72,6 +105,26 @@ public class DateIndexExtension { return new DatePrefixAndRange(datePrefix, minEpochMilli, maxEpochMilli); } + public static List toDateIndexEpochMillis(final DateTimeRange dateRange) { + final List result = new ArrayList<>(); + + OffsetDateTime current = dateRange.getStart(); + final OffsetDateTime end = dateRange.getEnd(); + current = current.withOffsetSameInstant(ZoneOffset.UTC).withDayOfMonth(1).withHour(0).withMinute(0) + .withSecond(0).withNano(0); + + while (!current.isAfter(end)) { + result.add(current.toInstant().toEpochMilli()); + current = current.plusMonths(1); + } + + return result; + } + + public static ClusterId now() { + return toClusterId(System.currentTimeMillis()); + } + } class DatePrefixAndRange { diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/CandidateGrouper.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/CandidateGrouper.java index 1fbd915..9e876aa 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/CandidateGrouper.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/CandidateGrouper.java @@ -15,7 +15,7 @@ public class CandidateGrouper { final int numDotsInValue = countDotsInValue(queryWithCaretMarker); for (final String value : values) { - // keep everyting up to the (numDotsInValue+1)-th + // keep everything up to the (numDotsInValue+1)-th final String[] token = value.split(Pattern.quote(".")); final List tokenlist = new ArrayList<>(Arrays.asList(token)); final List prefix = tokenlist.subList(0, numDotsInValue + 1); diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java index 0c28bc7..02a709d 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java @@ -11,13 +11,14 @@ import org.lucares.collections.LongList; import org.lucares.pdb.api.RuntimeIOException; import org.lucares.pdb.api.Tag; import org.lucares.pdb.blockstorage.LongStreamFile; +import org.lucares.pdb.datastore.internal.ClusterId; +import org.lucares.pdb.datastore.internal.ClusteredPersistentMap; import org.lucares.pdb.datastore.internal.DataStore; import org.lucares.pdb.datastore.lang.Expression.And; import org.lucares.pdb.datastore.lang.Expression.Not; import org.lucares.pdb.datastore.lang.Expression.Or; import org.lucares.pdb.datastore.lang.Expression.Parentheses; import org.lucares.pdb.diskstorage.DiskStorage; -import org.lucares.pdb.map.PersistentMap; import org.lucares.utils.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,10 +26,14 @@ import org.slf4j.LoggerFactory; public class ExpressionToDocIdVisitor extends ExpressionVisitor { private static final Logger LOGGER = LoggerFactory.getLogger(ExpressionToDocIdVisitor.class); - private final PersistentMap keyToValueToDocId; + private final ClusteredPersistentMap keyToValueToDocId; private final DiskStorage diskStorage; - public ExpressionToDocIdVisitor(final PersistentMap keyToValueToDocsId, final DiskStorage diskStorage) { + private final ClusterId clusterId; + + public ExpressionToDocIdVisitor(final ClusterId clusterId, + final ClusteredPersistentMap keyToValueToDocsId, final DiskStorage diskStorage) { + this.clusterId = clusterId; this.keyToValueToDocId = keyToValueToDocsId; this.diskStorage = diskStorage; } @@ -121,9 +126,17 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { private LongList getAllDocIds() { try { - final Long blockOffset = keyToValueToDocId.getValue(DataStore.TAG_ALL_DOCS); - final LongStreamFile bsFile = LongStreamFile.existingFile(blockOffset, diskStorage); - return bsFile.asLongList(); + + final Long blockOffset = keyToValueToDocId.getValue(clusterId, DataStore.TAG_ALL_DOCS); + + if (blockOffset != null) { + final LongStreamFile bsFile = LongStreamFile.existingFile(blockOffset, diskStorage); + final LongList longList = bsFile.asLongList(); + + return longList; + } else { + return new LongList(0); + } } catch (final IOException e) { throw new RuntimeIOException(e); } @@ -131,42 +144,36 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { private List filterByWildcard(final String propertyName, final Pattern valuePattern) { final List result = new ArrayList<>(); - try { - final long start = System.nanoTime(); - keyToValueToDocId.visitValues(new Tag(propertyName, ""), (tags, blockOffsetToDocIds) -> { - try { - if (valuePattern.matcher(tags.getValueAsString()).matches()) { - try (final LongStreamFile bsFile = LongStreamFile.existingFile(blockOffsetToDocIds, - diskStorage)) { - // We know that all LongLists coming from a BSFile are sorted, non-overlapping - // and increasing, that means we can just concatenate them and get a sorted - // list. - final List longLists = bsFile.streamOfLongLists().collect(Collectors.toList()); - final LongList concatenatedLists = concatenateLists(longLists); + final long start = System.nanoTime(); + keyToValueToDocId.visitValues(clusterId, new Tag(propertyName, ""), (tags, blockOffsetToDocIds) -> { + try { + if (valuePattern.matcher(tags.getValueAsString()).matches()) { + try (final LongStreamFile bsFile = LongStreamFile.existingFile(blockOffsetToDocIds, diskStorage)) { - Preconditions.checkTrue(concatenatedLists.isSorted(), - "The LongLists containing document ids must be sorted, " - + "non-overlapping and increasing, so that the concatenation " - + "is sorted. This is guaranteed by the fact that document ids " - + "are generated in monotonically increasing order."); + // We know that all LongLists coming from a BSFile are sorted, non-overlapping + // and increasing, that means we can just concatenate them and get a sorted + // list. + final List longLists = bsFile.streamOfLongLists().collect(Collectors.toList()); + final LongList concatenatedLists = concatenateLists(longLists); - result.add(concatenatedLists); - } + Preconditions.checkTrue(concatenatedLists.isSorted(), + "The LongLists containing document ids must be sorted, " + + "non-overlapping and increasing, so that the concatenation " + + "is sorted. This is guaranteed by the fact that document ids " + + "are generated in monotonically increasing order."); + + result.add(concatenatedLists); } - } catch (final IOException e) { - throw new RuntimeIOException(e); } - }); + } catch (final IOException e) { + throw new RuntimeIOException(e); + } + }); - LOGGER.trace("filterByWildcard: for key {} took {}ms", propertyName, - (System.nanoTime() - start) / 1_000_000.0); - - return result; - } catch (final IOException e) { - throw new RuntimeIOException(e); - } + LOGGER.trace("filterByWildcard: for key {} took {}ms", propertyName, (System.nanoTime() - start) / 1_000_000.0); + return result; } private LongList merge(final Collection lists) { diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java index 1984652..3ab1141 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/DataStoreTest.java @@ -6,8 +6,7 @@ import java.awt.event.KeyEvent; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.time.OffsetDateTime; -import java.time.ZoneOffset; +import java.time.temporal.ChronoUnit; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -24,6 +23,8 @@ import javax.swing.JFrame; import javax.swing.JTextArea; import javax.swing.JTextField; +import org.lucares.pdb.api.DateTimeRange; +import org.lucares.pdb.api.Query; import org.lucares.pdb.api.Tags; import org.lucares.pdb.blockstorage.BSFile; import org.lucares.pdb.datastore.Doc; @@ -59,6 +60,8 @@ public class DataStoreTest { public void testQuery() throws Exception { dataStore = new DataStore(dataDirectory); + final DateTimeRange dateRange = DateTimeRange.relativeHours(1); + final ClusterId clusterId = DateIndexExtension.toClusterIds(dateRange).get(0); final Tags eagleTim = Tags.createAndAddToDictionary("bird", "eagle", "name", "Tim"); final Tags pigeonJennifer = Tags.createAndAddToDictionary("bird", "pigeon", "name", "Jennifer"); @@ -67,40 +70,40 @@ public class DataStoreTest { final Tags labradorTim = Tags.createAndAddToDictionary("dog", "labrador", "name", "Tim"); tagsToBlockStorageRootBlockNumber = new HashMap<>(); - tagsToBlockStorageRootBlockNumber.put(eagleTim, dataStore.createNewFile(eagleTim)); - tagsToBlockStorageRootBlockNumber.put(pigeonJennifer, dataStore.createNewFile(pigeonJennifer)); - tagsToBlockStorageRootBlockNumber.put(flamingoJennifer, dataStore.createNewFile(flamingoJennifer)); - tagsToBlockStorageRootBlockNumber.put(labradorJenny, dataStore.createNewFile(labradorJenny)); - tagsToBlockStorageRootBlockNumber.put(labradorTim, dataStore.createNewFile(labradorTim)); + tagsToBlockStorageRootBlockNumber.put(eagleTim, dataStore.createNewFile(clusterId, eagleTim)); + tagsToBlockStorageRootBlockNumber.put(pigeonJennifer, dataStore.createNewFile(clusterId, pigeonJennifer)); + tagsToBlockStorageRootBlockNumber.put(flamingoJennifer, dataStore.createNewFile(clusterId, flamingoJennifer)); + tagsToBlockStorageRootBlockNumber.put(labradorJenny, dataStore.createNewFile(clusterId, labradorJenny)); + tagsToBlockStorageRootBlockNumber.put(labradorTim, dataStore.createNewFile(clusterId, labradorTim)); - assertSearch("bird=eagle", eagleTim); - assertSearch("dog=labrador", labradorJenny, labradorTim); - assertSearch("name=Tim", eagleTim, labradorTim); - assertSearch("dog=labrador and name=Tim", labradorTim); - assertSearch("dog=labrador and !name=Tim", labradorJenny); - assertSearch("name=Jennifer or name=Jenny", pigeonJennifer, flamingoJennifer, labradorJenny); + assertSearch(dateRange, "bird=eagle", eagleTim); + assertSearch(dateRange, "dog=labrador", labradorJenny, labradorTim); + assertSearch(dateRange, "name=Tim", eagleTim, labradorTim); + assertSearch(dateRange, "dog=labrador and name=Tim", labradorTim); + assertSearch(dateRange, "dog=labrador and !name=Tim", labradorJenny); + assertSearch(dateRange, "name=Jennifer or name=Jenny", pigeonJennifer, flamingoJennifer, labradorJenny); // a͟n͟d binds stronger than o͟r - assertSearch("name=Tim and dog=labrador or bird=pigeon", pigeonJennifer, labradorTim); - assertSearch("bird=pigeon or name=Tim and dog=labrador", pigeonJennifer, labradorTim); + assertSearch(dateRange, "name=Tim and dog=labrador or bird=pigeon", pigeonJennifer, labradorTim); + assertSearch(dateRange, "bird=pigeon or name=Tim and dog=labrador", pigeonJennifer, labradorTim); // parenthesis override priority of a͟n͟d - assertSearch("name=Tim and (dog=labrador or bird=pigeon)", labradorTim); - assertSearch("(dog=labrador or bird=pigeon) and name=Tim", labradorTim); + assertSearch(dateRange, "name=Tim and (dog=labrador or bird=pigeon)", labradorTim); + assertSearch(dateRange, "(dog=labrador or bird=pigeon) and name=Tim", labradorTim); // wildcards - assertSearch("bird=*", eagleTim, pigeonJennifer, flamingoJennifer); - assertSearch("name=Jen*", pigeonJennifer, flamingoJennifer, labradorJenny); - assertSearch("dog=*dor", labradorJenny, labradorTim); - assertSearch("dog=lab*dor", labradorJenny, labradorTim); - assertSearch("dog=*lab*dor*", labradorJenny, labradorTim); + assertSearch(dateRange, "bird=*", eagleTim, pigeonJennifer, flamingoJennifer); + assertSearch(dateRange, "name=Jen*", pigeonJennifer, flamingoJennifer, labradorJenny); + assertSearch(dateRange, "dog=*dor", labradorJenny, labradorTim); + assertSearch(dateRange, "dog=lab*dor", labradorJenny, labradorTim); + assertSearch(dateRange, "dog=*lab*dor*", labradorJenny, labradorTim); // 'in' queries - assertSearch("bird=(eagle, pigeon, flamingo)", eagleTim, pigeonJennifer, flamingoJennifer); - assertSearch("dog = (labrador) and name =Tim,Jennifer", labradorTim); - assertSearch("name =Jenn*", pigeonJennifer, flamingoJennifer, labradorJenny); - assertSearch("name = (*) and dog=labrador", labradorJenny, labradorTim); - assertSearch("name =XYZ, * and dog=labrador", labradorJenny, labradorTim); + assertSearch(dateRange, "bird=(eagle, pigeon, flamingo)", eagleTim, pigeonJennifer, flamingoJennifer); + assertSearch(dateRange, "dog = (labrador) and name =Tim,Jennifer", labradorTim); + assertSearch(dateRange, "name =Jenn*", pigeonJennifer, flamingoJennifer, labradorJenny); + assertSearch(dateRange, "name = (*) and dog=labrador", labradorJenny, labradorTim); + assertSearch(dateRange, "name =XYZ, * and dog=labrador", labradorJenny, labradorTim); } @@ -111,10 +114,11 @@ public class DataStoreTest { final Tags pigeonJennifer = Tags.createAndAddToDictionary("bird", "pigeon", "name", "Jennifer"); final Tags flamingoJennifer = Tags.createAndAddToDictionary("bird", "flamingo", "name", "Jennifer"); - tagsToBlockStorageRootBlockNumber.put(pigeonJennifer, dataStore.createNewFile(pigeonJennifer)); - tagsToBlockStorageRootBlockNumber.put(flamingoJennifer, dataStore.createNewFile(flamingoJennifer)); + final ClusterId clusterId = new ClusterId("clusterA"); + tagsToBlockStorageRootBlockNumber.put(pigeonJennifer, dataStore.createNewFile(clusterId, pigeonJennifer)); + tagsToBlockStorageRootBlockNumber.put(flamingoJennifer, dataStore.createNewFile(clusterId, flamingoJennifer)); - final Optional docsFlamingoJennifer = dataStore.getByTags(flamingoJennifer); + final Optional docsFlamingoJennifer = dataStore.getByTags(clusterId, flamingoJennifer); Assert.assertTrue(docsFlamingoJennifer.isPresent(), "doc for docsFlamingoJennifer"); } @@ -122,7 +126,7 @@ public class DataStoreTest { dataStore = new DataStore(dataDirectory); final Tags eagleTim = Tags.createAndAddToDictionary("bird", "eagle", "name", "Tim"); - final long eagleTimBlockOffset = dataStore.createNewFile(eagleTim); + final long eagleTimBlockOffset = dataStore.createNewFile(new ClusterId("clusterA"), eagleTim); Assert.assertEquals(eagleTimBlockOffset % BSFile.BLOCK_SIZE, 0); } @@ -156,6 +160,7 @@ public class DataStoreTest { // TODO should only match "Jenny", because Jenny is the only non-bird name // starting with 'Jen' result.add(new Object[] { "!(type=bird and name=Jen|)", "name", Arrays.asList("Jennifer", "Jenny") }); + result.add(new Object[] { "!(type=dog and name=|) and !type=cat", "name", Arrays.asList("Jennifer", "Jenny", "Tim") }); @@ -167,6 +172,8 @@ public class DataStoreTest { final List expectedProposedValues) throws Exception { dataStore = new DataStore(dataDirectory); + final ClusterId clusterId = DateIndexExtension.now(); + final DateTimeRange dateRange = DateTimeRange.relativeHours(1); final List tags = Arrays.asList( Tags.createAndAddToDictionary("type", "bird", "subtype", "eagle", "age", "three", "name", "Tim"), @@ -182,48 +189,9 @@ public class DataStoreTest { Tags.createAndAddToDictionary("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"), Tags.createAndAddToDictionary("type", "cat", "subtype", "lion", "age", "four", "name", "John")); - tags.forEach(dataStore::createNewFile); + tags.forEach(t -> dataStore.createNewFile(clusterId, t)); - assertProposals(queryWithCaret, field, expectedProposedValues); - } - - public void test() throws Exception { - - try (final DataStore dataStore = new DataStore(dataDirectory)) { - - final OffsetDateTime date = OffsetDateTime.now(ZoneOffset.UTC); - final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue"); - - final PdbWriter newFileForTags = dataStore.getWriter(date.toInstant().toEpochMilli(), tags); - - final PdbWriter existingFileForTags = dataStore.getWriter(date.toInstant().toEpochMilli(), tags); - - Assert.assertSame(newFileForTags, existingFileForTags); - } - } - - public void testAppendingToSameFile() throws Exception { - - try (final DataStore dataStore = new DataStore(dataDirectory)) { - - // dayC is before dayA and dayB - final long dayA = DateUtils.getDate(2016, 1, 2, 1, 1, 1).toInstant().toEpochMilli(); - final long dayB = DateUtils.getDate(2016, 1, 3, 1, 1, 1).toInstant().toEpochMilli(); - final long dayC = DateUtils.getDate(2016, 1, 1, 1, 1, 1).toInstant().toEpochMilli(); - - final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue"); - - final PdbWriter writerForDayA = dataStore.getWriter(dayA, tags); - writerForDayA.write(dayA, 1); - final PdbWriter writerForDayB = dataStore.getWriter(dayB, tags); - writerForDayB.write(dayB, 2); - - final PdbWriter writerForDayC = dataStore.getWriter(dayC, tags); - writerForDayC.write(dayC, 3); - - Assert.assertSame(writerForDayA, writerForDayB); - Assert.assertSame(writerForDayA, writerForDayC); - } + assertProposals(dateRange, queryWithCaret, field, expectedProposedValues); } public void testIdenticalDatesGoIntoSameFile() throws Exception { @@ -234,19 +202,16 @@ public class DataStoreTest { final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue"); - final PdbWriter fileA = dataStore.getWriter(timestamp, tags); - fileA.write(timestamp, 1); + dataStore.write(timestamp, tags, 1); + dataStore.write(timestamp, tags, 2); - final PdbWriter fileB = dataStore.getWriter(timestamp, tags); - fileA.write(timestamp, 2); - - Assert.assertEquals(fileA, fileB); + Assert.assertEquals(dataStore.sizeWriterCache(), 1, "size of the writer cache"); } } public static void main(final String[] args) throws IOException, InterruptedException { final Path dir = Files.createTempDirectory("pdb"); - try (DataStore dataStore = new DataStore(dir)) { + try (final DataStore dataStore = new DataStore(dir)) { final List tags = Arrays.asList( Tags.createAndAddToDictionary("type", "bird", "subtype", "eagle", "age", "three", "name", "Tim"), @@ -265,7 +230,8 @@ public class DataStoreTest { Tags.createAndAddToDictionary("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"), Tags.createAndAddToDictionary("type", "cat", "subtype", "lion", "age", "four", "name", "John")); - tags.forEach(dataStore::createNewFile); + final ClusterId clusterId = DateIndexExtension.now(); + tags.forEach(t -> dataStore.createNewFile(clusterId, t)); final JFrame frame = new JFrame(); final JTextField input = new JTextField(); @@ -306,7 +272,7 @@ public class DataStoreTest { } }); - final List docs = dataStore.search(""); + final List docs = dataStore.search(Query.createQuery("", DateTimeRange.relative(1, ChronoUnit.DAYS))); final StringBuilder out = new StringBuilder(); out.append("info\n"); for (final Doc doc : docs) { @@ -321,7 +287,7 @@ public class DataStoreTest { } } - private void assertProposals(final String queryWithCaret, final String field, + private void assertProposals(final DateTimeRange dateRange, final String queryWithCaret, final String field, final List expectedProposedValues) { final String query = queryWithCaret.replace("|", ""); final int caretIndex = queryWithCaret.indexOf("|"); @@ -329,7 +295,7 @@ public class DataStoreTest { System.out.println( "proposed values: " + proposals.stream().map(Proposal::getProposedTag).collect(Collectors.toList())); - proposals.forEach(p -> assertQueryFindsResults(p.getNewQuery())); + proposals.forEach(p -> assertQueryFindsResults(dateRange, p.getNewQuery())); final List proposedValues = CollectionUtils.map(proposals, Proposal::getProposedTag); Collections.sort(proposedValues); @@ -337,13 +303,13 @@ public class DataStoreTest { Assert.assertEquals(proposedValues.toString(), expectedProposedValues.toString(), "proposed values:"); } - private void assertQueryFindsResults(final String query) { - final List result = dataStore.search(query); + private void assertQueryFindsResults(final DateTimeRange dateRange, final String query) { + final List result = dataStore.search(new Query(query, dateRange)); Assert.assertFalse(result.isEmpty(), "The query '" + query + "' must return a result, but didn't."); } - private void assertSearch(final String query, final Tags... tags) { - final List actualDocs = dataStore.search(query); + private void assertSearch(final DateTimeRange dateRange, final String query, final Tags... tags) { + final List actualDocs = dataStore.search(new Query(query, dateRange)); final List actual = CollectionUtils.map(actualDocs, Doc::getRootBlockNumber); final List expectedPaths = CollectionUtils.map(tags, tagsToBlockStorageRootBlockNumber::get); diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/lang/DateIndexExtensionTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/DateIndexExtensionTest.java similarity index 52% rename from data-store/src/test/java/org/lucares/pdb/datastore/lang/DateIndexExtensionTest.java rename to data-store/src/test/java/org/lucares/pdb/datastore/internal/DateIndexExtensionTest.java index ce7e924..d744bda 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/lang/DateIndexExtensionTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/DateIndexExtensionTest.java @@ -1,12 +1,13 @@ -package org.lucares.pdb.datastore.lang; +package org.lucares.pdb.datastore.internal; import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Set; -import org.lucares.pdb.datastore.DateTimeRange; +import org.lucares.pdb.api.DateTimeRange; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -64,6 +65,42 @@ public class DateIndexExtensionTest { Assert.assertEquals(DateIndexExtension.toDateIndexPrefix(min_201801), "201801"); Assert.assertEquals(DateIndexExtension.toDateIndexPrefix(max_201801), "201801"); Assert.assertEquals(DateIndexExtension.toDateIndexPrefix(mid_201711), "201711"); - System.out.println(DateIndexExtension.DATE_PREFIX_CACHE); + } + + public void testDateRanges() { + final OffsetDateTime mid_201712 = OffsetDateTime.of(2017, 12, 7, 1, 1, 1, 0, ZoneOffset.UTC) + .withOffsetSameInstant(ZoneOffset.ofHours(-2)); + final OffsetDateTime min_201801 = OffsetDateTime.of(2018, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC) + .withOffsetSameInstant(ZoneOffset.ofHours(-8)); + final OffsetDateTime min_201802 = OffsetDateTime.of(2018, 2, 1, 0, 0, 0, 0, ZoneOffset.UTC) + .withOffsetSameInstant(ZoneOffset.ofHours(12)); + + final DateTimeRange range_201712_201802 = new DateTimeRange(mid_201712, min_201802); + final DateTimeRange range_201712_201801 = new DateTimeRange(mid_201712, min_201801); + final DateTimeRange range_201712_201712 = new DateTimeRange(mid_201712, mid_201712); + + final List dateIndexPrefixesWithEmptyCache = DateIndexExtension.toClusterIds(range_201712_201802); + Assert.assertEquals(dateIndexPrefixesWithEmptyCache, + Arrays.asList(new ClusterId("201712"), new ClusterId("201801"), new ClusterId("201802"))); + + final List dateIndexPrefixesWithFilledCache = DateIndexExtension.toClusterIds(range_201712_201801); + Assert.assertEquals(dateIndexPrefixesWithFilledCache, + Arrays.asList(new ClusterId("201712"), new ClusterId("201801"))); + + final List dateIndexPrefixesOneMonth = DateIndexExtension.toClusterIds(range_201712_201712); + Assert.assertEquals(dateIndexPrefixesOneMonth, Arrays.asList(new ClusterId("201712"))); + } + + public void testDateRangeToEpochMilli() { + final OffsetDateTime mid_201712 = OffsetDateTime.of(2017, 12, 7, 1, 1, 1, 0, ZoneOffset.ofHours(3)); + final OffsetDateTime min_201802 = OffsetDateTime.of(2018, 2, 15, 0, 0, 0, 0, ZoneOffset.ofHours(7)); + + final long exp_201712 = OffsetDateTime.of(2017, 12, 1, 0, 0, 0, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); + final long exp_201801 = OffsetDateTime.of(2018, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); + final long exp_201802 = OffsetDateTime.of(2018, 2, 1, 0, 0, 0, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); + + final List dateIndexEpochMillis = DateIndexExtension + .toDateIndexEpochMillis(new DateTimeRange(mid_201712, min_201802)); + Assert.assertEquals(dateIndexEpochMillis, Arrays.asList(exp_201712, exp_201801, exp_201802)); } } diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java index 4ad9a54..266e296 100644 --- a/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java +++ b/data-store/src/test/java/org/lucares/pdb/datastore/internal/ProposerTest.java @@ -38,6 +38,7 @@ public class ProposerTest { private void initDatabase() throws Exception { dataStore = new DataStore(dataDirectory); + final ClusterId now = DateIndexExtension.now(); final Tags eagleTim = Tags.createAndAddToDictionary("bird", "eagle", "name", "Tim"); final Tags eagleTimothy = Tags.createAndAddToDictionary("bird", "eagle", "name", "Timothy"); @@ -46,12 +47,12 @@ public class ProposerTest { final Tags labradorJenny = Tags.createAndAddToDictionary("dog", "labrador", "name", "Jenny"); final Tags labradorTim = Tags.createAndAddToDictionary("dog", "labrador", "name", "Tim"); - dataStore.createNewFile(eagleTim); - dataStore.createNewFile(eagleTimothy); - dataStore.createNewFile(pigeonJennifer); - dataStore.createNewFile(flamingoJennifer); - dataStore.createNewFile(labradorJenny); - dataStore.createNewFile(labradorTim); + dataStore.createNewFile(now, eagleTim); + dataStore.createNewFile(now, eagleTimothy); + dataStore.createNewFile(now, pigeonJennifer); + dataStore.createNewFile(now, flamingoJennifer); + dataStore.createNewFile(now, labradorJenny); + dataStore.createNewFile(now, labradorTim); } public void testEmptyQuery() throws Exception { diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 558870d..44e7c4d 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-5.1.1-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-5.2.1-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/DateTimeRange.java b/pdb-api/src/main/java/org/lucares/pdb/api/DateTimeRange.java new file mode 100644 index 0000000..72dbb4d --- /dev/null +++ b/pdb-api/src/main/java/org/lucares/pdb/api/DateTimeRange.java @@ -0,0 +1,100 @@ +package org.lucares.pdb.api; + +import java.time.Duration; +import java.time.OffsetDateTime; +import java.time.temporal.ChronoUnit; +import java.time.temporal.TemporalUnit; + +public class DateTimeRange { + + private final OffsetDateTime start; + private final OffsetDateTime end; + + public DateTimeRange(final OffsetDateTime start, final OffsetDateTime end) { + this.start = start; + this.end = end; + } + + public static DateTimeRange relative(final long amount, final TemporalUnit unit) { + return new DateTimeRange(OffsetDateTime.now().minus(amount, unit), OffsetDateTime.now()); + } + + public static DateTimeRange relativeMillis(final long amount) { + return relative(amount, ChronoUnit.MILLIS); + } + + public static DateTimeRange relativeSeconds(final long amount) { + return relative(amount, ChronoUnit.SECONDS); + } + + public static DateTimeRange relativeMinutes(final long amount) { + return relative(amount, ChronoUnit.MINUTES); + } + + public static DateTimeRange relativeHours(final long amount) { + return relative(amount, ChronoUnit.HOURS); + } + + public static DateTimeRange relativeDays(final long amount) { + return relative(amount, ChronoUnit.DAYS); + } + + public static DateTimeRange relativeMonths(final long amount) { + return relative(amount, ChronoUnit.MONTHS); + } + + public static DateTimeRange relativeYears(final long amount) { + return relative(amount, ChronoUnit.YEARS); + } + + public OffsetDateTime getStart() { + return start; + } + + public long getStartEpochMilli() { + return start.toInstant().toEpochMilli(); + } + + public OffsetDateTime getEnd() { + return end; + } + + public long getEndEpochMilli() { + return end.toInstant().toEpochMilli(); + } + + @Override + public String toString() { + return start + "-" + end; + } + + public static DateTimeRange ofDay(final OffsetDateTime day) { + final OffsetDateTime from = day.truncatedTo(ChronoUnit.DAYS); + final OffsetDateTime to = from.plusDays(1).minusNanos(1); + + return new DateTimeRange(from, to); + } + + public Duration duration() { + return Duration.between(start, end); + } + + public boolean inRange(final long epochMilli) { + final long fromEpochMilli = start.toInstant().toEpochMilli(); + final long toEpochMilli = end.toInstant().toEpochMilli(); + + return fromEpochMilli <= epochMilli && epochMilli <= toEpochMilli; + } + + public boolean inRange(final OffsetDateTime date) { + return start.compareTo(date) <= 0 && end.compareTo(date) >= 0; + } + + public boolean intersect(final DateTimeRange timeRange) { + return inRange(timeRange.start) // + || inRange(timeRange.end) // + || timeRange.inRange(start)// + || timeRange.inRange(end); + } + +} diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Query.java b/pdb-api/src/main/java/org/lucares/pdb/api/Query.java new file mode 100644 index 0000000..56d5c01 --- /dev/null +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Query.java @@ -0,0 +1,77 @@ +package org.lucares.pdb.api; + +import java.util.ArrayList; +import java.util.List; + +public class Query { + private final String query; + + private final DateTimeRange dateRange; + + public Query(final String query, final DateTimeRange dateRange) { + super(); + this.query = query; + this.dateRange = dateRange; + } + + public Query relativeMillis(final String query, final long amount) { + return new Query(query, DateTimeRange.relativeMillis(amount)); + } + + public Query relativeSeconds(final String query, final long amount) { + return new Query(query, DateTimeRange.relativeSeconds(amount)); + } + + public Query relativeMinutes(final String query, final long amount) { + return new Query(query, DateTimeRange.relativeMinutes(amount)); + } + + public Query relativeHours(final String query, final long amount) { + return new Query(query, DateTimeRange.relativeHours(amount)); + } + + public Query relativeDays(final String query, final long amount) { + return new Query(query, DateTimeRange.relativeDays(amount)); + } + + public Query relativeMonths(final String query, final long amount) { + return new Query(query, DateTimeRange.relativeMonths(amount)); + } + + public static Query createQuery(final String query, final DateTimeRange dateRange) { + return new Query(query, dateRange); + } + + public static Query createQuery(final Tags tags, final DateTimeRange dateRange) { + + final List terms = new ArrayList<>(); + + for (final String key : tags.getKeys()) { + final String value = tags.getValue(key); + + final StringBuilder term = new StringBuilder(); + term.append(key); + term.append("="); + term.append(value); + term.append(" "); + + terms.add(term.toString()); + } + + return new Query(String.join(" and ", terms), dateRange); + } + + public String getQuery() { + return query; + } + + public DateTimeRange getDateRange() { + return dateRange; + } + + @Override + public String toString() { + return "'" + query + "' [" + dateRange + "]"; + } + +} diff --git a/performanceDb/src/test/java/org/lucares/performance/db/TimeRangeTest.java b/pdb-api/src/test/java/org/lucares/pdb/api/DateTimeRangeTest.java similarity index 60% rename from performanceDb/src/test/java/org/lucares/performance/db/TimeRangeTest.java rename to pdb-api/src/test/java/org/lucares/pdb/api/DateTimeRangeTest.java index a7129d4..3095991 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/TimeRangeTest.java +++ b/pdb-api/src/test/java/org/lucares/pdb/api/DateTimeRangeTest.java @@ -1,4 +1,4 @@ -package org.lucares.performance.db; +package org.lucares.pdb.api; import java.time.Instant; import java.time.OffsetDateTime; @@ -10,9 +10,7 @@ import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -@Test -public class TimeRangeTest { - +public class DateTimeRangeTest { @DataProvider Object[][] providerIntersect() { final List result = new ArrayList<>(); @@ -22,19 +20,18 @@ public class TimeRangeTest { final OffsetDateTime c = Instant.ofEpochMilli(3000).atOffset(ZoneOffset.UTC); final OffsetDateTime d = Instant.ofEpochMilli(4000).atOffset(ZoneOffset.UTC); - result.add(new Object[] { new TimeRange(a, b), new TimeRange(c, d), false }); - result.add(new Object[] { new TimeRange(a, c), new TimeRange(b, d), true }); - result.add(new Object[] { new TimeRange(a, d), new TimeRange(b, d), true }); - result.add(new Object[] { new TimeRange(a, d), new TimeRange(b, d), true }); - result.add(new Object[] { new TimeRange(a, b), new TimeRange(b, d), true }); + result.add(new Object[] { new DateTimeRange(a, b), new DateTimeRange(c, d), false }); + result.add(new Object[] { new DateTimeRange(a, c), new DateTimeRange(b, d), true }); + result.add(new Object[] { new DateTimeRange(a, d), new DateTimeRange(b, d), true }); + result.add(new Object[] { new DateTimeRange(a, d), new DateTimeRange(b, d), true }); + result.add(new Object[] { new DateTimeRange(a, b), new DateTimeRange(b, d), true }); return result.toArray(new Object[result.size()][]); } @Test(dataProvider = "providerIntersect") - public void testIntersect(final TimeRange a, final TimeRange b, final boolean expected) throws Exception { + public void testIntersect(final DateTimeRange a, final DateTimeRange b, final boolean expected) throws Exception { Assert.assertEquals(a.intersect(b), expected, a + " intersects " + b); Assert.assertEquals(b.intersect(a), expected, a + " intersects " + b); } - } diff --git a/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PlotSettings.java b/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PlotSettings.java index cab100a..646ed79 100644 --- a/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PlotSettings.java +++ b/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PlotSettings.java @@ -7,7 +7,7 @@ import java.time.format.DateTimeFormatter; import java.util.List; import java.util.regex.Pattern; -import org.lucares.pdb.datastore.DateTimeRange; +import org.lucares.pdb.api.DateTimeRange; import com.google.common.base.Preconditions; diff --git a/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java b/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java index e5d727b..7a677d8 100644 --- a/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java +++ b/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java @@ -24,10 +24,11 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Stream; import org.lucares.collections.LongList; +import org.lucares.pdb.api.DateTimeRange; import org.lucares.pdb.api.GroupResult; +import org.lucares.pdb.api.Query; import org.lucares.pdb.api.Result; import org.lucares.pdb.api.Tags; -import org.lucares.pdb.datastore.DateTimeRange; import org.lucares.pdb.plot.api.CustomAggregator; import org.lucares.pdb.plot.api.Limit; import org.lucares.pdb.plot.api.PlotSettings; @@ -83,7 +84,7 @@ public class ScatterPlot { final OffsetDateTime dateFrom = dateRange.getStart(); final OffsetDateTime dateTo = dateRange.getEnd(); - final Result result = db.get(query, groupBy); + final Result result = db.get(new Query(query, dateRange), groupBy); final long start = System.nanoTime(); final AtomicInteger idCounter = new AtomicInteger(0); diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/PdbController.java b/pdb-ui/src/main/java/org/lucares/pdbui/PdbController.java index a8c6cfe..94fe5ea 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/PdbController.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/PdbController.java @@ -15,6 +15,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.lang3.StringUtils; +import org.lucares.pdb.api.DateTimeRange; +import org.lucares.pdb.api.Query; import org.lucares.pdb.datastore.Proposal; import org.lucares.pdb.plot.api.AxisScale; import org.lucares.pdb.plot.api.Limit; @@ -228,7 +230,10 @@ public class PdbController implements HardcodedValues, PropertyKeys { ) @ResponseBody List fields() { - final List fields = db.getFields(); + // TODO get date range from UI + // TODO time range must not be static + final DateTimeRange dateTimeRange = DateTimeRange.relativeYears(5); + final List fields = db.getFields(dateTimeRange); fields.sort(Collator.getInstance(Locale.ENGLISH)); @@ -244,7 +249,11 @@ public class PdbController implements HardcodedValues, PropertyKeys { SortedSet fields(@PathVariable(name = "fieldName") final String fieldName, @RequestParam(name = "query") final String query) { - final SortedSet fields = db.getFieldsValues(query, fieldName); + // TODO get date range from UI + // TODO time range must not be static + final DateTimeRange dateRange = DateTimeRange.relativeYears(5); + final Query q = new Query(query, dateRange); + final SortedSet fields = db.getFieldsValues(q, fieldName); return fields; } diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/domain/DateRange.java b/pdb-ui/src/main/java/org/lucares/pdbui/domain/DateRange.java new file mode 100644 index 0000000..04cb364 --- /dev/null +++ b/pdb-ui/src/main/java/org/lucares/pdbui/domain/DateRange.java @@ -0,0 +1,76 @@ +package org.lucares.pdbui.domain; + +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; + +import org.lucares.pdb.api.DateTimeRange; + +public class DateRange { + + private static final DateTimeFormatter DATE_FORMAT = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + private String startDate; + private String endDate; + + DateRange() { + super(); + } + + /** + * + * @param startDate date in format 'yyyy-MM-dd HH:mm:ss' + * @param endDate date in format 'yyyy-MM-dd HH:mm:ss' + */ + public DateRange(final String startDate, final String endDate) { + this.startDate = startDate; + this.endDate = endDate; + } + + /** + * + * @return date in format 'yyyy-MM-dd HH:mm:ss' + */ + public String getStartDate() { + return startDate; + } + + /** + * + * @param startDate date in format 'yyyy-MM-dd HH:mm:ss' + */ + public void setStartDate(final String startDate) { + this.startDate = startDate; + } + + /** + * + * @return date in format 'yyyy-MM-dd HH:mm:ss' + */ + public String getEndDate() { + return endDate; + } + + /** + * + * @param endDate date in format 'yyyy-MM-dd HH:mm:ss' + */ + public void setEndDate(final String endDate) { + this.endDate = endDate; + } + + @Override + public String toString() { + return startDate + " - " + endDate; + } + + public DateTimeRange toDateTimeRange() { + + final OffsetDateTime start = LocalDateTime.parse(startDate, DATE_FORMAT).atOffset(ZoneOffset.UTC); + final OffsetDateTime end = LocalDateTime.parse(endDate, DATE_FORMAT).atOffset(ZoneOffset.UTC); + + return new DateTimeRange(start, end); + } + +} diff --git a/pdb-ui/src/test/java/org/lucares/performance/db/ingestor/TcpIngestorTest.java b/pdb-ui/src/test/java/org/lucares/performance/db/ingestor/TcpIngestorTest.java index 2cfa305..5bb1e31 100644 --- a/pdb-ui/src/test/java/org/lucares/performance/db/ingestor/TcpIngestorTest.java +++ b/pdb-ui/src/test/java/org/lucares/performance/db/ingestor/TcpIngestorTest.java @@ -18,6 +18,8 @@ import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.ThreadLocalRandom; import org.lucares.collections.LongList; +import org.lucares.pdb.api.DateTimeRange; +import org.lucares.pdb.api.Query; import org.lucares.pdb.datastore.internal.DataStore; import org.lucares.pdbui.TcpIngestor; import org.lucares.performance.db.PdbExport; @@ -79,10 +81,11 @@ public class TcpIngestorTest { } try (PerformanceDb db = new PerformanceDb(dataDirectory)) { - final LongList result = db.get("host=" + host).singleGroup().flatMap(); + final LongList result = db.get(new Query("host=" + host, DateTimeRange.ofDay(dateA))).singleGroup() + .flatMap(); Assert.assertEquals(result.size(), 4); - Assert.assertEquals(result.get(0), dateA.toInstant().truncatedTo(ChronoUnit.MILLIS).toEpochMilli()); + Assert.assertEquals(result.get(0), dateA.toInstant().toEpochMilli()); Assert.assertEquals(result.get(1), 1); Assert.assertEquals(result.get(2), dateB.toInstant().truncatedTo(ChronoUnit.MILLIS).toEpochMilli()); @@ -95,6 +98,7 @@ public class TcpIngestorTest { final long dateA = Instant.now().toEpochMilli(); final long dateB = Instant.now().toEpochMilli() + 1; final long dateC = Instant.now().toEpochMilli() - 1; + final DateTimeRange dateRange = DateTimeRange.relativeMinutes(1); final String host = "someHost"; // 1. insert some data @@ -133,7 +137,7 @@ public class TcpIngestorTest { // 5. check that the data is correctly inserted try (PerformanceDb db = new PerformanceDb(dataDirectory)) { - final LongList result = db.get("host=" + host).singleGroup().flatMap(); + final LongList result = db.get(new Query("host=" + host, dateRange)).singleGroup().flatMap(); Assert.assertEquals(result.size(), 6); Assert.assertEquals(result.get(0), dateA); @@ -151,6 +155,7 @@ public class TcpIngestorTest { public void testIngestionThreadDoesNotDieOnErrors() throws Exception { final OffsetDateTime dateA = OffsetDateTime.ofInstant(Instant.ofEpochMilli(-1), ZoneOffset.UTC); final OffsetDateTime dateB = OffsetDateTime.now(); + final DateTimeRange dateRange = new DateTimeRange(dateA, dateB); final String host = "someHost"; try (TcpIngestor tcpIngestor = new TcpIngestor(dataDirectory)) { @@ -185,7 +190,7 @@ public class TcpIngestorTest { } try (PerformanceDb db = new PerformanceDb(dataDirectory)) { - final LongList result = db.get("host=" + host).singleGroup().flatMap(); + final LongList result = db.get(new Query("host=" + host, dateRange)).singleGroup().flatMap(); Assert.assertEquals(result.size(), 4); Assert.assertEquals(result.get(0), dateA.toInstant().truncatedTo(ChronoUnit.MILLIS).toEpochMilli()); @@ -212,6 +217,8 @@ public class TcpIngestorTest { final ThreadLocalRandom rnd = ThreadLocalRandom.current(); final String host = "someHost"; final List additionalTagValues = Arrays.asList("foo", "bar", "baz"); + final DateTimeRange dateRange = new DateTimeRange(Instant.ofEpochMilli(-100000L).atOffset(ZoneOffset.UTC), + Instant.ofEpochMilli(10000000L).atOffset(ZoneOffset.UTC)); final LongList expected = new LongList(); @@ -245,7 +252,7 @@ public class TcpIngestorTest { } try (PerformanceDb db = new PerformanceDb(dataDirectory)) { - final LongList result = db.get("host=" + host).singleGroup().flatMap(); + final LongList result = db.get(new Query("host=" + host, dateRange)).singleGroup().flatMap(); Assert.assertEquals(LongPair.fromLongList(result), LongPair.fromLongList(expected)); } } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbExport.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbExport.java index 754dc2c..d254f86 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbExport.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbExport.java @@ -22,6 +22,8 @@ import java.util.zip.GZIPOutputStream; import org.apache.logging.log4j.Level; import org.apache.logging.log4j.core.config.Configurator; import org.lucares.collections.LongList; +import org.lucares.pdb.api.DateTimeRange; +import org.lucares.pdb.api.Query; import org.lucares.pdb.api.Tags; import org.lucares.pdb.datastore.PdbFile; import org.slf4j.Logger; @@ -75,7 +77,8 @@ public class PdbExport { try (final PerformanceDb db = new PerformanceDb(dataDirectory);) { LOGGER.info("Searching for all files. This may take a while ..."); - final List pdbFiles = db.getFilesForQuery(""); + // TODO time range should not be static, but include everything + final List pdbFiles = db.getFilesForQuery(new Query("", DateTimeRange.relativeYears(5))); long count = 0; long lastEpochMilli = 0; diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java b/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java index b86af8e..c932500 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java @@ -12,9 +12,11 @@ import java.util.SortedSet; import java.util.stream.Stream; import org.lucares.collections.LongList; +import org.lucares.pdb.api.DateTimeRange; import org.lucares.pdb.api.Entries; import org.lucares.pdb.api.Entry; import org.lucares.pdb.api.GroupResult; +import org.lucares.pdb.api.Query; import org.lucares.pdb.api.Result; import org.lucares.pdb.api.Tags; import org.lucares.pdb.datastore.InvalidValueException; @@ -119,11 +121,11 @@ public class PerformanceDb implements AutoCloseable { * @param query * @return */ - public Result get(final String query) { + public Result get(final Query query) { return get(query, Grouping.NO_GROUPING); } - public List getFilesForQuery(final String query) { + public List getFilesForQuery(final Query query) { return dataStore.getFilesForQuery(query); } @@ -134,7 +136,7 @@ public class PerformanceDb implements AutoCloseable { * @param groupBy the tag to group by * @return {@link Result} */ - public Result get(final String query, final List groupBy) { + public Result get(final Query query, final List groupBy) { final long start = System.nanoTime(); final List pdbFiles = dataStore.getFilesForQuery(query); @@ -171,14 +173,14 @@ public class PerformanceDb implements AutoCloseable { return dataStore.propose(query, caretIndex); } - public List getFields() { + public List getFields(final DateTimeRange dateRange) { - final List fields = dataStore.getAvailableFields(); + final List fields = dataStore.getAvailableFields(dateRange); return fields; } - public SortedSet getFieldsValues(final String query, final String fieldName) { + public SortedSet getFieldsValues(final Query query, final String fieldName) { return dataStore.getAvailableValuesForKey(query, fieldName); } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/TimeRange.java b/performanceDb/src/main/java/org/lucares/performance/db/TimeRange.java deleted file mode 100644 index ebf4806..0000000 --- a/performanceDb/src/main/java/org/lucares/performance/db/TimeRange.java +++ /dev/null @@ -1,68 +0,0 @@ -package org.lucares.performance.db; - -import java.time.Duration; -import java.time.OffsetDateTime; -import java.time.ZoneOffset; -import java.time.format.DateTimeFormatter; -import java.time.temporal.ChronoUnit; - -public class TimeRange { - private final OffsetDateTime from; - private final OffsetDateTime to; - - public TimeRange(final OffsetDateTime from, final OffsetDateTime to) { - if (from.isAfter(to)) { - throw new IllegalArgumentException("from date must be before to date. from: " + from + " to: " + to); - } - - this.from = from; - this.to = to; - } - - public OffsetDateTime getFrom() { - return from; - } - - public OffsetDateTime getTo() { - return to; - } - - public Duration duration() { - return Duration.between(from, to); - } - - public boolean inRange(final long epochMilli) { - final long fromEpochMilli = from.toInstant().toEpochMilli(); - final long toEpochMilli = to.toInstant().toEpochMilli(); - - return fromEpochMilli <= epochMilli && epochMilli <= toEpochMilli; - } - - public boolean inRange(final OffsetDateTime date) { - return from.compareTo(date) <= 0 && to.compareTo(date) >= 0; - } - - public boolean intersect(final TimeRange timeRange) { - return inRange(timeRange.from) // - || inRange(timeRange.to) // - || timeRange.inRange(from)// - || timeRange.inRange(to); - } - - @Override - public String toString() { - - final DateTimeFormatter formatter = DateTimeFormatter.ISO_ZONED_DATE_TIME.withZone(ZoneOffset.UTC); - final String fromUtc = from.format(formatter); - final String totc = from.format(formatter); - - return "[" + fromUtc + ":" + totc + "]"; - } - - public static TimeRange ofDay(final OffsetDateTime day) { - final OffsetDateTime from = day.truncatedTo(ChronoUnit.DAYS); - final OffsetDateTime to = from.plusDays(1).minusNanos(1); - - return new TimeRange(from, to); - } -} diff --git a/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java b/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java index 633c2e1..7059e5c 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java +++ b/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java @@ -13,8 +13,10 @@ import java.util.concurrent.ThreadLocalRandom; import org.apache.commons.collections4.CollectionUtils; import org.lucares.collections.LongList; +import org.lucares.pdb.api.DateTimeRange; import org.lucares.pdb.api.Entry; import org.lucares.pdb.api.GroupResult; +import org.lucares.pdb.api.Query; import org.lucares.pdb.api.Result; import org.lucares.pdb.api.Tags; import org.lucares.utils.DateUtils; @@ -42,12 +44,13 @@ public class PerformanceDbTest { public void testInsertRead() throws Exception { try (PerformanceDb db = new PerformanceDb(dataDirectory)) { - final long date = DateUtils.nowInUtc().toInstant().toEpochMilli(); + final OffsetDateTime nowInUtc = DateUtils.nowInUtc(); + final long date = nowInUtc.toInstant().toEpochMilli(); final long value = 1; final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue"); db.putEntry(new Entry(date, value, tags)); - final Result result = db.get(Query.createQuery(tags)); + final Result result = db.get(Query.createQuery(tags, DateTimeRange.ofDay(nowInUtc))); final LongList stream = result.singleGroup().flatMap(); Assert.assertEquals(stream.size(), 2); @@ -60,8 +63,10 @@ public class PerformanceDbTest { public void testInsertIntoMultipleFilesRead() throws Exception { try (PerformanceDb db = new PerformanceDb(dataDirectory)) { - final long dayOne = DateUtils.getDate(2016, 11, 1, 10, 0, 0).toInstant().toEpochMilli(); - final long dayTwo = DateUtils.getDate(2016, 11, 2, 12, 34, 56).toInstant().toEpochMilli(); + final DateTimeRange dateRange = new DateTimeRange(DateUtils.getDate(2016, 11, 1, 10, 0, 0), + DateUtils.getDate(2016, 11, 2, 12, 34, 56)); + final long dayOne = dateRange.getStartEpochMilli(); + final long dayTwo = dateRange.getEndEpochMilli(); final long valueOne = 1; final long valueTwo = 2; final Tags tags = Tags.createAndAddToDictionary("myKey", "myValue"); @@ -69,7 +74,7 @@ public class PerformanceDbTest { db.putEntry(new Entry(dayOne, valueOne, tags)); db.putEntry(new Entry(dayTwo, valueTwo, tags)); - final LongList stream = db.get(Query.createQuery(tags)).singleGroup().flatMap(); + final LongList stream = db.get(Query.createQuery(tags, dateRange)).singleGroup().flatMap(); Assert.assertEquals(stream.size(), 4); @@ -80,10 +85,11 @@ public class PerformanceDbTest { } } - private List generateEntries(final TimeRange timeRange, final long n, final int addToDate, final Tags tags) { + private List generateEntries(final DateTimeRange dateRange, final long n, final int addToDate, + final Tags tags) { final List result = new ArrayList<>(); - final long differenceInMs = timeRange.duration().toMillis() / n; - long currentTime = timeRange.getFrom().toInstant().toEpochMilli(); + final long differenceInMs = dateRange.duration().toMillis() / n; + long currentTime = dateRange.getStartEpochMilli(); for (long i = 0; i < n; i++) { final long value = ThreadLocalRandom.current().nextInt(0, Integer.MAX_VALUE); @@ -114,7 +120,7 @@ public class PerformanceDbTest { final int month = 1; final int day = 2; - final TimeRange timeRange = TimeRange.ofDay(DateUtils.getDate(year, month, day, 1, 1, 1)); + final DateTimeRange timeRange = DateTimeRange.ofDay(DateUtils.getDate(year, month, day, 1, 1, 1)); final Tags tags = Tags.createAndAddToDictionary("myKey", "one"); final List entries = generateEntries(timeRange, numberOfEntries, 0, tags); @@ -123,7 +129,7 @@ public class PerformanceDbTest { db.putEntries(entries); - final LongList actualEntries = db.get(Query.createQuery(tags)).singleGroup().flatMap(); + final LongList actualEntries = db.get(Query.createQuery(tags, timeRange)).singleGroup().flatMap(); Assert.assertEquals(actualEntries.size(), entries.size() * 2); for (int i = 0; i < entries.size(); i++) { @@ -158,7 +164,7 @@ public class PerformanceDbTest { final int day = 2; tags = Tags.createAndAddToDictionary("myKey", "one"); - final TimeRange timeRange = TimeRange.ofDay(DateUtils.getDate(year, month, day, 1, 1, 1)); + final DateTimeRange timeRange = DateTimeRange.ofDay(DateUtils.getDate(year, month, day, 1, 1, 1)); final List entries = generateEntries(timeRange, numberOfEntries, 0, tags); db.putEntries(entries); @@ -170,13 +176,12 @@ public class PerformanceDbTest { final int month = 1; final int day = 3; - final TimeRange timeRange = TimeRange.ofDay(DateUtils.getDate(year, month, day, 1, 1, 1)); - + final DateTimeRange timeRange = DateTimeRange.ofDay(DateUtils.getDate(year, month, day, 1, 1, 1)); final List entries = generateEntries(timeRange, numberOfEntries, 0, tags); db.putEntries(entries); expected.addAll(entries); - final LongList actualEntries = db.get(Query.createQuery(tags)).singleGroup().flatMap(); + final LongList actualEntries = db.get(Query.createQuery(tags, timeRange)).singleGroup().flatMap(); Assert.assertEquals(actualEntries.size(), expected.size() * 2); Assert.assertEquals(actualEntries, toExpectedValues(expected)); @@ -189,7 +194,8 @@ public class PerformanceDbTest { final OffsetDateTime from = DateUtils.getDate(2016, 1, 1, 00, 00, 00); final OffsetDateTime to = DateUtils.getDate(2016, 1, 1, 23, 59, 50); - final TimeRange timeRange = new TimeRange(from, to); + final DateTimeRange timeRange = new DateTimeRange(from, to); + final DateTimeRange dateRange = new DateTimeRange(from, to); final long numberOfEntries = timeRange.duration().toHours(); final Tags tagsCommon = Tags.createAndAddToDictionary("commonKey", "commonValue"); @@ -208,16 +214,16 @@ public class PerformanceDbTest { printEntries(entriesThree, "three"); db.putEntries(entriesThree); - final LongList actualEntriesOne = db.get(Query.createQuery(tagsOne)).singleGroup().flatMap(); + final LongList actualEntriesOne = db.get(Query.createQuery(tagsOne, dateRange)).singleGroup().flatMap(); Assert.assertEquals(actualEntriesOne, toExpectedValues(entriesOne)); - final LongList actualEntriesTwo = db.get(Query.createQuery(tagsTwo)).singleGroup().flatMap(); + final LongList actualEntriesTwo = db.get(Query.createQuery(tagsTwo, dateRange)).singleGroup().flatMap(); Assert.assertEquals(actualEntriesTwo, toExpectedValues(entriesTwo)); - final LongList actualEntriesThree = db.get(Query.createQuery(tagsThree)).singleGroup().flatMap(); + final LongList actualEntriesThree = db.get(Query.createQuery(tagsThree, dateRange)).singleGroup().flatMap(); Assert.assertEquals(actualEntriesThree, toExpectedValues(entriesThree)); - final LongList actualEntriesAll = db.get(Query.createQuery(tagsCommon)).singleGroup().flatMap(); + final LongList actualEntriesAll = db.get(Query.createQuery(tagsCommon, dateRange)).singleGroup().flatMap(); final List expectedAll = CollectionUtils.collate(entriesOne, CollectionUtils.collate(entriesTwo, entriesThree, EntryByDateComparator.INSTANCE), EntryByDateComparator.INSTANCE); @@ -235,7 +241,7 @@ public class PerformanceDbTest { final OffsetDateTime from = DateUtils.getDate(2016, 1, 1, 00, 00, 00); final OffsetDateTime to = DateUtils.getDate(2016, 1, 1, 23, 59, 50); - final TimeRange timeRange = new TimeRange(from, to); + final DateTimeRange timeRange = new DateTimeRange(from, to); final long numberOfEntries = timeRange.duration().toHours(); final String key = "myKey"; @@ -246,7 +252,7 @@ public class PerformanceDbTest { final LongList entriesTwo = storeEntries(db, timeRange, numberOfEntries, tagsTwo, 2); final LongList entriesThree = storeEntries(db, timeRange, numberOfEntries, tagsThree, 3); - final Result result = db.get("commonKey=commonValue", Arrays.asList(key)); + final Result result = db.get(Query.createQuery("commonKey=commonValue", timeRange), Arrays.asList(key)); final List groups = result.getGroups(); @@ -272,7 +278,7 @@ public class PerformanceDbTest { final OffsetDateTime from = DateUtils.getDate(2016, 1, 1, 00, 00, 00); final OffsetDateTime to = DateUtils.getDate(2016, 1, 1, 23, 59, 50); - final TimeRange timeRange = new TimeRange(from, to); + final DateTimeRange timeRange = new DateTimeRange(from, to); final long numberOfEntries = timeRange.duration().toHours(); final String key1 = "myKey1"; @@ -287,7 +293,8 @@ public class PerformanceDbTest { entriesTwo.addAll(storeEntries(db, timeRange, numberOfEntries, tagsTwoB, 3)); final LongList entriesThree = storeEntries(db, timeRange, numberOfEntries, tagsThree, 4); - final Result result = db.get("commonKey=commonValue", Arrays.asList(key1, key2)); + final Result result = db.get(Query.createQuery("commonKey=commonValue", timeRange), + Arrays.asList(key1, key2)); final List groups = result.getGroups(); @@ -315,7 +322,7 @@ public class PerformanceDbTest { } } - private LongList storeEntries(final PerformanceDb performanceDb, final TimeRange timeRange, + private LongList storeEntries(final PerformanceDb performanceDb, final DateTimeRange timeRange, final long numberOfEntries, final Tags tags, final int addToDate) { final List entries = generateEntries(timeRange, numberOfEntries, addToDate, tags); performanceDb.putEntries(entries); diff --git a/performanceDb/src/test/java/org/lucares/performance/db/Query.java b/performanceDb/src/test/java/org/lucares/performance/db/Query.java deleted file mode 100644 index df0a57d..0000000 --- a/performanceDb/src/test/java/org/lucares/performance/db/Query.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.lucares.performance.db; - -import java.util.ArrayList; -import java.util.List; - -import org.lucares.pdb.api.Tags; - -final class Query { - static String createQuery(final Tags tags) { - - final List terms = new ArrayList<>(); - - for (final String key : tags.getKeys()) { - final String value = tags.getValue(key); - - final StringBuilder term = new StringBuilder(); - term.append(key); - term.append("="); - term.append(value); - term.append(" "); - - terms.add(term.toString()); - } - - return String.join(" and ", terms); - } -}