From 24fcfd776369195a050037024b3d6a79b37727c0 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 28 Sep 2018 19:07:01 +0200 Subject: [PATCH] prepare the addition of a date index --- .../lucares/pdb/datastore}/DateTimeRange.java | 5 +- .../datastore/lang/DateIndexExtension.java | 109 ++++++++++++++++++ .../pdb/datastore/lang/Expression.java | 18 +++ .../lang/ExpressionToDocIdVisitor.java | 1 - .../lang/DateIndexExtensionTest.java | 69 +++++++++++ .../main/java/org/lucares/pdb/api/Entry.java | 39 ++----- .../main/java/org/lucares/pdb/api/Tags.java | 11 ++ .../lucares/pdb/plot/api/PlotSettings.java | 2 + .../lucares/recommind/logs/ScatterPlot.java | 2 +- .../org/lucares/performance/db/PdbWriter.java | 6 +- .../performance/db/PerformanceDbTest.java | 6 +- 11 files changed, 232 insertions(+), 36 deletions(-) rename {pdb-plotting/src/main/java/org/lucares/pdb/plot/api => data-store/src/main/java/org/lucares/pdb/datastore}/DateTimeRange.java (76%) create mode 100644 data-store/src/main/java/org/lucares/pdb/datastore/lang/DateIndexExtension.java create mode 100644 data-store/src/test/java/org/lucares/pdb/datastore/lang/DateIndexExtensionTest.java diff --git a/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/DateTimeRange.java b/data-store/src/main/java/org/lucares/pdb/datastore/DateTimeRange.java similarity index 76% rename from pdb-plotting/src/main/java/org/lucares/pdb/plot/api/DateTimeRange.java rename to data-store/src/main/java/org/lucares/pdb/datastore/DateTimeRange.java index f880888..08bfe23 100644 --- a/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/DateTimeRange.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/DateTimeRange.java @@ -1,8 +1,11 @@ -package org.lucares.pdb.plot.api; +package org.lucares.pdb.datastore; import java.time.OffsetDateTime; public class DateTimeRange { + + public static final DateTimeRange MAX = new DateTimeRange(OffsetDateTime.MIN, OffsetDateTime.MAX); + private final OffsetDateTime start; private final OffsetDateTime end; diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/DateIndexExtension.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/DateIndexExtension.java new file mode 100644 index 0000000..c1c1dc4 --- /dev/null +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/DateIndexExtension.java @@ -0,0 +1,109 @@ +package org.lucares.pdb.datastore.lang; + +import java.time.Instant; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentNavigableMap; +import java.util.concurrent.ConcurrentSkipListMap; + +import org.lucares.pdb.datastore.DateTimeRange; + +public class DateIndexExtension { + + /** + * This date pattern defines the resolution of the date index + */ + private static final DateTimeFormatter DATE_PATTERN = DateTimeFormatter.ofPattern("yyyyMM"); + + // visible for test + static final ConcurrentNavigableMap DATE_PREFIX_CACHE = new ConcurrentSkipListMap<>(); + + static Set toDateIndexPrefix(final DateTimeRange dateRange) { + final Set result = new TreeSet<>(); + if (Objects.equals(dateRange, DateTimeRange.MAX)) { + result.add("*"); + } else { + OffsetDateTime current = dateRange.getStart(); + while (current.isBefore(dateRange.getEnd())) { + + result.add(toDateIndexPrefix(current)); + current = current.plusMonths(1); + + } + result.add(toDateIndexPrefix(dateRange.getEnd())); + } + return result; + } + + static String toDateIndexPrefix(final OffsetDateTime time) { + return time.format(DATE_PATTERN); + } + + public static String toDateIndexPrefix(final long epochMilli) { + + final Entry value = DATE_PREFIX_CACHE.floorEntry(epochMilli); + + String result; + if (value == null || !value.getValue().contains(epochMilli)) { + final DatePrefixAndRange newValue = toDatePrefixAndRange(epochMilli); + DATE_PREFIX_CACHE.put(newValue.getMinEpochMilli(), newValue); + result = newValue.getDatePrefix(); + } else { + result = value.getValue().getDatePrefix(); + } + + return result; + } + + private static DatePrefixAndRange toDatePrefixAndRange(final long epochMilli) { + final OffsetDateTime date = Instant.ofEpochMilli(epochMilli).atOffset(ZoneOffset.UTC); + final OffsetDateTime beginOfMonth = date.withDayOfMonth(1).withHour(0).withMinute(0).withSecond(0).withNano(0); + final OffsetDateTime endOfMonth = beginOfMonth.plusMonths(1).minusNanos(1); + + final String datePrefix = date.format(DATE_PATTERN); + final long minEpochMilli = beginOfMonth.toInstant().toEpochMilli(); + final long maxEpochMilli = endOfMonth.toInstant().toEpochMilli(); + + return new DatePrefixAndRange(datePrefix, minEpochMilli, maxEpochMilli); + } + +} + +class DatePrefixAndRange { + private final String datePrefix; + private final long minEpochMilli; + private final long maxEpochMilli; + + public DatePrefixAndRange(final String datePrefix, final long minEpochMilli, final long maxEpochMilli) { + super(); + this.datePrefix = datePrefix; + this.minEpochMilli = minEpochMilli; + this.maxEpochMilli = maxEpochMilli; + } + + public String getDatePrefix() { + return datePrefix; + } + + public long getMinEpochMilli() { + return minEpochMilli; + } + + public long getMaxEpochMilli() { + return maxEpochMilli; + } + + public boolean contains(final long epochMilli) { + return minEpochMilli <= epochMilli && epochMilli <= maxEpochMilli; + } + + @Override + public String toString() { + return datePrefix + " (" + minEpochMilli + " - " + maxEpochMilli + ")"; + } +} \ No newline at end of file diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java index 55f3551..9fdb0c1 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/Expression.java @@ -1,6 +1,7 @@ package org.lucares.pdb.datastore.lang; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.lucares.utils.CollectionUtils; @@ -190,6 +191,19 @@ abstract public class Expression { return true; } + public static Expression create(final List or) { + + if (or.size() == 1) { + return or.get(0); + } else { + Or result = new Or(or.get(0), or.get(1)); + for (int i = 2; i < or.size(); i++) { + result = new Or(result, or.get(i)); + } + return result; + } + } + } static class And extends Expression { @@ -465,6 +479,10 @@ abstract public class Expression { private final String property; private final List values; + public InExpression(final String property, final String value) { + this(property, Arrays.asList(value)); + } + public InExpression(final String property, final List values) { this.property = property; this.values = values; diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java index 70039d1..f062d99 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java @@ -49,7 +49,6 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { } private static final Map EMPTY_VALUES = Collections.emptyMap(); - private static final IntList EMPTY_DOC_IDS = new IntList(); private final Map> keyToValueToDocId; private final AllDocIds allDocIds; diff --git a/data-store/src/test/java/org/lucares/pdb/datastore/lang/DateIndexExtensionTest.java b/data-store/src/test/java/org/lucares/pdb/datastore/lang/DateIndexExtensionTest.java new file mode 100644 index 0000000..ce7e924 --- /dev/null +++ b/data-store/src/test/java/org/lucares/pdb/datastore/lang/DateIndexExtensionTest.java @@ -0,0 +1,69 @@ +package org.lucares.pdb.datastore.lang; + +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.lucares.pdb.datastore.DateTimeRange; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +@Test +public class DateIndexExtensionTest { + + @DataProvider + public Object[][] provider() { + + final List result = new ArrayList<>(); + + { + final OffsetDateTime start = OffsetDateTime.of(2018, 1, 31, 0, 0, 0, 0, ZoneOffset.UTC); + final OffsetDateTime end = OffsetDateTime.of(2018, 1, 31, 0, 0, 0, 0, ZoneOffset.UTC); + final Set expected = Set.of("201801"); + result.add(new Object[] { start, end, expected }); + } + { + final OffsetDateTime start = OffsetDateTime.of(2017, 11, 1, 0, 0, 0, 0, ZoneOffset.UTC); + final OffsetDateTime end = OffsetDateTime.of(2018, 02, 1, 0, 0, 0, 0, ZoneOffset.UTC); + final Set expected = Set.of("201711", "201712", "201801", "201802"); + result.add(new Object[] { start, end, expected }); + } + { + // check that adding one month to Jan 31 does not skip the February + final OffsetDateTime start = OffsetDateTime.of(2018, 1, 31, 0, 0, 0, 0, ZoneOffset.UTC); + final OffsetDateTime end = OffsetDateTime.of(2018, 3, 31, 0, 0, 0, 0, ZoneOffset.UTC); + final Set expected = Set.of("201801", "201802", "201803"); + result.add(new Object[] { start, end, expected }); + } + + return result.toArray(new Object[0][]); + } + + @Test(dataProvider = "provider") + public void test(final OffsetDateTime start, final OffsetDateTime end, final Set expected) { + + final DateTimeRange dateRange = new DateTimeRange(start, end); + + final Set actual = DateIndexExtension.toDateIndexPrefix(dateRange); + + Assert.assertEquals(actual, expected); + } + + public void testDateToDateIndexPrefix() { + + final long mid_201711 = OffsetDateTime.of(2017, 11, 23, 2, 2, 2, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); + final long mid_201712 = OffsetDateTime.of(2017, 12, 7, 1, 1, 1, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); + final long min_201801 = OffsetDateTime.of(2018, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); + final long max_201801 = OffsetDateTime.of(2018, 1, 31, 23, 59, 59, 999_999_999, ZoneOffset.UTC).toInstant() + .toEpochMilli(); + + Assert.assertEquals(DateIndexExtension.toDateIndexPrefix(mid_201712), "201712"); + Assert.assertEquals(DateIndexExtension.toDateIndexPrefix(min_201801), "201801"); + Assert.assertEquals(DateIndexExtension.toDateIndexPrefix(max_201801), "201801"); + Assert.assertEquals(DateIndexExtension.toDateIndexPrefix(mid_201711), "201711"); + System.out.println(DateIndexExtension.DATE_PREFIX_CACHE); + } +} diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Entry.java b/pdb-api/src/main/java/org/lucares/pdb/api/Entry.java index eb32425..8bcc424 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/Entry.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Entry.java @@ -1,8 +1,6 @@ package org.lucares.pdb.api; -import java.time.Instant; import java.time.OffsetDateTime; -import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; public class Entry { @@ -11,39 +9,22 @@ public class Entry { * A special {@link Entry} that can be used as poison object for * {@link BlockingQueueIterator}. */ - public static final Entry POISON = new Entry(0, -1); - - private final long epochMilli; + public static final Entry POISON = new Entry(OffsetDateTime.MIN, -1, null); private final long value; private final Tags tags; + private final OffsetDateTime date; + public Entry(final OffsetDateTime date, final long value, final Tags tags) { + this.date = date; this.tags = tags; - this.epochMilli = date.toInstant().toEpochMilli(); this.value = value; } - public Entry(final long epochMilli, final long value, final Tags tags) { - if (value < 0) { - throw new IllegalArgumentException("value must be between 0 and " + Long.MAX_VALUE + ", but was " + value); - } - - this.epochMilli = epochMilli; - this.value = value; - this.tags = tags; - } - - private Entry(final long epochMilli, final long value) { - this.epochMilli = epochMilli; - this.value = value; - this.tags = null; - } - public OffsetDateTime getDate() { - final Instant instant = Instant.ofEpochMilli(epochMilli); - return OffsetDateTime.ofInstant(instant, ZoneOffset.UTC); + return date; } public long getValue() { @@ -51,7 +32,7 @@ public class Entry { } public long getEpochMilli() { - return epochMilli; + return date.toInstant().toEpochMilli(); } public Tags getTags() { @@ -72,7 +53,7 @@ public class Entry { public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + (int) (epochMilli ^ (epochMilli >>> 32)); + result = prime * result + ((date == null) ? 0 : date.hashCode()); result = prime * result + ((tags == null) ? 0 : tags.hashCode()); result = prime * result + (int) (value ^ (value >>> 32)); return result; @@ -87,7 +68,10 @@ public class Entry { if (getClass() != obj.getClass()) return false; final Entry other = (Entry) obj; - if (epochMilli != other.epochMilli) + if (date == null) { + if (other.date != null) + return false; + } else if (!date.equals(other.date)) return false; if (tags == null) { if (other.tags != null) @@ -98,4 +82,5 @@ public class Entry { return false; return true; } + } diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java b/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java index f508d31..c1eb330 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java @@ -1,6 +1,7 @@ package org.lucares.pdb.api; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; @@ -9,6 +10,7 @@ import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.function.BiConsumer; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -162,6 +164,15 @@ public class Tags { } } + public Tags mapTags(final Function tagMapFuntion) { + final Set tags = toTags(); + final Collection mappedTags = new ArrayList<>(tags.size()); + for (final Tag tag : tags) { + mappedTags.add(tagMapFuntion.apply(tag)); + } + return Tags.create(mappedTags); + } + @Override public String toString() { return "Tags [filename=" + serialize() + ", tags=" + toTags() + "]"; diff --git a/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PlotSettings.java b/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PlotSettings.java index d5bb762..cab100a 100644 --- a/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PlotSettings.java +++ b/pdb-plotting/src/main/java/org/lucares/pdb/plot/api/PlotSettings.java @@ -7,6 +7,8 @@ import java.time.format.DateTimeFormatter; import java.util.List; import java.util.regex.Pattern; +import org.lucares.pdb.datastore.DateTimeRange; + import com.google.common.base.Preconditions; public class PlotSettings { diff --git a/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java b/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java index 4450f80..e5d727b 100644 --- a/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java +++ b/pdb-plotting/src/main/java/org/lucares/recommind/logs/ScatterPlot.java @@ -27,8 +27,8 @@ import org.lucares.collections.LongList; import org.lucares.pdb.api.GroupResult; import org.lucares.pdb.api.Result; import org.lucares.pdb.api.Tags; +import org.lucares.pdb.datastore.DateTimeRange; import org.lucares.pdb.plot.api.CustomAggregator; -import org.lucares.pdb.plot.api.DateTimeRange; import org.lucares.pdb.plot.api.Limit; import org.lucares.pdb.plot.api.PlotSettings; import org.lucares.pdb.plot.api.TimeRangeUnitInternal; diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbWriter.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbWriter.java index 752fed0..efcf321 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbWriter.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbWriter.java @@ -24,11 +24,7 @@ class PdbWriter implements AutoCloseable, Flushable { bsFile = BSFile.existingFile(pdbFile.getRootBlockNumber(), diskStorage); final Optional optionalLastValue = bsFile.getLastValue(); - if (optionalLastValue.isPresent()) { - lastEpochMilli = optionalLastValue.get(); - } else { - lastEpochMilli = 0; - } + lastEpochMilli = optionalLastValue.orElse(0L); } public PdbFile getPdbFile() { diff --git a/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java b/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java index ef74085..4075174 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java +++ b/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java @@ -3,7 +3,9 @@ package org.lucares.performance.db; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.time.Instant; import java.time.OffsetDateTime; +import java.time.ZoneOffset; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -84,7 +86,9 @@ public class PerformanceDbTest { for (long i = 0; i < n; i++) { final long value = ThreadLocalRandom.current().nextInt(0, Integer.MAX_VALUE); - result.add(new Entry(currentTime + addToDate, value, tags)); + final OffsetDateTime date = OffsetDateTime.ofInstant(Instant.ofEpochMilli(currentTime + addToDate), + ZoneOffset.UTC); + result.add(new Entry(date, value, tags)); currentTime += differenceInMs; }