From 40f4506e13d7251f61234286d0623c5284c80b73 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sun, 16 Dec 2018 19:24:47 +0100 Subject: [PATCH] use FastISODateParser.parseAsEpochMilli Compared to FastISODateParser.parse, which returns an OffsetDateTime object, parseAsEpochMilli returns the epoch time millis. The performance improvement for date parsing alone is roughly 100% (8m dates/s to 18m dates/s). Insertion speed improved from 13-14s for 1.6m entries to 11.5-12.5s. --- .../main/java/org/lucares/pdb/api/Entry.java | 26 ++++---- .../lucares/pdbui/CsvToEntryTransformer.java | 8 +-- .../lucares/pdbui/JsonToEntryTransformer.java | 10 ++-- .../java/org/lucares/pdbui/TcpIngestor.java | 60 ------------------- .../lucares/pdbui/date/FastISODateParser.java | 2 +- .../pdbui/date/FastISODateParserTest.java | 4 +- .../lucares/performance/db/PerformanceDb.java | 2 +- .../performance/db/PerformanceDbTest.java | 18 +++--- .../performance/db/TagsToFilesTest.java | 18 +++--- 9 files changed, 40 insertions(+), 108 deletions(-) diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Entry.java b/pdb-api/src/main/java/org/lucares/pdb/api/Entry.java index 8bcc424..1f41c7f 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/Entry.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Entry.java @@ -1,6 +1,8 @@ package org.lucares.pdb.api; +import java.time.Instant; import java.time.OffsetDateTime; +import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; public class Entry { @@ -9,30 +11,26 @@ public class Entry { * A special {@link Entry} that can be used as poison object for * {@link BlockingQueueIterator}. */ - public static final Entry POISON = new Entry(OffsetDateTime.MIN, -1, null); + public static final Entry POISON = new Entry(Long.MIN_VALUE, -1, null); private final long value; private final Tags tags; - private final OffsetDateTime date; + private final long epochMilli; - public Entry(final OffsetDateTime date, final long value, final Tags tags) { - this.date = date; + public Entry(final long epochMilli, final long value, final Tags tags) { + this.epochMilli = epochMilli; this.tags = tags; this.value = value; } - public OffsetDateTime getDate() { - return date; - } - public long getValue() { return value; } public long getEpochMilli() { - return date.toInstant().toEpochMilli(); + return epochMilli; } public Tags getTags() { @@ -45,7 +43,7 @@ public class Entry { return "POISON ENTRY"; } - final OffsetDateTime date = getDate(); + final OffsetDateTime date = Instant.ofEpochMilli(epochMilli).atOffset(ZoneOffset.UTC); return date.format(DateTimeFormatter.ISO_ZONED_DATE_TIME) + " = " + value + " (" + tags.asString() + ")"; } @@ -53,7 +51,7 @@ public class Entry { public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + ((date == null) ? 0 : date.hashCode()); + result = prime * result + (int) (epochMilli ^ (epochMilli >>> 32)); result = prime * result + ((tags == null) ? 0 : tags.hashCode()); result = prime * result + (int) (value ^ (value >>> 32)); return result; @@ -68,10 +66,7 @@ public class Entry { if (getClass() != obj.getClass()) return false; final Entry other = (Entry) obj; - if (date == null) { - if (other.date != null) - return false; - } else if (!date.equals(other.date)) + if (epochMilli != other.epochMilli) return false; if (tags == null) { if (other.tags != null) @@ -82,5 +77,4 @@ public class Entry { return false; return true; } - } diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java b/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java index 522ccfe..9b56476 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java @@ -1,7 +1,6 @@ package org.lucares.pdbui; import java.io.IOException; -import java.time.OffsetDateTime; import java.util.Optional; import java.util.regex.Pattern; @@ -45,14 +44,15 @@ public class CsvToEntryTransformer implements LineToEntryTransformer { private Optional createEntry(final String[] columns) { - OffsetDateTime date = null; + long epochMilli = 0; long duration = Long.MIN_VALUE; final TagsBuilder tagsBuilder = TagsBuilder.create(); for (int i = 0; i < columns.length; i++) { switch (headers[i]) { case "@timestamp": - date = fastISODateParser.parse(columns[i]); + epochMilli = fastISODateParser.parseAsEpochMilli(columns[i]); + ; break; case "duration": duration = Long.parseLong(columns[i]); @@ -65,7 +65,7 @@ public class CsvToEntryTransformer implements LineToEntryTransformer { } final Tags tags = tagsBuilder.build(); - final Entry entry = new Entry(date, duration, tags); + final Entry entry = new Entry(epochMilli, duration, tags); return Optional.of(entry); } } diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/JsonToEntryTransformer.java b/pdb-ui/src/main/java/org/lucares/pdbui/JsonToEntryTransformer.java index 5990575..2a84f96 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/JsonToEntryTransformer.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/JsonToEntryTransformer.java @@ -1,7 +1,6 @@ package org.lucares.pdbui; import java.io.IOException; -import java.time.OffsetDateTime; import java.util.Map; import java.util.Optional; @@ -40,12 +39,12 @@ public class JsonToEntryTransformer implements LineToEntryTransformer { try { if (map.containsKey("duration") && map.containsKey("@timestamp")) { - final OffsetDateTime date = getDate(map); + final long epochMilli = getDate(map); final long duration = (int) map.get("duration"); final Tags tags = createTags(map); - final Entry entry = new Entry(date, duration, tags); + final Entry entry = new Entry(epochMilli, duration, tags); return Optional.of(entry); } else { LOGGER.info("Skipping invalid entry: " + map); @@ -84,11 +83,10 @@ public class JsonToEntryTransformer implements LineToEntryTransformer { return tags.build(); } - private OffsetDateTime getDate(final Map map) { + private long getDate(final Map map) { final String timestamp = (String) map.get("@timestamp"); - final OffsetDateTime date = fastISODateParser.parse(timestamp); - return date; + return fastISODateParser.parseAsEpochMilli(timestamp); } } diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/TcpIngestor.java b/pdb-ui/src/main/java/org/lucares/pdbui/TcpIngestor.java index 2b67da1..082e2c1 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/TcpIngestor.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/TcpIngestor.java @@ -9,9 +9,6 @@ import java.net.Socket; import java.net.SocketAddress; import java.net.SocketTimeoutException; import java.nio.file.Path; -import java.time.OffsetDateTime; -import java.time.format.DateTimeFormatter; -import java.util.Map; import java.util.Optional; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.Callable; @@ -24,8 +21,6 @@ import java.util.regex.Pattern; import javax.annotation.PreDestroy; import org.lucares.pdb.api.Entry; -import org.lucares.pdb.api.Tags; -import org.lucares.pdb.api.TagsBuilder; import org.lucares.performance.db.BlockingQueueIterator; import org.lucares.performance.db.PerformanceDb; import org.lucares.recommind.logs.Config; @@ -109,61 +104,6 @@ public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean { return null; } - - public Optional createEntry(final Map map) { - try { - - if (map.containsKey("duration") && map.containsKey("@timestamp")) { - final OffsetDateTime date = getDate(map); - final long duration = (int) map.get("duration"); - - final Tags tags = createTags(map); - - final Entry entry = new Entry(date, duration, tags); - return Optional.of(entry); - } else { - LOGGER.info("Skipping invalid entry: " + map); - return Optional.empty(); - } - } catch (final Exception e) { - LOGGER.error("Failed to create entry from map: " + map, e); - return Optional.empty(); - } - } - - private Tags createTags(final Map map) { - final TagsBuilder tags = TagsBuilder.create(); - for (final java.util.Map.Entry e : map.entrySet()) { - - final String key = e.getKey(); - final Object value = e.getValue(); - - switch (key) { - case "@timestamp": - case "duration": - // these fields are not tags - break; - case "tags": - // ignore: we only support key/value tags - break; - default: - if (value instanceof String) { - tags.add(key, (String) value); - } else if (value != null) { - tags.add(key, String.valueOf(value)); - } - break; - } - } - return tags.build(); - } - - private OffsetDateTime getDate(final Map map) { - final String timestamp = (String) map.get("@timestamp"); - - final OffsetDateTime date = OffsetDateTime.parse(timestamp, DateTimeFormatter.ISO_ZONED_DATE_TIME); - return date; - } } public TcpIngestor(final Path dataDirectory) throws IOException { diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/date/FastISODateParser.java b/pdb-ui/src/main/java/org/lucares/pdbui/date/FastISODateParser.java index eb69a65..e72b473 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/date/FastISODateParser.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/date/FastISODateParser.java @@ -47,7 +47,7 @@ public class FastISODateParser { } } - public long parseAsTimestamp(final String date) { + public long parseAsEpochMilli(final String date) { try { // final long year = Integer.parseInt(date, 0, 4, 10); // final long month = Integer.parseInt(date, 5, 7, 10); diff --git a/pdb-ui/src/test/java/org/lucares/pdbui/date/FastISODateParserTest.java b/pdb-ui/src/test/java/org/lucares/pdbui/date/FastISODateParserTest.java index 37958d8..b32a89f 100644 --- a/pdb-ui/src/test/java/org/lucares/pdbui/date/FastISODateParserTest.java +++ b/pdb-ui/src/test/java/org/lucares/pdbui/date/FastISODateParserTest.java @@ -126,7 +126,7 @@ public class FastISODateParserTest { @Test(dataProvider = "providerDateToTimestamp") public void testDateToTimestamp(final String date) { - final long actualEpochMilli = new FastISODateParser().parseAsTimestamp(date); + final long actualEpochMilli = new FastISODateParser().parseAsEpochMilli(date); final OffsetDateTime expectedDate = OffsetDateTime.from(DateTimeFormatter.ISO_DATE_TIME.parse(date)); final long expectedEpochMilli = expectedDate.toInstant().toEpochMilli(); @@ -178,7 +178,7 @@ public class FastISODateParserTest { final FastISODateParser fastISODateParser = new FastISODateParser(); for (final String date : dates) { - fastISODateParser.parseAsTimestamp(date); + fastISODateParser.parseAsEpochMilli(date); // final long timestamp = // fastISODateParser.parse(date).toInstant().toEpochMilli(); // final long timestamp = OffsetDateTime.parse(date, DateTimeFormatter.ISO_OFFSET_DATE_TIME) diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java b/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java index 0ac3bd0..99ea8ae 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PerformanceDb.java @@ -53,7 +53,7 @@ public class PerformanceDb implements AutoCloseable { public void putEntries(final BlockingIterator entries) throws WriteException { - final Duration timeBetweenSyncs = Duration.ofSeconds(10); + final Duration timeBetweenSyncs = Duration.ofSeconds(1); long count = 0; long insertionsSinceLastSync = 0; diff --git a/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java b/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java index ec5fce0..0251943 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java +++ b/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java @@ -41,7 +41,7 @@ public class PerformanceDbTest { public void testInsertRead() throws Exception { try (PerformanceDb db = new PerformanceDb(dataDirectory)) { - final OffsetDateTime date = DateUtils.nowInUtc(); + final long date = DateUtils.nowInUtc().toInstant().toEpochMilli(); final long value = 1; final Tags tags = Tags.create("myKey", "myValue"); db.putEntry(new Entry(date, value, tags)); @@ -51,7 +51,7 @@ public class PerformanceDbTest { Assert.assertEquals(stream.size(), 2); - Assert.assertEquals(stream.get(0), date.toInstant().toEpochMilli()); + Assert.assertEquals(stream.get(0), date); Assert.assertEquals(stream.get(1), value); } } @@ -59,8 +59,8 @@ public class PerformanceDbTest { public void testInsertIntoMultipleFilesRead() throws Exception { try (PerformanceDb db = new PerformanceDb(dataDirectory)) { - final OffsetDateTime dayOne = DateUtils.getDate(2016, 11, 1, 10, 0, 0); - final OffsetDateTime dayTwo = DateUtils.getDate(2016, 11, 2, 12, 34, 56); + final long dayOne = DateUtils.getDate(2016, 11, 1, 10, 0, 0).toInstant().toEpochMilli(); + final long dayTwo = DateUtils.getDate(2016, 11, 2, 12, 34, 56).toInstant().toEpochMilli(); final long valueOne = 1; final long valueTwo = 2; final Tags tags = Tags.create("myKey", "myValue"); @@ -72,9 +72,9 @@ public class PerformanceDbTest { Assert.assertEquals(stream.size(), 4); - Assert.assertEquals(stream.get(0), dayOne.toInstant().toEpochMilli()); + Assert.assertEquals(stream.get(0), dayOne); Assert.assertEquals(stream.get(1), valueOne); - Assert.assertEquals(stream.get(2), dayTwo.toInstant().toEpochMilli()); + Assert.assertEquals(stream.get(2), dayTwo); Assert.assertEquals(stream.get(3), valueTwo); } } @@ -86,8 +86,8 @@ public class PerformanceDbTest { for (long i = 0; i < n; i++) { final long value = ThreadLocalRandom.current().nextInt(0, Integer.MAX_VALUE); - final OffsetDateTime date = OffsetDateTime.ofInstant(Instant.ofEpochMilli(currentTime + addToDate), - ZoneOffset.UTC); + final long date = OffsetDateTime.ofInstant(Instant.ofEpochMilli(currentTime + addToDate), ZoneOffset.UTC) + .toInstant().toEpochMilli(); result.add(new Entry(date, value, tags)); currentTime += differenceInMs; @@ -333,7 +333,7 @@ public class PerformanceDbTest { int index = 0; for (final Entry entry : entriesOne) { - System.out.printf("%4d %s %d (%s)\n", index, entry.getDate(), entry.getValue(), label); + System.out.printf("%4d %s %d (%s)\n", index, entry.getEpochMilli(), entry.getValue(), label); index++; } } diff --git a/performanceDb/src/test/java/org/lucares/performance/db/TagsToFilesTest.java b/performanceDb/src/test/java/org/lucares/performance/db/TagsToFilesTest.java index 271645b..e07891d 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/TagsToFilesTest.java +++ b/performanceDb/src/test/java/org/lucares/performance/db/TagsToFilesTest.java @@ -51,18 +51,18 @@ public class TagsToFilesTest { final TagsToFile tagsToFile = new TagsToFile(dataStore);) { // dayC is before dayA and dayB - final OffsetDateTime dayA = DateUtils.getDate(2016, 1, 2, 1, 1, 1); - final OffsetDateTime dayB = DateUtils.getDate(2016, 1, 3, 1, 1, 1); - final OffsetDateTime dayC = DateUtils.getDate(2016, 1, 1, 1, 1, 1); + final long dayA = DateUtils.getDate(2016, 1, 2, 1, 1, 1).toInstant().toEpochMilli(); + final long dayB = DateUtils.getDate(2016, 1, 3, 1, 1, 1).toInstant().toEpochMilli(); + final long dayC = DateUtils.getDate(2016, 1, 1, 1, 1, 1).toInstant().toEpochMilli(); final Tags tags = Tags.create("myKey", "myValue"); - final PdbWriter writerForDayA = tagsToFile.getWriter(dayA.toInstant().toEpochMilli(), tags); + final PdbWriter writerForDayA = tagsToFile.getWriter(dayA, tags); writerForDayA.write(new Entry(dayA, 1, tags)); - final PdbWriter writerForDayB = tagsToFile.getWriter(dayB.toInstant().toEpochMilli(), tags); + final PdbWriter writerForDayB = tagsToFile.getWriter(dayB, tags); writerForDayB.write(new Entry(dayB, 2, tags)); - final PdbWriter writerForDayC = tagsToFile.getWriter(dayC.toInstant().toEpochMilli(), tags); + final PdbWriter writerForDayC = tagsToFile.getWriter(dayC, tags); writerForDayC.write(new Entry(dayC, 3, tags)); Assert.assertSame(writerForDayA, writerForDayB); @@ -75,14 +75,14 @@ public class TagsToFilesTest { try (final DataStore dataStore = new DataStore(dataDirectory); // final TagsToFile tagsToFile = new TagsToFile(dataStore)) { - final OffsetDateTime timestamp = DateUtils.getDate(2016, 1, 1, 13, 1, 1); + final long timestamp = DateUtils.getDate(2016, 1, 1, 13, 1, 1).toInstant().toEpochMilli(); final Tags tags = Tags.create("myKey", "myValue"); - final PdbWriter fileA = tagsToFile.getWriter(timestamp.toInstant().toEpochMilli(), tags); + final PdbWriter fileA = tagsToFile.getWriter(timestamp, tags); fileA.write(new Entry(timestamp, 1, tags)); - final PdbWriter fileB = tagsToFile.getWriter(timestamp.toInstant().toEpochMilli(), tags); + final PdbWriter fileB = tagsToFile.getWriter(timestamp, tags); fileA.write(new Entry(timestamp, 2, tags)); Assert.assertEquals(fileA, fileB);