From 23f800a441bf4764cd77c5013468f52b5b77179b Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sun, 16 Dec 2018 15:38:26 +0100 Subject: [PATCH] add date parsing method that returns epochMillis instead of date object --- .../lucares/pdbui/CsvToEntryTransformer.java | 3 +- .../lucares/pdbui/JsonToEntryTransformer.java | 3 +- .../lucares/pdbui/date/FastISODateParser.java | 102 +++++++++++++++- .../pdbui/date/FastISODateParserTest.java | 112 +++++++++++++++++- 4 files changed, 213 insertions(+), 7 deletions(-) diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java b/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java index eabc4fb..522ccfe 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java @@ -17,6 +17,7 @@ public class CsvToEntryTransformer implements LineToEntryTransformer { private final String[] headers; private final Pattern splitPattern = Pattern.compile(","); + private final FastISODateParser fastISODateParser = new FastISODateParser(); public CsvToEntryTransformer(final String[] headers) { this.headers = headers; @@ -51,7 +52,7 @@ public class CsvToEntryTransformer implements LineToEntryTransformer { switch (headers[i]) { case "@timestamp": - date = FastISODateParser.parse(columns[i]); + date = fastISODateParser.parse(columns[i]); break; case "duration": duration = Long.parseLong(columns[i]); diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/JsonToEntryTransformer.java b/pdb-ui/src/main/java/org/lucares/pdbui/JsonToEntryTransformer.java index 874e245..5990575 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/JsonToEntryTransformer.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/JsonToEntryTransformer.java @@ -24,6 +24,7 @@ public class JsonToEntryTransformer implements LineToEntryTransformer { private final ObjectMapper objectMapper = new ObjectMapper(); private final ObjectReader objectReader = objectMapper.readerFor(typeReferenceForMap); + private final FastISODateParser fastISODateParser = new FastISODateParser(); @Override public Optional toEntry(final String line) throws IOException { @@ -86,7 +87,7 @@ public class JsonToEntryTransformer implements LineToEntryTransformer { private OffsetDateTime getDate(final Map map) { final String timestamp = (String) map.get("@timestamp"); - final OffsetDateTime date = FastISODateParser.parse(timestamp); + final OffsetDateTime date = fastISODateParser.parse(timestamp); return date; } diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/date/FastISODateParser.java b/pdb-ui/src/main/java/org/lucares/pdbui/date/FastISODateParser.java index 0453fca..eb69a65 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/date/FastISODateParser.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/date/FastISODateParser.java @@ -3,6 +3,7 @@ package org.lucares.pdbui.date; import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; +import java.util.concurrent.ConcurrentHashMap; /** * A specialized date parser that can only handle ISO-8601 like dates @@ -12,6 +13,11 @@ import java.time.format.DateTimeFormatter; */ public class FastISODateParser { + private final static ConcurrentHashMap EPOCH_MILLI_MONTH_OFFSETS = new ConcurrentHashMap<>(); + + private int cached_epochMilliMonthOffsetKey = 0; + private long cached_epochMilliMonthOffset = 0; + /** * Parsing ISO-8601 like dates, e.g. 2011-12-03T10:15:30.123Z or * 2011-12-03T10:15:30+01:00. @@ -19,7 +25,7 @@ public class FastISODateParser { * @param date in ISO-8601 format * @return {@link OffsetDateTime} */ - public static OffsetDateTime parse(final String date) { + public OffsetDateTime parse(final String date) { try { final int year = Integer.parseInt(date, 0, 4, 10); final int month = Integer.parseInt(date, 5, 7, 10); @@ -41,7 +47,86 @@ public class FastISODateParser { } } - private static int[] parseMilliseconds(final String date, final int start) { + public long parseAsTimestamp(final String date) { + try { +// final long year = Integer.parseInt(date, 0, 4, 10); +// final long month = Integer.parseInt(date, 5, 7, 10); +// final long dayOfMonth = Integer.parseInt(date, 8, 10, 10); +// final long hour = Integer.parseInt(date, 11, 13, 10); +// final long minute = Integer.parseInt(date, 14, 16, 10); +// final long second = Integer.parseInt(date, 17, 19, 10); + final long year = parseLong(date, 0, 4); + final long month = parseLong(date, 5, 7); + final long dayOfMonth = parseLong(date, 8, 10); + final long hour = parseLong(date, 11, 13); + final long minute = parseLong(date, 14, 16); + final long second = parseLong(date, 17, 19); + +// final long year = 2018; +// final long month = 10; +// final long dayOfMonth = 12; +// final long hour = 0; +// final long minute = 0; +// final long second = 0; + + final int[] nanosAndCharsRead = parseMilliseconds(date, 19); + final long nanos = nanosAndCharsRead[0]; + final int offsetTimezone = 19 + nanosAndCharsRead[1]; + + final long zoneOffsetMillis = date.charAt(offsetTimezone) == 'Z' ? 0 + : parseZoneToMillis(date.subSequence(offsetTimezone, date.length())); + + final int epochMilliMonthOffsetKey = (int) (year * 12 + month - 1); + final long epochMilliMonthOffset; + + if (cached_epochMilliMonthOffsetKey == epochMilliMonthOffsetKey) { + epochMilliMonthOffset = cached_epochMilliMonthOffset; + } else { + epochMilliMonthOffset = EPOCH_MILLI_MONTH_OFFSETS.computeIfAbsent(epochMilliMonthOffsetKey, + FastISODateParser::computeEpochMilliMonthOffset); + cached_epochMilliMonthOffsetKey = epochMilliMonthOffsetKey; + cached_epochMilliMonthOffset = epochMilliMonthOffset; + } + + final long epochMilli = epochMilliMonthOffset // + + (dayOfMonth - 1) * 86_400_000 // + + hour * 3_600_000 // + + minute * 60_000 // + + second * 1_000 // + + nanos / 1_000_000// + - zoneOffsetMillis; + return epochMilli; + + } catch (final RuntimeException e) { + throw new IllegalArgumentException("'" + date + "' is not an ISO-8601 that can be parsed with " + + FastISODateParser.class.getCanonicalName(), e); + } + } + + private static Long computeEpochMilliMonthOffset(final int key) { + + final int year = key / 12; + final int month = key % 12 + 1; + + final OffsetDateTime date = OffsetDateTime.of(year, month, 1, 0, 0, 0, 0, ZoneOffset.UTC); + + return date.toInstant().toEpochMilli(); + } + + private long parseLong(final String string, final int start, final int end) { + long result = 0; + for (int i = start; i < end; i++) { + // final int c = string.charAt(i); + final int c = string.codePointAt(i); + if (c < '0' || c > '9') { + throw new NumberFormatException(c + " is not a number at offset " + i); + } + result = result * 10 + (c - '0'); + } + return result; + } + + private int[] parseMilliseconds(final String date, final int start) { int result = 0; int i = start; while (i < date.length()) { @@ -65,7 +150,7 @@ public class FastISODateParser { return new int[] { result, readChars }; } - private static ZoneOffset parseZone(final CharSequence zoneString) { + private ZoneOffset parseZone(final CharSequence zoneString) { final int hours = Integer.parseInt(zoneString, 0, 3, 10); int minutes = Integer.parseInt(zoneString, 4, 6, 10); @@ -74,4 +159,15 @@ public class FastISODateParser { minutes = (hours < 0 ? -1 : 1) * minutes; return ZoneOffset.ofHoursMinutes(hours, minutes); } + + private long parseZoneToMillis(final CharSequence zoneString) { + + final int hours = Integer.parseInt(zoneString, 0, 3, 10); + int minutes = Integer.parseInt(zoneString, 4, 6, 10); + + // if hours is negative,then minutes must be too + minutes = (hours < 0 ? -1 : 1) * minutes; + return hours * 3_600_000 + minutes * 60_000; + } + } diff --git a/pdb-ui/src/test/java/org/lucares/pdbui/date/FastISODateParserTest.java b/pdb-ui/src/test/java/org/lucares/pdbui/date/FastISODateParserTest.java index 2251fad..37958d8 100644 --- a/pdb-ui/src/test/java/org/lucares/pdbui/date/FastISODateParserTest.java +++ b/pdb-ui/src/test/java/org/lucares/pdbui/date/FastISODateParserTest.java @@ -1,7 +1,17 @@ package org.lucares.pdbui.date; +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.time.Instant; import java.time.OffsetDateTime; +import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -43,7 +53,7 @@ public class FastISODateParserTest { @Test(dataProvider = "providerValidDate") public void testParseValidDate(final String date) { - final OffsetDateTime actualDate = FastISODateParser.parse(date); + final OffsetDateTime actualDate = new FastISODateParser().parse(date); final OffsetDateTime expectedDate = OffsetDateTime.from(DateTimeFormatter.ISO_DATE_TIME.parse(date)); Assert.assertEquals(actualDate, expectedDate); @@ -81,6 +91,104 @@ public class FastISODateParserTest { @Test(expectedExceptions = IllegalArgumentException.class, dataProvider = "providerParseInvalidDate") public void testParseInvalidDate(final String invalidDate) { - FastISODateParser.parse(invalidDate); + new FastISODateParser().parse(invalidDate); + } + + @DataProvider(name = "providerDateToTimestamp") + public Object[][] providerDateToTimestamp() { + return new Object[][] { // + { "2018-11-18T14:42:49.123Z" }, // + // There are no leap seconds in java-time: + // In reality, UTC has a leap second 2016-12-31T23:59:60Z, but java handles + // this differently. This makes it a little bit easier for us, because we do not + // have to handle this. + { "2016-12-31T23:59:59.999Z" }, // before leap second + { "2017-01-01T00:00:00.000Z" }, // after leap second + + // normal leap days exist + { "2016-02-28T23:59:59.999Z" }, // before leap day + { "2016-02-29T00:00:00.000Z" }, // leap day + { "2016-02-29T23:59:59.999Z" }, // leap day + { "2016-03-01T00:00:00.000Z" }, // after leap day + + // dates with non-UTC timezones + { "2018-11-18T14:42:49.123+12:34" }, // + { "2018-11-18T02:34:56.123+12:34" }, // + + // dates with non-UTC timezones and leap days + { "2016-02-29T00:59:59.999+01:00" }, // before leap day + { "2016-02-29T01:00:00.000+01:00" }, // leap day + { "2016-03-01T00:59:59.999+01:00" }, // leap day + { "2016-03-01T01:00:00.000+01:00" }, // after leap day + }; + } + + @Test(dataProvider = "providerDateToTimestamp") + public void testDateToTimestamp(final String date) { + + final long actualEpochMilli = new FastISODateParser().parseAsTimestamp(date); + + final OffsetDateTime expectedDate = OffsetDateTime.from(DateTimeFormatter.ISO_DATE_TIME.parse(date)); + final long expectedEpochMilli = expectedDate.toInstant().toEpochMilli(); + Assert.assertEquals(actualEpochMilli, expectedEpochMilli); + } + + @Test(enabled = false) + public void test() { + + final OffsetDateTime expectedDate = OffsetDateTime + .from(DateTimeFormatter.ISO_DATE_TIME.parse("2016-12-31T23:00:00.000Z")); + + final long epochMilli = expectedDate.toInstant().toEpochMilli(); + + for (int i = 0; i < 1000; i++) { + + final long timestamp = epochMilli + i * 10000; + final OffsetDateTime date = Instant.ofEpochMilli(timestamp).atOffset(ZoneOffset.UTC); + + System.out.println(timestamp + " " + date.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME)); + } + } + + public static void main(final String[] args) throws IOException, InterruptedException { + final Path path = Path.of("/home/andi/ws/performanceDb/data/production/dates2.csv"); + + for (int i = 0; i < 15; i++) { + final List dates = new ArrayList<>(); + + try (final BufferedReader reader = new BufferedReader( + new FileReader(path.toFile(), StandardCharsets.UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + dates.add(line); + } + } + + System.gc(); + TimeUnit.MILLISECONDS.sleep(100); + System.gc(); + TimeUnit.MILLISECONDS.sleep(100); + System.gc(); + TimeUnit.MILLISECONDS.sleep(100); + System.gc(); + + TimeUnit.SECONDS.sleep(1); + + final long start = System.nanoTime(); + final FastISODateParser fastISODateParser = new FastISODateParser(); + + for (final String date : dates) { + fastISODateParser.parseAsTimestamp(date); + // final long timestamp = + // fastISODateParser.parse(date).toInstant().toEpochMilli(); +// final long timestamp = OffsetDateTime.parse(date, DateTimeFormatter.ISO_OFFSET_DATE_TIME) +// .toInstant().toEpochMilli(); + // sum += timestamp; + } + + final double millis = (System.nanoTime() - start) / 1_000_000.0; + final long datesPerSecond = (long) (dates.size() / (millis / 1000.0)); + System.out.println("duration: " + millis + "ms ; speed: " + datesPerSecond + " dates/s"); + } } }