From 218ea9ed68433e0c4778a4282b9d4018771653da Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Mon, 19 Nov 2018 19:23:57 +0100 Subject: [PATCH] use custom date parser A specialized date parser that can only handle ISO-8601 like dates (2011-12-03T10:15:30.123Z or 2011-12-03T10:15:30+01:00) but does this roughly 10 times faster than DateTimeFormatter and 5 times faster than the FastDateParser of commons-lang3. --- .../pdb/map/PersistentMapDiskNode.java | 2 +- .../lucares/pdbui/CsvToEntryTransformer.java | 6 +- .../lucares/pdbui/JsonToEntryTransformer.java | 4 +- .../lucares/pdbui/date/FastISODateParser.java | 77 +++++++++++++++++ .../pdbui/date/FastISODateParserTest.java | 86 +++++++++++++++++++ 5 files changed, 169 insertions(+), 6 deletions(-) create mode 100644 pdb-ui/src/main/java/org/lucares/pdbui/date/FastISODateParser.java create mode 100644 pdb-ui/src/test/java/org/lucares/pdbui/date/FastISODateParserTest.java diff --git a/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapDiskNode.java b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapDiskNode.java index 597a3b4..db1b827 100644 --- a/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapDiskNode.java +++ b/block-storage/src/main/java/org/lucares/pdb/map/PersistentMapDiskNode.java @@ -7,9 +7,9 @@ import java.util.List; import java.util.stream.Collectors; import org.lucares.collections.LongList; -import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder; import org.lucares.pdb.map.NodeEntry.ValueType; import org.lucares.utils.Preconditions; +import org.lucares.utils.byteencoder.VariableByteEncoder; /** *
diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java b/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java
index 4987d64..eabc4fb 100644
--- a/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java
+++ b/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java
@@ -2,13 +2,13 @@ package org.lucares.pdbui;
 
 import java.io.IOException;
 import java.time.OffsetDateTime;
-import java.time.format.DateTimeFormatter;
 import java.util.Optional;
 import java.util.regex.Pattern;
 
 import org.lucares.pdb.api.Entry;
 import org.lucares.pdb.api.Tags;
 import org.lucares.pdb.api.TagsBuilder;
+import org.lucares.pdbui.date.FastISODateParser;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -28,7 +28,7 @@ public class CsvToEntryTransformer implements LineToEntryTransformer {
 		try {
 
 			final String[] columns = splitPattern.split(line);
-			if (columns.length == headers.length) {
+			if (columns.length == headers.length && !columns[0].startsWith("@")) {
 
 				result = createEntry(columns);
 
@@ -51,7 +51,7 @@ public class CsvToEntryTransformer implements LineToEntryTransformer {
 
 			switch (headers[i]) {
 			case "@timestamp":
-				date = OffsetDateTime.parse(columns[i], DateTimeFormatter.ISO_ZONED_DATE_TIME);
+				date = FastISODateParser.parse(columns[i]);
 				break;
 			case "duration":
 				duration = Long.parseLong(columns[i]);
diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/JsonToEntryTransformer.java b/pdb-ui/src/main/java/org/lucares/pdbui/JsonToEntryTransformer.java
index 58b5e01..874e245 100644
--- a/pdb-ui/src/main/java/org/lucares/pdbui/JsonToEntryTransformer.java
+++ b/pdb-ui/src/main/java/org/lucares/pdbui/JsonToEntryTransformer.java
@@ -2,13 +2,13 @@ package org.lucares.pdbui;
 
 import java.io.IOException;
 import java.time.OffsetDateTime;
-import java.time.format.DateTimeFormatter;
 import java.util.Map;
 import java.util.Optional;
 
 import org.lucares.pdb.api.Entry;
 import org.lucares.pdb.api.Tags;
 import org.lucares.pdb.api.TagsBuilder;
+import org.lucares.pdbui.date.FastISODateParser;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -86,7 +86,7 @@ public class JsonToEntryTransformer implements LineToEntryTransformer {
 	private OffsetDateTime getDate(final Map map) {
 		final String timestamp = (String) map.get("@timestamp");
 
-		final OffsetDateTime date = OffsetDateTime.parse(timestamp, DateTimeFormatter.ISO_ZONED_DATE_TIME);
+		final OffsetDateTime date = FastISODateParser.parse(timestamp);
 		return date;
 	}
 
diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/date/FastISODateParser.java b/pdb-ui/src/main/java/org/lucares/pdbui/date/FastISODateParser.java
new file mode 100644
index 0000000..0453fca
--- /dev/null
+++ b/pdb-ui/src/main/java/org/lucares/pdbui/date/FastISODateParser.java
@@ -0,0 +1,77 @@
+package org.lucares.pdbui.date;
+
+import java.time.OffsetDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+
+/**
+ * A specialized date parser that can only handle ISO-8601 like dates
+ * (2011-12-03T10:15:30.123Z or 2011-12-03T10:15:30+01:00) but does this roughly
+ * 10 times faster than {@link DateTimeFormatter} and 5 times faster than the
+ * FastDateParser of commons-lang3.
+ */
+public class FastISODateParser {
+
+	/**
+	 * Parsing ISO-8601 like dates, e.g. 2011-12-03T10:15:30.123Z or
+	 * 2011-12-03T10:15:30+01:00.
+	 *
+	 * @param date in ISO-8601 format
+	 * @return {@link OffsetDateTime}
+	 */
+	public static OffsetDateTime parse(final String date) {
+		try {
+			final int year = Integer.parseInt(date, 0, 4, 10);
+			final int month = Integer.parseInt(date, 5, 7, 10);
+			final int dayOfMonth = Integer.parseInt(date, 8, 10, 10);
+			final int hour = Integer.parseInt(date, 11, 13, 10);
+			final int minute = Integer.parseInt(date, 14, 16, 10);
+			final int second = Integer.parseInt(date, 17, 19, 10);
+
+			final int[] nanosAndCharsRead = parseMilliseconds(date, 19);
+			final int nanos = nanosAndCharsRead[0];
+			final int offsetTimezone = 19 + nanosAndCharsRead[1];
+
+			final ZoneOffset offset = date.charAt(offsetTimezone) == 'Z' ? ZoneOffset.UTC
+					: parseZone(date.subSequence(offsetTimezone, date.length()));
+			return OffsetDateTime.of(year, month, dayOfMonth, hour, minute, second, nanos, offset);
+		} catch (final RuntimeException e) {
+			throw new IllegalArgumentException("'" + date + "' is not an ISO-8601 that can be parsed with "
+					+ FastISODateParser.class.getCanonicalName(), e);
+		}
+	}
+
+	private static int[] parseMilliseconds(final String date, final int start) {
+		int result = 0;
+		int i = start;
+		while (i < date.length()) {
+			final char c = date.charAt(i);
+			i++;
+			if (c == '.') {
+				continue;
+			}
+			if (c < '0' || c > '9') {
+				break;
+			}
+			result = result * 10 + (c - '0');
+		}
+		final int readChars = i - start - 1;
+
+		while (i <= start + 10) {
+			result *= 10;
+			i++;
+		}
+
+		return new int[] { result, readChars };
+	}
+
+	private static ZoneOffset parseZone(final CharSequence zoneString) {
+
+		final int hours = Integer.parseInt(zoneString, 0, 3, 10);
+		int minutes = Integer.parseInt(zoneString, 4, 6, 10);
+
+		// if hours is negative,then minutes must be too
+		minutes = (hours < 0 ? -1 : 1) * minutes;
+		return ZoneOffset.ofHoursMinutes(hours, minutes);
+	}
+}
diff --git a/pdb-ui/src/test/java/org/lucares/pdbui/date/FastISODateParserTest.java b/pdb-ui/src/test/java/org/lucares/pdbui/date/FastISODateParserTest.java
new file mode 100644
index 0000000..2251fad
--- /dev/null
+++ b/pdb-ui/src/test/java/org/lucares/pdbui/date/FastISODateParserTest.java
@@ -0,0 +1,86 @@
+package org.lucares.pdbui.date;
+
+import java.time.OffsetDateTime;
+import java.time.format.DateTimeFormatter;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+@Test
+public class FastISODateParserTest {
+
+	@DataProvider(name = "providerValidDate")
+	public Object[][] providerValidDate() {
+		return new Object[][] { //
+				{ "2018-11-18T14:42:49.123456789Z" }, //
+				{ "2018-11-18T14:42:49.123456789+12:34" }, //
+				{ "2018-11-18T14:42:49.12345678Z" }, //
+				{ "2018-11-18T14:42:49.12345678+12:34" }, //
+				{ "2018-11-18T14:42:49.1234567Z" }, //
+				{ "2018-11-18T14:42:49.1234567+12:34" }, //
+				{ "2018-11-18T14:42:49.123456Z" }, //
+				{ "2018-11-18T14:42:49.123456+12:34" }, //
+				{ "2018-11-18T14:42:49.33256Z" }, //
+				{ "2018-11-18T14:42:49.33256+12:34" }, //
+				{ "2018-11-18T14:42:49.3325Z" }, //
+				{ "2018-11-18T14:42:49.3325+12:34" }, //
+				{ "2018-11-18T14:42:49.332Z" }, //
+				{ "2018-11-18T14:42:49.332+00:00" }, //
+				{ "2018-11-18T14:42:49.332+12:34" }, //
+				{ "2018-11-18T14:42:49.332-01:23" }, //
+				{ "2018-11-18T14:55:49.44Z" }, //
+				{ "2018-11-18T14:55:49.55-01:23" }, //
+				{ "2018-11-18T14:55:49.4Z" }, //
+				{ "2018-11-18T14:55:49.5-01:23" }, //
+				{ "2018-11-18T14:55:49.Z" }, //
+				{ "2018-11-18T14:55:49.-01:23" }, //
+				{ "2018-11-18T14:55:49Z" }, //
+				{ "2018-11-18T14:55:49-01:23" },//
+		};
+	}
+
+	@Test(dataProvider = "providerValidDate")
+	public void testParseValidDate(final String date) {
+
+		final OffsetDateTime actualDate = FastISODateParser.parse(date);
+
+		final OffsetDateTime expectedDate = OffsetDateTime.from(DateTimeFormatter.ISO_DATE_TIME.parse(date));
+		Assert.assertEquals(actualDate, expectedDate);
+	}
+
+	@DataProvider(name = "providerParseInvalidDate")
+	public Object[][] providerParseInvalidDate() {
+		return new Object[][] { //
+				{ "a2018-11-18T14:42:49.332Z" }, //
+				{ "a018-11-18T14:42:49.332Z" }, //
+				{ "2a18-11-18T14:42:49.332Z" }, //
+				{ "20a8-11-18T14:42:49.332Z" }, //
+				{ "201a-11-18T14:42:49.332Z" }, //
+				{ "2018-a1-18T14:42:49.332Z" }, //
+				{ "2018-1a-18T14:42:49.332Z" }, //
+				{ "2018-11-a8T14:42:49.332Z" }, //
+				{ "2018-11-1aT14:42:49.332Z" }, //
+				{ "2018-11-18Ta4:42:49.332Z" }, //
+				{ "2018-11-18T1a:42:49.332Z" }, //
+				{ "2018-11-18T14:a2:49.332Z" }, //
+				{ "2018-11-18T14:4a:49.332Z" }, //
+				{ "2018-11-18T14:42:a9.332Z" }, //
+				{ "2018-11-18T14:42:4a.332Z" }, //
+				{ "2018-11-18T14:42:49.a32Z" }, //
+				{ "2018-11-18T14:42:49.3a2Z" }, //
+				{ "2018-11-18T14:42:49.33aZ" }, //
+				{ "2018-11-18T14:42:49.332a" }, //
+				{ "2018-11-18T14:42:49.332a00:00" }, //
+				{ "2018-11-18T14:42:49.332+a0:00" }, //
+				{ "2018-11-18T14:42:49.332+0a:00" }, //
+				{ "2018-11-18T14:42:49.332+00:a0" }, //
+				{ "2018-11-18T14:42:49.332+00:0a" }//
+		};
+	}
+
+	@Test(expectedExceptions = IllegalArgumentException.class, dataProvider = "providerParseInvalidDate")
+	public void testParseInvalidDate(final String invalidDate) {
+		FastISODateParser.parse(invalidDate);
+	}
+}