use custom date parser
A specialized date parser that can only handle ISO-8601 like dates (2011-12-03T10:15:30.123Z or 2011-12-03T10:15:30+01:00) but does this roughly 10 times faster than DateTimeFormatter and 5 times faster than the FastDateParser of commons-lang3.
This commit is contained in:
@@ -7,9 +7,9 @@ import java.util.List;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.lucares.collections.LongList;
|
import org.lucares.collections.LongList;
|
||||||
import org.lucares.pdb.blockstorage.intsequence.VariableByteEncoder;
|
|
||||||
import org.lucares.pdb.map.NodeEntry.ValueType;
|
import org.lucares.pdb.map.NodeEntry.ValueType;
|
||||||
import org.lucares.utils.Preconditions;
|
import org.lucares.utils.Preconditions;
|
||||||
|
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <pre>
|
* <pre>
|
||||||
|
|||||||
@@ -2,13 +2,13 @@ package org.lucares.pdbui;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.time.OffsetDateTime;
|
import java.time.OffsetDateTime;
|
||||||
import java.time.format.DateTimeFormatter;
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.lucares.pdb.api.Entry;
|
import org.lucares.pdb.api.Entry;
|
||||||
import org.lucares.pdb.api.Tags;
|
import org.lucares.pdb.api.Tags;
|
||||||
import org.lucares.pdb.api.TagsBuilder;
|
import org.lucares.pdb.api.TagsBuilder;
|
||||||
|
import org.lucares.pdbui.date.FastISODateParser;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@@ -28,7 +28,7 @@ public class CsvToEntryTransformer implements LineToEntryTransformer {
|
|||||||
try {
|
try {
|
||||||
|
|
||||||
final String[] columns = splitPattern.split(line);
|
final String[] columns = splitPattern.split(line);
|
||||||
if (columns.length == headers.length) {
|
if (columns.length == headers.length && !columns[0].startsWith("@")) {
|
||||||
|
|
||||||
result = createEntry(columns);
|
result = createEntry(columns);
|
||||||
|
|
||||||
@@ -51,7 +51,7 @@ public class CsvToEntryTransformer implements LineToEntryTransformer {
|
|||||||
|
|
||||||
switch (headers[i]) {
|
switch (headers[i]) {
|
||||||
case "@timestamp":
|
case "@timestamp":
|
||||||
date = OffsetDateTime.parse(columns[i], DateTimeFormatter.ISO_ZONED_DATE_TIME);
|
date = FastISODateParser.parse(columns[i]);
|
||||||
break;
|
break;
|
||||||
case "duration":
|
case "duration":
|
||||||
duration = Long.parseLong(columns[i]);
|
duration = Long.parseLong(columns[i]);
|
||||||
|
|||||||
@@ -2,13 +2,13 @@ package org.lucares.pdbui;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.time.OffsetDateTime;
|
import java.time.OffsetDateTime;
|
||||||
import java.time.format.DateTimeFormatter;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.lucares.pdb.api.Entry;
|
import org.lucares.pdb.api.Entry;
|
||||||
import org.lucares.pdb.api.Tags;
|
import org.lucares.pdb.api.Tags;
|
||||||
import org.lucares.pdb.api.TagsBuilder;
|
import org.lucares.pdb.api.TagsBuilder;
|
||||||
|
import org.lucares.pdbui.date.FastISODateParser;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@@ -86,7 +86,7 @@ public class JsonToEntryTransformer implements LineToEntryTransformer {
|
|||||||
private OffsetDateTime getDate(final Map<String, Object> map) {
|
private OffsetDateTime getDate(final Map<String, Object> map) {
|
||||||
final String timestamp = (String) map.get("@timestamp");
|
final String timestamp = (String) map.get("@timestamp");
|
||||||
|
|
||||||
final OffsetDateTime date = OffsetDateTime.parse(timestamp, DateTimeFormatter.ISO_ZONED_DATE_TIME);
|
final OffsetDateTime date = FastISODateParser.parse(timestamp);
|
||||||
return date;
|
return date;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,77 @@
|
|||||||
|
package org.lucares.pdbui.date;
|
||||||
|
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.time.ZoneOffset;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A specialized date parser that can only handle ISO-8601 like dates
|
||||||
|
* (2011-12-03T10:15:30.123Z or 2011-12-03T10:15:30+01:00) but does this roughly
|
||||||
|
* 10 times faster than {@link DateTimeFormatter} and 5 times faster than the
|
||||||
|
* FastDateParser of commons-lang3.
|
||||||
|
*/
|
||||||
|
public class FastISODateParser {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parsing ISO-8601 like dates, e.g. 2011-12-03T10:15:30.123Z or
|
||||||
|
* 2011-12-03T10:15:30+01:00.
|
||||||
|
*
|
||||||
|
* @param date in ISO-8601 format
|
||||||
|
* @return {@link OffsetDateTime}
|
||||||
|
*/
|
||||||
|
public static OffsetDateTime parse(final String date) {
|
||||||
|
try {
|
||||||
|
final int year = Integer.parseInt(date, 0, 4, 10);
|
||||||
|
final int month = Integer.parseInt(date, 5, 7, 10);
|
||||||
|
final int dayOfMonth = Integer.parseInt(date, 8, 10, 10);
|
||||||
|
final int hour = Integer.parseInt(date, 11, 13, 10);
|
||||||
|
final int minute = Integer.parseInt(date, 14, 16, 10);
|
||||||
|
final int second = Integer.parseInt(date, 17, 19, 10);
|
||||||
|
|
||||||
|
final int[] nanosAndCharsRead = parseMilliseconds(date, 19);
|
||||||
|
final int nanos = nanosAndCharsRead[0];
|
||||||
|
final int offsetTimezone = 19 + nanosAndCharsRead[1];
|
||||||
|
|
||||||
|
final ZoneOffset offset = date.charAt(offsetTimezone) == 'Z' ? ZoneOffset.UTC
|
||||||
|
: parseZone(date.subSequence(offsetTimezone, date.length()));
|
||||||
|
return OffsetDateTime.of(year, month, dayOfMonth, hour, minute, second, nanos, offset);
|
||||||
|
} catch (final RuntimeException e) {
|
||||||
|
throw new IllegalArgumentException("'" + date + "' is not an ISO-8601 that can be parsed with "
|
||||||
|
+ FastISODateParser.class.getCanonicalName(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int[] parseMilliseconds(final String date, final int start) {
|
||||||
|
int result = 0;
|
||||||
|
int i = start;
|
||||||
|
while (i < date.length()) {
|
||||||
|
final char c = date.charAt(i);
|
||||||
|
i++;
|
||||||
|
if (c == '.') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (c < '0' || c > '9') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
result = result * 10 + (c - '0');
|
||||||
|
}
|
||||||
|
final int readChars = i - start - 1;
|
||||||
|
|
||||||
|
while (i <= start + 10) {
|
||||||
|
result *= 10;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new int[] { result, readChars };
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ZoneOffset parseZone(final CharSequence zoneString) {
|
||||||
|
|
||||||
|
final int hours = Integer.parseInt(zoneString, 0, 3, 10);
|
||||||
|
int minutes = Integer.parseInt(zoneString, 4, 6, 10);
|
||||||
|
|
||||||
|
// if hours is negative,then minutes must be too
|
||||||
|
minutes = (hours < 0 ? -1 : 1) * minutes;
|
||||||
|
return ZoneOffset.ofHoursMinutes(hours, minutes);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,86 @@
|
|||||||
|
package org.lucares.pdbui.date;
|
||||||
|
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public class FastISODateParserTest {
|
||||||
|
|
||||||
|
@DataProvider(name = "providerValidDate")
|
||||||
|
public Object[][] providerValidDate() {
|
||||||
|
return new Object[][] { //
|
||||||
|
{ "2018-11-18T14:42:49.123456789Z" }, //
|
||||||
|
{ "2018-11-18T14:42:49.123456789+12:34" }, //
|
||||||
|
{ "2018-11-18T14:42:49.12345678Z" }, //
|
||||||
|
{ "2018-11-18T14:42:49.12345678+12:34" }, //
|
||||||
|
{ "2018-11-18T14:42:49.1234567Z" }, //
|
||||||
|
{ "2018-11-18T14:42:49.1234567+12:34" }, //
|
||||||
|
{ "2018-11-18T14:42:49.123456Z" }, //
|
||||||
|
{ "2018-11-18T14:42:49.123456+12:34" }, //
|
||||||
|
{ "2018-11-18T14:42:49.33256Z" }, //
|
||||||
|
{ "2018-11-18T14:42:49.33256+12:34" }, //
|
||||||
|
{ "2018-11-18T14:42:49.3325Z" }, //
|
||||||
|
{ "2018-11-18T14:42:49.3325+12:34" }, //
|
||||||
|
{ "2018-11-18T14:42:49.332Z" }, //
|
||||||
|
{ "2018-11-18T14:42:49.332+00:00" }, //
|
||||||
|
{ "2018-11-18T14:42:49.332+12:34" }, //
|
||||||
|
{ "2018-11-18T14:42:49.332-01:23" }, //
|
||||||
|
{ "2018-11-18T14:55:49.44Z" }, //
|
||||||
|
{ "2018-11-18T14:55:49.55-01:23" }, //
|
||||||
|
{ "2018-11-18T14:55:49.4Z" }, //
|
||||||
|
{ "2018-11-18T14:55:49.5-01:23" }, //
|
||||||
|
{ "2018-11-18T14:55:49.Z" }, //
|
||||||
|
{ "2018-11-18T14:55:49.-01:23" }, //
|
||||||
|
{ "2018-11-18T14:55:49Z" }, //
|
||||||
|
{ "2018-11-18T14:55:49-01:23" },//
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "providerValidDate")
|
||||||
|
public void testParseValidDate(final String date) {
|
||||||
|
|
||||||
|
final OffsetDateTime actualDate = FastISODateParser.parse(date);
|
||||||
|
|
||||||
|
final OffsetDateTime expectedDate = OffsetDateTime.from(DateTimeFormatter.ISO_DATE_TIME.parse(date));
|
||||||
|
Assert.assertEquals(actualDate, expectedDate);
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "providerParseInvalidDate")
|
||||||
|
public Object[][] providerParseInvalidDate() {
|
||||||
|
return new Object[][] { //
|
||||||
|
{ "a2018-11-18T14:42:49.332Z" }, //
|
||||||
|
{ "a018-11-18T14:42:49.332Z" }, //
|
||||||
|
{ "2a18-11-18T14:42:49.332Z" }, //
|
||||||
|
{ "20a8-11-18T14:42:49.332Z" }, //
|
||||||
|
{ "201a-11-18T14:42:49.332Z" }, //
|
||||||
|
{ "2018-a1-18T14:42:49.332Z" }, //
|
||||||
|
{ "2018-1a-18T14:42:49.332Z" }, //
|
||||||
|
{ "2018-11-a8T14:42:49.332Z" }, //
|
||||||
|
{ "2018-11-1aT14:42:49.332Z" }, //
|
||||||
|
{ "2018-11-18Ta4:42:49.332Z" }, //
|
||||||
|
{ "2018-11-18T1a:42:49.332Z" }, //
|
||||||
|
{ "2018-11-18T14:a2:49.332Z" }, //
|
||||||
|
{ "2018-11-18T14:4a:49.332Z" }, //
|
||||||
|
{ "2018-11-18T14:42:a9.332Z" }, //
|
||||||
|
{ "2018-11-18T14:42:4a.332Z" }, //
|
||||||
|
{ "2018-11-18T14:42:49.a32Z" }, //
|
||||||
|
{ "2018-11-18T14:42:49.3a2Z" }, //
|
||||||
|
{ "2018-11-18T14:42:49.33aZ" }, //
|
||||||
|
{ "2018-11-18T14:42:49.332a" }, //
|
||||||
|
{ "2018-11-18T14:42:49.332a00:00" }, //
|
||||||
|
{ "2018-11-18T14:42:49.332+a0:00" }, //
|
||||||
|
{ "2018-11-18T14:42:49.332+0a:00" }, //
|
||||||
|
{ "2018-11-18T14:42:49.332+00:a0" }, //
|
||||||
|
{ "2018-11-18T14:42:49.332+00:0a" }//
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expectedExceptions = IllegalArgumentException.class, dataProvider = "providerParseInvalidDate")
|
||||||
|
public void testParseInvalidDate(final String invalidDate) {
|
||||||
|
FastISODateParser.parse(invalidDate);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user