use FastISODateParser.parseAsEpochMilli

Compared to FastISODateParser.parse, which returns an
OffsetDateTime object, parseAsEpochMilli returns the
epoch time millis. The performance improvement for
date parsing alone is roughly 100% (8m dates/s to
18m dates/s).
Insertion speed improved from 13-14s for 1.6m entries
to 11.5-12.5s.
This commit is contained in:
2018-12-16 19:24:47 +01:00
parent 23f800a441
commit 40f4506e13
9 changed files with 40 additions and 108 deletions

View File

@@ -1,6 +1,8 @@
package org.lucares.pdb.api;
import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
public class Entry {
@@ -9,30 +11,26 @@ public class Entry {
* A special {@link Entry} that can be used as poison object for
* {@link BlockingQueueIterator}.
*/
public static final Entry POISON = new Entry(OffsetDateTime.MIN, -1, null);
public static final Entry POISON = new Entry(Long.MIN_VALUE, -1, null);
private final long value;
private final Tags tags;
private final OffsetDateTime date;
private final long epochMilli;
public Entry(final OffsetDateTime date, final long value, final Tags tags) {
this.date = date;
public Entry(final long epochMilli, final long value, final Tags tags) {
this.epochMilli = epochMilli;
this.tags = tags;
this.value = value;
}
public OffsetDateTime getDate() {
return date;
}
public long getValue() {
return value;
}
public long getEpochMilli() {
return date.toInstant().toEpochMilli();
return epochMilli;
}
public Tags getTags() {
@@ -45,7 +43,7 @@ public class Entry {
return "POISON ENTRY";
}
final OffsetDateTime date = getDate();
final OffsetDateTime date = Instant.ofEpochMilli(epochMilli).atOffset(ZoneOffset.UTC);
return date.format(DateTimeFormatter.ISO_ZONED_DATE_TIME) + " = " + value + " (" + tags.asString() + ")";
}
@@ -53,7 +51,7 @@ public class Entry {
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((date == null) ? 0 : date.hashCode());
result = prime * result + (int) (epochMilli ^ (epochMilli >>> 32));
result = prime * result + ((tags == null) ? 0 : tags.hashCode());
result = prime * result + (int) (value ^ (value >>> 32));
return result;
@@ -68,10 +66,7 @@ public class Entry {
if (getClass() != obj.getClass())
return false;
final Entry other = (Entry) obj;
if (date == null) {
if (other.date != null)
return false;
} else if (!date.equals(other.date))
if (epochMilli != other.epochMilli)
return false;
if (tags == null) {
if (other.tags != null)
@@ -82,5 +77,4 @@ public class Entry {
return false;
return true;
}
}

View File

@@ -1,7 +1,6 @@
package org.lucares.pdbui;
import java.io.IOException;
import java.time.OffsetDateTime;
import java.util.Optional;
import java.util.regex.Pattern;
@@ -45,14 +44,15 @@ public class CsvToEntryTransformer implements LineToEntryTransformer {
private Optional<Entry> createEntry(final String[] columns) {
OffsetDateTime date = null;
long epochMilli = 0;
long duration = Long.MIN_VALUE;
final TagsBuilder tagsBuilder = TagsBuilder.create();
for (int i = 0; i < columns.length; i++) {
switch (headers[i]) {
case "@timestamp":
date = fastISODateParser.parse(columns[i]);
epochMilli = fastISODateParser.parseAsEpochMilli(columns[i]);
;
break;
case "duration":
duration = Long.parseLong(columns[i]);
@@ -65,7 +65,7 @@ public class CsvToEntryTransformer implements LineToEntryTransformer {
}
final Tags tags = tagsBuilder.build();
final Entry entry = new Entry(date, duration, tags);
final Entry entry = new Entry(epochMilli, duration, tags);
return Optional.of(entry);
}
}

View File

@@ -1,7 +1,6 @@
package org.lucares.pdbui;
import java.io.IOException;
import java.time.OffsetDateTime;
import java.util.Map;
import java.util.Optional;
@@ -40,12 +39,12 @@ public class JsonToEntryTransformer implements LineToEntryTransformer {
try {
if (map.containsKey("duration") && map.containsKey("@timestamp")) {
final OffsetDateTime date = getDate(map);
final long epochMilli = getDate(map);
final long duration = (int) map.get("duration");
final Tags tags = createTags(map);
final Entry entry = new Entry(date, duration, tags);
final Entry entry = new Entry(epochMilli, duration, tags);
return Optional.of(entry);
} else {
LOGGER.info("Skipping invalid entry: " + map);
@@ -84,11 +83,10 @@ public class JsonToEntryTransformer implements LineToEntryTransformer {
return tags.build();
}
private OffsetDateTime getDate(final Map<String, Object> map) {
private long getDate(final Map<String, Object> map) {
final String timestamp = (String) map.get("@timestamp");
final OffsetDateTime date = fastISODateParser.parse(timestamp);
return date;
return fastISODateParser.parseAsEpochMilli(timestamp);
}
}

View File

@@ -9,9 +9,6 @@ import java.net.Socket;
import java.net.SocketAddress;
import java.net.SocketTimeoutException;
import java.nio.file.Path;
import java.time.OffsetDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.Callable;
@@ -24,8 +21,6 @@ import java.util.regex.Pattern;
import javax.annotation.PreDestroy;
import org.lucares.pdb.api.Entry;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.api.TagsBuilder;
import org.lucares.performance.db.BlockingQueueIterator;
import org.lucares.performance.db.PerformanceDb;
import org.lucares.recommind.logs.Config;
@@ -109,61 +104,6 @@ public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean {
return null;
}
public Optional<Entry> createEntry(final Map<String, Object> map) {
try {
if (map.containsKey("duration") && map.containsKey("@timestamp")) {
final OffsetDateTime date = getDate(map);
final long duration = (int) map.get("duration");
final Tags tags = createTags(map);
final Entry entry = new Entry(date, duration, tags);
return Optional.of(entry);
} else {
LOGGER.info("Skipping invalid entry: " + map);
return Optional.empty();
}
} catch (final Exception e) {
LOGGER.error("Failed to create entry from map: " + map, e);
return Optional.empty();
}
}
private Tags createTags(final Map<String, Object> map) {
final TagsBuilder tags = TagsBuilder.create();
for (final java.util.Map.Entry<String, Object> e : map.entrySet()) {
final String key = e.getKey();
final Object value = e.getValue();
switch (key) {
case "@timestamp":
case "duration":
// these fields are not tags
break;
case "tags":
// ignore: we only support key/value tags
break;
default:
if (value instanceof String) {
tags.add(key, (String) value);
} else if (value != null) {
tags.add(key, String.valueOf(value));
}
break;
}
}
return tags.build();
}
private OffsetDateTime getDate(final Map<String, Object> map) {
final String timestamp = (String) map.get("@timestamp");
final OffsetDateTime date = OffsetDateTime.parse(timestamp, DateTimeFormatter.ISO_ZONED_DATE_TIME);
return date;
}
}
public TcpIngestor(final Path dataDirectory) throws IOException {

View File

@@ -47,7 +47,7 @@ public class FastISODateParser {
}
}
public long parseAsTimestamp(final String date) {
public long parseAsEpochMilli(final String date) {
try {
// final long year = Integer.parseInt(date, 0, 4, 10);
// final long month = Integer.parseInt(date, 5, 7, 10);

View File

@@ -126,7 +126,7 @@ public class FastISODateParserTest {
@Test(dataProvider = "providerDateToTimestamp")
public void testDateToTimestamp(final String date) {
final long actualEpochMilli = new FastISODateParser().parseAsTimestamp(date);
final long actualEpochMilli = new FastISODateParser().parseAsEpochMilli(date);
final OffsetDateTime expectedDate = OffsetDateTime.from(DateTimeFormatter.ISO_DATE_TIME.parse(date));
final long expectedEpochMilli = expectedDate.toInstant().toEpochMilli();
@@ -178,7 +178,7 @@ public class FastISODateParserTest {
final FastISODateParser fastISODateParser = new FastISODateParser();
for (final String date : dates) {
fastISODateParser.parseAsTimestamp(date);
fastISODateParser.parseAsEpochMilli(date);
// final long timestamp =
// fastISODateParser.parse(date).toInstant().toEpochMilli();
// final long timestamp = OffsetDateTime.parse(date, DateTimeFormatter.ISO_OFFSET_DATE_TIME)

View File

@@ -53,7 +53,7 @@ public class PerformanceDb implements AutoCloseable {
public void putEntries(final BlockingIterator<Entry> entries) throws WriteException {
final Duration timeBetweenSyncs = Duration.ofSeconds(10);
final Duration timeBetweenSyncs = Duration.ofSeconds(1);
long count = 0;
long insertionsSinceLastSync = 0;

View File

@@ -41,7 +41,7 @@ public class PerformanceDbTest {
public void testInsertRead() throws Exception {
try (PerformanceDb db = new PerformanceDb(dataDirectory)) {
final OffsetDateTime date = DateUtils.nowInUtc();
final long date = DateUtils.nowInUtc().toInstant().toEpochMilli();
final long value = 1;
final Tags tags = Tags.create("myKey", "myValue");
db.putEntry(new Entry(date, value, tags));
@@ -51,7 +51,7 @@ public class PerformanceDbTest {
Assert.assertEquals(stream.size(), 2);
Assert.assertEquals(stream.get(0), date.toInstant().toEpochMilli());
Assert.assertEquals(stream.get(0), date);
Assert.assertEquals(stream.get(1), value);
}
}
@@ -59,8 +59,8 @@ public class PerformanceDbTest {
public void testInsertIntoMultipleFilesRead() throws Exception {
try (PerformanceDb db = new PerformanceDb(dataDirectory)) {
final OffsetDateTime dayOne = DateUtils.getDate(2016, 11, 1, 10, 0, 0);
final OffsetDateTime dayTwo = DateUtils.getDate(2016, 11, 2, 12, 34, 56);
final long dayOne = DateUtils.getDate(2016, 11, 1, 10, 0, 0).toInstant().toEpochMilli();
final long dayTwo = DateUtils.getDate(2016, 11, 2, 12, 34, 56).toInstant().toEpochMilli();
final long valueOne = 1;
final long valueTwo = 2;
final Tags tags = Tags.create("myKey", "myValue");
@@ -72,9 +72,9 @@ public class PerformanceDbTest {
Assert.assertEquals(stream.size(), 4);
Assert.assertEquals(stream.get(0), dayOne.toInstant().toEpochMilli());
Assert.assertEquals(stream.get(0), dayOne);
Assert.assertEquals(stream.get(1), valueOne);
Assert.assertEquals(stream.get(2), dayTwo.toInstant().toEpochMilli());
Assert.assertEquals(stream.get(2), dayTwo);
Assert.assertEquals(stream.get(3), valueTwo);
}
}
@@ -86,8 +86,8 @@ public class PerformanceDbTest {
for (long i = 0; i < n; i++) {
final long value = ThreadLocalRandom.current().nextInt(0, Integer.MAX_VALUE);
final OffsetDateTime date = OffsetDateTime.ofInstant(Instant.ofEpochMilli(currentTime + addToDate),
ZoneOffset.UTC);
final long date = OffsetDateTime.ofInstant(Instant.ofEpochMilli(currentTime + addToDate), ZoneOffset.UTC)
.toInstant().toEpochMilli();
result.add(new Entry(date, value, tags));
currentTime += differenceInMs;
@@ -333,7 +333,7 @@ public class PerformanceDbTest {
int index = 0;
for (final Entry entry : entriesOne) {
System.out.printf("%4d %s %d (%s)\n", index, entry.getDate(), entry.getValue(), label);
System.out.printf("%4d %s %d (%s)\n", index, entry.getEpochMilli(), entry.getValue(), label);
index++;
}
}

View File

@@ -51,18 +51,18 @@ public class TagsToFilesTest {
final TagsToFile tagsToFile = new TagsToFile(dataStore);) {
// dayC is before dayA and dayB
final OffsetDateTime dayA = DateUtils.getDate(2016, 1, 2, 1, 1, 1);
final OffsetDateTime dayB = DateUtils.getDate(2016, 1, 3, 1, 1, 1);
final OffsetDateTime dayC = DateUtils.getDate(2016, 1, 1, 1, 1, 1);
final long dayA = DateUtils.getDate(2016, 1, 2, 1, 1, 1).toInstant().toEpochMilli();
final long dayB = DateUtils.getDate(2016, 1, 3, 1, 1, 1).toInstant().toEpochMilli();
final long dayC = DateUtils.getDate(2016, 1, 1, 1, 1, 1).toInstant().toEpochMilli();
final Tags tags = Tags.create("myKey", "myValue");
final PdbWriter writerForDayA = tagsToFile.getWriter(dayA.toInstant().toEpochMilli(), tags);
final PdbWriter writerForDayA = tagsToFile.getWriter(dayA, tags);
writerForDayA.write(new Entry(dayA, 1, tags));
final PdbWriter writerForDayB = tagsToFile.getWriter(dayB.toInstant().toEpochMilli(), tags);
final PdbWriter writerForDayB = tagsToFile.getWriter(dayB, tags);
writerForDayB.write(new Entry(dayB, 2, tags));
final PdbWriter writerForDayC = tagsToFile.getWriter(dayC.toInstant().toEpochMilli(), tags);
final PdbWriter writerForDayC = tagsToFile.getWriter(dayC, tags);
writerForDayC.write(new Entry(dayC, 3, tags));
Assert.assertSame(writerForDayA, writerForDayB);
@@ -75,14 +75,14 @@ public class TagsToFilesTest {
try (final DataStore dataStore = new DataStore(dataDirectory); //
final TagsToFile tagsToFile = new TagsToFile(dataStore)) {
final OffsetDateTime timestamp = DateUtils.getDate(2016, 1, 1, 13, 1, 1);
final long timestamp = DateUtils.getDate(2016, 1, 1, 13, 1, 1).toInstant().toEpochMilli();
final Tags tags = Tags.create("myKey", "myValue");
final PdbWriter fileA = tagsToFile.getWriter(timestamp.toInstant().toEpochMilli(), tags);
final PdbWriter fileA = tagsToFile.getWriter(timestamp, tags);
fileA.write(new Entry(timestamp, 1, tags));
final PdbWriter fileB = tagsToFile.getWriter(timestamp.toInstant().toEpochMilli(), tags);
final PdbWriter fileB = tagsToFile.getWriter(timestamp, tags);
fileA.write(new Entry(timestamp, 2, tags));
Assert.assertEquals(fileA, fileB);