read csv using input stream instead of reader
We are now reading the CSV input without transforming the data into strings. This reduces the amount of bytes that have to be converted and copied. We also made Tag smaller. It no longer stores pointers to strings, instead it stored integers obtained by compressing the strings (see StringCompressor). This reduces memory usage and it speeds up hashcode and equals, which speeds up access to the writer cache. Performance gain is almost 100%: - 330k entries/s -> 670k entries/s, top speed measured over a second - 62s -> 32s, to ingest 16 million entries
This commit is contained in:
@@ -59,6 +59,15 @@ public class FastISODateParserTest {
|
||||
Assert.assertEquals(actualDate, expectedDate);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "providerValidDate")
|
||||
public void testParseValidDateAsEpochMilli(final String date) {
|
||||
|
||||
final long actualDate = new FastISODateParser().parseAsEpochMilli(date);
|
||||
|
||||
final OffsetDateTime expectedDate = OffsetDateTime.from(DateTimeFormatter.ISO_DATE_TIME.parse(date));
|
||||
Assert.assertEquals(actualDate, expectedDate.toInstant().toEpochMilli());
|
||||
}
|
||||
|
||||
@DataProvider(name = "providerParseInvalidDate")
|
||||
public Object[][] providerParseInvalidDate() {
|
||||
return new Object[][] { //
|
||||
@@ -133,6 +142,17 @@ public class FastISODateParserTest {
|
||||
Assert.assertEquals(actualEpochMilli, expectedEpochMilli);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "providerDateToTimestamp")
|
||||
public void testDateToTimestampWithBytes(final String date) {
|
||||
|
||||
final byte[] dateAsBytes = date.getBytes(StandardCharsets.UTF_8);
|
||||
final long actualEpochMilli = new FastISODateParser().parseAsEpochMilli(dateAsBytes, 0);
|
||||
|
||||
final OffsetDateTime expectedDate = OffsetDateTime.from(DateTimeFormatter.ISO_DATE_TIME.parse(date));
|
||||
final long expectedEpochMilli = expectedDate.toInstant().toEpochMilli();
|
||||
Assert.assertEquals(actualEpochMilli, expectedEpochMilli);
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void test() {
|
||||
|
||||
@@ -151,18 +171,18 @@ public class FastISODateParserTest {
|
||||
}
|
||||
|
||||
public static void main(final String[] args) throws IOException, InterruptedException {
|
||||
final Path path = Path.of("/home/andi/ws/performanceDb/data/production/dates2.csv");
|
||||
final Path path = Path.of("/home/andi/ws/performanceDb/data/production/dates.csv");
|
||||
|
||||
for (int i = 0; i < 15; i++) {
|
||||
final List<String> dates = new ArrayList<>();
|
||||
final List<byte[]> dates = new ArrayList<>();
|
||||
|
||||
try (final BufferedReader reader = new BufferedReader(
|
||||
new FileReader(path.toFile(), StandardCharsets.UTF_8))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
dates.add(line);
|
||||
}
|
||||
try (final BufferedReader reader = new BufferedReader(new FileReader(path.toFile(), StandardCharsets.UTF_8))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
dates.add(line.getBytes());
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 20; i++) {
|
||||
|
||||
System.gc();
|
||||
TimeUnit.MILLISECONDS.sleep(100);
|
||||
@@ -177,8 +197,8 @@ public class FastISODateParserTest {
|
||||
final long start = System.nanoTime();
|
||||
final FastISODateParser fastISODateParser = new FastISODateParser();
|
||||
|
||||
for (final String date : dates) {
|
||||
fastISODateParser.parseAsEpochMilli(date);
|
||||
for (final byte[] date : dates) {
|
||||
fastISODateParser.parseAsEpochMilli(date, 0);
|
||||
// final long timestamp =
|
||||
// fastISODateParser.parse(date).toInstant().toEpochMilli();
|
||||
// final long timestamp = OffsetDateTime.parse(date, DateTimeFormatter.ISO_OFFSET_DATE_TIME)
|
||||
|
||||
@@ -162,10 +162,11 @@ public class TcpIngestorTest {
|
||||
|
||||
final LinkedBlockingDeque<Map<String, Object>> queue = new LinkedBlockingDeque<>();
|
||||
|
||||
for (int i = 0; i < 100; i++) {
|
||||
for (int i = 0; i < 103; i++) // use number of rows that is not a multiple of a page size
|
||||
{
|
||||
|
||||
final long duration = rnd.nextLong(-100000L, 100000L);
|
||||
final long timestamp = rnd.nextLong(-100000L, 100000L);
|
||||
final long timestamp = rnd.nextLong(-100000L, 10000000L);
|
||||
|
||||
final Map<String, Object> entry = new HashMap<>();
|
||||
entry.put("@timestamp", Instant.ofEpochMilli(timestamp).atOffset(ZoneOffset.UTC)
|
||||
@@ -178,7 +179,6 @@ public class TcpIngestorTest {
|
||||
expected.addAll(timestamp, duration);
|
||||
}
|
||||
|
||||
queue.put(PdbTestUtil.POISON);
|
||||
PdbTestUtil.send(format, queue);
|
||||
} catch (final Exception e) {
|
||||
LOGGER.error("", e);
|
||||
|
||||
Reference in New Issue
Block a user