make it possible to ignore columns using the csv ingestor

This commit is contained in:
2019-07-04 09:51:33 +02:00
parent 3a39f66e22
commit 2cb81e5acd
4 changed files with 45 additions and 7 deletions

View File

@@ -0,0 +1,72 @@
package org.lucares.pdbui;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.lucares.collections.LongList;
final class LongPair implements Comparable<LongPair> {
private final long a, b;
public LongPair(final long a, final long b) {
super();
this.a = a;
this.b = b;
}
public static List<LongPair> fromLongList(final LongList longList) {
final List<LongPair> result = new ArrayList<>();
for (int i = 0; i < longList.size(); i += 2) {
result.add(new LongPair(longList.get(i), longList.get(i + 1)));
}
Collections.sort(result);
return result;
}
public long getA() {
return a;
}
public long getB() {
return b;
}
@Override
public String toString() {
return a + "," + b;
}
@Override
public int compareTo(final LongPair o) {
return Comparator.comparing(LongPair::getA).thenComparing(LongPair::getB).compare(this, o);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + (int) (a ^ (a >>> 32));
result = prime * result + (int) (b ^ (b >>> 32));
return result;
}
@Override
public boolean equals(final Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
final LongPair other = (LongPair) obj;
if (a != other.a)
return false;
if (b != other.b)
return false;
return true;
}
}

View File

@@ -0,0 +1,174 @@
package org.lucares.pdbui;
import java.io.IOException;
import java.net.ConnectException;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.SocketChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.lucares.pdbui.TcpIngestor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
public class PdbTestUtil {
private static final Logger LOGGER = LoggerFactory.getLogger(PdbTestUtil.class);
static final Map<String, Object> POISON = new HashMap<>();
public static final void send(final String format, final Collection<Map<String, Object>> entries)
throws IOException, InterruptedException {
switch (format) {
case "csv":
sendAsCsv(entries);
break;
case "json":
sendAsJson(entries);
break;
default:
throw new IllegalStateException("unhandled format: " + format);
}
}
@SafeVarargs
public static final void sendAsCsv(final Map<String, Object>... entries) throws IOException, InterruptedException {
sendAsCsv(Arrays.asList(entries));
}
public static final void sendAsCsv(final Collection<Map<String, Object>> entries)
throws IOException, InterruptedException {
final Set<String> keys = entries.stream().map(Map::keySet).flatMap(Set::stream).collect(Collectors.toSet());
final StringBuilder csv = new StringBuilder();
csv.append(String.join(",", keys));
csv.append("\n");
for (final Map<String, Object> entry : entries) {
final List<String> line = new ArrayList<>();
for (final String key : keys) {
final String value = String.valueOf(entry.getOrDefault(key, ""));
line.add(value);
}
csv.append(String.join(",", line));
csv.append("\n");
}
System.out.println("sending: " + csv);
send(csv.toString());
}
@SafeVarargs
public static final void sendAsJson(final Map<String, Object>... entries) throws IOException, InterruptedException {
sendAsJson(Arrays.asList(entries));
}
public static final void sendAsJson(final Collection<Map<String, Object>> entries)
throws IOException, InterruptedException {
final LinkedBlockingDeque<Map<String, Object>> queue = new LinkedBlockingDeque<>(entries);
queue.put(POISON);
sendAsJson(queue);
}
public static final void sendAsJson(final BlockingQueue<Map<String, Object>> aEntriesSupplier) throws IOException {
final ObjectMapper mapper = new ObjectMapper();
final SocketChannel channel = connect();
Map<String, Object> entry;
while ((entry = aEntriesSupplier.poll()) != POISON) {
final StringBuilder streamData = new StringBuilder();
streamData.append(mapper.writeValueAsString(entry));
streamData.append("\n");
final ByteBuffer src = ByteBuffer.wrap(streamData.toString().getBytes(StandardCharsets.UTF_8));
channel.write(src);
}
try {
// ugly workaround: the channel was closed too early and not all
// data was received
TimeUnit.MILLISECONDS.sleep(10);
} catch (final InterruptedException e) {
throw new IllegalStateException(e);
}
channel.close();
LOGGER.trace("closed sender connection");
}
public static final void send(final String data) throws IOException {
final SocketChannel channel = connect();
final StringBuilder streamData = new StringBuilder();
streamData.append(data);
final ByteBuffer src = ByteBuffer.wrap(streamData.toString().getBytes(StandardCharsets.UTF_8));
channel.write(src);
try {
// ugly workaround: the channel was closed too early and not all
// data was received
TimeUnit.MILLISECONDS.sleep(10);
} catch (final InterruptedException e) {
throw new IllegalStateException(e);
}
channel.close();
LOGGER.trace("closed sender connection");
}
public static void send(final Path file) throws IOException {
final SocketChannel outputChannel = connect();
try (final FileChannel inputChannel = FileChannel.open(file, StandardOpenOption.READ)) {
inputChannel.transferTo(0, Long.MAX_VALUE, outputChannel);
}
try {
// ugly workaround: the channel was closed too early and not all
// data was received
TimeUnit.MILLISECONDS.sleep(10);
} catch (final InterruptedException e) {
throw new IllegalStateException(e);
}
outputChannel.close();
LOGGER.trace("closed sender connection");
}
private static SocketChannel connect() throws IOException {
SocketChannel result = null;
while (true) {
try {
result = SocketChannel.open();
result.configureBlocking(true);
result.connect(new InetSocketAddress("127.0.0.1", TcpIngestor.PORT));
break;
} catch (final ConnectException e) {
// server socket not yet ready, it should be ready any time soon
}
}
return result;
}
}

View File

@@ -0,0 +1,284 @@
package org.lucares.pdbui;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.ThreadLocalRandom;
import org.lucares.collections.LongList;
import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.Query;
import org.lucares.pdb.datastore.internal.DataStore;
import org.lucares.performance.db.PdbExport;
import org.lucares.performance.db.PerformanceDb;
import org.lucares.utils.file.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import com.fasterxml.jackson.databind.ObjectMapper;
@Test
public class TcpIngestorTest {
private static final Logger LOGGER = LoggerFactory.getLogger(TcpIngestorTest.class);
private Path dataDirectory;
@BeforeMethod
public void beforeMethod() throws IOException {
dataDirectory = Files.createTempDirectory("pdb");
}
@AfterMethod
public void afterMethod() throws IOException {
FileUtils.delete(dataDirectory);
}
public void testIngestDataViaTcpStream() throws Exception {
final OffsetDateTime dateA = OffsetDateTime.now();
final OffsetDateTime dateB = OffsetDateTime.now();
final String host = "someHost";
try (TcpIngestor ingestor = new TcpIngestor(dataDirectory)) {
ingestor.start();
final Map<String, Object> entryA = new HashMap<>();
entryA.put("duration", 1);
entryA.put("@timestamp", dateA.format(DateTimeFormatter.ISO_ZONED_DATE_TIME));
entryA.put("host", host);
entryA.put("tags", Collections.emptyList());
final Map<String, Object> entryB = new HashMap<>();
entryB.put("duration", 2);
entryB.put("@timestamp", dateB.format(DateTimeFormatter.ISO_ZONED_DATE_TIME));
entryB.put("host", host);
entryB.put("tags", Collections.emptyList());
PdbTestUtil.sendAsJson(entryA, entryB);
} catch (final Exception e) {
LOGGER.error("", e);
throw e;
}
try (PerformanceDb db = new PerformanceDb(dataDirectory)) {
final LongList result = db.get(new Query("host=" + host, DateTimeRange.ofDay(dateA))).singleGroup()
.flatMap();
Assert.assertEquals(result.size(), 4);
Assert.assertEquals(result.get(0), dateA.toInstant().toEpochMilli());
Assert.assertEquals(result.get(1), 1);
Assert.assertEquals(result.get(2), dateB.toInstant().truncatedTo(ChronoUnit.MILLIS).toEpochMilli());
Assert.assertEquals(result.get(3), 2);
}
}
public void testIngestDataViaTcpStream_CustomFormat() throws Exception {
final long dateA = Instant.now().toEpochMilli();
final long dateB = Instant.now().toEpochMilli() + 1;
final long dateC = Instant.now().toEpochMilli() - 1;
final DateTimeRange dateRange = DateTimeRange.relativeMinutes(1);
final String host = "someHost";
// 1. insert some data
try (TcpIngestor ingestor = new TcpIngestor(dataDirectory)) {
ingestor.start();
final long deltaEpochMilliB = dateB - dateA;
final long deltaEpochMilliC = dateC - dateB;
final String data = "#$0:host=someHost,pod=somePod\n"//
+ dateA + ",1,0\n"// previous date is 0, therefore the delta is dateA / using tags with id 0
+ "$1:host=someHost,pod=otherPod\n" //
+ deltaEpochMilliB + ",2,1\n" // dates are the delta the the previous date / using tags with id 1
+ deltaEpochMilliC + ",3,0"; // dates are the delta the the previous date / using tags with id 0
PdbTestUtil.send(data);
} catch (final Exception e) {
LOGGER.error("", e);
throw e;
}
// 2. export the data
final List<Path> exportFiles = PdbExport.export(dataDirectory, dataDirectory.resolve("export"));
// 3. delete database
FileUtils.delete(dataDirectory.resolve(DataStore.SUBDIR_STORAGE));
// 4. create a new database
try (TcpIngestor ingestor = new TcpIngestor(dataDirectory)) {
ingestor.start();
for (final Path exportFile : exportFiles) {
PdbTestUtil.send(exportFile);
}
}
// 5. check that the data is correctly inserted
try (PerformanceDb db = new PerformanceDb(dataDirectory)) {
final LongList result = db.get(new Query("host=" + host, dateRange)).singleGroup().flatMap();
Assert.assertEquals(result.size(), 6);
Assert.assertEquals(result.get(0), dateA);
Assert.assertEquals(result.get(1), 1);
Assert.assertEquals(result.get(2), dateC);
Assert.assertEquals(result.get(3), 3);
Assert.assertEquals(result.get(4), dateB);
Assert.assertEquals(result.get(5), 2);
}
}
@Test
public void testIngestionThreadDoesNotDieOnErrors() throws Exception {
final OffsetDateTime dateA = OffsetDateTime.ofInstant(Instant.ofEpochMilli(-1), ZoneOffset.UTC);
final OffsetDateTime dateB = OffsetDateTime.now();
final DateTimeRange dateRange = new DateTimeRange(dateA, dateB);
final String host = "someHost";
try (TcpIngestor tcpIngestor = new TcpIngestor(dataDirectory)) {
tcpIngestor.start();
// has a negative epoch time milli and negative value
final Map<String, Object> entryA = new HashMap<>();
entryA.put("duration", -1);
entryA.put("@timestamp", dateA.format(DateTimeFormatter.ISO_ZONED_DATE_TIME));
entryA.put("host", host);
entryA.put("tags", Collections.emptyList());
// skipped, because it is not valid json
final String corrupEntry = "{\"corrupt...";
// valid entry
final Map<String, Object> entryB = new HashMap<>();
entryB.put("duration", 2);
entryB.put("@timestamp", dateB.format(DateTimeFormatter.ISO_ZONED_DATE_TIME));
entryB.put("host", host);
entryB.put("tags", Collections.emptyList());
final ObjectMapper objectMapper = new ObjectMapper();
final String data = String.join("\n", //
objectMapper.writeValueAsString(entryA), //
corrupEntry, //
objectMapper.writeValueAsString(entryB)//
)//
+ "\n";
PdbTestUtil.send(data);
}
try (PerformanceDb db = new PerformanceDb(dataDirectory)) {
final LongList result = db.get(new Query("host=" + host, dateRange)).singleGroup().flatMap();
Assert.assertEquals(result.size(), 4);
Assert.assertEquals(result.get(0), dateA.toInstant().truncatedTo(ChronoUnit.MILLIS).toEpochMilli());
Assert.assertEquals(result.get(1), -1);
Assert.assertEquals(result.get(2), dateB.toInstant().truncatedTo(ChronoUnit.MILLIS).toEpochMilli());
Assert.assertEquals(result.get(3), 2);
}
}
@DataProvider
public Object[][] providerSendingFormats() {
final List<Object[]> data = new ArrayList<>();
data.add(new Object[] { "csv" });
data.add(new Object[] { "json" });
return data.toArray(Object[][]::new);
}
@Test(dataProvider = "providerSendingFormats")
public void testRandomOrder(final String format) throws Exception {
final ThreadLocalRandom rnd = ThreadLocalRandom.current();
final String host = "someHost";
final List<String> additionalTagValues = Arrays.asList("foo", "bar", "baz");
final DateTimeRange dateRange = new DateTimeRange(Instant.ofEpochMilli(-100000L).atOffset(ZoneOffset.UTC),
Instant.ofEpochMilli(10000000L).atOffset(ZoneOffset.UTC));
final LongList expected = new LongList();
try (TcpIngestor ingestor = new TcpIngestor(dataDirectory)) {
ingestor.start();
final LinkedBlockingDeque<Map<String, Object>> queue = new LinkedBlockingDeque<>();
for (int i = 0; i < 103; i++) // use number of rows that is not a multiple of a page size
{
final long duration = rnd.nextLong(-100000L, 100000L);
final long timestamp = rnd.nextLong(-100000L, 10000000L);
final Map<String, Object> entry = new HashMap<>();
entry.put("@timestamp", Instant.ofEpochMilli(timestamp).atOffset(ZoneOffset.UTC)
.format(DateTimeFormatter.ISO_ZONED_DATE_TIME));
entry.put("duration", duration);
entry.put("host", host);
entry.put("additionalKey", additionalTagValues.get(rnd.nextInt(additionalTagValues.size())));
queue.put(entry);
expected.addAll(timestamp, duration);
}
PdbTestUtil.send(format, queue);
} catch (final Exception e) {
LOGGER.error("", e);
throw e;
}
try (PerformanceDb db = new PerformanceDb(dataDirectory)) {
final LongList result = db.get(new Query("host=" + host, dateRange)).singleGroup().flatMap();
Assert.assertEquals(LongPair.fromLongList(result), LongPair.fromLongList(expected));
}
}
public void testCsvIngestorIgnoresColumns() throws Exception {
try (TcpIngestor ingestor = new TcpIngestor(dataDirectory)) {
ingestor.start();
final Map<String, Object> entry = new HashMap<>();
entry.put("@timestamp",
Instant.ofEpochMilli(1).atOffset(ZoneOffset.UTC).format(DateTimeFormatter.ISO_ZONED_DATE_TIME));
entry.put("duration", 1);
entry.put("host", "someHost");
entry.put(TcpIngestor.Handler.COLUM_IGNORE_PREFIX + "ignored", "ignoredValue");
PdbTestUtil.sendAsCsv(entry);
} catch (final Exception e) {
LOGGER.error("", e);
throw e;
}
try (PerformanceDb db = new PerformanceDb(dataDirectory)) {
final List<String> availableFields = db.getFields(DateTimeRange.max());
Assert.assertEquals(availableFields.toString(), List.of("host").toString(),
"the ignored field is not returned");
}
}
}