handle corrupt json

Entries must be separated by a newline. This allows
us to handle corrupt json entries, because we know
that entries only start at a line beginning.
This commit is contained in:
ahr
2018-03-03 09:58:50 +01:00
parent 9d4eb660a5
commit 5a9aae70af
3 changed files with 137 additions and 109 deletions

View File

@@ -34,8 +34,8 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Async; import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.MappingIterator;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.ObjectReader;
@@ -56,8 +56,6 @@ public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean {
public final static class Handler implements Callable<Void> { public final static class Handler implements Callable<Void> {
private final ObjectMapper objectMapper = new ObjectMapper();
private final TypeReference<Map<String, Object>> typeReferenceForMap = new TypeReference<Map<String, Object>>() { private final TypeReference<Map<String, Object>> typeReferenceForMap = new TypeReference<Map<String, Object>>() {
}; };
@@ -76,41 +74,41 @@ public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean {
LOGGER.debug("opening streams to client"); LOGGER.debug("opening streams to client");
try (PrintWriter out = new PrintWriter(clientSocket.getOutputStream(), true); try (PrintWriter out = new PrintWriter(clientSocket.getOutputStream(), true);
BufferedReader in = new BufferedReader(new InputStreamReader(clientSocket.getInputStream())); BufferedReader in = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
) { ) {
final ObjectMapper objectMapper = new ObjectMapper();
final ObjectReader objectReader = objectMapper.readerFor(typeReferenceForMap); final ObjectReader objectReader = objectMapper.readerFor(typeReferenceForMap);
final MappingIterator<Object> iterator = objectReader.readValues(in);
double duration = 0.0; double duration = 0.0;
int count = 0; int count = 0;
LOGGER.debug("reading from stream"); LOGGER.debug("reading from stream");
while (iterator.hasNext()) { String line;
while ((line = in.readLine()) != null) {
final long start = System.nanoTime(); final long start = System.nanoTime();
@SuppressWarnings("unchecked") try {
final Map<String, Object> object = (Map<String, Object>) iterator.next(); final Map<String, Object> object = objectReader.readValue(line);
final Optional<Entry> entry = createEntry(object); final Optional<Entry> entry = createEntry(object);
final long end = System.nanoTime(); final long end = System.nanoTime();
duration += (end - start) / 1_000_000.0; duration += (end - start) / 1_000_000.0;
count++; count++;
if (count == 100000) { if (count == 100000) {
METRICS_LOGGER.debug("reading {} took {} ms", count, duration); METRICS_LOGGER.debug("reading {} took {} ms", count, duration);
duration = 0.0; duration = 0.0;
count = 0; count = 0;
}
if (entry.isPresent()) {
LOGGER.debug("adding entry to queue: {}", entry);
queue.put(entry.get());
}
} catch (JsonParseException e) {
LOGGER.info("json parse error in line '" + line + "'", e);
} }
if (entry.isPresent()) {
LOGGER.debug("adding entry to queue: {}", entry);
queue.put(entry.get());
}
} }
LOGGER.debug("connection closed: " + clientAddress); LOGGER.debug("connection closed: " + clientAddress);
} } catch (Exception e) {
catch (Exception e)
{
LOGGER.warn("Stream handling failed", e); LOGGER.warn("Stream handling failed", e);
throw e; throw e;
} }
@@ -121,8 +119,7 @@ public class TcpIngestor implements Ingestor, AutoCloseable, DisposableBean {
public Optional<Entry> createEntry(final Map<String, Object> map) { public Optional<Entry> createEntry(final Map<String, Object> map) {
try { try {
if (map.containsKey("duration") if (map.containsKey("duration") && map.containsKey("@timestamp")) {
&& map.containsKey("@timestamp")) {
final OffsetDateTime date = getDate(map); final OffsetDateTime date = getDate(map);
final long duration = (int) map.get("duration"); final long duration = (int) map.get("duration");

View File

@@ -1,80 +1,101 @@
package org.lucares.performance.db.ingestor; package org.lucares.performance.db.ingestor;
import java.io.IOException; import java.io.IOException;
import java.net.ConnectException; import java.net.ConnectException;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.channels.SocketChannel; import java.nio.channels.SocketChannel;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.lucares.pdbui.TcpIngestor; import org.lucares.pdbui.TcpIngestor;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
public class PdbTestUtil { public class PdbTestUtil {
private static final Logger LOGGER = LoggerFactory.getLogger(PdbTestUtil.class); private static final Logger LOGGER = LoggerFactory.getLogger(PdbTestUtil.class);
private static final Map<String, Object> POISON = new HashMap<>(); private static final Map<String, Object> POISON = new HashMap<>();
@SafeVarargs @SafeVarargs
public static final void send(final Map<String, Object>... entries) throws IOException, InterruptedException { public static final void send(final Map<String, Object>... entries) throws IOException, InterruptedException {
final LinkedBlockingDeque<Map<String, Object>> queue = new LinkedBlockingDeque<>(Arrays.asList(entries)); final LinkedBlockingDeque<Map<String, Object>> queue = new LinkedBlockingDeque<>(Arrays.asList(entries));
queue.put(POISON); queue.put(POISON);
send(queue); send(queue);
} }
public static final void send(final BlockingQueue<Map<String, Object>> aEntriesSupplier) throws IOException { public static final void send(final BlockingQueue<Map<String, Object>> aEntriesSupplier) throws IOException {
final ObjectMapper mapper = new ObjectMapper(); final ObjectMapper mapper = new ObjectMapper();
final SocketChannel channel = connect(); final SocketChannel channel = connect();
Map<String, Object> entry; Map<String, Object> entry;
while ((entry = aEntriesSupplier.poll()) != POISON) { while ((entry = aEntriesSupplier.poll()) != POISON) {
final StringBuilder streamData = new StringBuilder(); final StringBuilder streamData = new StringBuilder();
streamData.append(mapper.writeValueAsString(entry)); streamData.append(mapper.writeValueAsString(entry));
streamData.append("\n"); streamData.append("\n");
final ByteBuffer src = ByteBuffer.wrap(streamData.toString().getBytes(StandardCharsets.UTF_8)); final ByteBuffer src = ByteBuffer.wrap(streamData.toString().getBytes(StandardCharsets.UTF_8));
channel.write(src); channel.write(src);
} }
try { try {
// ugly workaround: the channel was closed too early and not all // ugly workaround: the channel was closed too early and not all
// data was received // data was received
TimeUnit.MILLISECONDS.sleep(10); TimeUnit.MILLISECONDS.sleep(10);
} catch (final InterruptedException e) { } catch (final InterruptedException e) {
throw new IllegalStateException(e); throw new IllegalStateException(e);
} }
channel.close(); channel.close();
LOGGER.trace("closed sender connection"); LOGGER.trace("closed sender connection");
} }
private static SocketChannel connect() throws IOException { public static final void send(final String data) throws IOException {
SocketChannel result = null; final SocketChannel channel = connect();
while (true) { final StringBuilder streamData = new StringBuilder();
try { streamData.append(data);
result = SocketChannel.open();
result.configureBlocking(true); final ByteBuffer src = ByteBuffer.wrap(streamData.toString().getBytes(StandardCharsets.UTF_8));
result.connect(new InetSocketAddress("127.0.0.1", TcpIngestor.PORT)); channel.write(src);
break;
} catch (final ConnectException e) { try {
// server socket not yet ready, it should be ready any time soon // ugly workaround: the channel was closed too early and not all
} // data was received
} TimeUnit.MILLISECONDS.sleep(10);
} catch (final InterruptedException e) {
return result; throw new IllegalStateException(e);
} }
channel.close();
} LOGGER.trace("closed sender connection");
}
private static SocketChannel connect() throws IOException {
SocketChannel result = null;
while (true) {
try {
result = SocketChannel.open();
result.configureBlocking(true);
result.connect(new InetSocketAddress("127.0.0.1", TcpIngestor.PORT));
break;
} catch (final ConnectException e) {
// server socket not yet ready, it should be ready any time soon
}
}
return result;
}
}

View File

@@ -23,6 +23,8 @@ import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod; import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import com.fasterxml.jackson.databind.ObjectMapper;
@Test @Test
public class TcpIngestorTest { public class TcpIngestorTest {
@@ -89,20 +91,28 @@ public class TcpIngestorTest {
try (TcpIngestor tcpIngestor = new TcpIngestor(dataDirectory)) { try (TcpIngestor tcpIngestor = new TcpIngestor(dataDirectory)) {
tcpIngestor.start(); tcpIngestor.start();
// this entry will be skipped, because the date is invalid // skipped, because the date is invalid
final Map<String, Object> entryA = new HashMap<>(); final Map<String, Object> entryA = new HashMap<>();
entryA.put("duration", 1); entryA.put("duration", 1);
entryA.put("@timestamp", invalidDate.format(DateTimeFormatter.ISO_ZONED_DATE_TIME)); entryA.put("@timestamp", invalidDate.format(DateTimeFormatter.ISO_ZONED_DATE_TIME));
entryA.put("host", host); entryA.put("host", host);
entryA.put("tags", Collections.emptyList()); entryA.put("tags", Collections.emptyList());
// skipped, because it is not valid json
String corrupEntry = "{\"corrupt...";
// valid entry
final Map<String, Object> entryB = new HashMap<>(); final Map<String, Object> entryB = new HashMap<>();
entryB.put("duration", 2); entryB.put("duration", 2);
entryB.put("@timestamp", dateB.format(DateTimeFormatter.ISO_ZONED_DATE_TIME)); entryB.put("@timestamp", dateB.format(DateTimeFormatter.ISO_ZONED_DATE_TIME));
entryB.put("host", host); entryB.put("host", host);
entryB.put("tags", Collections.emptyList()); entryB.put("tags", Collections.emptyList());
PdbTestUtil.send(entryA, entryB); final ObjectMapper objectMapper = new ObjectMapper();
final String data = objectMapper.writeValueAsString(entryA)+"\n"+corrupEntry+"\n"+objectMapper.writeValueAsString(entryB)+"\n";
PdbTestUtil.send(data);
} }
try (PerformanceDb db = new PerformanceDb(dataDirectory)) { try (PerformanceDb db = new PerformanceDb(dataDirectory)) {