add settings to file upload

This makes it possible to define properties for
the uploaded CSV files. Currently we can define the
separator and which columns are to be ignored.
This commit is contained in:
2019-12-08 20:20:13 +01:00
parent e931856041
commit 60f1a79816
8 changed files with 96 additions and 42 deletions

View File

@@ -8,37 +8,54 @@ import java.util.Set;
import org.lucares.utils.Preconditions;
public class CsvReaderSettings {
private final byte separator;
private byte separator;
private final Set<String> ignoreColumnNames = new HashSet<String>();
private Set<String> ignoreColumnNames = new HashSet<String>();
public CsvReaderSettings(final byte separator, final String... ignoreColumnNames) {
this(separator, List.of(ignoreColumnNames));
public CsvReaderSettings() {
separator = ',';
}
public CsvReaderSettings(final char separator, final String... ignoreColumnNames) {
this(separator, List.of(ignoreColumnNames));
private CsvReaderSettings(final byte separator, final Collection<String> ignoreColumns) {
this.separator = separator;
this.ignoreColumnNames.addAll(ignoreColumns);
}
public CsvReaderSettings(final char separator, final Collection<String> ignoreColumnNames) {
this((byte) separator, ignoreColumnNames);
public static CsvReaderSettings create(final byte separator, final String... ignoreColumnNames) {
return new CsvReaderSettings(separator, List.of(ignoreColumnNames));
}
public static CsvReaderSettings create(final char separator, final String... ignoreColumnNames) {
return CsvReaderSettings.create(separator, List.of(ignoreColumnNames));
}
public static CsvReaderSettings create(final char separator, final Collection<String> ignoreColumnNames) {
Preconditions.checkTrue(separator == (byte) separator,
"Only separators that fulfill separator == (byte)separator are supported. "
+ "This restriction is because the parsing algorithm skips the overhead of "
+ "translating bytes to characters.");
}
public CsvReaderSettings(final byte separator, final Collection<String> ignoreColumns) {
this.separator = separator;
this.ignoreColumnNames.addAll(ignoreColumns);
return new CsvReaderSettings((byte) separator, ignoreColumnNames);
}
public byte getSeparator() {
return separator;
}
public void setSeparator(final byte separator) {
this.separator = separator;
}
public Set<String> getIgnoreColumnNames() {
return ignoreColumnNames;
}
public void setIgnoreColumnNames(final Set<String> ignoreColumnNames) {
this.ignoreColumnNames = ignoreColumnNames;
}
public boolean isIgnoredColumn(final String columnName) {
return ignoreColumnNames.contains(columnName);
}
}

View File

@@ -65,7 +65,8 @@ public final class IngestionHandler implements Callable<Void> {
handleInputStream(gzip);
} else {
in.reset();
final CsvToEntryTransformer csvTransformer = new CsvToEntryTransformer(queue, new CsvReaderSettings(','));
final CsvToEntryTransformer csvTransformer = new CsvToEntryTransformer(queue,
CsvReaderSettings.create(','));
csvTransformer.readCSV(in);
}
}

View File

@@ -54,6 +54,7 @@ import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.ResponseStatus;
import org.springframework.web.multipart.MultipartFile;
@@ -322,13 +323,14 @@ public class PdbController implements HardcodedValues, PropertyKeys {
return result;
}
@PostMapping(path = "/data")
@PostMapping(path = "/data", consumes = MediaType.MULTIPART_MIXED_VALUE)
@ResponseBody
@ResponseStatus(code = HttpStatus.CREATED)
public String handleCsvFileUpload(@RequestParam("file") final MultipartFile[] files)
public String handleCsvFileUpload(@RequestParam("file") final MultipartFile[] files,
@RequestPart("settings") final CsvReaderSettings csvReaderSettings)
throws IllegalStateException, IOException {
csvUploadHandler.ingest(List.of(files), new CsvReaderSettings(','));
csvUploadHandler.ingest(List.of(files), csvReaderSettings);
return ""; // return value might become a job id that can be used to cancel, or observe
// status
}

View File

@@ -48,7 +48,7 @@ public class CsvToEntryTransformerTest {
+ dateB.format(DateTimeFormatter.ISO_ZONED_DATE_TIME) + ",2,tagValue\n";
final ArrayBlockingQueue<Entries> queue = db.getQueue();
final CsvReaderSettings settings = new CsvReaderSettings(',');
final CsvReaderSettings settings = CsvReaderSettings.create(',');
final CsvToEntryTransformer csvToEntryTransformer = new CsvToEntryTransformer(queue, settings);
csvToEntryTransformer.readCSV(new ByteArrayInputStream(csv.getBytes(StandardCharsets.UTF_8)));
queue.put(Entries.POISON);
@@ -85,7 +85,7 @@ public class CsvToEntryTransformerTest {
+ "2000-01-01T00:00:00.001Z,2,ignoreValue,ignoreValue,tagValue\n";
final ArrayBlockingQueue<Entries> queue = db.getQueue();
final CsvReaderSettings settings = new CsvReaderSettings(',', "ignoredColumn");
final CsvReaderSettings settings = CsvReaderSettings.create(',', "ignoredColumn");
final CsvToEntryTransformer csvToEntryTransformer = new CsvToEntryTransformer(queue, settings);
csvToEntryTransformer.readCSV(new ByteArrayInputStream(csv.getBytes(StandardCharsets.UTF_8)));
queue.put(Entries.POISON);

View File

@@ -0,0 +1,21 @@
package org.lucares.pdbui;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
public class JsonResource extends StringResource {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public JsonResource(final Object obj, final String filename) {
super(stringify(obj), filename);
}
private static String stringify(final Object obj) {
try {
return OBJECT_MAPPER.writeValueAsString(obj);
} catch (final JsonProcessingException e) {
throw new RuntimeException("failed to serialize object", e);
}
}
}

View File

@@ -1,5 +1,6 @@
package org.lucares.pdbui;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Properties;
@@ -23,7 +24,9 @@ class TestOverrides implements ApplicationContextInitializer<ConfigurableApplica
final Path tmpdir = Path.of(System.getProperty("java.io.tmpdir")).resolve("pdb-test");
FileUtils.delete(tmpdir);
if (Files.isDirectory(tmpdir)) {
FileUtils.delete(tmpdir);
}
props.put("base.dir", tmpdir.toFile().getAbsolutePath());
final PropertiesPropertySource testOverrides = new PropertiesPropertySource("testOverrides", props);

View File

@@ -3,6 +3,7 @@ package org.lucares.pdbui;
import java.time.OffsetDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.List;
import org.junit.Assert;
import org.junit.Test;
@@ -41,38 +42,47 @@ public class PdbControllerTest {
@Test
public void testUploadCsv() {
final String ignoredColumn = "ignoredColumn";
final OffsetDateTime dateA = OffsetDateTime.now();
final OffsetDateTime dateB = OffsetDateTime.now();
final String csv = "@timestamp,duration,tag\n"//
+ dateA.format(DateTimeFormatter.ISO_ZONED_DATE_TIME) + ",1,tagValue\n"//
+ dateB.format(DateTimeFormatter.ISO_ZONED_DATE_TIME) + ",2,tagValue\n";
final String csv = "@timestamp,duration,tag," + ignoredColumn + "\n"//
+ dateA.format(DateTimeFormatter.ISO_ZONED_DATE_TIME) + ",1,tagValue,ignoredValue\n"//
+ dateB.format(DateTimeFormatter.ISO_ZONED_DATE_TIME) + ",2,tagValue,ignoredValue\n";
uploadCsv(csv);
final CsvReaderSettings settings = CsvReaderSettings.create(',', ignoredColumn);
uploadCsv(settings, csv);
{
final GroupResult groupResult = performanceDb.get(new Query("tag=tagValue", DateTimeRange.ofDay(dateA)))
.singleGroup();
final LongList result = groupResult.flatMap();
System.out.println(PdbTestUtil.timeValueLongListToString(result));
Assert.assertEquals(result.size(), 4);
final GroupResult groupResult = performanceDb.get(new Query("tag=tagValue", DateTimeRange.ofDay(dateA)))
.singleGroup();
final LongList result = groupResult.flatMap();
System.out.println(PdbTestUtil.timeValueLongListToString(result));
Assert.assertEquals(result.size(), 4);
Assert.assertEquals(result.get(0), dateA.toInstant().toEpochMilli());
Assert.assertEquals(result.get(1), 1);
Assert.assertEquals(result.get(0), dateA.toInstant().toEpochMilli());
Assert.assertEquals(result.get(1), 1);
Assert.assertEquals(result.get(2), dateB.toInstant().truncatedTo(ChronoUnit.MILLIS).toEpochMilli());
Assert.assertEquals(result.get(3), 2);
Assert.assertEquals(result.get(2), dateB.toInstant().truncatedTo(ChronoUnit.MILLIS).toEpochMilli());
Assert.assertEquals(result.get(3), 2);
}
{
final List<String> fields = performanceDb.getFields(DateTimeRange.max());
Assert.assertTrue("ignoredColumn not in fields. fields: " + fields, !fields.contains(ignoredColumn));
}
}
private void uploadCsv(final String... csvs) {
// final TestRestTemplate rest = new TestRestTemplate();
private void uploadCsv(final CsvReaderSettings csvReaderSettings, final String... csvs) {
final LinkedMultiValueMap<String, Object> parameters = new LinkedMultiValueMap<String, Object>();
int count = 0;
for (final String csv : csvs) {
parameters.add("file", new CsvResource(csv, count++ + ".csv"));
parameters.add("file", new StringResource(csv, count++ + ".csv"));
}
parameters.add("settings", new JsonResource(csvReaderSettings, "csvReaderSettings.json"));
final HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.MULTIPART_FORM_DATA);
headers.setContentType(MediaType.MULTIPART_MIXED);
final HttpEntity<MultiValueMap<String, Object>> entity = new HttpEntity<MultiValueMap<String, Object>>(
parameters, headers);

View File

@@ -4,12 +4,12 @@ import java.nio.charset.StandardCharsets;
import org.springframework.core.io.ByteArrayResource;
public class CsvResource extends ByteArrayResource {
public class StringResource extends ByteArrayResource {
private final String filename;
public CsvResource(final String csv, final String filename) {
super(csv.getBytes(StandardCharsets.UTF_8));
public StringResource(final String string, final String filename) {
super(string.getBytes(StandardCharsets.UTF_8));
this.filename = filename;
}