diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/CsvReaderSettings.java b/pdb-ui/src/main/java/org/lucares/pdbui/CsvReaderSettings.java index 89238bd..feed707 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/CsvReaderSettings.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/CsvReaderSettings.java @@ -1,23 +1,44 @@ package org.lucares.pdbui; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + import org.lucares.utils.Preconditions; public class CsvReaderSettings { private final byte separator; - public CsvReaderSettings(final byte separator) { - this.separator = separator; + private final Set ignoreColumnNames = new HashSet(); + + public CsvReaderSettings(final byte separator, final String... ignoreColumnNames) { + this(separator, List.of(ignoreColumnNames)); } - public CsvReaderSettings(final char separator) { + public CsvReaderSettings(final char separator, final String... ignoreColumnNames) { + this(separator, List.of(ignoreColumnNames)); + } + + public CsvReaderSettings(final char separator, final Collection ignoreColumnNames) { + this((byte) separator, ignoreColumnNames); Preconditions.checkTrue(separator == (byte) separator, "Only separators that fulfill separator == (byte)separator are supported. " + "This restriction is because the parsing algorithm skips the overhead of " + "translating bytes to characters."); - this.separator = (byte) separator; + } + + public CsvReaderSettings(final byte separator, final Collection ignoreColumns) { + + this.separator = separator; + this.ignoreColumnNames.addAll(ignoreColumns); } public byte getSeparator() { return separator; } + + public boolean isIgnoredColumn(final String columnName) { + return ignoreColumnNames.contains(columnName); + } } diff --git a/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java b/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java index 45f06bb..8e7567b 100644 --- a/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java +++ b/pdb-ui/src/main/java/org/lucares/pdbui/CsvToEntryTransformer.java @@ -116,7 +116,7 @@ class CsvToEntryTransformer { } private boolean ignoreColum(final String columnName) { - return columnName.startsWith(COLUM_IGNORE_PREFIX); + return settings.isIgnoredColumn(columnName) || columnName.startsWith(COLUM_IGNORE_PREFIX); } private static Entry handleCsvLine(final int[] columns, final byte[] line, final int bytesInLine, diff --git a/pdb-ui/src/test/java/org/lucares/pdbui/CsvToEntryTransformerTest.java b/pdb-ui/src/test/java/org/lucares/pdbui/CsvToEntryTransformerTest.java new file mode 100644 index 0000000..75b0114 --- /dev/null +++ b/pdb-ui/src/test/java/org/lucares/pdbui/CsvToEntryTransformerTest.java @@ -0,0 +1,105 @@ +package org.lucares.pdbui; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.OffsetDateTime; +import java.time.format.DateTimeFormatter; +import java.time.temporal.ChronoUnit; +import java.util.List; +import java.util.concurrent.ArrayBlockingQueue; + +import org.lucares.collections.LongList; +import org.lucares.pdb.api.DateTimeRange; +import org.lucares.pdb.api.Entries; +import org.lucares.pdb.api.Query; +import org.lucares.performance.db.BlockingQueueIterator; +import org.lucares.performance.db.PerformanceDb; +import org.lucares.utils.file.FileUtils; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +@Test +public class CsvToEntryTransformerTest { + + private Path dataDirectory; + + @BeforeMethod + public void beforeMethod() throws IOException { + dataDirectory = Files.createTempDirectory("pdb"); + } + + @AfterMethod + public void afterMethod() throws IOException { + FileUtils.delete(dataDirectory); + } + + public void testIngest() throws IOException, InterruptedException { + final OffsetDateTime dateA = OffsetDateTime.now(); + final OffsetDateTime dateB = OffsetDateTime.now(); + + try (final PerformanceDb db = new PerformanceDb(dataDirectory)) { + + final String csv = "@timestamp,duration,tag\n"// + + dateA.format(DateTimeFormatter.ISO_ZONED_DATE_TIME) + ",1,tagValue\n"// + + dateB.format(DateTimeFormatter.ISO_ZONED_DATE_TIME) + ",2,tagValue\n"; + + final ArrayBlockingQueue queue = new ArrayBlockingQueue(10); + final CsvReaderSettings settings = new CsvReaderSettings(','); + final CsvToEntryTransformer csvToEntryTransformer = new CsvToEntryTransformer(queue, settings); + csvToEntryTransformer.readCSV(new ByteArrayInputStream(csv.getBytes(StandardCharsets.UTF_8))); + queue.put(Entries.POISON); + + db.putEntries(new BlockingQueueIterator(queue, Entries.POISON)); + } + + try (PerformanceDb db = new PerformanceDb(dataDirectory)) { + final LongList result = db.get(new Query("tag=tagValue", DateTimeRange.max())).singleGroup().flatMap(); + Assert.assertEquals(result.size(), 4); + + Assert.assertEquals(result.get(0), dateA.toInstant().toEpochMilli()); + Assert.assertEquals(result.get(1), 1); + + Assert.assertEquals(result.get(2), dateB.toInstant().truncatedTo(ChronoUnit.MILLIS).toEpochMilli()); + Assert.assertEquals(result.get(3), 2); + } + } + + /** + * There are two ways to ignore columns: + *
    + *
  1. by specifying the column header name in the {@link CsvReaderSettings}. + *
  2. by adding the prefix "-" to the column header name. + *
+ * + * @throws IOException + * @throws InterruptedException + */ + public void testIgnoreColumns() throws IOException, InterruptedException { + + try (final PerformanceDb db = new PerformanceDb(dataDirectory)) { + + final String csv = "@timestamp,duration,ignoredColumn,-otherIgnoredColumn,tag\n"// + + "2000-01-01T00:00:00.000Z,1,ignoreValue,ignoreValue,tagValue\n"// + + "2000-01-01T00:00:00.001Z,2,ignoreValue,ignoreValue,tagValue\n"; + + final ArrayBlockingQueue queue = new ArrayBlockingQueue(10); + final CsvReaderSettings settings = new CsvReaderSettings(',', "ignoredColumn"); + final CsvToEntryTransformer csvToEntryTransformer = new CsvToEntryTransformer(queue, settings); + csvToEntryTransformer.readCSV(new ByteArrayInputStream(csv.getBytes(StandardCharsets.UTF_8))); + queue.put(Entries.POISON); + + db.putEntries(new BlockingQueueIterator(queue, Entries.POISON)); + } + + try (PerformanceDb db = new PerformanceDb(dataDirectory)) { + final List availableFields = db.getFields(DateTimeRange.max()); + Assert.assertEquals(availableFields.toString(), List.of("tag").toString(), + "the ignored field is not returned"); + } + } +}