extend CsvReaderSettings by list of columns that will not be indexed

This commit is contained in:
2019-12-01 09:21:07 +01:00
parent ffe5ae8652
commit 30504672bc
3 changed files with 131 additions and 5 deletions

View File

@@ -1,23 +1,44 @@
package org.lucares.pdbui; package org.lucares.pdbui;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.lucares.utils.Preconditions; import org.lucares.utils.Preconditions;
public class CsvReaderSettings { public class CsvReaderSettings {
private final byte separator; private final byte separator;
public CsvReaderSettings(final byte separator) { private final Set<String> ignoreColumnNames = new HashSet<String>();
this.separator = separator;
public CsvReaderSettings(final byte separator, final String... ignoreColumnNames) {
this(separator, List.of(ignoreColumnNames));
} }
public CsvReaderSettings(final char separator) { public CsvReaderSettings(final char separator, final String... ignoreColumnNames) {
this(separator, List.of(ignoreColumnNames));
}
public CsvReaderSettings(final char separator, final Collection<String> ignoreColumnNames) {
this((byte) separator, ignoreColumnNames);
Preconditions.checkTrue(separator == (byte) separator, Preconditions.checkTrue(separator == (byte) separator,
"Only separators that fulfill separator == (byte)separator are supported. " "Only separators that fulfill separator == (byte)separator are supported. "
+ "This restriction is because the parsing algorithm skips the overhead of " + "This restriction is because the parsing algorithm skips the overhead of "
+ "translating bytes to characters."); + "translating bytes to characters.");
this.separator = (byte) separator; }
public CsvReaderSettings(final byte separator, final Collection<String> ignoreColumns) {
this.separator = separator;
this.ignoreColumnNames.addAll(ignoreColumns);
} }
public byte getSeparator() { public byte getSeparator() {
return separator; return separator;
} }
public boolean isIgnoredColumn(final String columnName) {
return ignoreColumnNames.contains(columnName);
}
} }

View File

@@ -116,7 +116,7 @@ class CsvToEntryTransformer {
} }
private boolean ignoreColum(final String columnName) { private boolean ignoreColum(final String columnName) {
return columnName.startsWith(COLUM_IGNORE_PREFIX); return settings.isIgnoredColumn(columnName) || columnName.startsWith(COLUM_IGNORE_PREFIX);
} }
private static Entry handleCsvLine(final int[] columns, final byte[] line, final int bytesInLine, private static Entry handleCsvLine(final int[] columns, final byte[] line, final int bytesInLine,

View File

@@ -0,0 +1,105 @@
package org.lucares.pdbui;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.OffsetDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import org.lucares.collections.LongList;
import org.lucares.pdb.api.DateTimeRange;
import org.lucares.pdb.api.Entries;
import org.lucares.pdb.api.Query;
import org.lucares.performance.db.BlockingQueueIterator;
import org.lucares.performance.db.PerformanceDb;
import org.lucares.utils.file.FileUtils;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@Test
public class CsvToEntryTransformerTest {
private Path dataDirectory;
@BeforeMethod
public void beforeMethod() throws IOException {
dataDirectory = Files.createTempDirectory("pdb");
}
@AfterMethod
public void afterMethod() throws IOException {
FileUtils.delete(dataDirectory);
}
public void testIngest() throws IOException, InterruptedException {
final OffsetDateTime dateA = OffsetDateTime.now();
final OffsetDateTime dateB = OffsetDateTime.now();
try (final PerformanceDb db = new PerformanceDb(dataDirectory)) {
final String csv = "@timestamp,duration,tag\n"//
+ dateA.format(DateTimeFormatter.ISO_ZONED_DATE_TIME) + ",1,tagValue\n"//
+ dateB.format(DateTimeFormatter.ISO_ZONED_DATE_TIME) + ",2,tagValue\n";
final ArrayBlockingQueue<Entries> queue = new ArrayBlockingQueue<Entries>(10);
final CsvReaderSettings settings = new CsvReaderSettings(',');
final CsvToEntryTransformer csvToEntryTransformer = new CsvToEntryTransformer(queue, settings);
csvToEntryTransformer.readCSV(new ByteArrayInputStream(csv.getBytes(StandardCharsets.UTF_8)));
queue.put(Entries.POISON);
db.putEntries(new BlockingQueueIterator<Entries>(queue, Entries.POISON));
}
try (PerformanceDb db = new PerformanceDb(dataDirectory)) {
final LongList result = db.get(new Query("tag=tagValue", DateTimeRange.max())).singleGroup().flatMap();
Assert.assertEquals(result.size(), 4);
Assert.assertEquals(result.get(0), dateA.toInstant().toEpochMilli());
Assert.assertEquals(result.get(1), 1);
Assert.assertEquals(result.get(2), dateB.toInstant().truncatedTo(ChronoUnit.MILLIS).toEpochMilli());
Assert.assertEquals(result.get(3), 2);
}
}
/**
* There are two ways to ignore columns:
* <ol>
* <li>by specifying the column header name in the {@link CsvReaderSettings}.
* <li>by adding the prefix &quot;-&quot; to the column header name.
* </ol>
*
* @throws IOException
* @throws InterruptedException
*/
public void testIgnoreColumns() throws IOException, InterruptedException {
try (final PerformanceDb db = new PerformanceDb(dataDirectory)) {
final String csv = "@timestamp,duration,ignoredColumn,-otherIgnoredColumn,tag\n"//
+ "2000-01-01T00:00:00.000Z,1,ignoreValue,ignoreValue,tagValue\n"//
+ "2000-01-01T00:00:00.001Z,2,ignoreValue,ignoreValue,tagValue\n";
final ArrayBlockingQueue<Entries> queue = new ArrayBlockingQueue<Entries>(10);
final CsvReaderSettings settings = new CsvReaderSettings(',', "ignoredColumn");
final CsvToEntryTransformer csvToEntryTransformer = new CsvToEntryTransformer(queue, settings);
csvToEntryTransformer.readCSV(new ByteArrayInputStream(csv.getBytes(StandardCharsets.UTF_8)));
queue.put(Entries.POISON);
db.putEntries(new BlockingQueueIterator<Entries>(queue, Entries.POISON));
}
try (PerformanceDb db = new PerformanceDb(dataDirectory)) {
final List<String> availableFields = db.getFields(DateTimeRange.max());
Assert.assertEquals(availableFields.toString(), List.of("tag").toString(),
"the ignored field is not returned");
}
}
}