add support for renaming and post processing of csv columns

This commit is contained in:
2019-12-14 18:11:59 +01:00
parent 1124dc8082
commit 00ba4d2a69
8 changed files with 250 additions and 72 deletions

View File

@@ -1,21 +1,155 @@
package org.lucares.pdbui;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import org.lucares.utils.Preconditions;
public class CsvReaderSettings {
public final class CsvReaderSettings {
public enum PostProcessors {
LOWER_CASE(String::toLowerCase), STRIP(String::trim);
private final Function<String, String> function;
PostProcessors(final Function<String, String> function) {
this.function = function;
}
public Function<String, String> getFunction() {
return function;
}
public static Function<String, String> toFunction(final EnumSet<PostProcessors> postProcessors) {
if (postProcessors == null || postProcessors.isEmpty()) {
return Function.identity();
}
final Iterator<PostProcessors> it = postProcessors.iterator();
Function<String, String> result = it.next().getFunction();
while (it.hasNext()) {
final Function<String, String> next = it.next().getFunction();
result = result.andThen(next);
}
return result;
}
}
public static final class ColumnDefinitions {
Map<String, ColumnDefinition> columnDefinitions = new HashMap<>();
public Map<String, ColumnDefinition> getColumnDefinitions() {
return columnDefinitions;
}
public void setColumnDefinitions(final Map<String, ColumnDefinition> columnDefinitions) {
this.columnDefinitions = columnDefinitions;
}
public void ignoreColumn(final String csvColumnHeader) {
columnDefinitions.putIfAbsent(csvColumnHeader, new ColumnDefinition());
columnDefinitions.get(csvColumnHeader).setIgnore(true);
}
public void rename(final String csvColumnHeader, final String renameTo) {
columnDefinitions.putIfAbsent(csvColumnHeader, new ColumnDefinition());
columnDefinitions.get(csvColumnHeader).setRenameTo(renameTo);
}
public void postProcess(final String csvColumnHeader, final EnumSet<PostProcessors> postProcessors) {
columnDefinitions.putIfAbsent(csvColumnHeader, new ColumnDefinition());
columnDefinitions.get(csvColumnHeader).setPostProcessors(postProcessors);
}
public boolean isIgnoredColumn(final String csvColumnHeader) {
return columnDefinitions.getOrDefault(csvColumnHeader, new ColumnDefinition()).isIgnore();
}
public String getRenameTo(final String csvColumnHeader) {
return columnDefinitions.getOrDefault(csvColumnHeader, new ColumnDefinition()).getRenameTo();
}
public EnumSet<PostProcessors> getPostProcessors(final String csvColumnHeader) {
return columnDefinitions.getOrDefault(csvColumnHeader, new ColumnDefinition()).getPostProcessors();
}
@Override
public String toString() {
final StringBuilder result = new StringBuilder();
for (final String col : columnDefinitions.keySet()) {
result.append(col);
result.append(":");
result.append(columnDefinitions.get(col));
result.append("\n");
}
return result.toString();
}
}
public static final class ColumnDefinition {
private boolean ignore;
private String renameTo;
private EnumSet<PostProcessors> postProcessors = EnumSet.noneOf(PostProcessors.class);
public ColumnDefinition() {
super();
}
public boolean isIgnore() {
return ignore;
}
public void setIgnore(final boolean ignore) {
this.ignore = ignore;
}
public String getRenameTo() {
return renameTo;
}
public void setRenameTo(final String renameTo) {
this.renameTo = renameTo;
}
public EnumSet<PostProcessors> getPostProcessors() {
return postProcessors != null ? postProcessors : EnumSet.noneOf(PostProcessors.class);
}
public void setPostProcessors(final EnumSet<PostProcessors> postProcessors) {
this.postProcessors = postProcessors;
}
@Override
public String toString() {
final StringBuilder builder = new StringBuilder();
if (ignore) {
builder.append(" ignore=");
builder.append(ignore);
}
if (renameTo != null) {
builder.append(" renameTo=");
builder.append(renameTo);
}
if (postProcessors != null && !postProcessors.isEmpty()) {
builder.append(" postProcess=");
builder.append(postProcessors);
}
return builder.toString();
}
}
private byte separator;
private Set<String> ignoreColumnNames = new HashSet<String>();
private ColumnDefinitions columnDefinitions = new ColumnDefinitions();
private final Map<String, String> additionalTags = new HashMap<String, String>();
private Map<String, String> additionalTags = new HashMap<String, String>();
private String timeColumn;
@@ -24,35 +158,25 @@ public class CsvReaderSettings {
private byte comment = '#';
public CsvReaderSettings() {
this("@timestamp", "duration", (byte) ',', Collections.emptyList());
this("@timestamp", "duration", (byte) ',', new ColumnDefinitions());
}
private CsvReaderSettings(final String timeColumn, final String valueColumn, final byte separator,
final Collection<String> ignoreColumns) {
final ColumnDefinitions columnDefinitions) {
this.timeColumn = timeColumn;
this.valueColumn = valueColumn;
this.separator = separator;
this.ignoreColumnNames.addAll(ignoreColumns);
}
public static CsvReaderSettings create(final String timeColumn, final String valueColumn, final byte separator,
final String... ignoreColumnNames) {
return new CsvReaderSettings(timeColumn, valueColumn, separator, List.of(ignoreColumnNames));
this.columnDefinitions = columnDefinitions;
}
public static CsvReaderSettings create(final String timeColumn, final String valueColumn, final char separator,
final String... ignoreColumnNames) {
return CsvReaderSettings.create(timeColumn, valueColumn, separator, List.of(ignoreColumnNames));
}
public static CsvReaderSettings create(final String timeColumn, final String valueColumn, final char separator,
final Collection<String> ignoreColumnNames) {
final ColumnDefinitions columnDefinitions) {
Preconditions.checkTrue(separator == (byte) separator,
"Only separators that fulfill separator == (byte)separator are supported. "
+ "This restriction is because the parsing algorithm skips the overhead of "
+ "translating bytes to characters.");
return new CsvReaderSettings(timeColumn, valueColumn, (byte) separator, ignoreColumnNames);
return new CsvReaderSettings(timeColumn, valueColumn, (byte) separator, columnDefinitions);
}
public String getTimeColumn() {
@@ -87,18 +211,6 @@ public class CsvReaderSettings {
this.comment = comment;
}
public Set<String> getIgnoreColumnNames() {
return ignoreColumnNames;
}
public void setIgnoreColumnNames(final Set<String> ignoreColumnNames) {
this.ignoreColumnNames = ignoreColumnNames;
}
public boolean isIgnoredColumn(final String columnName) {
return ignoreColumnNames.contains(columnName);
}
public void putAdditionalTag(final String field, final String value) {
additionalTags.put(field, value);
}
@@ -107,4 +219,16 @@ public class CsvReaderSettings {
return Map.copyOf(additionalTags);
}
public void setAdditionalTags(final Map<String, String> additionalTags) {
this.additionalTags = additionalTags;
}
public ColumnDefinitions getColumnDefinitions() {
return columnDefinitions;
}
public void setColumnDefinitions(final ColumnDefinitions columnDefinitions) {
this.columnDefinitions = columnDefinitions;
}
}