add support for renaming and post processing of csv columns
This commit is contained in:
@@ -1,21 +1,155 @@
|
||||
package org.lucares.pdbui;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.lucares.utils.Preconditions;
|
||||
|
||||
public class CsvReaderSettings {
|
||||
public final class CsvReaderSettings {
|
||||
|
||||
public enum PostProcessors {
|
||||
LOWER_CASE(String::toLowerCase), STRIP(String::trim);
|
||||
|
||||
private final Function<String, String> function;
|
||||
|
||||
PostProcessors(final Function<String, String> function) {
|
||||
this.function = function;
|
||||
}
|
||||
|
||||
public Function<String, String> getFunction() {
|
||||
return function;
|
||||
}
|
||||
|
||||
public static Function<String, String> toFunction(final EnumSet<PostProcessors> postProcessors) {
|
||||
if (postProcessors == null || postProcessors.isEmpty()) {
|
||||
return Function.identity();
|
||||
}
|
||||
final Iterator<PostProcessors> it = postProcessors.iterator();
|
||||
Function<String, String> result = it.next().getFunction();
|
||||
while (it.hasNext()) {
|
||||
final Function<String, String> next = it.next().getFunction();
|
||||
result = result.andThen(next);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
public static final class ColumnDefinitions {
|
||||
Map<String, ColumnDefinition> columnDefinitions = new HashMap<>();
|
||||
|
||||
public Map<String, ColumnDefinition> getColumnDefinitions() {
|
||||
return columnDefinitions;
|
||||
}
|
||||
|
||||
public void setColumnDefinitions(final Map<String, ColumnDefinition> columnDefinitions) {
|
||||
this.columnDefinitions = columnDefinitions;
|
||||
}
|
||||
|
||||
public void ignoreColumn(final String csvColumnHeader) {
|
||||
columnDefinitions.putIfAbsent(csvColumnHeader, new ColumnDefinition());
|
||||
columnDefinitions.get(csvColumnHeader).setIgnore(true);
|
||||
}
|
||||
|
||||
public void rename(final String csvColumnHeader, final String renameTo) {
|
||||
columnDefinitions.putIfAbsent(csvColumnHeader, new ColumnDefinition());
|
||||
columnDefinitions.get(csvColumnHeader).setRenameTo(renameTo);
|
||||
}
|
||||
|
||||
public void postProcess(final String csvColumnHeader, final EnumSet<PostProcessors> postProcessors) {
|
||||
columnDefinitions.putIfAbsent(csvColumnHeader, new ColumnDefinition());
|
||||
columnDefinitions.get(csvColumnHeader).setPostProcessors(postProcessors);
|
||||
}
|
||||
|
||||
public boolean isIgnoredColumn(final String csvColumnHeader) {
|
||||
return columnDefinitions.getOrDefault(csvColumnHeader, new ColumnDefinition()).isIgnore();
|
||||
}
|
||||
|
||||
public String getRenameTo(final String csvColumnHeader) {
|
||||
return columnDefinitions.getOrDefault(csvColumnHeader, new ColumnDefinition()).getRenameTo();
|
||||
}
|
||||
|
||||
public EnumSet<PostProcessors> getPostProcessors(final String csvColumnHeader) {
|
||||
return columnDefinitions.getOrDefault(csvColumnHeader, new ColumnDefinition()).getPostProcessors();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder result = new StringBuilder();
|
||||
|
||||
for (final String col : columnDefinitions.keySet()) {
|
||||
result.append(col);
|
||||
result.append(":");
|
||||
result.append(columnDefinitions.get(col));
|
||||
result.append("\n");
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static final class ColumnDefinition {
|
||||
private boolean ignore;
|
||||
|
||||
private String renameTo;
|
||||
|
||||
private EnumSet<PostProcessors> postProcessors = EnumSet.noneOf(PostProcessors.class);
|
||||
|
||||
public ColumnDefinition() {
|
||||
super();
|
||||
}
|
||||
|
||||
public boolean isIgnore() {
|
||||
return ignore;
|
||||
}
|
||||
|
||||
public void setIgnore(final boolean ignore) {
|
||||
this.ignore = ignore;
|
||||
}
|
||||
|
||||
public String getRenameTo() {
|
||||
return renameTo;
|
||||
}
|
||||
|
||||
public void setRenameTo(final String renameTo) {
|
||||
this.renameTo = renameTo;
|
||||
}
|
||||
|
||||
public EnumSet<PostProcessors> getPostProcessors() {
|
||||
return postProcessors != null ? postProcessors : EnumSet.noneOf(PostProcessors.class);
|
||||
}
|
||||
|
||||
public void setPostProcessors(final EnumSet<PostProcessors> postProcessors) {
|
||||
this.postProcessors = postProcessors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
if (ignore) {
|
||||
builder.append(" ignore=");
|
||||
builder.append(ignore);
|
||||
}
|
||||
if (renameTo != null) {
|
||||
builder.append(" renameTo=");
|
||||
builder.append(renameTo);
|
||||
}
|
||||
if (postProcessors != null && !postProcessors.isEmpty()) {
|
||||
builder.append(" postProcess=");
|
||||
builder.append(postProcessors);
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private byte separator;
|
||||
|
||||
private Set<String> ignoreColumnNames = new HashSet<String>();
|
||||
private ColumnDefinitions columnDefinitions = new ColumnDefinitions();
|
||||
|
||||
private final Map<String, String> additionalTags = new HashMap<String, String>();
|
||||
private Map<String, String> additionalTags = new HashMap<String, String>();
|
||||
|
||||
private String timeColumn;
|
||||
|
||||
@@ -24,35 +158,25 @@ public class CsvReaderSettings {
|
||||
private byte comment = '#';
|
||||
|
||||
public CsvReaderSettings() {
|
||||
this("@timestamp", "duration", (byte) ',', Collections.emptyList());
|
||||
this("@timestamp", "duration", (byte) ',', new ColumnDefinitions());
|
||||
}
|
||||
|
||||
private CsvReaderSettings(final String timeColumn, final String valueColumn, final byte separator,
|
||||
final Collection<String> ignoreColumns) {
|
||||
final ColumnDefinitions columnDefinitions) {
|
||||
|
||||
this.timeColumn = timeColumn;
|
||||
this.valueColumn = valueColumn;
|
||||
this.separator = separator;
|
||||
this.ignoreColumnNames.addAll(ignoreColumns);
|
||||
}
|
||||
|
||||
public static CsvReaderSettings create(final String timeColumn, final String valueColumn, final byte separator,
|
||||
final String... ignoreColumnNames) {
|
||||
return new CsvReaderSettings(timeColumn, valueColumn, separator, List.of(ignoreColumnNames));
|
||||
this.columnDefinitions = columnDefinitions;
|
||||
}
|
||||
|
||||
public static CsvReaderSettings create(final String timeColumn, final String valueColumn, final char separator,
|
||||
final String... ignoreColumnNames) {
|
||||
return CsvReaderSettings.create(timeColumn, valueColumn, separator, List.of(ignoreColumnNames));
|
||||
}
|
||||
|
||||
public static CsvReaderSettings create(final String timeColumn, final String valueColumn, final char separator,
|
||||
final Collection<String> ignoreColumnNames) {
|
||||
final ColumnDefinitions columnDefinitions) {
|
||||
Preconditions.checkTrue(separator == (byte) separator,
|
||||
"Only separators that fulfill separator == (byte)separator are supported. "
|
||||
+ "This restriction is because the parsing algorithm skips the overhead of "
|
||||
+ "translating bytes to characters.");
|
||||
return new CsvReaderSettings(timeColumn, valueColumn, (byte) separator, ignoreColumnNames);
|
||||
return new CsvReaderSettings(timeColumn, valueColumn, (byte) separator, columnDefinitions);
|
||||
}
|
||||
|
||||
public String getTimeColumn() {
|
||||
@@ -87,18 +211,6 @@ public class CsvReaderSettings {
|
||||
this.comment = comment;
|
||||
}
|
||||
|
||||
public Set<String> getIgnoreColumnNames() {
|
||||
return ignoreColumnNames;
|
||||
}
|
||||
|
||||
public void setIgnoreColumnNames(final Set<String> ignoreColumnNames) {
|
||||
this.ignoreColumnNames = ignoreColumnNames;
|
||||
}
|
||||
|
||||
public boolean isIgnoredColumn(final String columnName) {
|
||||
return ignoreColumnNames.contains(columnName);
|
||||
}
|
||||
|
||||
public void putAdditionalTag(final String field, final String value) {
|
||||
additionalTags.put(field, value);
|
||||
}
|
||||
@@ -107,4 +219,16 @@ public class CsvReaderSettings {
|
||||
return Map.copyOf(additionalTags);
|
||||
}
|
||||
|
||||
public void setAdditionalTags(final Map<String, String> additionalTags) {
|
||||
this.additionalTags = additionalTags;
|
||||
}
|
||||
|
||||
public ColumnDefinitions getColumnDefinitions() {
|
||||
return columnDefinitions;
|
||||
}
|
||||
|
||||
public void setColumnDefinitions(final ColumnDefinitions columnDefinitions) {
|
||||
this.columnDefinitions = columnDefinitions;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user