add CsvReaderSettings

Preparation to add more complex CSV parsing rules.
This commit is contained in:
2019-11-30 18:32:34 +01:00
parent 08b1be5334
commit ffe5ae8652
4 changed files with 37 additions and 6 deletions

View File

@@ -0,0 +1,23 @@
package org.lucares.pdbui;
import org.lucares.utils.Preconditions;
public class CsvReaderSettings {
private final byte separator;
public CsvReaderSettings(final byte separator) {
this.separator = separator;
}
public CsvReaderSettings(final char separator) {
Preconditions.checkTrue(separator == (byte) separator,
"Only separators that fulfill separator == (byte)separator are supported. "
+ "This restriction is because the parsing algorithm skips the overhead of "
+ "translating bytes to characters.");
this.separator = (byte) separator;
}
public byte getSeparator() {
return separator;
}
}

View File

@@ -17,14 +17,21 @@ class CsvToEntryTransformer {
* Column header names starting with "-" will be ignored.
*/
static final String COLUM_IGNORE_PREFIX = "-";
private static final int IGNORE_COLUMN = 0;
static final int IGNORE_COLUMN = 0;
private final ArrayBlockingQueue<Entries> queue;
private final CsvReaderSettings settings;
void readCSV(final InputStream in, final ArrayBlockingQueue<Entries> queue)
throws IOException, InterruptedException {
public CsvToEntryTransformer(final ArrayBlockingQueue<Entries> queue, final CsvReaderSettings settings) {
this.queue = queue;
this.settings = settings;
}
void readCSV(final InputStream in) throws IOException, InterruptedException {
final int chunksize = 1000;
Entries entries = new Entries(chunksize);
final byte newline = '\n';
final byte separator = settings.getSeparator();
final byte[] line = new byte[64 * 1024]; // max line length
int offsetInLine = 0;
int offsetInBuffer = 0;
@@ -68,7 +75,7 @@ class CsvToEntryTransformer {
offsetInLine = 0;
bytesInLine = 0;
separatorPositions.clear();
} else if (buffer[i] == ',') {
} else if (buffer[i] == separator) {
separatorPositions.add(offsetInLine + i - offsetInBuffer);
}
}

View File

@@ -65,8 +65,8 @@ public final class IngestionHandler implements Callable<Void> {
handleInputStream(gzip);
} else {
in.reset();
final CsvToEntryTransformer csvTransformer = new CsvToEntryTransformer();
csvTransformer.readCSV(in, queue);
final CsvToEntryTransformer csvTransformer = new CsvToEntryTransformer(queue, new CsvReaderSettings(','));
csvTransformer.readCSV(in);
}
}