add CsvReaderSettings
Preparation to add more complex CSV parsing rules.
This commit is contained in:
@@ -0,0 +1,23 @@
|
||||
package org.lucares.pdbui;
|
||||
|
||||
import org.lucares.utils.Preconditions;
|
||||
|
||||
public class CsvReaderSettings {
|
||||
private final byte separator;
|
||||
|
||||
public CsvReaderSettings(final byte separator) {
|
||||
this.separator = separator;
|
||||
}
|
||||
|
||||
public CsvReaderSettings(final char separator) {
|
||||
Preconditions.checkTrue(separator == (byte) separator,
|
||||
"Only separators that fulfill separator == (byte)separator are supported. "
|
||||
+ "This restriction is because the parsing algorithm skips the overhead of "
|
||||
+ "translating bytes to characters.");
|
||||
this.separator = (byte) separator;
|
||||
}
|
||||
|
||||
public byte getSeparator() {
|
||||
return separator;
|
||||
}
|
||||
}
|
||||
@@ -17,14 +17,21 @@ class CsvToEntryTransformer {
|
||||
* Column header names starting with "-" will be ignored.
|
||||
*/
|
||||
static final String COLUM_IGNORE_PREFIX = "-";
|
||||
private static final int IGNORE_COLUMN = 0;
|
||||
static final int IGNORE_COLUMN = 0;
|
||||
private final ArrayBlockingQueue<Entries> queue;
|
||||
private final CsvReaderSettings settings;
|
||||
|
||||
void readCSV(final InputStream in, final ArrayBlockingQueue<Entries> queue)
|
||||
throws IOException, InterruptedException {
|
||||
public CsvToEntryTransformer(final ArrayBlockingQueue<Entries> queue, final CsvReaderSettings settings) {
|
||||
this.queue = queue;
|
||||
this.settings = settings;
|
||||
}
|
||||
|
||||
void readCSV(final InputStream in) throws IOException, InterruptedException {
|
||||
final int chunksize = 1000;
|
||||
Entries entries = new Entries(chunksize);
|
||||
|
||||
final byte newline = '\n';
|
||||
final byte separator = settings.getSeparator();
|
||||
final byte[] line = new byte[64 * 1024]; // max line length
|
||||
int offsetInLine = 0;
|
||||
int offsetInBuffer = 0;
|
||||
@@ -68,7 +75,7 @@ class CsvToEntryTransformer {
|
||||
offsetInLine = 0;
|
||||
bytesInLine = 0;
|
||||
separatorPositions.clear();
|
||||
} else if (buffer[i] == ',') {
|
||||
} else if (buffer[i] == separator) {
|
||||
separatorPositions.add(offsetInLine + i - offsetInBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,8 +65,8 @@ public final class IngestionHandler implements Callable<Void> {
|
||||
handleInputStream(gzip);
|
||||
} else {
|
||||
in.reset();
|
||||
final CsvToEntryTransformer csvTransformer = new CsvToEntryTransformer();
|
||||
csvTransformer.readCSV(in, queue);
|
||||
final CsvToEntryTransformer csvTransformer = new CsvToEntryTransformer(queue, new CsvReaderSettings(','));
|
||||
csvTransformer.readCSV(in);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user