performance improvement: ingest data directly from tmp file
This commit is contained in:
@@ -1,18 +1,14 @@
|
|||||||
package org.lucares.pdbui;
|
package org.lucares.pdbui;
|
||||||
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.io.InputStream;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.UUID;
|
|
||||||
import java.util.concurrent.ArrayBlockingQueue;
|
import java.util.concurrent.ArrayBlockingQueue;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.Future;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.TimeoutException;
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
@@ -22,7 +18,6 @@ import org.lucares.utils.file.FileUtils;
|
|||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.beans.factory.DisposableBean;
|
import org.springframework.beans.factory.DisposableBean;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
@@ -31,56 +26,32 @@ public class CsvUploadHandler implements PropertyKeys, DisposableBean {
|
|||||||
|
|
||||||
private static final Logger LOGGER = LoggerFactory.getLogger(CsvUploadHandler.class);
|
private static final Logger LOGGER = LoggerFactory.getLogger(CsvUploadHandler.class);
|
||||||
|
|
||||||
private final Path tmpDir;
|
|
||||||
|
|
||||||
private final ExecutorService threadPool = Executors.newFixedThreadPool(2);
|
private final ExecutorService threadPool = Executors.newFixedThreadPool(2);
|
||||||
|
|
||||||
private final PerformanceDb performanceDb;
|
private final PerformanceDb performanceDb;
|
||||||
|
|
||||||
public CsvUploadHandler(@Value("${" + TMP_DIR + "}") final String tmpDir, final PerformanceDb performanceDb)
|
public CsvUploadHandler(final PerformanceDb performanceDb) throws IOException {
|
||||||
throws IOException {
|
|
||||||
this.tmpDir = Paths.get(tmpDir).resolve("uploads");
|
|
||||||
Files.createDirectories(this.tmpDir);
|
|
||||||
this.performanceDb = performanceDb;
|
this.performanceDb = performanceDb;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void ingest(final List<MultipartFile> files, final CsvReaderSettings settings,
|
public void ingest(final List<MultipartFile> files, final CsvReaderSettings settings)
|
||||||
final boolean waitUntilFinished)
|
|
||||||
throws IllegalStateException, IOException, InterruptedException, ExecutionException, TimeoutException {
|
throws IllegalStateException, IOException, InterruptedException, ExecutionException, TimeoutException {
|
||||||
|
|
||||||
final List<Path> tmpFiles = new ArrayList<Path>();
|
final List<Path> tmpFiles = new ArrayList<Path>();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
for (final MultipartFile file : files) {
|
|
||||||
final Path tmpFile = tmpDir.resolve(UUID.randomUUID().toString());
|
|
||||||
tmpFiles.add(tmpFile);
|
|
||||||
LOGGER.debug("writing uploaded file to {}", tmpFile);
|
|
||||||
file.transferTo(tmpFile);
|
|
||||||
}
|
|
||||||
} catch (RuntimeException | IOException e) {
|
|
||||||
FileUtils.deleteSilently(tmpFiles);
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
|
|
||||||
final Future<?> future = threadPool.submit(() -> {
|
|
||||||
final ArrayBlockingQueue<Entries> queue = performanceDb.getQueue();
|
final ArrayBlockingQueue<Entries> queue = performanceDb.getQueue();
|
||||||
for (final Path tmpFile : tmpFiles) {
|
for (final MultipartFile file : files) {
|
||||||
try {
|
|
||||||
final CsvToEntryTransformer csvToEntryTransformer = new CsvToEntryTransformer(queue, settings);
|
final CsvToEntryTransformer csvToEntryTransformer = new CsvToEntryTransformer(queue, settings);
|
||||||
try (FileInputStream in = new FileInputStream(tmpFile.toFile())) {
|
try (InputStream in = file.getInputStream()) {
|
||||||
csvToEntryTransformer.readCSV(in);
|
csvToEntryTransformer.readCSV(in);
|
||||||
}
|
|
||||||
|
|
||||||
LOGGER.debug("delete uploaded file {}", tmpFile);
|
|
||||||
Files.delete(tmpFile);
|
|
||||||
|
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
LOGGER.error("csv ingestion failed", e);
|
LOGGER.error("csv ingestion failed", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
} catch (final RuntimeException e) {
|
||||||
if (waitUntilFinished) {
|
FileUtils.deleteSilently(tmpFiles);
|
||||||
future.get(1, TimeUnit.HOURS);
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -298,11 +298,10 @@ public class PdbController implements HardcodedValues, PropertyKeys {
|
|||||||
@ResponseBody
|
@ResponseBody
|
||||||
@ResponseStatus(code = HttpStatus.CREATED)
|
@ResponseStatus(code = HttpStatus.CREATED)
|
||||||
public String handleCsvFileUpload(@RequestParam("file") final MultipartFile[] files,
|
public String handleCsvFileUpload(@RequestParam("file") final MultipartFile[] files,
|
||||||
@RequestPart("settings") final CsvReaderSettings csvReaderSettings,
|
@RequestPart("settings") final CsvReaderSettings csvReaderSettings)
|
||||||
@RequestParam(name = "waitUntilFinished", defaultValue = "false") final boolean waitUntilFinished)
|
|
||||||
throws IllegalStateException, IOException, InterruptedException, ExecutionException, TimeoutException {
|
throws IllegalStateException, IOException, InterruptedException, ExecutionException, TimeoutException {
|
||||||
|
|
||||||
csvUploadHandler.ingest(List.of(files), csvReaderSettings, waitUntilFinished);
|
csvUploadHandler.ingest(List.of(files), csvReaderSettings);
|
||||||
return ""; // return value might become a job id that can be used to cancel, or observe
|
return ""; // return value might become a job id that can be used to cancel, or observe
|
||||||
// status
|
// status
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -216,13 +216,12 @@ def send_csv(zip, file_in_zip, csvSettings):
|
|||||||
'settings': ('csvReaderSettings.json', csvSettings , 'application/json')
|
'settings': ('csvReaderSettings.json', csvSettings , 'application/json')
|
||||||
})
|
})
|
||||||
|
|
||||||
r = requests.post('http://localhost:17333/api/data?waitUntilFinished=true', data=m,
|
r = requests.post('http://localhost:17333/api/data', data=m,
|
||||||
headers={
|
headers={
|
||||||
'Accept': 'text/plain, application/json, application/*+json, */*',
|
'Accept': 'text/plain, application/json, application/*+json, */*',
|
||||||
'Content-Type': m.content_type.replace("form-data", "mixed;charset=UTF-8")
|
'Content-Type': m.content_type.replace("form-data", "mixed;charset=UTF-8")
|
||||||
})
|
})
|
||||||
print("response:")
|
print("response: %s" % r)
|
||||||
print(r)
|
|
||||||
finally:
|
finally:
|
||||||
shutil.rmtree(tmp_folder)
|
shutil.rmtree(tmp_folder)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user