performance improvement: ingest data directly from tmp file

This commit is contained in:
2020-11-24 10:04:21 +01:00
parent 11beda5432
commit 08111e0d69
3 changed files with 14 additions and 45 deletions

View File

@@ -1,18 +1,14 @@
package org.lucares.pdbui; package org.lucares.pdbui;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.io.InputStream;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.UUID;
import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeoutException;
@@ -22,7 +18,6 @@ import org.lucares.utils.file.FileUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.DisposableBean; import org.springframework.beans.factory.DisposableBean;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
@@ -31,56 +26,32 @@ public class CsvUploadHandler implements PropertyKeys, DisposableBean {
private static final Logger LOGGER = LoggerFactory.getLogger(CsvUploadHandler.class); private static final Logger LOGGER = LoggerFactory.getLogger(CsvUploadHandler.class);
private final Path tmpDir;
private final ExecutorService threadPool = Executors.newFixedThreadPool(2); private final ExecutorService threadPool = Executors.newFixedThreadPool(2);
private final PerformanceDb performanceDb; private final PerformanceDb performanceDb;
public CsvUploadHandler(@Value("${" + TMP_DIR + "}") final String tmpDir, final PerformanceDb performanceDb) public CsvUploadHandler(final PerformanceDb performanceDb) throws IOException {
throws IOException {
this.tmpDir = Paths.get(tmpDir).resolve("uploads");
Files.createDirectories(this.tmpDir);
this.performanceDb = performanceDb; this.performanceDb = performanceDb;
} }
public void ingest(final List<MultipartFile> files, final CsvReaderSettings settings, public void ingest(final List<MultipartFile> files, final CsvReaderSettings settings)
final boolean waitUntilFinished)
throws IllegalStateException, IOException, InterruptedException, ExecutionException, TimeoutException { throws IllegalStateException, IOException, InterruptedException, ExecutionException, TimeoutException {
final List<Path> tmpFiles = new ArrayList<Path>(); final List<Path> tmpFiles = new ArrayList<Path>();
try { try {
for (final MultipartFile file : files) {
final Path tmpFile = tmpDir.resolve(UUID.randomUUID().toString());
tmpFiles.add(tmpFile);
LOGGER.debug("writing uploaded file to {}", tmpFile);
file.transferTo(tmpFile);
}
} catch (RuntimeException | IOException e) {
FileUtils.deleteSilently(tmpFiles);
throw e;
}
final Future<?> future = threadPool.submit(() -> {
final ArrayBlockingQueue<Entries> queue = performanceDb.getQueue(); final ArrayBlockingQueue<Entries> queue = performanceDb.getQueue();
for (final Path tmpFile : tmpFiles) { for (final MultipartFile file : files) {
try {
final CsvToEntryTransformer csvToEntryTransformer = new CsvToEntryTransformer(queue, settings); final CsvToEntryTransformer csvToEntryTransformer = new CsvToEntryTransformer(queue, settings);
try (FileInputStream in = new FileInputStream(tmpFile.toFile())) { try (InputStream in = file.getInputStream()) {
csvToEntryTransformer.readCSV(in); csvToEntryTransformer.readCSV(in);
}
LOGGER.debug("delete uploaded file {}", tmpFile);
Files.delete(tmpFile);
} catch (final Exception e) { } catch (final Exception e) {
LOGGER.error("csv ingestion failed", e); LOGGER.error("csv ingestion failed", e);
} }
} }
}); } catch (final RuntimeException e) {
if (waitUntilFinished) { FileUtils.deleteSilently(tmpFiles);
future.get(1, TimeUnit.HOURS); throw e;
} }
} }

View File

@@ -298,11 +298,10 @@ public class PdbController implements HardcodedValues, PropertyKeys {
@ResponseBody @ResponseBody
@ResponseStatus(code = HttpStatus.CREATED) @ResponseStatus(code = HttpStatus.CREATED)
public String handleCsvFileUpload(@RequestParam("file") final MultipartFile[] files, public String handleCsvFileUpload(@RequestParam("file") final MultipartFile[] files,
@RequestPart("settings") final CsvReaderSettings csvReaderSettings, @RequestPart("settings") final CsvReaderSettings csvReaderSettings)
@RequestParam(name = "waitUntilFinished", defaultValue = "false") final boolean waitUntilFinished)
throws IllegalStateException, IOException, InterruptedException, ExecutionException, TimeoutException { throws IllegalStateException, IOException, InterruptedException, ExecutionException, TimeoutException {
csvUploadHandler.ingest(List.of(files), csvReaderSettings, waitUntilFinished); csvUploadHandler.ingest(List.of(files), csvReaderSettings);
return ""; // return value might become a job id that can be used to cancel, or observe return ""; // return value might become a job id that can be used to cancel, or observe
// status // status
} }

View File

@@ -216,13 +216,12 @@ def send_csv(zip, file_in_zip, csvSettings):
'settings': ('csvReaderSettings.json', csvSettings , 'application/json') 'settings': ('csvReaderSettings.json', csvSettings , 'application/json')
}) })
r = requests.post('http://localhost:17333/api/data?waitUntilFinished=true', data=m, r = requests.post('http://localhost:17333/api/data', data=m,
headers={ headers={
'Accept': 'text/plain, application/json, application/*+json, */*', 'Accept': 'text/plain, application/json, application/*+json, */*',
'Content-Type': m.content_type.replace("form-data", "mixed;charset=UTF-8") 'Content-Type': m.content_type.replace("form-data", "mixed;charset=UTF-8")
}) })
print("response:") print("response: %s" % r)
print(r)
finally: finally:
shutil.rmtree(tmp_folder) shutil.rmtree(tmp_folder)