use listing.csv instead of iterating through all folders
The hope is, that it is faster to read a single file instead of listing hundreds of folders.
This commit is contained in:
@@ -27,6 +27,7 @@ import org.slf4j.LoggerFactory;
|
||||
public class DataStore {
|
||||
private static final Logger EXECUTE_QUERY_LOGGER = LoggerFactory
|
||||
.getLogger("org.lucares.metrics.dataStore.executeQuery");
|
||||
private static final Logger INITIALIZE = LoggerFactory.getLogger("org.lucares.metrics.dataStore.init");
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(DataStore.class);
|
||||
|
||||
private static final String SUBDIR_STORAGE = "storage";
|
||||
@@ -50,6 +51,7 @@ public class DataStore {
|
||||
|
||||
private void init(final FolderStorage folderStorage) throws IOException {
|
||||
|
||||
final long start = System.nanoTime();
|
||||
final Stream<Path> files = folderStorage.list();
|
||||
files.parallel().forEach(path -> {
|
||||
|
||||
@@ -63,6 +65,7 @@ public class DataStore {
|
||||
synchronized (docIdToDoc) {
|
||||
((ArrayList<Doc>) docIdToDoc).trimToSize();
|
||||
}
|
||||
INITIALIZE.info(((System.nanoTime() - start) / 1_000_000.0) + "ms");
|
||||
}
|
||||
|
||||
private void cacheTagToFileMapping(final Tags tags, final Path path) {
|
||||
|
||||
@@ -1,14 +1,27 @@
|
||||
package org.lucares.pdb.datastore.internal;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.util.Iterator;
|
||||
import java.util.function.BiPredicate;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class FolderStorage {
|
||||
|
||||
static final String LISTING_FILE_NAME = "listing.csv";
|
||||
private final static Logger LOGGER = LoggerFactory.getLogger(FolderStorage.class);
|
||||
private final static Logger METRICS_CREATE_LISTING_FILE = LoggerFactory
|
||||
.getLogger("org.lucares.metrics.fodlerStorage.createListingFile");
|
||||
|
||||
private final Path storageBaseDirectory;
|
||||
|
||||
private int firstLevel = 0;
|
||||
@@ -19,8 +32,11 @@ public class FolderStorage {
|
||||
|
||||
private final int maxFilesPerFolder;
|
||||
|
||||
private final Path listingFile;
|
||||
|
||||
public FolderStorage(final Path storageBaseDirectory, final int maxFilesPerFolder) throws IOException {
|
||||
this.storageBaseDirectory = storageBaseDirectory;
|
||||
this.listingFile = storageBaseDirectory.resolve(LISTING_FILE_NAME);
|
||||
this.maxFilesPerFolder = maxFilesPerFolder;
|
||||
init();
|
||||
}
|
||||
@@ -29,12 +45,12 @@ public class FolderStorage {
|
||||
|
||||
Files.createDirectories(storageBaseDirectory);
|
||||
|
||||
firstLevel = Math.max((int) Files.list(storageBaseDirectory).count() - 1, 0);
|
||||
firstLevel = Math.max((int) Files.list(storageBaseDirectory).filter(Files::isDirectory).count() - 1, 0);
|
||||
|
||||
final Path firstLevelDirectory = storageBaseDirectory.resolve(String.valueOf(firstLevel));
|
||||
Files.createDirectories(firstLevelDirectory);
|
||||
|
||||
secondLevel = Math.max((int) Files.list(firstLevelDirectory).count() - 1, 0);
|
||||
secondLevel = Math.max((int) Files.list(firstLevelDirectory).filter(Files::isDirectory).count() - 1, 0);
|
||||
currentDirectory = firstLevelDirectory.resolve(String.valueOf(secondLevel));
|
||||
Files.createDirectories(currentDirectory);
|
||||
|
||||
@@ -55,9 +71,19 @@ public class FolderStorage {
|
||||
Files.createFile(newFile);
|
||||
filesInSecondLevel++;
|
||||
|
||||
updateListingFile(newFile);
|
||||
|
||||
return newFile;
|
||||
}
|
||||
|
||||
private synchronized void updateListingFile(final Path newFile) throws IOException {
|
||||
try (Writer out = Files.newBufferedWriter(listingFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE,
|
||||
StandardOpenOption.APPEND)) {
|
||||
out.write(newFile.toString());
|
||||
out.write("\n");
|
||||
}
|
||||
}
|
||||
|
||||
private void ensureCapacity() throws IOException {
|
||||
if (filesInSecondLevel >= maxFilesPerFolder) {
|
||||
secondLevel++;
|
||||
@@ -78,9 +104,30 @@ public class FolderStorage {
|
||||
}
|
||||
|
||||
public Stream<Path> list() throws IOException {
|
||||
|
||||
if (!Files.exists(listingFile)) {
|
||||
final long start = System.nanoTime();
|
||||
LOGGER.info("listing file not found -> creating a new one");
|
||||
createNewListingFile();
|
||||
METRICS_CREATE_LISTING_FILE.info(((System.nanoTime() - start) / 1_000_000.0) + "ms");
|
||||
}
|
||||
return Files.lines(listingFile, StandardCharsets.UTF_8).map(Paths::get);
|
||||
}
|
||||
|
||||
private void createNewListingFile() throws IOException {
|
||||
final int maxDepth = Integer.MAX_VALUE;
|
||||
final BiPredicate<Path, BasicFileAttributes> matchRegularFiles = (path, attr) -> Files.isRegularFile(path);
|
||||
|
||||
return Files.find(storageBaseDirectory, maxDepth, matchRegularFiles);
|
||||
try (final Writer out = Files.newBufferedWriter(listingFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE,
|
||||
StandardOpenOption.APPEND);
|
||||
final Stream<Path> stream = Files.find(storageBaseDirectory, maxDepth, matchRegularFiles)) {
|
||||
|
||||
final Iterator<Path> iterator = stream.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
final Path path = iterator.next();
|
||||
out.write(path.toString());
|
||||
out.write("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user