use listing.csv instead of iterating through all folders

The hope is, that it is faster to read a single file instead of listing
hundreds of folders.
This commit is contained in:
2018-05-05 10:46:16 +02:00
parent bda2de672e
commit e3102c01d4
4 changed files with 95 additions and 5 deletions

View File

@@ -27,6 +27,7 @@ import org.slf4j.LoggerFactory;
public class DataStore {
private static final Logger EXECUTE_QUERY_LOGGER = LoggerFactory
.getLogger("org.lucares.metrics.dataStore.executeQuery");
private static final Logger INITIALIZE = LoggerFactory.getLogger("org.lucares.metrics.dataStore.init");
private static final Logger LOGGER = LoggerFactory.getLogger(DataStore.class);
private static final String SUBDIR_STORAGE = "storage";
@@ -50,6 +51,7 @@ public class DataStore {
private void init(final FolderStorage folderStorage) throws IOException {
final long start = System.nanoTime();
final Stream<Path> files = folderStorage.list();
files.parallel().forEach(path -> {
@@ -63,6 +65,7 @@ public class DataStore {
synchronized (docIdToDoc) {
((ArrayList<Doc>) docIdToDoc).trimToSize();
}
INITIALIZE.info(((System.nanoTime() - start) / 1_000_000.0) + "ms");
}
private void cacheTagToFileMapping(final Tags tags, final Path path) {

View File

@@ -1,14 +1,27 @@
package org.lucares.pdb.datastore.internal;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Iterator;
import java.util.function.BiPredicate;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class FolderStorage {
static final String LISTING_FILE_NAME = "listing.csv";
private final static Logger LOGGER = LoggerFactory.getLogger(FolderStorage.class);
private final static Logger METRICS_CREATE_LISTING_FILE = LoggerFactory
.getLogger("org.lucares.metrics.fodlerStorage.createListingFile");
private final Path storageBaseDirectory;
private int firstLevel = 0;
@@ -19,8 +32,11 @@ public class FolderStorage {
private final int maxFilesPerFolder;
private final Path listingFile;
public FolderStorage(final Path storageBaseDirectory, final int maxFilesPerFolder) throws IOException {
this.storageBaseDirectory = storageBaseDirectory;
this.listingFile = storageBaseDirectory.resolve(LISTING_FILE_NAME);
this.maxFilesPerFolder = maxFilesPerFolder;
init();
}
@@ -29,12 +45,12 @@ public class FolderStorage {
Files.createDirectories(storageBaseDirectory);
firstLevel = Math.max((int) Files.list(storageBaseDirectory).count() - 1, 0);
firstLevel = Math.max((int) Files.list(storageBaseDirectory).filter(Files::isDirectory).count() - 1, 0);
final Path firstLevelDirectory = storageBaseDirectory.resolve(String.valueOf(firstLevel));
Files.createDirectories(firstLevelDirectory);
secondLevel = Math.max((int) Files.list(firstLevelDirectory).count() - 1, 0);
secondLevel = Math.max((int) Files.list(firstLevelDirectory).filter(Files::isDirectory).count() - 1, 0);
currentDirectory = firstLevelDirectory.resolve(String.valueOf(secondLevel));
Files.createDirectories(currentDirectory);
@@ -55,9 +71,19 @@ public class FolderStorage {
Files.createFile(newFile);
filesInSecondLevel++;
updateListingFile(newFile);
return newFile;
}
private synchronized void updateListingFile(final Path newFile) throws IOException {
try (Writer out = Files.newBufferedWriter(listingFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE,
StandardOpenOption.APPEND)) {
out.write(newFile.toString());
out.write("\n");
}
}
private void ensureCapacity() throws IOException {
if (filesInSecondLevel >= maxFilesPerFolder) {
secondLevel++;
@@ -78,9 +104,30 @@ public class FolderStorage {
}
public Stream<Path> list() throws IOException {
if (!Files.exists(listingFile)) {
final long start = System.nanoTime();
LOGGER.info("listing file not found -> creating a new one");
createNewListingFile();
METRICS_CREATE_LISTING_FILE.info(((System.nanoTime() - start) / 1_000_000.0) + "ms");
}
return Files.lines(listingFile, StandardCharsets.UTF_8).map(Paths::get);
}
private void createNewListingFile() throws IOException {
final int maxDepth = Integer.MAX_VALUE;
final BiPredicate<Path, BasicFileAttributes> matchRegularFiles = (path, attr) -> Files.isRegularFile(path);
return Files.find(storageBaseDirectory, maxDepth, matchRegularFiles);
try (final Writer out = Files.newBufferedWriter(listingFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE,
StandardOpenOption.APPEND);
final Stream<Path> stream = Files.find(storageBaseDirectory, maxDepth, matchRegularFiles)) {
final Iterator<Path> iterator = stream.iterator();
while (iterator.hasNext()) {
final Path path = iterator.next();
out.write(path.toString());
out.write("\n");
}
}
}
}