use listing.csv instead of iterating through all folders

The hope is, that it is faster to read a single file instead of listing
hundreds of folders.
This commit is contained in:
2018-05-05 10:46:16 +02:00
parent bda2de672e
commit e3102c01d4
4 changed files with 95 additions and 5 deletions

View File

@@ -27,6 +27,7 @@ import org.slf4j.LoggerFactory;
public class DataStore {
private static final Logger EXECUTE_QUERY_LOGGER = LoggerFactory
.getLogger("org.lucares.metrics.dataStore.executeQuery");
private static final Logger INITIALIZE = LoggerFactory.getLogger("org.lucares.metrics.dataStore.init");
private static final Logger LOGGER = LoggerFactory.getLogger(DataStore.class);
private static final String SUBDIR_STORAGE = "storage";
@@ -50,6 +51,7 @@ public class DataStore {
private void init(final FolderStorage folderStorage) throws IOException {
final long start = System.nanoTime();
final Stream<Path> files = folderStorage.list();
files.parallel().forEach(path -> {
@@ -63,6 +65,7 @@ public class DataStore {
synchronized (docIdToDoc) {
((ArrayList<Doc>) docIdToDoc).trimToSize();
}
INITIALIZE.info(((System.nanoTime() - start) / 1_000_000.0) + "ms");
}
private void cacheTagToFileMapping(final Tags tags, final Path path) {

View File

@@ -1,14 +1,27 @@
package org.lucares.pdb.datastore.internal;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Iterator;
import java.util.function.BiPredicate;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class FolderStorage {
static final String LISTING_FILE_NAME = "listing.csv";
private final static Logger LOGGER = LoggerFactory.getLogger(FolderStorage.class);
private final static Logger METRICS_CREATE_LISTING_FILE = LoggerFactory
.getLogger("org.lucares.metrics.fodlerStorage.createListingFile");
private final Path storageBaseDirectory;
private int firstLevel = 0;
@@ -19,8 +32,11 @@ public class FolderStorage {
private final int maxFilesPerFolder;
private final Path listingFile;
public FolderStorage(final Path storageBaseDirectory, final int maxFilesPerFolder) throws IOException {
this.storageBaseDirectory = storageBaseDirectory;
this.listingFile = storageBaseDirectory.resolve(LISTING_FILE_NAME);
this.maxFilesPerFolder = maxFilesPerFolder;
init();
}
@@ -29,12 +45,12 @@ public class FolderStorage {
Files.createDirectories(storageBaseDirectory);
firstLevel = Math.max((int) Files.list(storageBaseDirectory).count() - 1, 0);
firstLevel = Math.max((int) Files.list(storageBaseDirectory).filter(Files::isDirectory).count() - 1, 0);
final Path firstLevelDirectory = storageBaseDirectory.resolve(String.valueOf(firstLevel));
Files.createDirectories(firstLevelDirectory);
secondLevel = Math.max((int) Files.list(firstLevelDirectory).count() - 1, 0);
secondLevel = Math.max((int) Files.list(firstLevelDirectory).filter(Files::isDirectory).count() - 1, 0);
currentDirectory = firstLevelDirectory.resolve(String.valueOf(secondLevel));
Files.createDirectories(currentDirectory);
@@ -55,9 +71,19 @@ public class FolderStorage {
Files.createFile(newFile);
filesInSecondLevel++;
updateListingFile(newFile);
return newFile;
}
private synchronized void updateListingFile(final Path newFile) throws IOException {
try (Writer out = Files.newBufferedWriter(listingFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE,
StandardOpenOption.APPEND)) {
out.write(newFile.toString());
out.write("\n");
}
}
private void ensureCapacity() throws IOException {
if (filesInSecondLevel >= maxFilesPerFolder) {
secondLevel++;
@@ -78,9 +104,30 @@ public class FolderStorage {
}
public Stream<Path> list() throws IOException {
if (!Files.exists(listingFile)) {
final long start = System.nanoTime();
LOGGER.info("listing file not found -> creating a new one");
createNewListingFile();
METRICS_CREATE_LISTING_FILE.info(((System.nanoTime() - start) / 1_000_000.0) + "ms");
}
return Files.lines(listingFile, StandardCharsets.UTF_8).map(Paths::get);
}
private void createNewListingFile() throws IOException {
final int maxDepth = Integer.MAX_VALUE;
final BiPredicate<Path, BasicFileAttributes> matchRegularFiles = (path, attr) -> Files.isRegularFile(path);
return Files.find(storageBaseDirectory, maxDepth, matchRegularFiles);
try (final Writer out = Files.newBufferedWriter(listingFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE,
StandardOpenOption.APPEND);
final Stream<Path> stream = Files.find(storageBaseDirectory, maxDepth, matchRegularFiles)) {
final Iterator<Path> iterator = stream.iterator();
while (iterator.hasNext()) {
final Path path = iterator.next();
out.write(path.toString());
out.write("\n");
}
}
}
}

View File

@@ -7,6 +7,7 @@ import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.lucares.utils.CollectionUtils;
import org.lucares.utils.file.FileUtils;
@@ -15,6 +16,7 @@ import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@Test
public class FolderStorageTest {
private static final String SUFFIX = ".txt";
private Path dataDirectory;
@@ -74,8 +76,46 @@ public class FolderStorageTest {
Assert.assertEquals(actualFiles, expectedFiles);
}
@Test
public void testCreateAndUpdateFileListing() throws Exception {
final int maxFilesPerFolder = 10;
final Path storageLeafFolder = dataDirectory.resolve("0").resolve("0");
// initial creation
{
final FolderStorage storage = new FolderStorage(dataDirectory, maxFilesPerFolder);
storage.insert("abc", ".txt");
storage.insert("def", ".txt");
final List<Path> initialListing = storage.list().collect(Collectors.toList());
Assert.assertEquals(initialListing,
Arrays.asList(storageLeafFolder.resolve("abc$.txt"), storageLeafFolder.resolve("def$.txt")));
}
// load existing storage
{
final FolderStorage storage = new FolderStorage(dataDirectory, maxFilesPerFolder);
// files inserted previously are still there
final List<Path> initialListing = storage.list().collect(Collectors.toList());
Assert.assertEquals(initialListing,
Arrays.asList(storageLeafFolder.resolve("abc$.txt"), storageLeafFolder.resolve("def$.txt")));
// add new file
storage.insert("ghi", ".txt");
// listing is updated
final List<Path> updatedListing = storage.list().collect(Collectors.toList());
Assert.assertEquals(updatedListing, Arrays.asList(storageLeafFolder.resolve("abc$.txt"),
storageLeafFolder.resolve("def$.txt"), storageLeafFolder.resolve("ghi$.txt")));
}
}
private List<Path> getPathsRelativeToDataDirectory() throws IOException {
final List<Path> actualFiles = FileUtils.listRecursively(dataDirectory);
List<Path> actualFiles = FileUtils.listRecursively(dataDirectory);
actualFiles = CollectionUtils.filter(actualFiles,
p -> !p.getFileName().toString().equals(FolderStorage.LISTING_FILE_NAME));
CollectionUtils.mapInPlace(actualFiles, p -> dataDirectory.relativize(p));
Collections.sort(actualFiles);
return actualFiles;

View File

@@ -113,7 +113,7 @@ public class PerformanceDbTest {
final List<Path> filesInStorage = FileUtils.listRecursively(DataStore.storageDirectory(dataDirectory));
Assert.assertEquals(filesInStorage.size(), 1);
Assert.assertEquals(filesInStorage.size(), 2, "the created file and the listing.csv");
final Path tagSpecificFile = filesInStorage.get(0);