use listing.csv instead of iterating through all folders
The hope is, that it is faster to read a single file instead of listing hundreds of folders.
This commit is contained in:
@@ -27,6 +27,7 @@ import org.slf4j.LoggerFactory;
|
|||||||
public class DataStore {
|
public class DataStore {
|
||||||
private static final Logger EXECUTE_QUERY_LOGGER = LoggerFactory
|
private static final Logger EXECUTE_QUERY_LOGGER = LoggerFactory
|
||||||
.getLogger("org.lucares.metrics.dataStore.executeQuery");
|
.getLogger("org.lucares.metrics.dataStore.executeQuery");
|
||||||
|
private static final Logger INITIALIZE = LoggerFactory.getLogger("org.lucares.metrics.dataStore.init");
|
||||||
private static final Logger LOGGER = LoggerFactory.getLogger(DataStore.class);
|
private static final Logger LOGGER = LoggerFactory.getLogger(DataStore.class);
|
||||||
|
|
||||||
private static final String SUBDIR_STORAGE = "storage";
|
private static final String SUBDIR_STORAGE = "storage";
|
||||||
@@ -50,6 +51,7 @@ public class DataStore {
|
|||||||
|
|
||||||
private void init(final FolderStorage folderStorage) throws IOException {
|
private void init(final FolderStorage folderStorage) throws IOException {
|
||||||
|
|
||||||
|
final long start = System.nanoTime();
|
||||||
final Stream<Path> files = folderStorage.list();
|
final Stream<Path> files = folderStorage.list();
|
||||||
files.parallel().forEach(path -> {
|
files.parallel().forEach(path -> {
|
||||||
|
|
||||||
@@ -63,6 +65,7 @@ public class DataStore {
|
|||||||
synchronized (docIdToDoc) {
|
synchronized (docIdToDoc) {
|
||||||
((ArrayList<Doc>) docIdToDoc).trimToSize();
|
((ArrayList<Doc>) docIdToDoc).trimToSize();
|
||||||
}
|
}
|
||||||
|
INITIALIZE.info(((System.nanoTime() - start) / 1_000_000.0) + "ms");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void cacheTagToFileMapping(final Tags tags, final Path path) {
|
private void cacheTagToFileMapping(final Tags tags, final Path path) {
|
||||||
|
|||||||
@@ -1,14 +1,27 @@
|
|||||||
package org.lucares.pdb.datastore.internal;
|
package org.lucares.pdb.datastore.internal;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.Writer;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.nio.file.StandardOpenOption;
|
||||||
import java.nio.file.attribute.BasicFileAttributes;
|
import java.nio.file.attribute.BasicFileAttributes;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.function.BiPredicate;
|
import java.util.function.BiPredicate;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
public class FolderStorage {
|
public class FolderStorage {
|
||||||
|
|
||||||
|
static final String LISTING_FILE_NAME = "listing.csv";
|
||||||
|
private final static Logger LOGGER = LoggerFactory.getLogger(FolderStorage.class);
|
||||||
|
private final static Logger METRICS_CREATE_LISTING_FILE = LoggerFactory
|
||||||
|
.getLogger("org.lucares.metrics.fodlerStorage.createListingFile");
|
||||||
|
|
||||||
private final Path storageBaseDirectory;
|
private final Path storageBaseDirectory;
|
||||||
|
|
||||||
private int firstLevel = 0;
|
private int firstLevel = 0;
|
||||||
@@ -19,8 +32,11 @@ public class FolderStorage {
|
|||||||
|
|
||||||
private final int maxFilesPerFolder;
|
private final int maxFilesPerFolder;
|
||||||
|
|
||||||
|
private final Path listingFile;
|
||||||
|
|
||||||
public FolderStorage(final Path storageBaseDirectory, final int maxFilesPerFolder) throws IOException {
|
public FolderStorage(final Path storageBaseDirectory, final int maxFilesPerFolder) throws IOException {
|
||||||
this.storageBaseDirectory = storageBaseDirectory;
|
this.storageBaseDirectory = storageBaseDirectory;
|
||||||
|
this.listingFile = storageBaseDirectory.resolve(LISTING_FILE_NAME);
|
||||||
this.maxFilesPerFolder = maxFilesPerFolder;
|
this.maxFilesPerFolder = maxFilesPerFolder;
|
||||||
init();
|
init();
|
||||||
}
|
}
|
||||||
@@ -29,12 +45,12 @@ public class FolderStorage {
|
|||||||
|
|
||||||
Files.createDirectories(storageBaseDirectory);
|
Files.createDirectories(storageBaseDirectory);
|
||||||
|
|
||||||
firstLevel = Math.max((int) Files.list(storageBaseDirectory).count() - 1, 0);
|
firstLevel = Math.max((int) Files.list(storageBaseDirectory).filter(Files::isDirectory).count() - 1, 0);
|
||||||
|
|
||||||
final Path firstLevelDirectory = storageBaseDirectory.resolve(String.valueOf(firstLevel));
|
final Path firstLevelDirectory = storageBaseDirectory.resolve(String.valueOf(firstLevel));
|
||||||
Files.createDirectories(firstLevelDirectory);
|
Files.createDirectories(firstLevelDirectory);
|
||||||
|
|
||||||
secondLevel = Math.max((int) Files.list(firstLevelDirectory).count() - 1, 0);
|
secondLevel = Math.max((int) Files.list(firstLevelDirectory).filter(Files::isDirectory).count() - 1, 0);
|
||||||
currentDirectory = firstLevelDirectory.resolve(String.valueOf(secondLevel));
|
currentDirectory = firstLevelDirectory.resolve(String.valueOf(secondLevel));
|
||||||
Files.createDirectories(currentDirectory);
|
Files.createDirectories(currentDirectory);
|
||||||
|
|
||||||
@@ -55,9 +71,19 @@ public class FolderStorage {
|
|||||||
Files.createFile(newFile);
|
Files.createFile(newFile);
|
||||||
filesInSecondLevel++;
|
filesInSecondLevel++;
|
||||||
|
|
||||||
|
updateListingFile(newFile);
|
||||||
|
|
||||||
return newFile;
|
return newFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private synchronized void updateListingFile(final Path newFile) throws IOException {
|
||||||
|
try (Writer out = Files.newBufferedWriter(listingFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE,
|
||||||
|
StandardOpenOption.APPEND)) {
|
||||||
|
out.write(newFile.toString());
|
||||||
|
out.write("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void ensureCapacity() throws IOException {
|
private void ensureCapacity() throws IOException {
|
||||||
if (filesInSecondLevel >= maxFilesPerFolder) {
|
if (filesInSecondLevel >= maxFilesPerFolder) {
|
||||||
secondLevel++;
|
secondLevel++;
|
||||||
@@ -78,9 +104,30 @@ public class FolderStorage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public Stream<Path> list() throws IOException {
|
public Stream<Path> list() throws IOException {
|
||||||
|
|
||||||
|
if (!Files.exists(listingFile)) {
|
||||||
|
final long start = System.nanoTime();
|
||||||
|
LOGGER.info("listing file not found -> creating a new one");
|
||||||
|
createNewListingFile();
|
||||||
|
METRICS_CREATE_LISTING_FILE.info(((System.nanoTime() - start) / 1_000_000.0) + "ms");
|
||||||
|
}
|
||||||
|
return Files.lines(listingFile, StandardCharsets.UTF_8).map(Paths::get);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createNewListingFile() throws IOException {
|
||||||
final int maxDepth = Integer.MAX_VALUE;
|
final int maxDepth = Integer.MAX_VALUE;
|
||||||
final BiPredicate<Path, BasicFileAttributes> matchRegularFiles = (path, attr) -> Files.isRegularFile(path);
|
final BiPredicate<Path, BasicFileAttributes> matchRegularFiles = (path, attr) -> Files.isRegularFile(path);
|
||||||
|
|
||||||
return Files.find(storageBaseDirectory, maxDepth, matchRegularFiles);
|
try (final Writer out = Files.newBufferedWriter(listingFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE,
|
||||||
|
StandardOpenOption.APPEND);
|
||||||
|
final Stream<Path> stream = Files.find(storageBaseDirectory, maxDepth, matchRegularFiles)) {
|
||||||
|
|
||||||
|
final Iterator<Path> iterator = stream.iterator();
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
final Path path = iterator.next();
|
||||||
|
out.write(path.toString());
|
||||||
|
out.write("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import java.nio.file.Paths;
|
|||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.lucares.utils.CollectionUtils;
|
import org.lucares.utils.CollectionUtils;
|
||||||
import org.lucares.utils.file.FileUtils;
|
import org.lucares.utils.file.FileUtils;
|
||||||
@@ -15,6 +16,7 @@ import org.testng.annotations.AfterMethod;
|
|||||||
import org.testng.annotations.BeforeMethod;
|
import org.testng.annotations.BeforeMethod;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
@Test
|
||||||
public class FolderStorageTest {
|
public class FolderStorageTest {
|
||||||
private static final String SUFFIX = ".txt";
|
private static final String SUFFIX = ".txt";
|
||||||
private Path dataDirectory;
|
private Path dataDirectory;
|
||||||
@@ -74,8 +76,46 @@ public class FolderStorageTest {
|
|||||||
Assert.assertEquals(actualFiles, expectedFiles);
|
Assert.assertEquals(actualFiles, expectedFiles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCreateAndUpdateFileListing() throws Exception {
|
||||||
|
final int maxFilesPerFolder = 10;
|
||||||
|
final Path storageLeafFolder = dataDirectory.resolve("0").resolve("0");
|
||||||
|
// initial creation
|
||||||
|
{
|
||||||
|
final FolderStorage storage = new FolderStorage(dataDirectory, maxFilesPerFolder);
|
||||||
|
storage.insert("abc", ".txt");
|
||||||
|
storage.insert("def", ".txt");
|
||||||
|
|
||||||
|
final List<Path> initialListing = storage.list().collect(Collectors.toList());
|
||||||
|
Assert.assertEquals(initialListing,
|
||||||
|
Arrays.asList(storageLeafFolder.resolve("abc$.txt"), storageLeafFolder.resolve("def$.txt")));
|
||||||
|
}
|
||||||
|
|
||||||
|
// load existing storage
|
||||||
|
{
|
||||||
|
final FolderStorage storage = new FolderStorage(dataDirectory, maxFilesPerFolder);
|
||||||
|
|
||||||
|
// files inserted previously are still there
|
||||||
|
final List<Path> initialListing = storage.list().collect(Collectors.toList());
|
||||||
|
|
||||||
|
Assert.assertEquals(initialListing,
|
||||||
|
Arrays.asList(storageLeafFolder.resolve("abc$.txt"), storageLeafFolder.resolve("def$.txt")));
|
||||||
|
|
||||||
|
// add new file
|
||||||
|
storage.insert("ghi", ".txt");
|
||||||
|
|
||||||
|
// listing is updated
|
||||||
|
final List<Path> updatedListing = storage.list().collect(Collectors.toList());
|
||||||
|
Assert.assertEquals(updatedListing, Arrays.asList(storageLeafFolder.resolve("abc$.txt"),
|
||||||
|
storageLeafFolder.resolve("def$.txt"), storageLeafFolder.resolve("ghi$.txt")));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
private List<Path> getPathsRelativeToDataDirectory() throws IOException {
|
private List<Path> getPathsRelativeToDataDirectory() throws IOException {
|
||||||
final List<Path> actualFiles = FileUtils.listRecursively(dataDirectory);
|
List<Path> actualFiles = FileUtils.listRecursively(dataDirectory);
|
||||||
|
actualFiles = CollectionUtils.filter(actualFiles,
|
||||||
|
p -> !p.getFileName().toString().equals(FolderStorage.LISTING_FILE_NAME));
|
||||||
CollectionUtils.mapInPlace(actualFiles, p -> dataDirectory.relativize(p));
|
CollectionUtils.mapInPlace(actualFiles, p -> dataDirectory.relativize(p));
|
||||||
Collections.sort(actualFiles);
|
Collections.sort(actualFiles);
|
||||||
return actualFiles;
|
return actualFiles;
|
||||||
|
|||||||
@@ -113,7 +113,7 @@ public class PerformanceDbTest {
|
|||||||
|
|
||||||
final List<Path> filesInStorage = FileUtils.listRecursively(DataStore.storageDirectory(dataDirectory));
|
final List<Path> filesInStorage = FileUtils.listRecursively(DataStore.storageDirectory(dataDirectory));
|
||||||
|
|
||||||
Assert.assertEquals(filesInStorage.size(), 1);
|
Assert.assertEquals(filesInStorage.size(), 2, "the created file and the listing.csv");
|
||||||
|
|
||||||
final Path tagSpecificFile = filesInStorage.get(0);
|
final Path tagSpecificFile = filesInStorage.get(0);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user