use RandomAccessFile in FolderStorage.getPathByOffset()

The old implementation opened a new buffered reader everytime
getPathByOffset was called. This took 1/20th of a second or
longer. For queries that visited thousands of files this could
take a long time.
We are now using a RandomAccessFile, that is opened once. The
average time spend in getPathByOffset is now down to 0.11ms.
This commit is contained in:
2018-05-10 10:22:25 +02:00
parent 82b8a8a932
commit 911062e26b
18 changed files with 215 additions and 146 deletions

View File

@@ -10,6 +10,7 @@ import org.lucares.pdb.datastore.internal.DataStore;
public class Doc {
private final Tags tags;
private final long offsetInListingFile;
private final Path storageBasePath;
private byte[] path;
/**
@@ -24,21 +25,25 @@ public class Doc {
* @param tags
* @param offsetInListingFile
* must be set if {@code path} is {@code null}
* @param path
* optional, can be {@code null}
* @param storageBasePath
* the storage base path.
* @param relativePath
* optional, can be {@code null}. This path is relative to
* {@code storageBasePath}
*/
public Doc(final Tags tags, final long offsetInListingFile, final Path path) {
public Doc(final Tags tags, final long offsetInListingFile, final Path storageBasePath, final Path relativePath) {
super();
this.tags = tags;
this.offsetInListingFile = offsetInListingFile;
setPath(path);
this.storageBasePath = storageBasePath;
setRelativePath(relativePath);
}
public Tags getTags() {
return tags;
}
public void setPath(final Path path) {
public void setRelativePath(final Path path) {
if (path != null) {
this.path = path.toString().getBytes(StandardCharsets.UTF_8);
} else {
@@ -54,18 +59,18 @@ public class Doc {
*
* @return the path
*/
public Path getPath(final FolderStoragePathResolver resolver) {
public Path getAbsolutePath(final FolderStoragePathResolver resolver) {
if (path == null) {
final Path resolvedPath = resolver.getPath(offsetInListingFile);
setPath(resolvedPath);
setRelativePath(resolvedPath);
}
final Path result = Paths.get(new String(path, StandardCharsets.UTF_8));
return result;
final Path relativePath = Paths.get(new String(path, StandardCharsets.UTF_8));
return storageBasePath.resolve(relativePath);
}
private Path getPathNullable() {
return getPath(FolderStoragePathResolver.NULL);
private Path getAbsolutePathNullable() {
return getAbsolutePath(FolderStoragePathResolver.NULL);
}
public long getOffsetInListingFile() {
@@ -74,7 +79,7 @@ public class Doc {
@Override
public String toString() {
return "Doc [tags=" + tags + ", offsetInListingFile=" + offsetInListingFile + ", path=" + getPathNullable()
return "Doc [tags=" + tags + ", offsetInListingFile=" + offsetInListingFile + ", path=" + getAbsolutePathNullable()
+ "]";
}

View File

@@ -9,7 +9,7 @@ import org.lucares.pdb.api.Tags;
import org.lucares.pdb.datastore.internal.DataStore;
import org.lucares.pdb.datastore.internal.Proposer;
public class PdbDB {
public class PdbDB implements AutoCloseable {
private final DataStore dataStore;
private final Proposer proposer;
@@ -47,4 +47,13 @@ public class PdbDB {
public FolderStoragePathResolver getFolderStoragePathResolver() {
return dataStore.getFolderStoragePathResolver();
}
public Path getStorageBasePath() {
return dataStore.getStorageBasePath();
}
@Override
public void close() throws IOException {
dataStore.close();
}
}

View File

@@ -25,7 +25,7 @@ import org.lucares.pdb.datastore.lang.QueryLanguageParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DataStore {
public class DataStore implements AutoCloseable {
private static final Logger EXECUTE_QUERY_LOGGER = LoggerFactory
.getLogger("org.lucares.metrics.dataStore.executeQuery");
private static final Logger INITIALIZE = LoggerFactory.getLogger("org.lucares.metrics.dataStore.init");
@@ -43,11 +43,13 @@ public class DataStore {
private final FolderStorage folderStorage;
private final FolderStoragePathResolver folderStoragePathResolver;
private final Path storageBasePath;
public DataStore(final Path dataDirectory) throws IOException {
Tags.STRING_COMPRESSOR = StringCompressor.create(keyCompressionFile(dataDirectory));
folderStorage = new FolderStorage(storageDirectory(dataDirectory), 1000);
storageBasePath = storageDirectory(dataDirectory);
folderStorage = new FolderStorage(storageBasePath, 1000);
init(folderStorage);
folderStoragePathResolver = folderStorage::getPathByOffset;
@@ -60,7 +62,7 @@ public class DataStore {
files// .parallel()
.forEach(listingFileEntry -> {
listingFileEntry.unsetPath(); // unset the path, so that we don't store it for every document (will
listingFileEntry.unsetRelativePath(); // unset the path, so that we don't store it for every document (will
// be
// initialized lazily if needed)
@@ -80,7 +82,8 @@ public class DataStore {
private void cacheTagToFileMapping(final Tags tags, final ListingFileEntry listingFileEntry) {
final int docId;
final Doc newDoc = new Doc(tags, listingFileEntry.getOffsetInListingFile(), listingFileEntry.getPath());
final Doc newDoc = new Doc(tags, listingFileEntry.getOffsetInListingFile(), storageBasePath,
listingFileEntry.getPath());
synchronized (docIdToDoc) {
docId = docIdToDoc.size();
docIdToDoc.add(newDoc);
@@ -245,4 +248,13 @@ public class DataStore {
public FolderStoragePathResolver getFolderStoragePathResolver() {
return folderStoragePathResolver;
}
public Path getStorageBasePath() {
return storageBasePath;
}
@Override
public void close() throws IOException {
folderStorage.close();
}
}

View File

@@ -1,7 +1,5 @@
package org.lucares.pdb.datastore.internal;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.io.Writer;
@@ -22,12 +20,15 @@ import org.lucares.pdb.api.RuntimeIOException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class FolderStorage {
public class FolderStorage implements AutoCloseable {
private static final byte[] NEWLINE = "\n".getBytes(StandardCharsets.US_ASCII);
static final String LISTING_FILE_NAME = "listing.csv";
private final static Logger LOGGER = LoggerFactory.getLogger(FolderStorage.class);
private final static Logger METRICS_CREATE_LISTING_FILE = LoggerFactory
.getLogger("org.lucares.metrics.fodlerStorage.createListingFile");
.getLogger("org.lucares.metrics.folderStorage.createListingFile");
private final static Logger METRICS_GET_PATH_BY_OFFSET = LoggerFactory
.getLogger("org.lucares.metrics.folderStorage.getPathByOffset");
private final Path storageBaseDirectory;
@@ -39,14 +40,21 @@ public class FolderStorage {
private final int maxFilesPerFolder;
private final Path listingFile;
private final Path listingFilePath;
private final RandomAccessFile listingFile;
public FolderStorage(final Path storageBaseDirectory, final int maxFilesPerFolder) throws IOException {
this.storageBaseDirectory = storageBaseDirectory;
this.listingFile = storageBaseDirectory.resolve(LISTING_FILE_NAME);
this.listingFilePath = storageBaseDirectory.resolve(LISTING_FILE_NAME);
this.maxFilesPerFolder = maxFilesPerFolder;
init();
initListingFileIfNotExists();
listingFile = new RandomAccessFile(listingFilePath.toFile(), "rws");
}
@Override
public void close() throws IOException {
listingFile.close();
}
private void init() throws IOException {
@@ -85,23 +93,19 @@ public class FolderStorage {
}
private synchronized ListingFileEntry updateListingFile(final Path newFile) throws IOException {
final long offsetInListingFile = getFilePointer();
try (Writer out = Files.newBufferedWriter(listingFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE,
StandardOpenOption.APPEND)) {
out.write(newFile.toString());
out.write("\n");
final long offsetInListingFile = Files.size(listingFilePath);
// remember: all paths within storageBaseDirectory use only ascii characters
try (Writer out = Files.newBufferedWriter(listingFilePath, StandardCharsets.US_ASCII, StandardOpenOption.CREATE,
StandardOpenOption.APPEND, StandardOpenOption.SYNC)) {
final Path relativePath = storageBaseDirectory.relativize(newFile);
listingFile.seek(offsetInListingFile);
listingFile.write(relativePath.toString().getBytes(StandardCharsets.US_ASCII));
listingFile.write(NEWLINE);
}
final String filename = newFile.getFileName().toString();
return new ListingFileEntry(filename, offsetInListingFile, newFile);
}
private long getFilePointer() throws FileNotFoundException, IOException {
final RandomAccessFile randomAccessFile = new RandomAccessFile(listingFile.toFile(), "r");
try {
return randomAccessFile.getFilePointer();
} finally {
randomAccessFile.close();
}
final Path relativePath = storageBaseDirectory.relativize(newFile);
return new ListingFileEntry(filename, offsetInListingFile, relativePath);
}
private void ensureCapacity() throws IOException {
@@ -125,21 +129,15 @@ public class FolderStorage {
public Stream<ListingFileEntry> list() throws IOException {
return readListingFile();
}
private Stream<ListingFileEntry> readListingFile() throws IOException {
try (final ListingFileIterator iterator = new ListingFileIterator(listingFile)) {
final Spliterator<ListingFileEntry> spliterator = Spliterators.spliteratorUnknownSize(iterator,
Spliterator.ORDERED);
final Stream<ListingFileEntry> stream = StreamSupport.stream(spliterator, false);
return stream;
}
final ListingFileIterator iterator = new ListingFileIterator(listingFilePath);
final Spliterator<ListingFileEntry> spliterator = Spliterators.spliteratorUnknownSize(iterator,
Spliterator.ORDERED);
final Stream<ListingFileEntry> stream = StreamSupport.stream(spliterator, false);
return stream;
}
private void initListingFileIfNotExists() throws IOException {
if (!Files.exists(listingFile)) {
if (!Files.exists(listingFilePath)) {
final long start = System.nanoTime();
LOGGER.info("listing file not found -> creating a new one");
createNewListingFile();
@@ -151,29 +149,38 @@ public class FolderStorage {
final int maxDepth = Integer.MAX_VALUE;
final BiPredicate<Path, BasicFileAttributes> matchRegularFiles = (path, attr) -> Files.isRegularFile(path);
try (final Writer out = Files.newBufferedWriter(listingFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE,
StandardOpenOption.APPEND);
// remember: all paths within storageBaseDirectory use only ascii characters
try (final Writer out = Files.newBufferedWriter(listingFilePath, StandardCharsets.US_ASCII,
StandardOpenOption.CREATE, StandardOpenOption.APPEND);
final Stream<Path> stream = Files.find(storageBaseDirectory, maxDepth, matchRegularFiles)) {
final Iterator<Path> iterator = stream.iterator();
while (iterator.hasNext()) {
final Path path = iterator.next();
if (!path.getFileName().toString().equals(LISTING_FILE_NAME)) {
out.write(path.toString());
final Path relativePath = storageBaseDirectory.relativize(path);
out.write(relativePath.toString());
out.write("\n");
}
}
}
}
public Path getPathByOffset(final long offsetInListingFile) throws RuntimeIOException {
public synchronized Path getPathByOffset(final long offsetInListingFile) throws RuntimeIOException {
try (BufferedReader reader = Files.newBufferedReader(listingFile, StandardCharsets.UTF_8)) {
reader.skip(offsetInListingFile);
final String line = reader.readLine();
final long start = System.nanoTime();
try {
listingFile.seek(offsetInListingFile);
// remember: all paths within storageBaseDirectory use only ascii characters
final String line = listingFile.readLine();
return Paths.get(line);
} catch (final IOException e) {
throw new RuntimeIOException(e);
} finally {
METRICS_GET_PATH_BY_OFFSET.debug(((System.nanoTime() - start) / 1_000_000.0) + "ms");
}
}

View File

@@ -4,15 +4,29 @@ import java.nio.file.Path;
import javax.annotation.Nullable;
import org.lucares.pdb.datastore.Doc;
public class ListingFileEntry {
private final String filename;
private final long offsetInListingFile;
private Path path;
private Path relativePath;
public ListingFileEntry(final String filename, final long offsetInListingFile, final Path path) {
/**
* Create a new {@link ListingFileEntry}.
* <p>
* The {@code path} is optional. When the {@link ListingFileEntry} is read from
* the listing file, then the {@code path} is set to {@code null}. This is done
* to save memory. See {@link Doc} for more information on its usage.
*
* @param filename
* @param offsetInListingFile
* @param relativePath
* optional, see {@link Doc}
*/
public ListingFileEntry(final String filename, final long offsetInListingFile, final Path relativePath) {
this.filename = filename;
this.offsetInListingFile = offsetInListingFile;
this.path = path;
this.relativePath = relativePath;
}
public String getFilename() {
@@ -23,19 +37,19 @@ public class ListingFileEntry {
return offsetInListingFile;
}
public void unsetPath() {
path = null;
public void unsetRelativePath() {
relativePath = null;
}
@Nullable
public Path getPath() {
return path;
return relativePath;
}
@Override
public String toString() {
return "ListingFileEntry [filename=" + filename + ", offsetInListingFile=" + offsetInListingFile + ", path="
+ path + "]";
return "ListingFileEntry [filename=" + filename + ", offsetInListingFile=" + offsetInListingFile
+ ", relativePath=" + relativePath + "]";
}
@Override
@@ -44,7 +58,7 @@ public class ListingFileEntry {
int result = 1;
result = prime * result + ((filename == null) ? 0 : filename.hashCode());
result = prime * result + (int) (offsetInListingFile ^ (offsetInListingFile >>> 32));
result = prime * result + ((path == null) ? 0 : path.hashCode());
result = prime * result + ((relativePath == null) ? 0 : relativePath.hashCode());
return result;
}
@@ -64,10 +78,10 @@ public class ListingFileEntry {
return false;
if (offsetInListingFile != other.offsetInListingFile)
return false;
if (path == null) {
if (other.path != null)
if (relativePath == null) {
if (other.relativePath != null)
return false;
} else if (!path.equals(other.path))
} else if (!relativePath.equals(other.relativePath))
return false;
return true;
}

View File

@@ -42,14 +42,16 @@ public class DataStoreTest {
final Path path;
{
final DataStore dataStore = new DataStore(dataDirectory);
final Path storageBasePath = dataStore.getStorageBasePath();
final Tags tags = Tags.create("key1", "value1", "key2", "value2");
path = dataStore.createNewFile(tags);
assertSearch(dataStore, "key1=value1", path);
assertSearch(dataStore, "key1=value1", storageBasePath.resolve(path));
}
{
final DataStore dataStore = new DataStore(dataDirectory);
assertSearch(dataStore, "key1=value1", path);
final Path storageBasePath = dataStore.getStorageBasePath();
assertSearch(dataStore, "key1=value1", storageBasePath.resolve(path));
}
}
@@ -124,9 +126,11 @@ public class DataStoreTest {
private void assertSearch(final String query, final Tags... tags) {
final List<Doc> actualDocs = dataStore.search(query);
final List<Path> actual = CollectionUtils.map(actualDocs,
doc -> doc.getPath(dataStore.getFolderStoragePathResolver()));
doc -> doc.getAbsolutePath(dataStore.getFolderStoragePathResolver()));
final List<Path> expectedPaths = CollectionUtils.map(tags, tagsToPath::get);
final Path storageBasePath = dataStore.getStorageBasePath();
final List<Path> expectedPaths = CollectionUtils.map(CollectionUtils.map(tags, tagsToPath::get),
storageBasePath::resolve);
Assert.assertEquals(actual, expectedPaths, "Query: " + query + " Found: " + getTagsForPaths(actual));
}
@@ -154,7 +158,7 @@ public class DataStoreTest {
private void assertSearch(final DataStore dataStore, final String query, final Path... paths) {
final List<Doc> actualDocs = dataStore.search(query);
final List<Path> actual = CollectionUtils.map(actualDocs,
doc -> doc.getPath(dataStore.getFolderStoragePathResolver()));
doc -> doc.getAbsolutePath(dataStore.getFolderStoragePathResolver()));
Assert.assertEquals(actual, Arrays.asList(paths));
}

View File

@@ -79,40 +79,40 @@ public class FolderStorageTest {
@Test
public void testCreateAndUpdateFileListing() throws Exception {
final int maxFilesPerFolder = 10;
final Path storageLeafFolder = dataDirectory.resolve("0").resolve("0");
final int storageLeafFolderLength = storageLeafFolder.toString().length();
// initial creation
{
final FolderStorage storage = new FolderStorage(dataDirectory, maxFilesPerFolder);
storage.insert("abc", ".txt");
storage.insert("def", ".txt");
try (final FolderStorage storage = new FolderStorage(dataDirectory, maxFilesPerFolder);) {
storage.insert("abc", ".txt");
storage.insert("def", ".txt");
final List<ListingFileEntry> initialListing = storage.list().collect(Collectors.toList());
Assert.assertEquals(initialListing, Arrays.asList(//
new ListingFileEntry("abc$.txt", 0, null), //
new ListingFileEntry("def$.txt", storageLeafFolderLength + 10, null)));
final List<ListingFileEntry> initialListing = storage.list().collect(Collectors.toList());
Assert.assertEquals(initialListing, Arrays.asList(//
new ListingFileEntry("abc$.txt", 0, null), //
new ListingFileEntry("def$.txt", 13, null)));
}
}
// load existing storage
{
final FolderStorage storage = new FolderStorage(dataDirectory, maxFilesPerFolder);
try (final FolderStorage storage = new FolderStorage(dataDirectory, maxFilesPerFolder);) {
// files inserted previously are still there
final List<ListingFileEntry> initialListing = storage.list().collect(Collectors.toList());
// files inserted previously are still there
final List<ListingFileEntry> initialListing = storage.list().collect(Collectors.toList());
Assert.assertEquals(initialListing, Arrays.asList(//
new ListingFileEntry("abc$.txt", 0, null), //
new ListingFileEntry("def$.txt", storageLeafFolderLength + 10, null)));
Assert.assertEquals(initialListing, Arrays.asList(//
new ListingFileEntry("abc$.txt", 0, null), //
new ListingFileEntry("def$.txt", 13, null)));
// add new file
storage.insert("ghi", ".txt");
// add new file
storage.insert("ghi", ".txt");
// listing is updated
final List<ListingFileEntry> updatedListing = storage.list().collect(Collectors.toList());
Assert.assertEquals(updatedListing, Arrays.asList(//
new ListingFileEntry("abc$.txt", 0, null), //
new ListingFileEntry("def$.txt", storageLeafFolderLength + 10, null), //
new ListingFileEntry("ghi$.txt", 2 * storageLeafFolderLength + 20, null)));
// listing is updated
final List<ListingFileEntry> updatedListing = storage.list().collect(Collectors.toList());
Assert.assertEquals(updatedListing, Arrays.asList(//
new ListingFileEntry("abc$.txt", 0, null), //
new ListingFileEntry("def$.txt", 13, null), //
new ListingFileEntry("ghi$.txt", 26, null)));
}
}
}
@@ -127,10 +127,11 @@ public class FolderStorageTest {
}
private void storeFiles(final int maxFilesPerFolder, final String... filenames) throws IOException {
final FolderStorage storage = new FolderStorage(dataDirectory, maxFilesPerFolder);
try (final FolderStorage storage = new FolderStorage(dataDirectory, maxFilesPerFolder)) {
for (final String filename : filenames) {
storage.insert(filename, SUFFIX);
for (final String filename : filenames) {
storage.insert(filename, SUFFIX);
}
}
}
}

View File

@@ -34,6 +34,7 @@ public class ProposerTest {
@AfterClass
public void afterClass() throws IOException {
FileUtils.delete(dataDirectory);
db.close();
db = null;
tagsToPath = null;
Tags.STRING_COMPRESSOR = null;