reduce memory footprint by lazily intializing the path in Doc
The path in Doc is not optional. This reduces memory consumption, because we only have to store a long (the offset in the listing file). This assumes, that only a small percentage of Docs is requested.
This commit is contained in:
@@ -5,27 +5,77 @@ import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
import org.lucares.pdb.api.Tags;
|
||||
import org.lucares.pdb.datastore.internal.DataStore;
|
||||
|
||||
public class Doc {
|
||||
private final Tags tags;
|
||||
private final byte[] path;
|
||||
private final long offsetInListingFile;
|
||||
private byte[] path;
|
||||
|
||||
public Doc(final Tags tags, final Path path) {
|
||||
/**
|
||||
* Initializes a new document.
|
||||
* <p>
|
||||
* The path can be {@code null}. If path is {@code null}, then
|
||||
* {@code offsetInListingFile} must be set. The path will be initialized lazily
|
||||
* when needed.
|
||||
* <p>
|
||||
* This is used to reduce the memory footprint.
|
||||
*
|
||||
* @param tags
|
||||
* @param offsetInListingFile
|
||||
* must be set if {@code path} is {@code null}
|
||||
* @param path
|
||||
* optional, can be {@code null}
|
||||
*/
|
||||
public Doc(final Tags tags, final long offsetInListingFile, final Path path) {
|
||||
super();
|
||||
this.tags = tags;
|
||||
this.path = path.toString().getBytes(StandardCharsets.UTF_8);
|
||||
this.offsetInListingFile = offsetInListingFile;
|
||||
setPath(path);
|
||||
}
|
||||
|
||||
public Tags getTags() {
|
||||
return tags;
|
||||
}
|
||||
|
||||
public Path getPath() {
|
||||
return Paths.get(new String(path, StandardCharsets.UTF_8));
|
||||
public void setPath(final Path path) {
|
||||
if (path != null) {
|
||||
this.path = path.toString().getBytes(StandardCharsets.UTF_8);
|
||||
} else {
|
||||
this.path = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The path to the storage file.
|
||||
* <p>
|
||||
* This value is lazily initialized. Callers have to provide a resolver. See
|
||||
* {@link DataStore#getFolderStoragePathResolver()}.
|
||||
*
|
||||
* @return the path
|
||||
*/
|
||||
public Path getPath(final FolderStoragePathResolver resolver) {
|
||||
|
||||
if (path == null) {
|
||||
final Path resolvedPath = resolver.getPath(offsetInListingFile);
|
||||
setPath(resolvedPath);
|
||||
}
|
||||
final Path result = Paths.get(new String(path, StandardCharsets.UTF_8));
|
||||
return result;
|
||||
}
|
||||
|
||||
private Path getPathNullable() {
|
||||
return getPath(FolderStoragePathResolver.NULL);
|
||||
}
|
||||
|
||||
public long getOffsetInListingFile() {
|
||||
return offsetInListingFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Doc [tags=" + tags + ", path=" + getPath() + "]";
|
||||
return "Doc [tags=" + tags + ", offsetInListingFile=" + offsetInListingFile + ", path=" + getPathNullable()
|
||||
+ "]";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
package org.lucares.pdb.datastore;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
public interface FolderStoragePathResolver {
|
||||
FolderStoragePathResolver NULL = offset -> null;
|
||||
|
||||
public Path getPath(long offsetInListingFile);
|
||||
}
|
||||
@@ -39,8 +39,12 @@ public class PdbDB {
|
||||
return proposer.propose(query, caretIndex);
|
||||
}
|
||||
|
||||
public List<Doc> getByTags(Tags tags) {
|
||||
|
||||
public List<Doc> getByTags(final Tags tags) {
|
||||
|
||||
return dataStore.getByTags(tags);
|
||||
}
|
||||
|
||||
public FolderStoragePathResolver getFolderStoragePathResolver() {
|
||||
return dataStore.getFolderStoragePathResolver();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ import org.lucares.collections.IntList;
|
||||
import org.lucares.pdb.api.StringCompressor;
|
||||
import org.lucares.pdb.api.Tags;
|
||||
import org.lucares.pdb.datastore.Doc;
|
||||
import org.lucares.pdb.datastore.FolderStoragePathResolver;
|
||||
import org.lucares.pdb.datastore.lang.Expression;
|
||||
import org.lucares.pdb.datastore.lang.ExpressionToDocIdVisitor;
|
||||
import org.lucares.pdb.datastore.lang.ExpressionToDocIdVisitor.AllDocIds;
|
||||
@@ -41,25 +42,33 @@ public class DataStore {
|
||||
private final ConcurrentHashMap<String, Map<String, IntList>> keyToValueToDocId = new ConcurrentHashMap<>();
|
||||
|
||||
private final FolderStorage folderStorage;
|
||||
private final FolderStoragePathResolver folderStoragePathResolver;
|
||||
|
||||
public DataStore(final Path dataDirectory) throws IOException {
|
||||
Tags.STRING_COMPRESSOR = StringCompressor.create(keyCompressionFile(dataDirectory));
|
||||
|
||||
folderStorage = new FolderStorage(storageDirectory(dataDirectory), 1000);
|
||||
init(folderStorage);
|
||||
|
||||
folderStoragePathResolver = folderStorage::getPathByOffset;
|
||||
}
|
||||
|
||||
private void init(final FolderStorage folderStorage) throws IOException {
|
||||
|
||||
final long start = System.nanoTime();
|
||||
final Stream<Path> files = folderStorage.list();
|
||||
files.parallel().forEach(path -> {
|
||||
final Stream<ListingFileEntry> files = folderStorage.list();
|
||||
files// .parallel()
|
||||
.forEach(listingFileEntry -> {
|
||||
|
||||
final String filename = path.getFileName().toString();
|
||||
final Tags tags = toTags(filename);
|
||||
cacheTagToFileMapping(tags, path);
|
||||
listingFileEntry.unsetPath(); // unset the path, so that we don't store it for every document (will
|
||||
// be
|
||||
// initialized lazily if needed)
|
||||
|
||||
});
|
||||
final String filename = listingFileEntry.getFilename();
|
||||
final Tags tags = toTags(filename);
|
||||
cacheTagToFileMapping(tags, listingFileEntry);
|
||||
|
||||
});
|
||||
trimIntLists();
|
||||
sortIntLists();
|
||||
synchronized (docIdToDoc) {
|
||||
@@ -68,10 +77,10 @@ public class DataStore {
|
||||
INITIALIZE.info(((System.nanoTime() - start) / 1_000_000.0) + "ms");
|
||||
}
|
||||
|
||||
private void cacheTagToFileMapping(final Tags tags, final Path path) {
|
||||
private void cacheTagToFileMapping(final Tags tags, final ListingFileEntry listingFileEntry) {
|
||||
|
||||
final int docId;
|
||||
final Doc newDoc = new Doc(tags, path);
|
||||
final Doc newDoc = new Doc(tags, listingFileEntry.getOffsetInListingFile(), listingFileEntry.getPath());
|
||||
synchronized (docIdToDoc) {
|
||||
docId = docIdToDoc.size();
|
||||
docIdToDoc.add(newDoc);
|
||||
@@ -140,11 +149,11 @@ public class DataStore {
|
||||
public Path createNewFile(final Tags tags) throws IOException {
|
||||
|
||||
final String filename = tags.getFilename();
|
||||
final Path result = folderStorage.insert(filename, PDB_EXTENSION);
|
||||
final ListingFileEntry listingFileEntry = folderStorage.insert(filename, PDB_EXTENSION);
|
||||
|
||||
cacheTagToFileMapping(tags, result);
|
||||
cacheTagToFileMapping(tags, listingFileEntry);
|
||||
|
||||
return result;
|
||||
return listingFileEntry.getPath();
|
||||
}
|
||||
|
||||
private Tags toTags(final String filename) {
|
||||
@@ -232,4 +241,8 @@ public class DataStore {
|
||||
final List<Doc> result = tagsToDocs.getOrDefault(tags, new ArrayList<>(0));
|
||||
return result;
|
||||
}
|
||||
|
||||
public FolderStoragePathResolver getFolderStoragePathResolver() {
|
||||
return folderStoragePathResolver;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package org.lucares.pdb.datastore.internal;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.io.Writer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
@@ -9,9 +12,13 @@ import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.util.Iterator;
|
||||
import java.util.Spliterator;
|
||||
import java.util.Spliterators;
|
||||
import java.util.function.BiPredicate;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import org.lucares.pdb.api.RuntimeIOException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -39,6 +46,7 @@ public class FolderStorage {
|
||||
this.listingFile = storageBaseDirectory.resolve(LISTING_FILE_NAME);
|
||||
this.maxFilesPerFolder = maxFilesPerFolder;
|
||||
init();
|
||||
initListingFileIfNotExists();
|
||||
}
|
||||
|
||||
private void init() throws IOException {
|
||||
@@ -57,7 +65,7 @@ public class FolderStorage {
|
||||
filesInSecondLevel = (int) Files.list(currentDirectory).count();
|
||||
}
|
||||
|
||||
public Path insert(final String filenamePrefix, final String filenameSuffix) throws IOException {
|
||||
public ListingFileEntry insert(final String filenamePrefix, final String filenameSuffix) throws IOException {
|
||||
|
||||
ensureCapacity();
|
||||
|
||||
@@ -71,17 +79,29 @@ public class FolderStorage {
|
||||
Files.createFile(newFile);
|
||||
filesInSecondLevel++;
|
||||
|
||||
updateListingFile(newFile);
|
||||
final ListingFileEntry result = updateListingFile(newFile);
|
||||
|
||||
return newFile;
|
||||
return result;
|
||||
}
|
||||
|
||||
private synchronized void updateListingFile(final Path newFile) throws IOException {
|
||||
private synchronized ListingFileEntry updateListingFile(final Path newFile) throws IOException {
|
||||
final long offsetInListingFile = getFilePointer();
|
||||
try (Writer out = Files.newBufferedWriter(listingFile, StandardCharsets.UTF_8, StandardOpenOption.CREATE,
|
||||
StandardOpenOption.APPEND)) {
|
||||
out.write(newFile.toString());
|
||||
out.write("\n");
|
||||
}
|
||||
final String filename = newFile.getFileName().toString();
|
||||
return new ListingFileEntry(filename, offsetInListingFile, newFile);
|
||||
}
|
||||
|
||||
private long getFilePointer() throws FileNotFoundException, IOException {
|
||||
final RandomAccessFile randomAccessFile = new RandomAccessFile(listingFile.toFile(), "r");
|
||||
try {
|
||||
return randomAccessFile.getFilePointer();
|
||||
} finally {
|
||||
randomAccessFile.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void ensureCapacity() throws IOException {
|
||||
@@ -103,15 +123,28 @@ public class FolderStorage {
|
||||
Files.createDirectories(currentDirectory);
|
||||
}
|
||||
|
||||
public Stream<Path> list() throws IOException {
|
||||
public Stream<ListingFileEntry> list() throws IOException {
|
||||
|
||||
return readListingFile();
|
||||
}
|
||||
|
||||
private Stream<ListingFileEntry> readListingFile() throws IOException {
|
||||
|
||||
try (final ListingFileIterator iterator = new ListingFileIterator(listingFile)) {
|
||||
final Spliterator<ListingFileEntry> spliterator = Spliterators.spliteratorUnknownSize(iterator,
|
||||
Spliterator.ORDERED);
|
||||
final Stream<ListingFileEntry> stream = StreamSupport.stream(spliterator, false);
|
||||
return stream;
|
||||
}
|
||||
}
|
||||
|
||||
private void initListingFileIfNotExists() throws IOException {
|
||||
if (!Files.exists(listingFile)) {
|
||||
final long start = System.nanoTime();
|
||||
LOGGER.info("listing file not found -> creating a new one");
|
||||
createNewListingFile();
|
||||
METRICS_CREATE_LISTING_FILE.info(((System.nanoTime() - start) / 1_000_000.0) + "ms");
|
||||
}
|
||||
return Files.lines(listingFile, StandardCharsets.UTF_8).map(Paths::get);
|
||||
}
|
||||
|
||||
private void createNewListingFile() throws IOException {
|
||||
@@ -125,9 +158,23 @@ public class FolderStorage {
|
||||
final Iterator<Path> iterator = stream.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
final Path path = iterator.next();
|
||||
out.write(path.toString());
|
||||
out.write("\n");
|
||||
if (!path.getFileName().toString().equals(LISTING_FILE_NAME)) {
|
||||
out.write(path.toString());
|
||||
out.write("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Path getPathByOffset(final long offsetInListingFile) throws RuntimeIOException {
|
||||
|
||||
try (BufferedReader reader = Files.newBufferedReader(listingFile, StandardCharsets.UTF_8)) {
|
||||
reader.skip(offsetInListingFile);
|
||||
final String line = reader.readLine();
|
||||
return Paths.get(line);
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
package org.lucares.pdb.datastore.internal;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
public class ListingFileEntry {
|
||||
private final String filename;
|
||||
private final long offsetInListingFile;
|
||||
private Path path;
|
||||
|
||||
public ListingFileEntry(final String filename, final long offsetInListingFile, final Path path) {
|
||||
this.filename = filename;
|
||||
this.offsetInListingFile = offsetInListingFile;
|
||||
this.path = path;
|
||||
}
|
||||
|
||||
public String getFilename() {
|
||||
return filename;
|
||||
}
|
||||
|
||||
public long getOffsetInListingFile() {
|
||||
return offsetInListingFile;
|
||||
}
|
||||
|
||||
public void unsetPath() {
|
||||
path = null;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public Path getPath() {
|
||||
return path;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ListingFileEntry [filename=" + filename + ", offsetInListingFile=" + offsetInListingFile + ", path="
|
||||
+ path + "]";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((filename == null) ? 0 : filename.hashCode());
|
||||
result = prime * result + (int) (offsetInListingFile ^ (offsetInListingFile >>> 32));
|
||||
result = prime * result + ((path == null) ? 0 : path.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (obj == null)
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
final ListingFileEntry other = (ListingFileEntry) obj;
|
||||
if (filename == null) {
|
||||
if (other.filename != null)
|
||||
return false;
|
||||
} else if (!filename.equals(other.filename))
|
||||
return false;
|
||||
if (offsetInListingFile != other.offsetInListingFile)
|
||||
return false;
|
||||
if (path == null) {
|
||||
if (other.path != null)
|
||||
return false;
|
||||
} else if (!path.equals(other.path))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
package org.lucares.pdb.datastore.internal;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.lucares.pdb.api.RuntimeIOException;
|
||||
|
||||
import com.google.common.io.CountingInputStream;
|
||||
|
||||
public class ListingFileIterator implements Iterator<ListingFileEntry>, AutoCloseable {
|
||||
|
||||
private final CountingInputStream is;
|
||||
|
||||
private Optional<ListingFileEntry> next = null;
|
||||
|
||||
public ListingFileIterator(final Path listingFile) throws FileNotFoundException {
|
||||
is = new CountingInputStream(new BufferedInputStream(new FileInputStream(listingFile.toFile())));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
|
||||
if (next == null) {
|
||||
next = Optional.ofNullable(getNext());
|
||||
}
|
||||
|
||||
return next.isPresent();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ListingFileEntry next() {
|
||||
|
||||
final ListingFileEntry result = next.orElseGet(() -> getNext());
|
||||
if (result == null) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
next = Optional.ofNullable(getNext());
|
||||
return result;
|
||||
}
|
||||
|
||||
public ListingFileEntry getNext() {
|
||||
final StringBuilder line = new StringBuilder();
|
||||
try {
|
||||
final long offsetInListingFile = is.getCount();
|
||||
|
||||
int codePoint;
|
||||
while ((codePoint = is.read()) >= 0) {
|
||||
if (codePoint == '\n') {
|
||||
break;
|
||||
}
|
||||
line.appendCodePoint(codePoint);
|
||||
}
|
||||
|
||||
if (codePoint < 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final int lastSeparatorPosition = line.lastIndexOf(File.separator);
|
||||
final String filename = line.substring(lastSeparatorPosition + 1);
|
||||
return new ListingFileEntry(filename, offsetInListingFile, null);
|
||||
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeIOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user