introduce indexes

This commit is contained in:
2021-05-09 10:33:28 +02:00
parent ae545e602c
commit 36ccc57db6
34 changed files with 721 additions and 758 deletions

View File

@@ -1,84 +0,0 @@
package org.lucares.pdb.datastore;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
/**
* Wrapper for chunk of {@link Entry}s.
* <p>
* This class is supposed to be provided to the queue returned by
* PerformanceDb.getQueue(). Processing {@link Entry}s in chunks is more
* efficient than processing each one individually.
* <p>
* Optionally, you can request that the entries will be flushed to disk by
* calling {@link #forceFlush()} before adding it to the queue.
* <p>
* Optionally, this class can act like a future. This is useful if you have to
* wait until the entries have been processed. Use {@link #forceFlush()} and
* {@link #waitUntilFlushed(long, TimeUnit)}.
*/
public class Entries implements Iterable<Entry> {
/**
* A special {@link Entries} instance that can be used as poison object for
* blocking queues.
*/
public static final Entries POISON = new Entries(0);
private final List<Entry> entries;
private boolean forceFlush = false;
private CountDownLatch flushLatch = null;
public Entries(final int initialSize) {
entries = new ArrayList<>(initialSize);
}
public Entries(final Entry... entries) {
this.entries = new ArrayList<>(Arrays.asList(entries));
}
public Entries(final Collection<Entry> entries) {
this.entries = new ArrayList<>(entries);
}
public void add(final Entry entry) {
entries.add(entry);
}
@Override
public Iterator<Entry> iterator() {
return entries.iterator();
}
public int size() {
return entries.size();
}
public boolean isForceFlush() {
return forceFlush;
}
public void forceFlush() {
forceFlush = true;
flushLatch = new CountDownLatch(1);
}
public void waitUntilFlushed(final long timeout, final TimeUnit unit)
throws InterruptedException, TimeoutException {
final boolean finished = flushLatch.await(timeout, unit);
if (!finished) {
throw new TimeoutException();
}
}
public void notifyFlushed() {
flushLatch.countDown();
}
}

View File

@@ -4,6 +4,7 @@ import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.Objects;
import org.lucares.pdb.api.Tags;
@@ -42,12 +43,7 @@ public class Entry {
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + (int) (epochMilli ^ (epochMilli >>> 32));
result = prime * result + ((tags == null) ? 0 : tags.hashCode());
result = prime * result + (int) (value ^ (value >>> 32));
return result;
return Objects.hash(epochMilli, tags, value);
}
@Override
@@ -59,15 +55,7 @@ public class Entry {
if (getClass() != obj.getClass())
return false;
final Entry other = (Entry) obj;
if (epochMilli != other.epochMilli)
return false;
if (tags == null) {
if (other.tags != null)
return false;
} else if (!tags.equals(other.tags))
return false;
if (value != other.value)
return false;
return true;
return epochMilli == other.epochMilli && Objects.equals(tags, other.tags) && value == other.value;
}
}

View File

@@ -0,0 +1,17 @@
package org.lucares.pdb.datastore;
public class IndexNotFoundException extends RuntimeException {
private static final long serialVersionUID = 360217229200302323L;
private final String id;
public IndexNotFoundException(final PdbIndexId id) {
super(id.getId());
this.id = id.getId();
}
public String getId() {
return id;
}
}

View File

@@ -0,0 +1,69 @@
package org.lucares.pdb.datastore;
import java.io.Closeable;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Duration;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.lucares.pdb.api.RuntimeIOException;
import org.lucares.pdb.datastore.internal.DataStore;
import org.lucares.utils.cache.HotEntryCache;
public class Indexes implements Closeable {
private final HotEntryCache<PdbIndexId, DataStore> dataStores = new HotEntryCache<PdbIndexId, DataStore>(
Duration.ofMinutes(1), 10);
private final Path dataDirectory;
public Indexes(final Path dataDirectory) {
this.dataDirectory = dataDirectory;
}
public DataStore getOrCreateDataStore(final PdbIndexId id) {
return dataStores.putIfAbsent(id, idx -> {
final PdbIndex pdbIndex = getIndexById(idx);
return new DataStore(pdbIndex.getPath());
});
}
private PdbIndex getIndexById(final PdbIndexId id) {
return PdbIndex//
.create(dataDirectory, id)//
.orElseThrow(() -> new IndexNotFoundException(id));
}
public DataStore getOrCreateDataStore(final PdbIndex pdbIndex) {
return dataStores.putIfAbsent(pdbIndex.getId(), idx -> new DataStore(pdbIndex.getPath()));
}
public List<PdbIndex> getAvailableIndexes() {
try {
return Files.list(dataDirectory)//
.map(PdbIndex::create)//
.filter(Optional::isPresent)//
.map(Optional::get)//
.collect(Collectors.toList());
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
public void create(final PdbIndexId id, final String name, final String description) {
PdbIndex.init(dataDirectory, id, name, description);
}
@Override
public void close() {
dataStores.forEach(DataStore::close);
}
public void flush() {
dataStores.forEach(DataStore::flush);
}
}

View File

@@ -0,0 +1,164 @@
package org.lucares.pdb.datastore;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import org.lucares.pdb.api.RuntimeIOException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class PdbIndex {
private static final String META_PROPERTIES = "meta.properties";
private static final String INDEX_PREFIX = "index_";
private final static Logger LOGGER = LoggerFactory.getLogger(PdbIndex.class);
private final Path path;
private final PdbIndexId id;
private final String name;
private final String description;
public PdbIndex(final PdbIndexId id, final Path path, final String name, final String description) {
this.id = id;
this.path = path;
this.name = name;
this.description = description;
}
public static Optional<PdbIndex> create(final Path dataDirectory, final PdbIndexId id) {
final Path indexPath = dataDirectory.resolve(INDEX_PREFIX + id);
return create(indexPath);
}
public static Optional<PdbIndex> create(final Path path) {
if (!Files.isDirectory(path)) {
return Optional.empty();
}
if (!path.getFileName().toString().startsWith(INDEX_PREFIX)) {
return Optional.empty();
}
final Path metadataPath = path.resolve(META_PROPERTIES);
if (!Files.isRegularFile(metadataPath)) {
LOGGER.warn("index folder {} is ignored, because it does not contain a meta.properties file", path);
return Optional.empty();
}
if (!Files.isReadable(metadataPath)) {
LOGGER.warn("meta.properties file is not readable", metadataPath);
return Optional.empty();
}
final String id = path.getFileName().toString().substring(INDEX_PREFIX.length());
final PdbIndexId indexId = new PdbIndexId(id);
final Properties properties = readProperties(metadataPath);
final String name = properties.getProperty("name", "no name");
final String description = properties.getProperty("description", "");
return Optional.of(new PdbIndex(indexId, path, name, description));
}
private static Properties readProperties(final Path metadataPath) {
final Properties properties = new Properties();
try (final Reader r = new FileReader(metadataPath.toFile(), StandardCharsets.UTF_8)) {
properties.load(r);
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
return properties;
}
private static void writeProperties(final Path metadataPath, final String name, final String description) {
final Properties properties = new Properties();
properties.setProperty("name", name);
properties.setProperty("description", description);
try (final Writer w = new FileWriter(metadataPath.toFile(), StandardCharsets.UTF_8)) {
properties.store(w, "");
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
public PdbIndexId getId() {
return id;
}
public Path getPath() {
return path;
}
public String getName() {
return name;
}
public String getDescription() {
return description;
}
/**
* Custom hash code implementation!
*/
@Override
public int hashCode() {
return Objects.hash(id);
}
/**
* Custom equals implementation!
*/
@Override
public boolean equals(final Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
final PdbIndex other = (PdbIndex) obj;
return Objects.equals(id, other.id);
}
@Override
public String toString() {
final StringBuilder builder = new StringBuilder();
builder.append("PdbIndex [path=");
builder.append(path);
builder.append(", name=");
builder.append(name);
builder.append(", description=");
builder.append(description);
builder.append("]");
return builder.toString();
}
public static void init(final Path dataDirectory, final PdbIndexId id, final String name,
final String description) {
try {
final Path path = dataDirectory.resolve(INDEX_PREFIX + id.getId());
Files.createDirectories(path);
final Path metadataPath = path.resolve(META_PROPERTIES);
writeProperties(metadataPath, name, description);
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
}

View File

@@ -0,0 +1,39 @@
package org.lucares.pdb.datastore;
import java.util.Objects;
public class PdbIndexId {
private final String id;
public PdbIndexId(final String id) {
super();
this.id = id;
}
public String getId() {
return id;
}
@Override
public int hashCode() {
return Objects.hash(id);
}
@Override
public boolean equals(final Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
final PdbIndexId other = (PdbIndexId) obj;
return Objects.equals(id, other.id);
}
@Override
public String toString() {
return id;
}
}

View File

@@ -121,7 +121,7 @@ public class DataStore implements AutoCloseable {
private final PartitionDiskStore diskStorage;
private final Path storageBasePath;
public DataStore(final Path dataDirectory) throws IOException {
public DataStore(final Path dataDirectory) {
storageBasePath = storageDirectory(dataDirectory);
Tags.STRING_COMPRESSOR = StringCompressor.create(keyCompressionFile(storageBasePath));
@@ -147,11 +147,11 @@ public class DataStore implements AutoCloseable {
writerCache.addListener((key, value) -> value.close());
}
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
private Path keyCompressionFile(final Path dataDirectory) {
return dataDirectory.resolve("keys.csv");
}
public static Path storageDirectory(final Path dataDirectory) throws IOException {
public static Path storageDirectory(final Path dataDirectory) {
return dataDirectory.resolve(SUBDIR_STORAGE);
}

View File

@@ -301,7 +301,7 @@ public class QueryCompletionIndex implements AutoCloseable {
private final PartitionPersistentMap<Tag, Empty, Empty> fieldToValueIndex;
private final PartitionPersistentMap<String, Empty, Empty> fieldIndex;
public QueryCompletionIndex(final Path basePath) throws IOException {
public QueryCompletionIndex(final Path basePath) {
tagToTagIndex = new PartitionPersistentMap<>(basePath, "queryCompletionTagToTagIndex.bs", new EncoderTwoTags(),
PartitionAwareWrapper.wrap(PersistentMap.EMPTY_ENCODER));

View File

@@ -23,6 +23,7 @@ import javax.swing.JTextArea;
import javax.swing.JTextField;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
@@ -36,7 +37,6 @@ import org.lucares.pdb.api.Tags;
import org.lucares.pdb.blockstorage.BSFile;
import org.lucares.pdb.datastore.Doc;
import org.lucares.pdb.datastore.Proposal;
import org.junit.jupiter.api.Assertions;
import org.lucares.utils.CollectionUtils;
import org.lucares.utils.DateUtils;
import org.lucares.utils.file.FileUtils;
@@ -261,7 +261,7 @@ public class DataStoreTest {
final String query = input.getText();
final int caretIndex = input.getCaretPosition();
final QueryWithCaretMarker q = new QueryWithCaretMarker(query, dateRange, caretIndex,
ResultMode.CUT_AT_DOT);
ResultMode.CUT_AT_DOT, null);
final List<Proposal> proposals = dataStore.propose(q);
@@ -284,7 +284,8 @@ public class DataStoreTest {
}
});
final List<Doc> docs = dataStore.search(Query.createQuery("", DateTimeRange.relative(1, ChronoUnit.DAYS)));
final List<Doc> docs = dataStore
.search(Query.createQuery("", DateTimeRange.relative(1, ChronoUnit.DAYS), null));
final StringBuilder out = new StringBuilder();
out.append("info\n");
for (final Doc doc : docs) {
@@ -304,7 +305,7 @@ public class DataStoreTest {
final String query = queryWithCaret.replace("|", "");
final int caretIndex = queryWithCaret.indexOf("|");
final List<Proposal> proposals = dataStore
.propose(new QueryWithCaretMarker(query, dateRange, caretIndex, ResultMode.CUT_AT_DOT));
.propose(new QueryWithCaretMarker(query, dateRange, caretIndex, ResultMode.CUT_AT_DOT, null));
System.out.println(
"proposed values: " + proposals.stream().map(Proposal::getProposedTag).collect(Collectors.toList()));
@@ -317,12 +318,12 @@ public class DataStoreTest {
}
private void assertQueryFindsResults(final DateTimeRange dateRange, final String query) {
final List<Doc> result = dataStore.search(new Query(query, dateRange));
final List<Doc> result = dataStore.search(new Query(query, dateRange, null));
Assertions.assertFalse(result.isEmpty(), "The query '" + query + "' must return a result, but didn't.");
}
private void assertSearch(final DateTimeRange dateRange, final String queryString, final Tags... tags) {
final Query query = new Query(queryString, dateRange);
final Query query = new Query(queryString, dateRange, null);
final List<Doc> actualDocs = dataStore.search(query);
final List<Long> actual = CollectionUtils.map(actualDocs, Doc::getRootBlockNumber);

View File

@@ -8,6 +8,7 @@ import java.util.Collections;
import java.util.List;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.lucares.pdb.api.DateTimeRange;
@@ -15,7 +16,6 @@ import org.lucares.pdb.api.QueryWithCaretMarker;
import org.lucares.pdb.api.QueryWithCaretMarker.ResultMode;
import org.lucares.pdb.api.Tags;
import org.lucares.pdb.datastore.Proposal;
import org.junit.jupiter.api.Assertions;
import org.lucares.utils.CollectionUtils;
import org.lucares.utils.file.FileUtils;
@@ -25,6 +25,8 @@ public class ProposerTest {
private static DataStore dataStore;
private static DateTimeRange dateRange;
private static final String INDEX = "no used";
@BeforeAll
public static void beforeClass() throws Exception {
dataDirectory = Files.createTempDirectory("pdb");
@@ -293,7 +295,7 @@ public class ProposerTest {
final Proposal... expected) throws InterruptedException {
final List<Proposal> actual = dataStore
.propose(new QueryWithCaretMarker(query, dateRange, caretIndex, resultMode));
.propose(new QueryWithCaretMarker(query, dateRange, caretIndex, resultMode, INDEX));
final List<Proposal> expectedList = Arrays.asList(expected);
Collections.sort(expectedList);