rewrite query completion
The old implementation searched for all possible values and then executed each query to see what matches. The new implementation uses several indices to find only the matching values.
This commit is contained in:
@@ -0,0 +1,107 @@
|
|||||||
|
package org.lucares.pdb.map;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import org.lucares.collections.LongList;
|
||||||
|
import org.lucares.pdb.blockstorage.BSFile;
|
||||||
|
import org.lucares.pdb.diskstorage.DiskStorage;
|
||||||
|
import org.lucares.pdb.map.PersistentMap.EncoderDecoder;
|
||||||
|
import org.lucares.utils.Preconditions;
|
||||||
|
import org.lucares.utils.cache.HotEntryCache;
|
||||||
|
import org.lucares.utils.cache.HotEntryCache.Event;
|
||||||
|
import org.lucares.utils.cache.HotEntryCache.EventListener;
|
||||||
|
import org.lucares.utils.cache.HotEntryCache.EventType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Combines {@link PersistentMap} and {@link BSFile} to represent a map where
|
||||||
|
* the values are lists of longs.
|
||||||
|
*/
|
||||||
|
public class PersistentMapOfListsOfLongs<K> implements AutoCloseable {
|
||||||
|
|
||||||
|
private static final class RemovalListener<KEY> implements EventListener<KEY, BSFile> {
|
||||||
|
@Override
|
||||||
|
public void onEvent(final Event<KEY, BSFile> event) {
|
||||||
|
event.getValue().close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final PersistentMap<K, Long> map;
|
||||||
|
private final Path mapPath;
|
||||||
|
private final DiskStorage diskStore;
|
||||||
|
private final Path diskStorePath;
|
||||||
|
|
||||||
|
private final HotEntryCache<K, BSFile> writerCache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new map that stores indexed streams/lists of longs.
|
||||||
|
* <p>
|
||||||
|
* This class creates two files on disk. One for the index and one for the lists
|
||||||
|
* of longs.
|
||||||
|
*
|
||||||
|
* @param path the folder where to store the map
|
||||||
|
* @param filePrefix prefix of the files
|
||||||
|
* @param keyEncoder {@link EncoderDecoder} for the key
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public PersistentMapOfListsOfLongs(final Path path, final String filePrefix, final EncoderDecoder<K> keyEncoder)
|
||||||
|
throws IOException {
|
||||||
|
Preconditions.checkTrue(Files.isDirectory(path), "must be a directory {0}", path);
|
||||||
|
mapPath = path.resolve(filePrefix + "_index.bs");
|
||||||
|
diskStorePath = path.resolve(filePrefix + "_data.bs");
|
||||||
|
map = new PersistentMap<>(mapPath, keyEncoder, PersistentMap.LONG_CODER);
|
||||||
|
diskStore = new DiskStorage(diskStorePath);
|
||||||
|
|
||||||
|
writerCache = new HotEntryCache<>(Duration.ofMinutes(10), filePrefix + "Cache");
|
||||||
|
writerCache.addListener(new RemovalListener<K>(), EventType.EVICTED, EventType.REMOVED);
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void appendLong(final K key, final long value) throws IOException {
|
||||||
|
|
||||||
|
BSFile cachedWriter = writerCache.get(key);
|
||||||
|
if (cachedWriter == null) {
|
||||||
|
final Long bsFileBlockNumber = map.getValue(key);
|
||||||
|
|
||||||
|
if (bsFileBlockNumber == null) {
|
||||||
|
cachedWriter = BSFile.newFile(diskStore);
|
||||||
|
map.putValue(key, cachedWriter.getRootBlockOffset());
|
||||||
|
} else {
|
||||||
|
cachedWriter = BSFile.existingFile(bsFileBlockNumber, diskStore);
|
||||||
|
}
|
||||||
|
writerCache.put(key, cachedWriter);
|
||||||
|
}
|
||||||
|
cachedWriter.append(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized boolean hasKey(final K key) throws IOException {
|
||||||
|
return map.getValue(key) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized Stream<LongList> getLongs(final K key) throws IOException {
|
||||||
|
final Long bsFileBlockNumber = map.getValue(key);
|
||||||
|
if (bsFileBlockNumber == null) {
|
||||||
|
throw new NoSuchElementException("the map at '" + mapPath + "' does not contain the key '" + key + "'");
|
||||||
|
}
|
||||||
|
|
||||||
|
final BSFile bsFile = BSFile.existingFile(bsFileBlockNumber, diskStore);
|
||||||
|
|
||||||
|
return bsFile.streamOfLongLists();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
try {
|
||||||
|
try {
|
||||||
|
writerCache.forEach(bsFile -> bsFile.close());
|
||||||
|
} finally {
|
||||||
|
map.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
diskStore.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
package org.lucares.pdb.map;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.lucares.collections.LongList;
|
||||||
|
import org.lucares.utils.file.FileUtils;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.AfterMethod;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public class PersistentMapOfListsOfLongsTest {
|
||||||
|
|
||||||
|
private Path dataDirectory;
|
||||||
|
|
||||||
|
@BeforeMethod
|
||||||
|
public void beforeMethod() throws IOException {
|
||||||
|
dataDirectory = Files.createTempDirectory("pdb");
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterMethod
|
||||||
|
public void afterMethod() throws IOException {
|
||||||
|
FileUtils.delete(dataDirectory);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test() throws IOException {
|
||||||
|
|
||||||
|
final String mapFilePrefix = "test";
|
||||||
|
final String keyA = "a";
|
||||||
|
final String keyB = "b";
|
||||||
|
|
||||||
|
final int size = 10;
|
||||||
|
final LongList a = LongList.range(0, size);
|
||||||
|
a.shuffle();
|
||||||
|
final LongList b = LongList.range(0, size);
|
||||||
|
b.shuffle();
|
||||||
|
|
||||||
|
try (PersistentMapOfListsOfLongs<String> map = new PersistentMapOfListsOfLongs<>(dataDirectory, mapFilePrefix,
|
||||||
|
PersistentMap.STRING_CODER)) {
|
||||||
|
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
map.appendLong(keyA, a.get(i));
|
||||||
|
map.appendLong(keyB, b.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try (PersistentMapOfListsOfLongs<String> map = new PersistentMapOfListsOfLongs<>(dataDirectory, mapFilePrefix,
|
||||||
|
PersistentMap.STRING_CODER)) {
|
||||||
|
|
||||||
|
final LongList actualA = new LongList();
|
||||||
|
map.getLongs(keyA).forEachOrdered(actualA::addAll);
|
||||||
|
Assert.assertEquals(actualA, a);
|
||||||
|
|
||||||
|
final LongList actualB = new LongList();
|
||||||
|
map.getLongs(keyB).forEachOrdered(actualB::addAll);
|
||||||
|
Assert.assertEquals(actualB, b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -51,7 +51,7 @@ fragment
|
|||||||
JavaLetter
|
JavaLetter
|
||||||
: [a-zA-Z0-9$_] // these are the "java letters" below 0x7F
|
: [a-zA-Z0-9$_] // these are the "java letters" below 0x7F
|
||||||
| [\u002a] // asterisk, used for wildcards
|
| [\u002a] // asterisk, used for wildcards
|
||||||
| [\ue001] // used to help parser identify empty identifiers (character is the second in the private use area)
|
| [\ue001] // used to help parser identify empty identifiers and to find the caret position when searching for proposals (character is the second in the private use area)
|
||||||
| // covers all characters above 0x7F which are not a surrogate
|
| // covers all characters above 0x7F which are not a surrogate
|
||||||
~[\u0000-\u007F\uD800-\uDBFF]
|
~[\u0000-\u007F\uD800-\uDBFF]
|
||||||
{Character.isJavaIdentifierStart(_input.LA(-1))}?
|
{Character.isJavaIdentifierStart(_input.LA(-1))}?
|
||||||
@@ -64,7 +64,7 @@ fragment
|
|||||||
JavaLetterOrDigit
|
JavaLetterOrDigit
|
||||||
: [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F
|
: [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F
|
||||||
| [\u002a] // asterisk, used for wildcards
|
| [\u002a] // asterisk, used for wildcards
|
||||||
| [\ue001] // used to help parser identify empty identifiers (character is the second in the private use area)
|
| [\ue001] // used to help parser identify empty identifiers and to find the caret position when searching for proposals (character is the second in the private use area)
|
||||||
| '.'
|
| '.'
|
||||||
| '/'
|
| '/'
|
||||||
| '-'
|
| '-'
|
||||||
|
|||||||
@@ -161,7 +161,6 @@ public class DataStore implements AutoCloseable {
|
|||||||
private final DiskStorage diskStorage;
|
private final DiskStorage diskStorage;
|
||||||
private final Path diskStorageFilePath;
|
private final Path diskStorageFilePath;
|
||||||
private final Path storageBasePath;
|
private final Path storageBasePath;
|
||||||
private final Path queryCompletionIndexFile;
|
|
||||||
|
|
||||||
public DataStore(final Path dataDirectory) throws IOException {
|
public DataStore(final Path dataDirectory) throws IOException {
|
||||||
storageBasePath = storageDirectory(dataDirectory);
|
storageBasePath = storageDirectory(dataDirectory);
|
||||||
@@ -183,8 +182,7 @@ public class DataStore implements AutoCloseable {
|
|||||||
final Path docIdToDocIndexPath = storageBasePath.resolve("docIdToDocIndex.bs");
|
final Path docIdToDocIndexPath = storageBasePath.resolve("docIdToDocIndex.bs");
|
||||||
docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, ENCODER_DOC);
|
docIdToDoc = new PersistentMap<>(docIdToDocIndexPath, PersistentMap.LONG_CODER, ENCODER_DOC);
|
||||||
|
|
||||||
queryCompletionIndexFile = storageBasePath.resolve("queryCompletionIndex.bs");
|
queryCompletionIndex = new QueryCompletionIndex(storageBasePath);
|
||||||
queryCompletionIndex = new QueryCompletionIndex(queryCompletionIndexFile);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
|
private Path keyCompressionFile(final Path dataDirectory) throws IOException {
|
||||||
@@ -195,8 +193,13 @@ public class DataStore implements AutoCloseable {
|
|||||||
return dataDirectory.resolve(SUBDIR_STORAGE);
|
return dataDirectory.resolve(SUBDIR_STORAGE);
|
||||||
}
|
}
|
||||||
|
|
||||||
public long createNewFile(final Tags tags) throws IOException {
|
// visible for test
|
||||||
|
QueryCompletionIndex getQueryCompletionIndex() {
|
||||||
|
return queryCompletionIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long createNewFile(final Tags tags) {
|
||||||
|
try {
|
||||||
final long newFilesRootBlockOffset = diskStorage.allocateBlock(BSFile.BLOCK_SIZE);
|
final long newFilesRootBlockOffset = diskStorage.allocateBlock(BSFile.BLOCK_SIZE);
|
||||||
|
|
||||||
final long docId = createUniqueDocId();
|
final long docId = createUniqueDocId();
|
||||||
@@ -228,6 +231,9 @@ public class DataStore implements AutoCloseable {
|
|||||||
queryCompletionIndex.addTags(tags);
|
queryCompletionIndex.addTags(tags);
|
||||||
|
|
||||||
return newFilesRootBlockOffset;
|
return newFilesRootBlockOffset;
|
||||||
|
} catch (final IOException e) {
|
||||||
|
throw new RuntimeIOException(e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private long createUniqueDocId() {
|
private long createUniqueDocId() {
|
||||||
@@ -357,11 +363,10 @@ public class DataStore implements AutoCloseable {
|
|||||||
|
|
||||||
public List<Proposal> propose(final String query, final int caretIndex) {
|
public List<Proposal> propose(final String query, final int caretIndex) {
|
||||||
|
|
||||||
final NewProposerParser newProposerParser = new NewProposerParser(this);
|
final NewProposerParser newProposerParser = new NewProposerParser(queryCompletionIndex);
|
||||||
final List<Proposal> proposals = newProposerParser.propose(query, caretIndex);
|
final List<Proposal> proposals = newProposerParser.propose(query, caretIndex);
|
||||||
System.out.println(proposals);
|
LOGGER.debug("Proposals for query {}: {}", query, proposals);
|
||||||
|
return proposals;
|
||||||
return new Proposer(this).propose(query, caretIndex);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public DiskStorage getDiskStorage() {
|
public DiskStorage getDiskStorage() {
|
||||||
|
|||||||
@@ -7,11 +7,13 @@ import java.util.SortedSet;
|
|||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.lucares.collections.LongList;
|
import org.lucares.collections.LongList;
|
||||||
|
import org.lucares.pdb.api.RuntimeIOException;
|
||||||
import org.lucares.pdb.api.Tag;
|
import org.lucares.pdb.api.Tag;
|
||||||
import org.lucares.pdb.api.Tags;
|
import org.lucares.pdb.api.Tags;
|
||||||
import org.lucares.pdb.map.Empty;
|
import org.lucares.pdb.map.Empty;
|
||||||
import org.lucares.pdb.map.PersistentMap;
|
import org.lucares.pdb.map.PersistentMap;
|
||||||
import org.lucares.pdb.map.PersistentMap.EncoderDecoder;
|
import org.lucares.pdb.map.PersistentMap.EncoderDecoder;
|
||||||
|
import org.lucares.utils.Preconditions;
|
||||||
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
import org.lucares.utils.byteencoder.VariableByteEncoder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -22,30 +24,42 @@ import org.lucares.utils.byteencoder.VariableByteEncoder;
|
|||||||
* <br>
|
* <br>
|
||||||
* The expensive way is to execute the query for all available lastnames and
|
* The expensive way is to execute the query for all available lastnames and
|
||||||
* keep those that return at least one result.<br>
|
* keep those that return at least one result.<br>
|
||||||
* A more effiecient way uses an index that lists all lastnames that occurr with
|
* A more effiecient way uses an index that lists all lastnames that occur with
|
||||||
* firstname=John. If we write this as table, then it looks like this:
|
* firstname=John. If we write this as table, then it looks like this:
|
||||||
*
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
*┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┓
|
*┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┓
|
||||||
*┃ fieldA ┃ valueA ┃ fieldB ┃ valueB ┃
|
*┃ fieldB ┃ fieldA ┃ valueA ┃ valueB ┃
|
||||||
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
||||||
*┃firstname┃ John ┃lastname ┃ Connor ┃
|
*┃lastname ┃firstname┃ John ┃ Connor ┃
|
||||||
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
||||||
*┃firstname┃ John ┃lastname ┃Carpenter┃
|
*┃lastname ┃firstname┃ John ┃Carpenter┃
|
||||||
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
||||||
*┃firstname┃ John ┃country ┃ Germany ┃
|
*┃country ┃firstname┃ John ┃ Germany ┃
|
||||||
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
||||||
*┃firstname┃ John ┃lastname ┃ Nash ┃
|
*┃lastname ┃firstname┃ John ┃ Nash ┃
|
||||||
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
||||||
*┃firstname┃ Rick ┃lastname ┃ Meyer ┃
|
*┃lastname ┃firstname┃ Rick ┃ Meyer ┃
|
||||||
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
*┣━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━╋━━━━━━━━━┫
|
||||||
*┃firstname┃ Rick ┃lastname ┃ Castle ┃
|
*┃lastname ┃firstname┃ Rick ┃ Castle ┃
|
||||||
*┗━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┛
|
*┗━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┻━━━━━━━━━┛
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* The lastnames where firstname=John are Connor, Carpenter and Nash. Given such
|
* The lastnames where firstname=John are: Connor, Carpenter and Nash. Given
|
||||||
* a table we can just for all rows with fieldA=firstname and valueA=John and
|
* such a table we can just for all rows with fieldA=firstname and valueA=John
|
||||||
* fieldB = lastname.
|
* and fieldB = lastname.
|
||||||
|
* <p>
|
||||||
|
* Please note, that the columns for fieldA and fieldB come first. This is to
|
||||||
|
* make this index more suitable for IN-expressions and wildcard expressions of
|
||||||
|
* fieldA. Because we can now find all values for lastname where firstname=J*n*
|
||||||
|
* by searching for fieldA=firstname and fieldB=lastname, then do the wildcard
|
||||||
|
* evaluation while iterating over those hits. We do not have to expand the
|
||||||
|
* wildcard and the do hundreds or thousands of queries.
|
||||||
|
* <p>
|
||||||
|
* Please note, that fieldB comes before fieldA. This is, so that we can run
|
||||||
|
* inverse searches more efficiently. E.g. finding all values for
|
||||||
|
* fieldB=lastname where fieldA=firstname has a value != Connor. This is used
|
||||||
|
* for queries like 'NOT (firstname=Connor) and lastname=|'
|
||||||
* <p>
|
* <p>
|
||||||
* The values in this index represent such a table.
|
* The values in this index represent such a table.
|
||||||
* <p>
|
* <p>
|
||||||
@@ -63,6 +77,12 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
this.tagB = tagB;
|
this.tagB = tagB;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public TwoTags(final String fieldB, final String fieldA, final String valueA, final String valueB) {
|
||||||
|
|
||||||
|
tagA = new Tag(fieldA, valueA);
|
||||||
|
tagB = new Tag(fieldB, valueB);
|
||||||
|
}
|
||||||
|
|
||||||
public Tag getTagA() {
|
public Tag getTagA() {
|
||||||
return tagA;
|
return tagA;
|
||||||
}
|
}
|
||||||
@@ -75,7 +95,29 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
public String toString() {
|
public String toString() {
|
||||||
return tagA + "::" + tagB;
|
return tagA + "::" + tagB;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final class FieldField {
|
||||||
|
private final int fieldA;
|
||||||
|
private final int fieldB;
|
||||||
|
|
||||||
|
public FieldField(final int fieldA, final int fieldB) {
|
||||||
|
this.fieldA = fieldA;
|
||||||
|
this.fieldB = fieldB;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getFieldA() {
|
||||||
|
return fieldA;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getFieldB() {
|
||||||
|
return fieldB;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return fieldA + "::" + fieldB;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class EncoderTwoTags implements EncoderDecoder<TwoTags> {
|
private static final class EncoderTwoTags implements EncoderDecoder<TwoTags> {
|
||||||
@@ -86,18 +128,23 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
final Tag tagA = tagAndField.getTagA();
|
final Tag tagA = tagAndField.getTagA();
|
||||||
final Tag tagB = tagAndField.getTagB();
|
final Tag tagB = tagAndField.getTagB();
|
||||||
|
|
||||||
|
tmp.add(tagB.getKey());
|
||||||
tmp.add(tagA.getKey());
|
tmp.add(tagA.getKey());
|
||||||
|
|
||||||
|
if (tagA.getValue() >= 0) {
|
||||||
tmp.add(tagA.getValue());
|
tmp.add(tagA.getValue());
|
||||||
|
|
||||||
tmp.add(tagB.getKey());
|
|
||||||
|
|
||||||
// A query for tagA.key and tagA.value and tagB.key is done by setting
|
// A query for tagA.key and tagA.value and tagB.key is done by setting
|
||||||
// tagB.value==0.
|
// tagB.value==-1.
|
||||||
// The query is then executed as a prefix search. Thus tagB.value must not be
|
// The query is then executed as a prefix search. Thus tagB.value must not be
|
||||||
// part of the byte array that is returned.
|
// part of the byte array that is returned.
|
||||||
if (tagB.getValue() >= 0) {
|
if (tagB.getValue() >= 0) {
|
||||||
tmp.add(tagB.getValue());
|
tmp.add(tagB.getValue());
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
Preconditions.checkSmaller(tagB.getValue(), 0,
|
||||||
|
"if no value for tagA is given, then tagB must also be empty");
|
||||||
|
}
|
||||||
|
|
||||||
return VariableByteEncoder.encode(tmp);
|
return VariableByteEncoder.encode(tmp);
|
||||||
}
|
}
|
||||||
@@ -106,9 +153,9 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
public TwoTags decode(final byte[] bytes) {
|
public TwoTags decode(final byte[] bytes) {
|
||||||
|
|
||||||
final LongList tmp = VariableByteEncoder.decode(bytes);
|
final LongList tmp = VariableByteEncoder.decode(bytes);
|
||||||
final int tagAKey = (int) tmp.get(0);
|
final int tagBKey = (int) tmp.get(0);
|
||||||
final int tagAValue = (int) tmp.get(1);
|
final int tagAKey = (int) tmp.get(1);
|
||||||
final int tagBKey = (int) tmp.get(2);
|
final int tagAValue = (int) tmp.get(2);
|
||||||
final int tagBValue = (int) tmp.get(3);
|
final int tagBValue = (int) tmp.get(3);
|
||||||
|
|
||||||
final Tag tagA = new Tag(tagAKey, tagAValue);
|
final Tag tagA = new Tag(tagAKey, tagAValue);
|
||||||
@@ -118,23 +165,81 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final PersistentMap<TwoTags, Empty> tagToTagIndex;
|
private static final class EncoderTag implements EncoderDecoder<Tag> {
|
||||||
|
|
||||||
public QueryCompletionIndex(final Path indexFile) throws IOException {
|
@Override
|
||||||
tagToTagIndex = new PersistentMap<>(indexFile, new EncoderTwoTags(), PersistentMap.EMPTY_ENCODER);
|
public byte[] encode(final Tag tag) {
|
||||||
|
|
||||||
|
final LongList longList = new LongList(2);
|
||||||
|
longList.add(tag.getKey());
|
||||||
|
|
||||||
|
if (tag.getValue() >= 0) {
|
||||||
|
longList.add(tag.getValue());
|
||||||
|
}
|
||||||
|
return VariableByteEncoder.encode(longList);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Tag decode(final byte[] bytes) {
|
||||||
|
final LongList tmp = VariableByteEncoder.decode(bytes);
|
||||||
|
final int key = (int) tmp.get(0);
|
||||||
|
final int value = (int) tmp.get(1);
|
||||||
|
return new Tag(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class EncoderField implements EncoderDecoder<String> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] encode(final String field) {
|
||||||
|
|
||||||
|
if (field.isEmpty()) {
|
||||||
|
return new byte[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
return VariableByteEncoder.encode(Tags.STRING_COMPRESSOR.put(field));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String decode(final byte[] bytes) {
|
||||||
|
final long compressedString = VariableByteEncoder.decodeFirstValue(bytes);
|
||||||
|
return Tags.STRING_COMPRESSOR.get((int) compressedString);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final PersistentMap<TwoTags, Empty> tagToTagIndex;
|
||||||
|
private final PersistentMap<Tag, Empty> fieldToValueIndex;
|
||||||
|
private final PersistentMap<String, Empty> fieldIndex;
|
||||||
|
|
||||||
|
public QueryCompletionIndex(final Path basePath) throws IOException {
|
||||||
|
final Path tagToTagIndexFile = basePath.resolve("queryCompletionTagToTagIndex.bs");
|
||||||
|
tagToTagIndex = new PersistentMap<>(tagToTagIndexFile, new EncoderTwoTags(), PersistentMap.EMPTY_ENCODER);
|
||||||
|
|
||||||
|
final Path fieldToValueIndexFile = basePath.resolve("queryCompletionFieldToValueIndex.bs");
|
||||||
|
fieldToValueIndex = new PersistentMap<>(fieldToValueIndexFile, new EncoderTag(), PersistentMap.EMPTY_ENCODER);
|
||||||
|
|
||||||
|
final Path fieldIndexFile = basePath.resolve("queryCompletionFieldIndex.bs");
|
||||||
|
fieldIndex = new PersistentMap<>(fieldIndexFile, new EncoderField(), PersistentMap.EMPTY_ENCODER);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addTags(final Tags tags) throws IOException {
|
public void addTags(final Tags tags) throws IOException {
|
||||||
final List<Tag> listOfTagsA = tags.toTags();
|
final List<Tag> listOfTagsA = tags.toTags();
|
||||||
final List<Tag> listOfTagsB = tags.toTags();
|
final List<Tag> listOfTagsB = tags.toTags();
|
||||||
|
|
||||||
// index all combinations of tagA and tagB
|
// index all combinations of tagA and tagB and fieldA to fieldB
|
||||||
for (final Tag tagA : listOfTagsA) {
|
for (final Tag tagA : listOfTagsA) {
|
||||||
for (final Tag tagB : listOfTagsB) {
|
for (final Tag tagB : listOfTagsB) {
|
||||||
final TwoTags key = new TwoTags(tagA, tagB);
|
final TwoTags key = new TwoTags(tagA, tagB);
|
||||||
tagToTagIndex.putValue(key, Empty.INSTANCE);
|
tagToTagIndex.putValue(key, Empty.INSTANCE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// create indices of all tags and all fields
|
||||||
|
for (final Tag tag : listOfTagsA) {
|
||||||
|
fieldToValueIndex.putValue(tag, Empty.INSTANCE);
|
||||||
|
fieldIndex.putValue(tag.getKeyAsString(), Empty.INSTANCE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -142,7 +247,13 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
tagToTagIndex.close();
|
tagToTagIndex.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public SortedSet<String> find(final Tag tag, final String field) throws IOException {
|
public SortedSet<String> find(final String property, final String value, final String field) {
|
||||||
|
final Tag tag = new Tag(property, value);
|
||||||
|
return find(tag, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
public SortedSet<String> find(final Tag tag, final String field) {
|
||||||
|
try {
|
||||||
final SortedSet<String> result = new TreeSet<>();
|
final SortedSet<String> result = new TreeSet<>();
|
||||||
final int tagBKey = Tags.STRING_COMPRESSOR.put(field);
|
final int tagBKey = Tags.STRING_COMPRESSOR.put(field);
|
||||||
final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See
|
final Tag tagB = new Tag(tagBKey, -1); // the value must be negative for the prefix search to work. See
|
||||||
@@ -153,6 +264,59 @@ public class QueryCompletionIndex implements AutoCloseable {
|
|||||||
});
|
});
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
} catch (final IOException e) {
|
||||||
|
throw new RuntimeIOException(e);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SortedSet<String> findAllValuesForField(final String field) {
|
||||||
|
try {
|
||||||
|
final SortedSet<String> result = new TreeSet<>();
|
||||||
|
final int tagKey = Tags.STRING_COMPRESSOR.put(field);
|
||||||
|
final Tag keyPrefix = new Tag(tagKey, -1); // the value must be negative for the prefix search to work. See
|
||||||
|
|
||||||
|
fieldToValueIndex.visitValues(keyPrefix, (k, v) -> {
|
||||||
|
result.add(k.getValueAsString());
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (final IOException e) {
|
||||||
|
throw new RuntimeIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SortedSet<String> findAllValuesNotForField(final Tag tag, final String field) {
|
||||||
|
try {
|
||||||
|
final SortedSet<String> result = new TreeSet<>();
|
||||||
|
|
||||||
|
final TwoTags keyPrefix = new TwoTags(field, tag.getKeyAsString(), null, null);
|
||||||
|
|
||||||
|
final int negatedValueA = tag.getValue();
|
||||||
|
|
||||||
|
tagToTagIndex.visitValues(keyPrefix, (k, v) -> {
|
||||||
|
|
||||||
|
final int valueA = k.getTagA().getValue();
|
||||||
|
if (valueA != negatedValueA) {
|
||||||
|
result.add(k.getTagB().getValueAsString());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (final IOException e) {
|
||||||
|
throw new RuntimeIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SortedSet<String> findAllFields() {
|
||||||
|
try {
|
||||||
|
final SortedSet<String> result = new TreeSet<>();
|
||||||
|
fieldIndex.visitValues("", (k, v) -> {
|
||||||
|
result.add(k);
|
||||||
|
});
|
||||||
|
return result;
|
||||||
|
} catch (final IOException e) {
|
||||||
|
throw new RuntimeIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import java.util.Arrays;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.lucares.utils.CollectionUtils;
|
import org.lucares.utils.CollectionUtils;
|
||||||
|
import org.lucares.utils.Preconditions;
|
||||||
|
|
||||||
abstract public class Expression {
|
abstract public class Expression {
|
||||||
|
|
||||||
@@ -12,30 +13,8 @@ abstract public class Expression {
|
|||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract static class UnaryExpression extends Expression {
|
boolean containsCaret() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
private final int line;
|
|
||||||
private final int startIndex;
|
|
||||||
private final int stopIndex;
|
|
||||||
|
|
||||||
public UnaryExpression(final int line, final int startIndex, final int stopIndex) {
|
|
||||||
super();
|
|
||||||
this.line = line;
|
|
||||||
this.startIndex = startIndex;
|
|
||||||
this.stopIndex = stopIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getLine() {
|
|
||||||
return line;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getStartIndex() {
|
|
||||||
return startIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getStopIndex() {
|
|
||||||
return stopIndex;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract static class TemporaryExpression extends Expression {
|
abstract static class TemporaryExpression extends Expression {
|
||||||
@@ -93,6 +72,11 @@ abstract public class Expression {
|
|||||||
return "!" + expression;
|
return "!" + expression;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean containsCaret() {
|
||||||
|
return expression.containsCaret();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
final int prime = 31;
|
final int prime = 31;
|
||||||
@@ -150,7 +134,12 @@ abstract public class Expression {
|
|||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
||||||
return " (" + left + " or " + right + ") ";
|
return "(" + left + " or " + right + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean containsCaret() {
|
||||||
|
return left.containsCaret() || right.containsCaret();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -191,7 +180,7 @@ abstract public class Expression {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Expression create(final List<Expression> or) {
|
public static Expression create(final List<? extends Expression> or) {
|
||||||
|
|
||||||
if (or.size() == 1) {
|
if (or.size() == 1) {
|
||||||
return or.get(0);
|
return or.get(0);
|
||||||
@@ -231,7 +220,12 @@ abstract public class Expression {
|
|||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
||||||
return " (" + left + " and " + right + ") ";
|
return "(" + left + " and " + right + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean containsCaret() {
|
||||||
|
return left.containsCaret() || right.containsCaret();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -294,11 +288,11 @@ abstract public class Expression {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class Terminal extends UnaryExpression {
|
static class Terminal extends Expression {
|
||||||
private final String value;
|
private final String value;
|
||||||
|
|
||||||
Terminal(final String value, final int line, final int startIndex, final int stopIndex) {
|
Terminal(final String value) {
|
||||||
super(line, startIndex, stopIndex);
|
|
||||||
this.value = value;
|
this.value = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -313,6 +307,11 @@ abstract public class Expression {
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean containsCaret() {
|
||||||
|
return value.contains(NewProposerParser.CARET_MARKER);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
final int prime = 31;
|
final int prime = 31;
|
||||||
@@ -365,7 +364,24 @@ abstract public class Expression {
|
|||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
||||||
return " " + property + " = " + value.getValue() + " ";
|
return property + " = " + value.getValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean containsCaret() {
|
||||||
|
return value.containsCaret();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getProperty() {
|
||||||
|
return property;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Terminal getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getValueAsString() {
|
||||||
|
return value.getValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -398,6 +414,7 @@ abstract public class Expression {
|
|||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static class Parentheses extends Expression {
|
static class Parentheses extends Expression {
|
||||||
@@ -419,7 +436,12 @@ abstract public class Expression {
|
|||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
||||||
return " [ " + expression + " ] ";
|
return "[ " + expression + " ]";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean containsCaret() {
|
||||||
|
return expression.containsCaret();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -473,6 +495,16 @@ abstract public class Expression {
|
|||||||
public String toString() {
|
public String toString() {
|
||||||
return "(" + String.join(", ", getValues()) + ")";
|
return "(" + String.join(", ", getValues()) + ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean containsCaret() {
|
||||||
|
for (final Terminal terminal : propertyValues) {
|
||||||
|
if (terminal.containsCaret()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class InExpression extends Expression {
|
static class InExpression extends Expression {
|
||||||
@@ -506,6 +538,16 @@ abstract public class Expression {
|
|||||||
return values;
|
return values;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean containsCaret() {
|
||||||
|
for (final String value : values) {
|
||||||
|
if (value.contains(NewProposerParser.CARET_MARKER)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
final int prime = 31;
|
final int prime = 31;
|
||||||
@@ -537,4 +579,147 @@ abstract public class Expression {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final class AndCaretExpression extends Expression {
|
||||||
|
Property caretExpression;
|
||||||
|
Expression expression;
|
||||||
|
|
||||||
|
public AndCaretExpression(final Property caretExpression, final Expression expression) {
|
||||||
|
Preconditions.checkTrue(caretExpression.containsCaret(), "the expression '{0}' must contain the caret",
|
||||||
|
caretExpression);
|
||||||
|
Preconditions.checkFalse(expression.containsCaret(), "the expression '{0}' must not contain the caret",
|
||||||
|
caretExpression);
|
||||||
|
this.caretExpression = caretExpression;
|
||||||
|
this.expression = expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public <T> T visit(final ExpressionVisitor<T> visitor) {
|
||||||
|
return visitor.visit(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean containsCaret() {
|
||||||
|
|
||||||
|
return caretExpression.containsCaret();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Property getCaretExpression() {
|
||||||
|
return caretExpression;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Expression getExpression() {
|
||||||
|
return expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "(" + caretExpression + " and " + expression + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
final int prime = 31;
|
||||||
|
int result = 1;
|
||||||
|
result = prime * result + ((caretExpression == null) ? 0 : caretExpression.hashCode());
|
||||||
|
result = prime * result + ((expression == null) ? 0 : expression.hashCode());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(final Object obj) {
|
||||||
|
if (this == obj)
|
||||||
|
return true;
|
||||||
|
if (obj == null)
|
||||||
|
return false;
|
||||||
|
if (getClass() != obj.getClass())
|
||||||
|
return false;
|
||||||
|
final AndCaretExpression other = (AndCaretExpression) obj;
|
||||||
|
if (caretExpression == null) {
|
||||||
|
if (other.caretExpression != null)
|
||||||
|
return false;
|
||||||
|
} else if (!caretExpression.equals(other.caretExpression))
|
||||||
|
return false;
|
||||||
|
if (expression == null) {
|
||||||
|
if (other.expression != null)
|
||||||
|
return false;
|
||||||
|
} else if (!expression.equals(other.expression))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final class AndNotCaretExpression extends Expression {
|
||||||
|
Property negatedCaretExpression;
|
||||||
|
Expression expression;
|
||||||
|
|
||||||
|
public AndNotCaretExpression(final Property negatedCaretExpression, final Expression expression) {
|
||||||
|
Preconditions.checkTrue(negatedCaretExpression.containsCaret(),
|
||||||
|
"the expression '{0}' must contain the caret", negatedCaretExpression);
|
||||||
|
Preconditions.checkFalse(expression.containsCaret(), "the expression '{0}' must not contain the caret",
|
||||||
|
expression);
|
||||||
|
this.negatedCaretExpression = negatedCaretExpression;
|
||||||
|
this.expression = expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public <T> T visit(final ExpressionVisitor<T> visitor) {
|
||||||
|
return visitor.visit(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean containsCaret() {
|
||||||
|
|
||||||
|
return negatedCaretExpression.containsCaret();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Property getCaretExpression() {
|
||||||
|
return negatedCaretExpression;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Expression getExpression() {
|
||||||
|
return expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "(!" + negatedCaretExpression + " and " + expression + ")";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final class CaretAndExpression extends Expression {
|
||||||
|
|
||||||
|
private final Property caretExpression;
|
||||||
|
private final Property otherExpression;
|
||||||
|
|
||||||
|
public CaretAndExpression(final Property caretExpression, final Property otherExpression) {
|
||||||
|
this.caretExpression = caretExpression;
|
||||||
|
this.otherExpression = otherExpression;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public <T> T visit(final ExpressionVisitor<T> visitor) {
|
||||||
|
return super.visit(visitor);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean containsCaret() {
|
||||||
|
Preconditions.checkTrue(caretExpression.containsCaret(),
|
||||||
|
"CaretAndExpression must contain the caret, but was: {0}", this);
|
||||||
|
return caretExpression.containsCaret();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "(caretAnd: " + caretExpression + " and " + otherExpression + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
public Property getCaretExpression() {
|
||||||
|
return caretExpression;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Property getOtherExpression() {
|
||||||
|
return otherExpression;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,4 +32,16 @@ public abstract class ExpressionVisitor<T> {
|
|||||||
public T visit(final Expression.Parentheses parentheses) {
|
public T visit(final Expression.Parentheses parentheses) {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public T visit(final Expression.AndCaretExpression expression) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public T visit(final Expression.AndNotCaretExpression expression) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public T visit(final Expression.CaretAndExpression expression) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,225 @@
|
|||||||
|
package org.lucares.pdb.datastore.lang;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.SortedSet;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
import org.lucares.pdb.api.Tag;
|
||||||
|
import org.lucares.pdb.datastore.internal.QueryCompletionIndex;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.And;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.AndNotCaretExpression;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.InExpression;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.Not;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.Or;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.Property;
|
||||||
|
import org.lucares.utils.CollectionUtils;
|
||||||
|
|
||||||
|
public class FindValuesForQueryCompletion extends ExpressionVisitor<SortedSet<String>> {
|
||||||
|
|
||||||
|
private static final class AndCaretExpressionVisitor extends ExpressionVisitor<SortedSet<String>> {
|
||||||
|
private final QueryCompletionIndex index;
|
||||||
|
private final String field;
|
||||||
|
|
||||||
|
public AndCaretExpressionVisitor(final QueryCompletionIndex queryCompletionIndex, final String field) {
|
||||||
|
index = queryCompletionIndex;
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSet<String> visit(final Property property) {
|
||||||
|
|
||||||
|
final String fieldA = property.getProperty();
|
||||||
|
final String valueA = property.getValue().getValue();
|
||||||
|
|
||||||
|
return index.find(fieldA, valueA, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSet<String> visit(final InExpression expression) {
|
||||||
|
|
||||||
|
final SortedSet<String> result = new TreeSet<>();
|
||||||
|
final String property = expression.getProperty();
|
||||||
|
final List<String> values = expression.getValues();
|
||||||
|
for (final String value : values) {
|
||||||
|
final SortedSet<String> candidates = index.find(property, value, field);
|
||||||
|
result.addAll(candidates);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSet<String> visit(final And expression) {
|
||||||
|
|
||||||
|
final Expression left = expression.getLeft();
|
||||||
|
final Expression right = expression.getRight();
|
||||||
|
|
||||||
|
if (left instanceof Property && right instanceof Not) {
|
||||||
|
final Property leftProperty = (Property) left;
|
||||||
|
|
||||||
|
final SortedSet<String> allValuesForField = leftProperty.visit(this);
|
||||||
|
|
||||||
|
final Expression rightInnerExpression = ((Not) right).getExpression();
|
||||||
|
final SortedSet<String> rightResult = rightInnerExpression.visit(this);
|
||||||
|
|
||||||
|
return CollectionUtils.removeAll(allValuesForField, rightResult, TreeSet::new);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
final SortedSet<String> result = left.visit(this);
|
||||||
|
final SortedSet<String> rightResult = right.visit(this);
|
||||||
|
|
||||||
|
result.retainAll(rightResult);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSet<String> visit(final Or expression) {
|
||||||
|
final Expression left = expression.getLeft();
|
||||||
|
final Expression right = expression.getRight();
|
||||||
|
|
||||||
|
final SortedSet<String> result = left.visit(this);
|
||||||
|
final SortedSet<String> rightResult = right.visit(this);
|
||||||
|
|
||||||
|
result.addAll(rightResult);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSet<String> visit(final Not expression) {
|
||||||
|
|
||||||
|
if (!(expression.getExpression() instanceof Property)) {
|
||||||
|
throw new UnsupportedOperationException("NOT expressions like '" + expression
|
||||||
|
+ "' are not supported. Only 'NOT property=value' expressions are supported.");
|
||||||
|
}
|
||||||
|
|
||||||
|
final Property property = (Property) expression.getExpression();
|
||||||
|
final Tag tag = new Tag(property.getProperty(), property.getValueAsString());
|
||||||
|
|
||||||
|
final SortedSet<String> valuesNotForField = index.findAllValuesNotForField(tag, field);
|
||||||
|
final SortedSet<String> valuesForField = index.find(tag, field);
|
||||||
|
final SortedSet<String> valuesOnlyAvailableInField = CollectionUtils.removeAll(valuesForField,
|
||||||
|
valuesNotForField, TreeSet::new);
|
||||||
|
|
||||||
|
final SortedSet<String> result = CollectionUtils.removeAll(valuesNotForField, valuesOnlyAvailableInField,
|
||||||
|
TreeSet::new);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final QueryCompletionIndex queryCompletionIndex;
|
||||||
|
|
||||||
|
public FindValuesForQueryCompletion(final QueryCompletionIndex queryCompletionIndex) {
|
||||||
|
this.queryCompletionIndex = queryCompletionIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSet<String> visit(final Property property) {
|
||||||
|
|
||||||
|
final String field = property.getProperty();
|
||||||
|
final String value = property.getValue().getValue();
|
||||||
|
|
||||||
|
final SortedSet<String> allValuesForField = queryCompletionIndex.findAllValuesForField(field);
|
||||||
|
|
||||||
|
final String valuePrefix = value.substring(0, value.indexOf(NewProposerParser.CARET_MARKER));
|
||||||
|
|
||||||
|
return GloblikePattern.filterValues(allValuesForField, valuePrefix, TreeSet::new);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSet<String> visit(final AndCaretExpression expression) {
|
||||||
|
|
||||||
|
final Property caretExpression = expression.getCaretExpression();
|
||||||
|
final String field = caretExpression.getProperty();
|
||||||
|
final String valueWithCaretMarker = caretExpression.getValue().getValue();
|
||||||
|
final String valuePrefix = valueWithCaretMarker.substring(0,
|
||||||
|
valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER));
|
||||||
|
|
||||||
|
final Expression rightHandExpression = expression.getExpression();
|
||||||
|
|
||||||
|
final SortedSet<String> candidateValues = rightHandExpression
|
||||||
|
.visit(new AndCaretExpressionVisitor(queryCompletionIndex, field));
|
||||||
|
|
||||||
|
return GloblikePattern.filterValues(candidateValues, valuePrefix, TreeSet::new);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSet<String> visit(final AndNotCaretExpression expression) {
|
||||||
|
|
||||||
|
final Property caretExpression = expression.getCaretExpression();
|
||||||
|
final String field = caretExpression.getProperty();
|
||||||
|
final String valueWithCaretMarker = caretExpression.getValue().getValue();
|
||||||
|
final String valuePattern = valueWithCaretMarker.substring(0,
|
||||||
|
valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER));
|
||||||
|
|
||||||
|
final SortedSet<String> allValuesForField = queryCompletionIndex
|
||||||
|
.findAllValuesForField(caretExpression.getProperty());
|
||||||
|
final SortedSet<String> valuesForFieldMatchingCaretExpression = GloblikePattern.filterValues(allValuesForField,
|
||||||
|
valuePattern, TreeSet::new);
|
||||||
|
|
||||||
|
final Expression rightHandExpression = expression.getExpression();
|
||||||
|
|
||||||
|
final SortedSet<String> rightHandValues = rightHandExpression
|
||||||
|
.visit(new AndCaretExpressionVisitor(queryCompletionIndex, field));
|
||||||
|
|
||||||
|
if (rightHandValues.size() == 1) {
|
||||||
|
// there is only one alternative and that one must not be chosen
|
||||||
|
return Collections.emptySortedSet();
|
||||||
|
}
|
||||||
|
final SortedSet<String> result = CollectionUtils.retainAll(rightHandValues,
|
||||||
|
valuesForFieldMatchingCaretExpression, TreeSet::new);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSet<String> visit(final Not expression) {
|
||||||
|
|
||||||
|
final String field;
|
||||||
|
final Expression innerExpression = expression.getExpression();
|
||||||
|
if (innerExpression instanceof Property) {
|
||||||
|
field = ((Property) innerExpression).getProperty();
|
||||||
|
final SortedSet<String> allValuesForField = queryCompletionIndex.findAllValuesForField(field);
|
||||||
|
final String valueWithCaretMarker = ((Property) innerExpression).getValue().getValue();
|
||||||
|
final String valuePrefix = valueWithCaretMarker.substring(0,
|
||||||
|
valueWithCaretMarker.indexOf(NewProposerParser.CARET_MARKER));
|
||||||
|
final TreeSet<String> result = GloblikePattern.filterValues(allValuesForField, valuePrefix + "*",
|
||||||
|
TreeSet::new);
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSet<String> visit(final Or expression) {
|
||||||
|
final Expression left = expression.getLeft();
|
||||||
|
final Expression right = expression.getRight();
|
||||||
|
|
||||||
|
final SortedSet<String> result = left.visit(this);
|
||||||
|
final SortedSet<String> rightResult = right.visit(this);
|
||||||
|
|
||||||
|
result.addAll(rightResult);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSet<String> visit(final And expression) {
|
||||||
|
|
||||||
|
final Expression left = expression.getLeft();
|
||||||
|
final Expression right = expression.getRight();
|
||||||
|
|
||||||
|
final SortedSet<String> result = left.visit(this);
|
||||||
|
final SortedSet<String> rightResult = right.visit(this);
|
||||||
|
|
||||||
|
result.retainAll(rightResult);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,11 +1,14 @@
|
|||||||
package org.lucares.pdb.datastore.lang;
|
package org.lucares.pdb.datastore.lang;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
class GloblikePattern {
|
public class GloblikePattern {
|
||||||
|
|
||||||
private static final Logger LOGGER = LoggerFactory.getLogger(GloblikePattern.class);
|
private static final Logger LOGGER = LoggerFactory.getLogger(GloblikePattern.class);
|
||||||
|
|
||||||
@@ -25,4 +28,26 @@ class GloblikePattern {
|
|||||||
|
|
||||||
return Pattern.compile(valueRegex);
|
return Pattern.compile(valueRegex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static <T extends Collection<String>> T filterValues(final Collection<String> availableValues,
|
||||||
|
final String valuePattern, final Supplier<T> generator) {
|
||||||
|
final T result = generator.get();
|
||||||
|
|
||||||
|
return filterValues(result, availableValues, valuePattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T extends Collection<String>> T filterValues(final T result,
|
||||||
|
final Collection<String> availableValues, final String valuePattern) {
|
||||||
|
|
||||||
|
final Pattern pattern = GloblikePattern.globlikeToRegex(valuePattern);
|
||||||
|
|
||||||
|
for (final String value : availableValues) {
|
||||||
|
final Matcher matcher = pattern.matcher(value);
|
||||||
|
if (matcher.find() && !value.equals(valuePattern)) {
|
||||||
|
result.add(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,79 @@
|
|||||||
|
package org.lucares.pdb.datastore.lang;
|
||||||
|
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.And;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.AndNotCaretExpression;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.CaretAndExpression;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.Not;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.Or;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.Parentheses;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.Property;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Visitor that returns the expressions without any modifications. Can be used
|
||||||
|
* as base class for visitors that modify expressions.
|
||||||
|
*/
|
||||||
|
public abstract class IdentityExpressionVisitor extends ExpressionVisitor<Expression> {
|
||||||
|
@Override
|
||||||
|
public Expression visit(final And expression) {
|
||||||
|
|
||||||
|
final Expression left = expression.getLeft().visit(this);
|
||||||
|
final Expression right = expression.getRight().visit(this);
|
||||||
|
|
||||||
|
return new And(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression visit(final Or expression) {
|
||||||
|
final Expression left = expression.getLeft().visit(this);
|
||||||
|
final Expression right = expression.getRight().visit(this);
|
||||||
|
|
||||||
|
return new Or(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression visit(final Not expression) {
|
||||||
|
return new Not(expression.getExpression().visit(this));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression visit(final Property expression) {
|
||||||
|
return expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression visit(final Expression.Terminal expression) {
|
||||||
|
return expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression visit(final Expression.MatchAll expression) {
|
||||||
|
return expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression visit(final Expression.InExpression expression) {
|
||||||
|
return expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression visit(final Parentheses parentheses) {
|
||||||
|
return new Parentheses(parentheses.getExpression().visit(this));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression visit(final AndCaretExpression expression) {
|
||||||
|
return expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression visit(final AndNotCaretExpression expression) {
|
||||||
|
return expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression visit(final CaretAndExpression expression) {
|
||||||
|
return expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,27 +1,195 @@
|
|||||||
package org.lucares.pdb.datastore.lang;
|
package org.lucares.pdb.datastore.lang;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.SortedSet;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.lucares.pdb.datastore.Proposal;
|
import org.lucares.pdb.datastore.Proposal;
|
||||||
import org.lucares.pdb.datastore.internal.DataStore;
|
import org.lucares.pdb.datastore.internal.QueryCompletionIndex;
|
||||||
|
import org.lucares.utils.CollectionUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
public class NewProposerParser {
|
public class NewProposerParser {
|
||||||
|
|
||||||
private final static String CARET_MARKER = "\ue001"; // third character in the private use area
|
private static final Logger LOGGER = LoggerFactory.getLogger(NewProposerParser.class);
|
||||||
|
|
||||||
private final DataStore dataStore;
|
public final static String CARET_MARKER = "\ue001"; // character in the private use area
|
||||||
|
|
||||||
public NewProposerParser(final DataStore dataStore) {
|
/*
|
||||||
this.dataStore = dataStore;
|
* Regex matching a java identifier without a caret marker. We define it as a
|
||||||
|
* blacklist, because this is easer. The regex is only used <em>after</em> the
|
||||||
|
* query has already been validated with the proper grammar.
|
||||||
|
*/
|
||||||
|
private static final String REGEX_IDENTIFIER = "[^\\s,!\\(\\)=" + CARET_MARKER + "]*";
|
||||||
|
|
||||||
|
private final QueryCompletionIndex queryCompletionIndex;
|
||||||
|
|
||||||
|
public NewProposerParser(final QueryCompletionIndex queryCompletionIndex) {
|
||||||
|
this.queryCompletionIndex = queryCompletionIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Proposal> propose(final String query, final int caretIndex) {
|
public List<Proposal> propose(final String query, final int caretIndex) {
|
||||||
|
List<Proposal> proposals;
|
||||||
|
if (StringUtils.isBlank(query)) {
|
||||||
|
proposals = proposeForAllKeys();
|
||||||
|
} else {
|
||||||
|
|
||||||
final String queryString = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString();
|
final List<Proposal> foundProposals = proposalsForValues(query, caretIndex);
|
||||||
|
if (foundProposals.isEmpty()) {
|
||||||
|
proposals = proposalsForNonValues(query, caretIndex);
|
||||||
|
} else {
|
||||||
|
proposals = foundProposals;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
final Expression expression = QueryLanguageParser.parse(queryString);
|
return proposals;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Proposal> proposalsForNonValues(final String query, final int caretIndex) {
|
||||||
|
final List<Proposal> proposals = new ArrayList<>();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This method is called when the query could not be parsed. It is likely that
|
||||||
|
* the next word is either a field or an operator. But is is also possible that
|
||||||
|
* the next word is a field-value, because the syntax error might be at another
|
||||||
|
* location in the query (not at the caret position).
|
||||||
|
*/
|
||||||
|
|
||||||
|
final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString();
|
||||||
|
|
||||||
|
final List<String> tokens = QueryLanguage.getTokens(queryWithCaretMarker);
|
||||||
|
final int indexTokenWithCaret = CollectionUtils.indexOf(tokens, t -> t.contains(CARET_MARKER));
|
||||||
|
|
||||||
|
if (indexTokenWithCaret > 0) {
|
||||||
|
final String previousToken = tokens.get(indexTokenWithCaret - 1);
|
||||||
|
switch (previousToken) {
|
||||||
|
case "(":
|
||||||
|
case "and":
|
||||||
|
case "or":
|
||||||
|
case "!":
|
||||||
|
proposals.addAll(proposeForAllKeys(queryWithCaretMarker));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ")":
|
||||||
|
default:
|
||||||
|
// proposals.addAll(proposal);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (indexTokenWithCaret == 0) {
|
||||||
|
proposals.addAll(proposeForAllKeys(queryWithCaretMarker));
|
||||||
|
}
|
||||||
|
|
||||||
|
return proposals;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Collection<? extends Proposal> proposeForAllKeys(final String queryWithCaretMarker) {
|
||||||
|
final List<Proposal> proposals = new ArrayList<>();
|
||||||
|
final String wordPrefix = wordPrefix(queryWithCaretMarker);
|
||||||
|
|
||||||
|
if (wordPrefix != null) {
|
||||||
|
final SortedSet<String> allFields = queryCompletionIndex.findAllFields();
|
||||||
|
for (final String field : allFields) {
|
||||||
|
|
||||||
|
if (!field.startsWith(wordPrefix)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
final String proposedQuery = queryWithCaretMarker
|
||||||
|
.replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, field + "=* ");
|
||||||
|
final String newQueryWithCaretMarker = queryWithCaretMarker
|
||||||
|
.replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, field + "=" + CARET_MARKER);
|
||||||
|
final String newQuery = newQueryWithCaretMarker.replace(CARET_MARKER, "");
|
||||||
|
final int newCaretPosition = newQueryWithCaretMarker.indexOf(CARET_MARKER);
|
||||||
|
final Proposal proposal = new Proposal(field, proposedQuery, true, newQuery, newCaretPosition);
|
||||||
|
proposals.add(proposal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return proposals;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String wordPrefix(final String queryWithCaretMarker) {
|
||||||
|
|
||||||
|
final Pattern pattern = Pattern.compile("(" + REGEX_IDENTIFIER + CARET_MARKER + ")");
|
||||||
|
final Matcher matcher = pattern.matcher(queryWithCaretMarker);
|
||||||
|
if (matcher.find()) {
|
||||||
|
final String group = matcher.group();
|
||||||
|
return group.replace(CARET_MARKER, "");
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<Proposal> proposeForAllKeys() {
|
||||||
|
final List<Proposal> proposals = new ArrayList<>();
|
||||||
|
|
||||||
|
final SortedSet<String> allFields = queryCompletionIndex.findAllFields();
|
||||||
|
for (final String field : allFields) {
|
||||||
|
final String proposedQuery = field + "=*";
|
||||||
|
final String newQuery = field + "=";
|
||||||
|
final int newCaretPosition = newQuery.length();
|
||||||
|
final Proposal proposal = new Proposal(field, proposedQuery, true, newQuery, newCaretPosition);
|
||||||
|
proposals.add(proposal);
|
||||||
|
}
|
||||||
|
|
||||||
|
return proposals;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Proposal> proposalsForValues(final String query, final int caretIndex) {
|
||||||
|
try {
|
||||||
|
// Add caret marker, so that we know where the caret is.
|
||||||
|
// This also makes sure that a query like "name=|" ('|' is the caret) can be
|
||||||
|
// parsed.
|
||||||
|
// Without the caret marker the query would be "name=", which is not a valid
|
||||||
|
// expression.
|
||||||
|
final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, CARET_MARKER).toString();
|
||||||
|
|
||||||
|
// parse the query
|
||||||
|
final Expression expression = QueryLanguageParser.parse(queryWithCaretMarker);
|
||||||
|
|
||||||
|
// normalize it, so that we can use the queryCompletionIndex to search vor
|
||||||
|
// candidate values
|
||||||
|
final QueryCompletionExpressionOptimizer optimizer = new QueryCompletionExpressionOptimizer();
|
||||||
|
final Expression normalizedExpression = optimizer.normalizeExpression(expression);
|
||||||
|
|
||||||
|
// find all candidate values
|
||||||
|
final SortedSet<String> candidateValues = normalizedExpression
|
||||||
|
.visit(new FindValuesForQueryCompletion(queryCompletionIndex));
|
||||||
|
|
||||||
|
// translate the candidate values to proposals
|
||||||
|
final List<Proposal> proposals = generateProposals(queryWithCaretMarker, expression, candidateValues);
|
||||||
|
|
||||||
|
return proposals;
|
||||||
|
} catch (final SyntaxException e) {
|
||||||
|
LOGGER.debug("Query ({}) is not valid. This is expected to happen "
|
||||||
|
+ "unless we are looking for proposals of values.", query, e);
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Proposal> generateProposals(final String queryWithCaretMarker, final Expression expression,
|
||||||
|
final SortedSet<String> candidateValues) {
|
||||||
|
final List<Proposal> proposals = new ArrayList<>();
|
||||||
|
|
||||||
|
for (final String proposedTag : candidateValues) {
|
||||||
|
|
||||||
|
final String proposedQueryWithCaretMarker = queryWithCaretMarker
|
||||||
|
.replaceAll(REGEX_IDENTIFIER + CARET_MARKER + REGEX_IDENTIFIER, proposedTag + CARET_MARKER);
|
||||||
|
|
||||||
|
final String proposedQuery = proposedQueryWithCaretMarker.replace(CARET_MARKER, "");
|
||||||
|
final int newCaretPosition = proposedQueryWithCaretMarker.indexOf(CARET_MARKER);
|
||||||
|
|
||||||
|
final Proposal proposal = new Proposal(proposedTag, proposedQuery, true, proposedQuery, newCaretPosition);
|
||||||
|
proposals.add(proposal);
|
||||||
|
}
|
||||||
|
|
||||||
|
return proposals;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,268 @@
|
|||||||
|
package org.lucares.pdb.datastore.lang;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.lucares.pdb.datastore.internal.QueryCompletionIndex;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.And;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.AndCaretExpression;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.AndNotCaretExpression;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.InExpression;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.Not;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.Or;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.Property;
|
||||||
|
import org.lucares.pdb.datastore.lang.Expression.Terminal;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Query completion utilizes an index that contains all mappings of
|
||||||
|
* tags+fieldname to values. This index can be used to answer the question what
|
||||||
|
* the possible values for fields in simple and queries are.
|
||||||
|
* <p>
|
||||||
|
* E.g. Given the query "lastname=Doe and firstname=|" ('|' is the marker for
|
||||||
|
* the caret position). All possible values for firstname are in the index under
|
||||||
|
* "tagA.field=lastname and tagA.value=Doe and tagB.field=firstname". See also
|
||||||
|
* {@link QueryCompletionIndex}.
|
||||||
|
* <p>
|
||||||
|
* We can use this index for all boolean queries. But we have to normalize the
|
||||||
|
* queries first.
|
||||||
|
* <p>
|
||||||
|
* E.g. "(lastname=Doe or country=Atlantis) and firstname=|" will be normalized
|
||||||
|
* and split into two queries:
|
||||||
|
* <ol>
|
||||||
|
* <li>"lastname=Doe and firstname=|"
|
||||||
|
* <li>"country=Atlantis and firstname=|"
|
||||||
|
* </ol>
|
||||||
|
* Everything that is or'ed with the field for which we are doeing the
|
||||||
|
* completion can be removed. E.g. "lastname=Doe or firstname=|" will be
|
||||||
|
* normalized to "firstname=|, because the expression lastname=Doe does not
|
||||||
|
* change which values are possible for firstname.
|
||||||
|
* <p>
|
||||||
|
* Consequently, IN-expressions are normalized to PROPERTY-expressions.
|
||||||
|
* <p>
|
||||||
|
* E.g. "firstname=John,|,Frank" will be normalized to "firstname=|".
|
||||||
|
*/
|
||||||
|
public class QueryCompletionExpressionOptimizer {
|
||||||
|
|
||||||
|
private static final class ReplaceINExpressionsWithPropertyExpressionsVisitor extends IdentityExpressionVisitor {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression visit(final InExpression expression) {
|
||||||
|
if (expression.containsCaret() || expression.getValues().size() == 1) {
|
||||||
|
final String property = expression.getProperty();
|
||||||
|
final List<String> values = expression.getValues();
|
||||||
|
|
||||||
|
final List<Property> propertyExpressions = new ArrayList<>();
|
||||||
|
|
||||||
|
for (final String value : values) {
|
||||||
|
propertyExpressions.add(new Property(property, new Terminal(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return Expression.Or.create(propertyExpressions);
|
||||||
|
} else {
|
||||||
|
return super.visit(expression);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class RemoveOrEdExpressions extends IdentityExpressionVisitor {
|
||||||
|
@Override
|
||||||
|
public Expression visit(final Or expression) {
|
||||||
|
final Expression left = expression.getLeft();
|
||||||
|
final Expression right = expression.getRight();
|
||||||
|
|
||||||
|
if (left.containsCaret() && !right.containsCaret()) {
|
||||||
|
return left;
|
||||||
|
}
|
||||||
|
if (!left.containsCaret() && right.containsCaret()) {
|
||||||
|
return right;
|
||||||
|
}
|
||||||
|
return super.visit(expression);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class DistributiveNormalization extends IdentityExpressionVisitor {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression visit(final And expression) {
|
||||||
|
final Expression left = expression.getLeft();
|
||||||
|
final Expression right = expression.getRight();
|
||||||
|
|
||||||
|
if (left instanceof Or) {
|
||||||
|
// (a or b) and c
|
||||||
|
// becomes
|
||||||
|
// a and c or b and c
|
||||||
|
final Expression ac = new And(((Or) left).getLeft(), right);
|
||||||
|
final Expression bc = new And(((Or) left).getRight(), right);
|
||||||
|
return new Or(ac, bc);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (right instanceof Or) {
|
||||||
|
// a and (b or c)
|
||||||
|
// becomes
|
||||||
|
// a and b or a and c
|
||||||
|
final Expression ab = new And(left, ((Or) right).getLeft());
|
||||||
|
final Expression ac = new And(left, ((Or) right).getRight());
|
||||||
|
return new Or(ab, ac);
|
||||||
|
}
|
||||||
|
return super.visit(expression);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class RotateAndExpressions extends IdentityExpressionVisitor {
|
||||||
|
@Override
|
||||||
|
public Expression visit(final And expression) {
|
||||||
|
|
||||||
|
final Expression left = expression.getLeft();
|
||||||
|
final Expression right = expression.getRight();
|
||||||
|
|
||||||
|
// (| and a) and b => | and (a and b)
|
||||||
|
//
|
||||||
|
// The expression with the caret is moved up
|
||||||
|
if (left.containsCaret() && left instanceof And) {
|
||||||
|
final Expression leftLeft = ((And) left).getLeft();
|
||||||
|
final Expression leftRight = ((And) left).getRight();
|
||||||
|
|
||||||
|
if (leftLeft.containsCaret()) {
|
||||||
|
return new And(leftLeft, new And(leftRight, right));
|
||||||
|
} else {
|
||||||
|
return new And(new And(leftLeft, right), leftRight);
|
||||||
|
}
|
||||||
|
} else if (right.containsCaret() && right instanceof And) {
|
||||||
|
final Expression rightLeft = ((And) right).getLeft();
|
||||||
|
final Expression rightRight = ((And) right).getRight();
|
||||||
|
|
||||||
|
if (rightLeft.containsCaret()) {
|
||||||
|
return new And(rightLeft, new And(rightRight, left));
|
||||||
|
} else {
|
||||||
|
return new And(new And(rightLeft, left), rightRight);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return super.visit(expression);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class DoubleNegationExpressions extends IdentityExpressionVisitor {
|
||||||
|
@Override
|
||||||
|
public Expression visit(final Not expression) {
|
||||||
|
if (expression instanceof Not) {
|
||||||
|
if (expression.getExpression() instanceof Not) {
|
||||||
|
return ((Not) expression.getExpression()).getExpression();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return super.visit(expression);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class DeMorgan extends IdentityExpressionVisitor {
|
||||||
|
@Override
|
||||||
|
public Expression visit(final Not expression) {
|
||||||
|
|
||||||
|
if (expression.getExpression() instanceof And) {
|
||||||
|
final And andExpression = (And) expression.getExpression();
|
||||||
|
final Expression left = andExpression.getLeft();
|
||||||
|
final Expression right = andExpression.getRight();
|
||||||
|
|
||||||
|
final Expression notLeft = new Not(left);
|
||||||
|
final Expression notRight = new Not(right);
|
||||||
|
|
||||||
|
return new Or(notLeft, notRight);
|
||||||
|
}
|
||||||
|
|
||||||
|
return super.visit(expression);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class ToAndCaretExpressions extends IdentityExpressionVisitor {
|
||||||
|
@Override
|
||||||
|
public Expression visit(final And expression) {
|
||||||
|
|
||||||
|
final Expression left = expression.getLeft();
|
||||||
|
final Expression right = expression.getRight();
|
||||||
|
|
||||||
|
if (left.containsCaret() && left instanceof Property) {
|
||||||
|
return new AndCaretExpression((Property) left, right);
|
||||||
|
}
|
||||||
|
if (right.containsCaret() && right instanceof Property) {
|
||||||
|
return new AndCaretExpression((Property) right, left);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (left.containsCaret()//
|
||||||
|
&& left instanceof Not//
|
||||||
|
&& ((Not) left).getExpression() instanceof Property) {
|
||||||
|
return new AndNotCaretExpression((Property) ((Not) left).getExpression(), right);
|
||||||
|
}
|
||||||
|
if (right.containsCaret()//
|
||||||
|
&& right instanceof Not//
|
||||||
|
&& ((Not) right).getExpression() instanceof Property) {
|
||||||
|
return new AndNotCaretExpression((Property) ((Not) right).getExpression(), left);
|
||||||
|
}
|
||||||
|
|
||||||
|
return super.visit(expression);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Expression normalizeExpression(final Expression expression) {
|
||||||
|
|
||||||
|
Expression normalizingExpression = expression;
|
||||||
|
Expression previousExpression = normalizingExpression;
|
||||||
|
do {
|
||||||
|
previousExpression = normalizingExpression;
|
||||||
|
// replace all IN-expression, because they are just syntactic sugar for
|
||||||
|
// OR-expressions, but only for those that include the caret
|
||||||
|
normalizingExpression = normalizingExpression
|
||||||
|
.visit(new ReplaceINExpressionsWithPropertyExpressionsVisitor());
|
||||||
|
|
||||||
|
// Remove expressions that are OR'ed with the one that contains the caret.
|
||||||
|
// Everything that is OR'ed with the 'caret'-expression cannot change the
|
||||||
|
// possible values.
|
||||||
|
normalizingExpression = visitRepeatedly(normalizingExpression, new RemoveOrEdExpressions());
|
||||||
|
|
||||||
|
// In the end we want to have expressions like "firstname=Jane and lastname=|".
|
||||||
|
// To reach that goal we use the distributive law to modify expressions like
|
||||||
|
// "(firstname=Jane or firstname=John) and lastname=|" to "(firstname=Jane and
|
||||||
|
// lastname=|) or (firstname=John and lastname=|)"
|
||||||
|
normalizingExpression = visitRepeatedly(normalizingExpression, new DistributiveNormalization());
|
||||||
|
|
||||||
|
// (fn=John and (fn=John and ln=|)
|
||||||
|
// normalized to
|
||||||
|
// (fn=John and ln=|) and (fn=Jane and ln=|)
|
||||||
|
// or normalized to
|
||||||
|
// (fn=John and fn=Jane) and ln=|
|
||||||
|
normalizingExpression = visitRepeatedly(normalizingExpression, new RotateAndExpressions());
|
||||||
|
|
||||||
|
// normalize a NAND-expression into an OR with DeMorgan, the OR-Expression might
|
||||||
|
// later be removed
|
||||||
|
// not ( a and b) => (not a) or (not b)
|
||||||
|
normalizingExpression = visitRepeatedly(normalizingExpression, new DeMorgan());
|
||||||
|
|
||||||
|
// remove double negation
|
||||||
|
// not not a => a
|
||||||
|
normalizingExpression = visitRepeatedly(normalizingExpression, new DoubleNegationExpressions());
|
||||||
|
} while (!normalizingExpression.equals(previousExpression));
|
||||||
|
|
||||||
|
// Replaces all (a and |) expressions with a special expression that represents
|
||||||
|
// it.
|
||||||
|
// This special expression will then be used during evaluation.
|
||||||
|
return visitRepeatedly(normalizingExpression, new ToAndCaretExpressions());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Expression visitRepeatedly(final Expression expression,
|
||||||
|
final ExpressionVisitor<Expression> visitor) {
|
||||||
|
Expression previousExpression;
|
||||||
|
Expression result = expression;
|
||||||
|
|
||||||
|
do {
|
||||||
|
previousExpression = result;
|
||||||
|
result = previousExpression.visit(visitor);
|
||||||
|
if (!previousExpression.equals(result)) {
|
||||||
|
System.out.println(" translate: " + visitor.getClass().getSimpleName());
|
||||||
|
System.out.println(" in: " + previousExpression);
|
||||||
|
System.out.println(" out: " + result);
|
||||||
|
}
|
||||||
|
} while (!previousExpression.equals(result));
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,10 +1,12 @@
|
|||||||
package org.lucares.pdb.datastore.lang;
|
package org.lucares.pdb.datastore.lang;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
import java.util.Stack;
|
import java.util.Stack;
|
||||||
|
|
||||||
import org.antlr.v4.runtime.CharStream;
|
import org.antlr.v4.runtime.CharStream;
|
||||||
import org.antlr.v4.runtime.CharStreams;
|
import org.antlr.v4.runtime.CharStreams;
|
||||||
import org.antlr.v4.runtime.CommonTokenStream;
|
import org.antlr.v4.runtime.CommonTokenStream;
|
||||||
|
import org.antlr.v4.runtime.Token;
|
||||||
import org.antlr.v4.runtime.tree.ParseTree;
|
import org.antlr.v4.runtime.tree.ParseTree;
|
||||||
import org.antlr.v4.runtime.tree.ParseTreeListener;
|
import org.antlr.v4.runtime.tree.ParseTreeListener;
|
||||||
import org.antlr.v4.runtime.tree.ParseTreeWalker;
|
import org.antlr.v4.runtime.tree.ParseTreeWalker;
|
||||||
@@ -22,6 +24,7 @@ import org.lucares.pdb.datastore.lang.PdbLangParser.IdentifierExpressionContext;
|
|||||||
import org.lucares.pdb.datastore.lang.PdbLangParser.ListOfPropValuesContext;
|
import org.lucares.pdb.datastore.lang.PdbLangParser.ListOfPropValuesContext;
|
||||||
import org.lucares.pdb.datastore.lang.PdbLangParser.NotExpressionContext;
|
import org.lucares.pdb.datastore.lang.PdbLangParser.NotExpressionContext;
|
||||||
import org.lucares.pdb.datastore.lang.PdbLangParser.PropertyTerminalExpressionContext;
|
import org.lucares.pdb.datastore.lang.PdbLangParser.PropertyTerminalExpressionContext;
|
||||||
|
import org.lucares.utils.CollectionUtils;
|
||||||
|
|
||||||
public class QueryLanguage {
|
public class QueryLanguage {
|
||||||
|
|
||||||
@@ -51,11 +54,7 @@ public class QueryLanguage {
|
|||||||
throw new SyntaxException(ctx, "token too long");
|
throw new SyntaxException(ctx, "token too long");
|
||||||
}
|
}
|
||||||
|
|
||||||
final int line = ctx.getStart().getLine();
|
stack.push(new Terminal(ctx.getText()));
|
||||||
final int startIndex = ctx.getStart().getStartIndex();
|
|
||||||
final int stopIndex = ctx.getStart().getStopIndex();
|
|
||||||
|
|
||||||
stack.push(new Terminal(ctx.getText(), line, startIndex, stopIndex));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -64,11 +63,7 @@ public class QueryLanguage {
|
|||||||
throw new SyntaxException(ctx, "token too long");
|
throw new SyntaxException(ctx, "token too long");
|
||||||
}
|
}
|
||||||
|
|
||||||
final int line = ctx.getStart().getLine();
|
stack.push(new Terminal(ctx.getText()));
|
||||||
final int startIndex = ctx.getStart().getStartIndex();
|
|
||||||
final int stopIndex = ctx.getStart().getStopIndex();
|
|
||||||
|
|
||||||
stack.push(new Terminal(ctx.getText(), line, startIndex, stopIndex));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -145,4 +140,15 @@ public class QueryLanguage {
|
|||||||
|
|
||||||
return stack.pop();
|
return stack.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static List<String> getTokens(final String input) {
|
||||||
|
final CharStream in = CharStreams.fromString(input);
|
||||||
|
|
||||||
|
final PdbLangLexer lexer = new PdbLangLexer(in);
|
||||||
|
|
||||||
|
final CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||||
|
tokens.fill();
|
||||||
|
final List<Token> tokenList = tokens.getTokens();
|
||||||
|
return CollectionUtils.map(tokenList, Token::getText);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,21 +1,36 @@
|
|||||||
package org.lucares.pdb.datastore.internal;
|
package org.lucares.pdb.datastore.internal;
|
||||||
|
|
||||||
|
import java.awt.BorderLayout;
|
||||||
|
import java.awt.event.KeyAdapter;
|
||||||
|
import java.awt.event.KeyEvent;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import javax.swing.JFrame;
|
||||||
|
import javax.swing.JTextArea;
|
||||||
|
import javax.swing.JTextField;
|
||||||
|
|
||||||
import org.lucares.pdb.api.Tags;
|
import org.lucares.pdb.api.Tags;
|
||||||
import org.lucares.pdb.blockstorage.BSFile;
|
import org.lucares.pdb.blockstorage.BSFile;
|
||||||
import org.lucares.pdb.datastore.Doc;
|
import org.lucares.pdb.datastore.Doc;
|
||||||
|
import org.lucares.pdb.datastore.Proposal;
|
||||||
import org.lucares.utils.CollectionUtils;
|
import org.lucares.utils.CollectionUtils;
|
||||||
import org.lucares.utils.file.FileUtils;
|
import org.lucares.utils.file.FileUtils;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.AfterMethod;
|
import org.testng.annotations.AfterMethod;
|
||||||
import org.testng.annotations.BeforeMethod;
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -107,6 +122,162 @@ public class DataStoreTest {
|
|||||||
Assert.assertEquals(eagleTimBlockOffset % BSFile.BLOCK_SIZE, 0);
|
Assert.assertEquals(eagleTimBlockOffset % BSFile.BLOCK_SIZE, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "providerProposals")
|
||||||
|
public Iterator<Object[]> providerProposals() {
|
||||||
|
|
||||||
|
final List<Object[]> result = new ArrayList<>();
|
||||||
|
|
||||||
|
result.add(new Object[] { "type=bird and subtype=eagle and name=|", "name", Arrays.asList("Tim") });
|
||||||
|
|
||||||
|
// returns Tim, because it is the only dog's name starting with 'Ti'
|
||||||
|
result.add(new Object[] { "!name=Ti| and type=dog", "name", Arrays.asList("Tim") });
|
||||||
|
|
||||||
|
// all cats
|
||||||
|
result.add(new Object[] { "type=cat and !name=|", "name",
|
||||||
|
Arrays.asList("Jane", "John", "Paul", "Sam", "Timothy") });
|
||||||
|
|
||||||
|
// finds nothing, because there are not dogs names neither Jenny, nor Ti*
|
||||||
|
result.add(new Object[] { "!name=Ti| and type=dog and !name=Jenny", "name", Arrays.asList() });
|
||||||
|
|
||||||
|
result.add(new Object[] { "(type=bird and age=three or type=dog and age=three) and name=|", "name",
|
||||||
|
Arrays.asList("Jenny", "Tim") });
|
||||||
|
|
||||||
|
// all but Jennifer
|
||||||
|
result.add(new Object[] { "!(type=bird) and name=|", "name",
|
||||||
|
Arrays.asList("Jane", "Jenny", "John", "Paul", "Sam", "Tim", "Timothy") });
|
||||||
|
|
||||||
|
result.add(new Object[] { "type=bird and !subtype=eagle and name=|", "name", Arrays.asList("Jennifer") });
|
||||||
|
|
||||||
|
// DeMorgan
|
||||||
|
// TODO should only match "Jenny", because Jenny is the only non-bird name
|
||||||
|
// starting with 'Jen'
|
||||||
|
result.add(new Object[] { "!(type=bird and name=Jen|)", "name", Arrays.asList("Jennifer", "Jenny") });
|
||||||
|
result.add(new Object[] { "!(type=dog and name=|) and !type=cat", "name",
|
||||||
|
Arrays.asList("Jennifer", "Jenny", "Tim") });
|
||||||
|
|
||||||
|
return result.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "providerProposals")
|
||||||
|
public void testProposals(final String queryWithCaret, final String field,
|
||||||
|
final List<String> expectedProposedValues) throws Exception {
|
||||||
|
|
||||||
|
dataStore = new DataStore(dataDirectory);
|
||||||
|
|
||||||
|
final List<Tags> tags = Arrays.asList(
|
||||||
|
Tags.create("type", "bird", "subtype", "eagle", "age", "three", "name", "Tim"),
|
||||||
|
Tags.create("type", "bird", "subtype", "pigeon", "age", "two", "name", "Jennifer"),
|
||||||
|
Tags.create("type", "bird", "subtype", "flamingo", "age", "one", "name", "Jennifer"),
|
||||||
|
|
||||||
|
Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Jenny"),
|
||||||
|
Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Tim"),
|
||||||
|
|
||||||
|
Tags.create("type", "cat", "subtype", "tiger", "age", "one", "name", "Timothy"),
|
||||||
|
Tags.create("type", "cat", "subtype", "tiger", "age", "two", "name", "Paul"),
|
||||||
|
Tags.create("type", "cat", "subtype", "lion", "age", "three", "name", "Jane"),
|
||||||
|
Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"),
|
||||||
|
Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "John"));
|
||||||
|
|
||||||
|
tags.forEach(dataStore::createNewFile);
|
||||||
|
|
||||||
|
assertProposals(queryWithCaret, field, expectedProposedValues);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws IOException, InterruptedException {
|
||||||
|
final Path dir = Files.createTempDirectory("pdb");
|
||||||
|
try (DataStore dataStore = new DataStore(dir)) {
|
||||||
|
|
||||||
|
final List<Tags> tags = Arrays.asList(
|
||||||
|
Tags.create("type", "bird", "subtype", "eagle", "age", "three", "name", "Tim"),
|
||||||
|
Tags.create("type", "bird", "subtype", "pigeon", "age", "two", "name", "Jennifer"),
|
||||||
|
Tags.create("type", "bird", "subtype", "flamingo", "age", "one", "name", "Jennifer"),
|
||||||
|
|
||||||
|
Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Jenny"),
|
||||||
|
Tags.create("type", "dog", "subtype", "labrador", "age", "three", "name", "Tim"),
|
||||||
|
|
||||||
|
Tags.create("type", "cat", "subtype", "tiger", "age", "one", "name", "Timothy"),
|
||||||
|
Tags.create("type", "cat", "subtype", "tiger", "age", "two", "name", "Paul"),
|
||||||
|
Tags.create("type", "cat", "subtype", "lion", "age", "three", "name", "Jane"),
|
||||||
|
Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "Sam"),
|
||||||
|
Tags.create("type", "cat", "subtype", "lion", "age", "four", "name", "John"));
|
||||||
|
|
||||||
|
tags.forEach(dataStore::createNewFile);
|
||||||
|
|
||||||
|
final JFrame frame = new JFrame();
|
||||||
|
final JTextField input = new JTextField();
|
||||||
|
final JTextArea output = new JTextArea();
|
||||||
|
final JTextArea info = new JTextArea();
|
||||||
|
|
||||||
|
frame.add(input, BorderLayout.NORTH);
|
||||||
|
frame.add(output, BorderLayout.CENTER);
|
||||||
|
frame.add(info, BorderLayout.SOUTH);
|
||||||
|
|
||||||
|
input.setText("type=bird and !subtype=eagle and name=");
|
||||||
|
|
||||||
|
input.addKeyListener(new KeyAdapter() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void keyReleased(final KeyEvent e) {
|
||||||
|
|
||||||
|
final String query = input.getText();
|
||||||
|
final int caretIndex = input.getCaretPosition();
|
||||||
|
final List<Proposal> proposals = dataStore.propose(query, caretIndex);
|
||||||
|
|
||||||
|
final StringBuilder out = new StringBuilder();
|
||||||
|
|
||||||
|
for (final Proposal proposal : proposals) {
|
||||||
|
out.append(proposal.getProposedTag());
|
||||||
|
out.append(" ");
|
||||||
|
out.append(proposal.getProposedQuery());
|
||||||
|
out.append("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
final String queryWithCaretMarker = new StringBuilder(query).insert(caretIndex, "|").toString();
|
||||||
|
|
||||||
|
out.append("\n");
|
||||||
|
out.append("\n");
|
||||||
|
out.append("input: " + queryWithCaretMarker);
|
||||||
|
|
||||||
|
output.setText(out.toString());
|
||||||
|
|
||||||
|
}
|
||||||
|
});
|
||||||
|
final List<Doc> docs = dataStore.search("");
|
||||||
|
final StringBuilder out = new StringBuilder();
|
||||||
|
out.append("info\n");
|
||||||
|
for (final Doc doc : docs) {
|
||||||
|
out.append(doc.getTags());
|
||||||
|
out.append("\n");
|
||||||
|
}
|
||||||
|
info.setText(out.toString());
|
||||||
|
|
||||||
|
frame.setSize(800, 600);
|
||||||
|
frame.setVisible(true);
|
||||||
|
TimeUnit.HOURS.sleep(1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertProposals(final String queryWithCaret, final String field,
|
||||||
|
final List<String> expectedProposedValues) {
|
||||||
|
final String query = queryWithCaret.replace("|", "");
|
||||||
|
final int caretIndex = queryWithCaret.indexOf("|");
|
||||||
|
final List<Proposal> proposals = dataStore.propose(query, caretIndex);
|
||||||
|
System.out.println(
|
||||||
|
"proposed values: " + proposals.stream().map(Proposal::getProposedTag).collect(Collectors.toList()));
|
||||||
|
|
||||||
|
proposals.forEach(p -> assertQueryFindsResults(p.getNewQuery()));
|
||||||
|
|
||||||
|
final List<String> proposedValues = CollectionUtils.map(proposals, Proposal::getProposedTag);
|
||||||
|
Collections.sort(proposedValues);
|
||||||
|
Collections.sort(expectedProposedValues);
|
||||||
|
Assert.assertEquals(proposedValues.toString(), expectedProposedValues.toString(), "proposed values:");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertQueryFindsResults(final String query) {
|
||||||
|
final List<Doc> result = dataStore.search(query);
|
||||||
|
Assert.assertFalse(result.isEmpty(), "The query '" + query + "' must return a result, but didn't.");
|
||||||
|
}
|
||||||
|
|
||||||
private void assertSearch(final String query, final Tags... tags) {
|
private void assertSearch(final String query, final Tags... tags) {
|
||||||
final List<Doc> actualDocs = dataStore.search(query);
|
final List<Doc> actualDocs = dataStore.search(query);
|
||||||
final List<Long> actual = CollectionUtils.map(actualDocs, Doc::getRootBlockNumber);
|
final List<Long> actual = CollectionUtils.map(actualDocs, Doc::getRootBlockNumber);
|
||||||
|
|||||||
@@ -141,6 +141,19 @@ public class ProposerTest {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testProposalWithAndExpression() throws Exception {
|
||||||
|
assertProposals("name=*im and bird=eagle", 8, //
|
||||||
|
new Proposal("Tim", "name=Tim and bird=eagle", true, "name=Tim and bird=eagle", 8), //
|
||||||
|
new Proposal("Timothy", "name=Timothy and bird=eagle", true, "name=Timothy and bird=eagle", 12)//
|
||||||
|
);
|
||||||
|
|
||||||
|
assertProposals("name=*im and bird=eagle,pigeon", 8, //
|
||||||
|
new Proposal("Tim", "name=Tim and bird=eagle,pigeon", true, "name=Tim and bird=eagle,pigeon", 8), //
|
||||||
|
new Proposal("Timothy", "name=Timothy and bird=eagle,pigeon", true,
|
||||||
|
"name=Timothy and bird=eagle,pigeon", 12)//
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
private void assertProposals(final String query, final int caretIndex, final Proposal... expected)
|
private void assertProposals(final String query, final int caretIndex, final Proposal... expected)
|
||||||
throws InterruptedException {
|
throws InterruptedException {
|
||||||
|
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ public class QueryCompletionIndexTest {
|
|||||||
Tags.create("firstname", "John", "lastname", "Miller", "country", "Atlantis")// C
|
Tags.create("firstname", "John", "lastname", "Miller", "country", "Atlantis")// C
|
||||||
);
|
);
|
||||||
|
|
||||||
try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory.resolve("qci.bs"))) {
|
try (QueryCompletionIndex index = new QueryCompletionIndex(dataDirectory)) {
|
||||||
for (final Tags t : tags) {
|
for (final Tags t : tags) {
|
||||||
index.addTags(t);
|
index.addTags(t);
|
||||||
}
|
}
|
||||||
@@ -55,6 +55,13 @@ public class QueryCompletionIndexTest {
|
|||||||
// tags A and C match firstname=John, but both have country=Atlantis
|
// tags A and C match firstname=John, but both have country=Atlantis
|
||||||
final SortedSet<String> countryWithFirstnameJohn = index.find(new Tag("firstname", "John"), "country");
|
final SortedSet<String> countryWithFirstnameJohn = index.find(new Tag("firstname", "John"), "country");
|
||||||
Assert.assertEquals(countryWithFirstnameJohn, Arrays.asList("Atlantis"));
|
Assert.assertEquals(countryWithFirstnameJohn, Arrays.asList("Atlantis"));
|
||||||
|
|
||||||
|
// findAllValuesForField sorts alphabetically
|
||||||
|
final SortedSet<String> firstnames = index.findAllValuesForField("firstname");
|
||||||
|
Assert.assertEquals(firstnames, Arrays.asList("Jane", "John"), "found: " + firstnames);
|
||||||
|
|
||||||
|
final SortedSet<String> countries = index.findAllValuesForField("country");
|
||||||
|
Assert.assertEquals(countries, Arrays.asList("Atlantis", "ElDorado"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,8 +11,8 @@ public class Tag implements Comparable<Tag> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public Tag(final String key, final String value) {
|
public Tag(final String key, final String value) {
|
||||||
this.key = Tags.STRING_COMPRESSOR.put(key);
|
this.key = key != null ? Tags.STRING_COMPRESSOR.put(key) : -1;
|
||||||
this.value = Tags.STRING_COMPRESSOR.put(value);
|
this.value = value != null ? Tags.STRING_COMPRESSOR.put(value) : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -72,6 +72,13 @@ public class Tags implements Comparable<Tags> {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Tags create(final String key1, final String value1, final String key2, final String value2,
|
||||||
|
final String key3, final String value3, final String key4, final String value4) {
|
||||||
|
final Tags result = TagsBuilder.create().add(key1, value1).add(key2, value2).add(key3, value3).add(key4, value4)
|
||||||
|
.build();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
public static Tags fromBytes(final byte[] bytes) {
|
public static Tags fromBytes(final byte[] bytes) {
|
||||||
final List<Tag> result = new ArrayList<>();
|
final List<Tag> result = new ArrayList<>();
|
||||||
|
|
||||||
@@ -188,7 +195,7 @@ public class Tags implements Comparable<Tags> {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "Tags [tags=" + tags + "]";
|
return String.valueOf(tags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
|
import java.util.function.Supplier;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
@@ -62,4 +63,31 @@ public class CollectionUtils {
|
|||||||
return collection.stream().filter(predicate).collect(Collectors.toList());
|
return collection.stream().filter(predicate).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static <T> int indexOf(final List<T> list, final Predicate<T> predicate) {
|
||||||
|
for (int i = 0; i < list.size(); i++) {
|
||||||
|
if (predicate.test(list.get(i))) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <V, T extends Collection<V>> T removeAll(final T collection, final T remove,
|
||||||
|
final Supplier<T> generator) {
|
||||||
|
|
||||||
|
final T result = generator.get();
|
||||||
|
result.addAll(collection);
|
||||||
|
result.removeAll(remove);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <V, T extends Collection<V>> T retainAll(final T collection, final T retain,
|
||||||
|
final Supplier<T> generator) {
|
||||||
|
|
||||||
|
final T result = generator.get();
|
||||||
|
result.addAll(collection);
|
||||||
|
result.retainAll(retain);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,6 +39,12 @@ public class Preconditions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void checkSmaller(final long a, final long b, final String message, final Object... args) {
|
||||||
|
if (a >= b) {
|
||||||
|
throw new IllegalStateException(MessageFormat.format(message, args) + " Expected: " + a + " < " + b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void checkEqual(final Object actual, final Object expected) {
|
public static void checkEqual(final Object actual, final Object expected) {
|
||||||
checkEqual(actual, expected, "expected {0} is equal to {1}", actual, expected);
|
checkEqual(actual, expected, "expected {0} is equal to {1}", actual, expected);
|
||||||
}
|
}
|
||||||
@@ -74,6 +80,18 @@ public class Preconditions {
|
|||||||
checkEqual(actual, true, message, args);
|
checkEqual(actual, true, message, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check that the given value is false.
|
||||||
|
*
|
||||||
|
* @param actual must be false
|
||||||
|
* @param message formatted with {@link MessageFormat}
|
||||||
|
* @param args arguments for the message
|
||||||
|
* @throws IllegalStateException if {@code actual} is not false
|
||||||
|
*/
|
||||||
|
public static void checkFalse(final boolean actual, final String message, final Object... args) {
|
||||||
|
checkEqual(actual, false, message, args);
|
||||||
|
}
|
||||||
|
|
||||||
public static void checkNull(final Object actual, final String message, final Object... args) {
|
public static void checkNull(final Object actual, final String message, final Object... args) {
|
||||||
if (actual != null) {
|
if (actual != null) {
|
||||||
throw new IllegalStateException(MessageFormat.format(message, args));
|
throw new IllegalStateException(MessageFormat.format(message, args));
|
||||||
|
|||||||
Reference in New Issue
Block a user