remove project pdb-keyword-db
it was only created to test how fast a keyword db implementation would be that works with integer arrays
This commit is contained in:
6
pdb-keyword-db/.gitignore
vendored
6
pdb-keyword-db/.gitignore
vendored
@@ -1,6 +0,0 @@
|
||||
/bin/
|
||||
/build/
|
||||
/.settings/
|
||||
/.classpath
|
||||
/.project
|
||||
/test-output/
|
||||
@@ -1,17 +0,0 @@
|
||||
|
||||
apply plugin: 'antlr'
|
||||
|
||||
dependencies {
|
||||
compile 'org.lucares:primitiveCollections:0.1.20170203201705'
|
||||
runtime "org.antlr:antlr4:4.6"
|
||||
antlr "org.antlr:antlr4:4.6"
|
||||
}
|
||||
|
||||
sourceSets {
|
||||
generated{
|
||||
java.srcDir "build/generated-src/antlr/main"
|
||||
}
|
||||
}
|
||||
compileJava{
|
||||
source += sourceSets.generated.java
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
grammar KeywordsLang;
|
||||
|
||||
@header {
|
||||
package org.lucares.pdb.keyword.db;
|
||||
}
|
||||
|
||||
start : expression EOF ;
|
||||
|
||||
expression
|
||||
: prop=identifier eq=equal value=identifier #propertyExpression
|
||||
| LPAREN expression RPAREN #parenExpression
|
||||
| NOT expression #notExpression
|
||||
| left=expression op_and=and right=expression #andExpression
|
||||
| left=expression op_or=or right=expression #orExpression
|
||||
;
|
||||
|
||||
identifier
|
||||
: IDENTIFIER #identifierExpression
|
||||
;
|
||||
|
||||
and : AND ;
|
||||
or : OR ;
|
||||
|
||||
equal : EQUAL ;
|
||||
|
||||
AND : [aA][nN][dD] ;
|
||||
OR : [oO][rR] ;
|
||||
NOT : '!';
|
||||
EQUAL : '=' ;
|
||||
LPAREN : '(' ;
|
||||
RPAREN : ')' ;
|
||||
WS : [ \r\t\u000C\n]+ -> skip;
|
||||
|
||||
IDENTIFIER
|
||||
: JavaLetter JavaLetterOrDigit*
|
||||
;
|
||||
|
||||
fragment
|
||||
JavaLetter
|
||||
: [a-zA-Z$_] // these are the "java letters" below 0x7F
|
||||
| [\u002a] // asterisk, used for wildcards
|
||||
| // covers all characters above 0x7F which are not a surrogate
|
||||
~[\u0000-\u007F\uD800-\uDBFF]
|
||||
{Character.isJavaIdentifierStart(_input.LA(-1))}?
|
||||
| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
|
||||
[\uD800-\uDBFF] [\uDC00-\uDFFF]
|
||||
{Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}?
|
||||
;
|
||||
|
||||
fragment
|
||||
JavaLetterOrDigit
|
||||
: [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F
|
||||
| [\u002a] // asterisk, used for wildcards
|
||||
| '.'
|
||||
| '/'
|
||||
| // covers all characters above 0x7F which are not a surrogate
|
||||
~[\u0000-\u007F\uD800-\uDBFF]
|
||||
{Character.isJavaIdentifierPart(_input.LA(-1))}?
|
||||
| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
|
||||
[\uD800-\uDBFF] [\uDC00-\uDFFF]
|
||||
{Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}?
|
||||
;
|
||||
@@ -1,447 +0,0 @@
|
||||
package org.lucares.pdb.keyword.db;
|
||||
|
||||
abstract public class Expression {
|
||||
|
||||
public <T> T visit(final ExpressionVisitor<T> visitor) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
abstract static class TemporaryExpression extends Expression {
|
||||
|
||||
abstract Expression toExpression(Expression left, Expression right);
|
||||
}
|
||||
|
||||
static class OrTemporary extends TemporaryExpression {
|
||||
|
||||
@Override
|
||||
Expression toExpression(final Expression left, final Expression right) {
|
||||
return new Or(left, right);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "OrTemporary";
|
||||
}
|
||||
}
|
||||
|
||||
static class AndTemporary extends TemporaryExpression {
|
||||
@Override
|
||||
Expression toExpression(final Expression left, final Expression right) {
|
||||
return new And(left, right);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "AndTemporary";
|
||||
}
|
||||
}
|
||||
|
||||
public static Or or(final Expression left, final Expression right) {
|
||||
return new Or(left, right);
|
||||
}
|
||||
|
||||
public static Or or(final String left, final String right) {
|
||||
return new Or(term(left), term(right));
|
||||
}
|
||||
|
||||
public static And and(final Expression left, final Expression right) {
|
||||
return new And(left, right);
|
||||
}
|
||||
|
||||
public static And and(final String left, final String right) {
|
||||
return new And(term(left), term(right));
|
||||
}
|
||||
|
||||
public static Terminal term(final String value) {
|
||||
return new Terminal(value);
|
||||
}
|
||||
|
||||
public static MatchAll matchAll() {
|
||||
return MatchAll.INSTANCE;
|
||||
}
|
||||
|
||||
public static Not not(final Expression expression) {
|
||||
return new Not(expression);
|
||||
}
|
||||
|
||||
public static Not not(final String expression) {
|
||||
return new Not(term(expression));
|
||||
}
|
||||
|
||||
public static Property property(final String property, final String value) {
|
||||
return new Property(property, value);
|
||||
}
|
||||
|
||||
public static Parentheses parentheses(final Expression expression) {
|
||||
return new Parentheses(expression);
|
||||
}
|
||||
|
||||
static class Not extends Expression {
|
||||
private final Expression expression;
|
||||
|
||||
Not(final Expression expression) {
|
||||
this.expression = expression;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T visit(final ExpressionVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
Expression getExpression() {
|
||||
return expression;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "!" + expression;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((expression == null) ? 0 : expression.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null) {
|
||||
return false;
|
||||
}
|
||||
if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final Not other = (Not) obj;
|
||||
if (expression == null) {
|
||||
if (other.expression != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!expression.equals(other.expression)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class Or extends Expression {
|
||||
private final Expression left;
|
||||
private final Expression right;
|
||||
|
||||
Or(final Expression left, final Expression right) {
|
||||
this.left = left;
|
||||
this.right = right;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T visit(final ExpressionVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
Expression getLeft() {
|
||||
return left;
|
||||
}
|
||||
|
||||
Expression getRight() {
|
||||
return right;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return " (" + left + " or " + right + ") ";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((left == null) ? 0 : left.hashCode());
|
||||
result = prime * result + ((right == null) ? 0 : right.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null) {
|
||||
return false;
|
||||
}
|
||||
if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final Or other = (Or) obj;
|
||||
if (left == null) {
|
||||
if (other.left != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!left.equals(other.left)) {
|
||||
return false;
|
||||
}
|
||||
if (right == null) {
|
||||
if (other.right != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!right.equals(other.right)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class And extends Expression {
|
||||
private final Expression left;
|
||||
private final Expression right;
|
||||
|
||||
And(final Expression left, final Expression right) {
|
||||
this.left = left;
|
||||
this.right = right;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T visit(final ExpressionVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
Expression getLeft() {
|
||||
return left;
|
||||
}
|
||||
|
||||
Expression getRight() {
|
||||
return right;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return " (" + left + " and " + right + ") ";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((left == null) ? 0 : left.hashCode());
|
||||
result = prime * result + ((right == null) ? 0 : right.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null) {
|
||||
return false;
|
||||
}
|
||||
if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final And other = (And) obj;
|
||||
if (left == null) {
|
||||
if (other.left != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!left.equals(other.left)) {
|
||||
return false;
|
||||
}
|
||||
if (right == null) {
|
||||
if (other.right != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!right.equals(other.right)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class MatchAll extends Expression {
|
||||
|
||||
public static final MatchAll INSTANCE = new MatchAll();
|
||||
|
||||
private MatchAll() {
|
||||
//
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T visit(final ExpressionVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return "1=1";
|
||||
}
|
||||
}
|
||||
|
||||
static class Terminal extends Expression {
|
||||
private final String value;
|
||||
|
||||
Terminal(final String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T visit(final ExpressionVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((value == null) ? 0 : value.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null) {
|
||||
return false;
|
||||
}
|
||||
if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final Terminal other = (Terminal) obj;
|
||||
if (value == null) {
|
||||
if (other.value != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!value.equals(other.value)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
static class Property extends Expression {
|
||||
final String property;
|
||||
final String stringValue;
|
||||
|
||||
public Property(final String property, final String value) {
|
||||
this.property = property;
|
||||
this.stringValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T visit(final ExpressionVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return " " + property + " = " + stringValue + " ";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((property == null) ? 0 : property.hashCode());
|
||||
result = prime * result + ((stringValue == null) ? 0 : stringValue.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (obj == null)
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
final Property other = (Property) obj;
|
||||
if (property == null) {
|
||||
if (other.property != null)
|
||||
return false;
|
||||
} else if (!property.equals(other.property))
|
||||
return false;
|
||||
if (stringValue == null) {
|
||||
if (other.stringValue != null)
|
||||
return false;
|
||||
} else if (!stringValue.equals(other.stringValue))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class Parentheses extends Expression {
|
||||
private final Expression expression;
|
||||
|
||||
Parentheses(final Expression expression) {
|
||||
this.expression = expression;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T visit(final ExpressionVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
public Expression getExpression() {
|
||||
return expression;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return " [ " + expression + " ] ";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((expression == null) ? 0 : expression.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null) {
|
||||
return false;
|
||||
}
|
||||
if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final Parentheses other = (Parentheses) obj;
|
||||
if (expression == null) {
|
||||
if (other.expression != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!expression.equals(other.expression)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,171 +0,0 @@
|
||||
package org.lucares.pdb.keyword.db;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.lucares.collections.IntList;
|
||||
import org.lucares.pdb.keyword.db.Expression.And;
|
||||
import org.lucares.pdb.keyword.db.Expression.Not;
|
||||
import org.lucares.pdb.keyword.db.Expression.Or;
|
||||
import org.lucares.pdb.keyword.db.Expression.Property;
|
||||
|
||||
public class ExpressionToFilesVisitor extends ExpressionVisitor<int[]> {
|
||||
|
||||
private static final int[] EMPTY = new int[0];
|
||||
private final Map<String, Map<String, IntList>> tagToFiles;
|
||||
private final List<KeywordTags> fileToTags;
|
||||
|
||||
public ExpressionToFilesVisitor(final Map<String, Map<String, IntList>> tagToFiles,
|
||||
final List<KeywordTags> fileToTags) {
|
||||
this.tagToFiles = tagToFiles;
|
||||
this.fileToTags = fileToTags;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] visit(final And expression) {
|
||||
|
||||
final Expression left = expression.getLeft();
|
||||
final Expression right = expression.getRight();
|
||||
|
||||
final int[] leftFiles = left.visit(this);
|
||||
final int[] rightFiles = right.visit(this);
|
||||
|
||||
final int[] result = new int[Math.min(leftFiles.length, rightFiles.length)];
|
||||
|
||||
int l = 0;
|
||||
int r = 0;
|
||||
int i = 0;
|
||||
|
||||
while (l < leftFiles.length && r < rightFiles.length) {
|
||||
|
||||
final int lv = leftFiles[l];
|
||||
final int rv = rightFiles[r];
|
||||
|
||||
if (lv < rv) {
|
||||
l++;
|
||||
} else if (lv > rv) {
|
||||
r++;
|
||||
} else {
|
||||
result[i] = lv;
|
||||
i++;
|
||||
l++;
|
||||
r++;
|
||||
}
|
||||
}
|
||||
|
||||
return Arrays.copyOfRange(result, 0, i);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] visit(final Or expression) {
|
||||
final Expression left = expression.getLeft();
|
||||
final Expression right = expression.getRight();
|
||||
|
||||
final int[] leftFiles = left.visit(this);
|
||||
final int[] rightFiles = right.visit(this);
|
||||
|
||||
final int[] result = new int[leftFiles.length + rightFiles.length];
|
||||
|
||||
int l = 0;
|
||||
int r = 0;
|
||||
int i = 0;
|
||||
|
||||
while (l < leftFiles.length && r < rightFiles.length) {
|
||||
|
||||
final int lv = leftFiles[l];
|
||||
final int rv = rightFiles[r];
|
||||
|
||||
if (lv < rv) {
|
||||
result[i] = lv;
|
||||
i++;
|
||||
l++;
|
||||
} else if (lv > rv) {
|
||||
result[i] = lv;
|
||||
i++;
|
||||
r++;
|
||||
} else {
|
||||
result[i] = lv;
|
||||
i++;
|
||||
l++;
|
||||
r++;
|
||||
}
|
||||
}
|
||||
|
||||
if (l < leftFiles.length) {
|
||||
final int length = leftFiles.length - l;
|
||||
System.arraycopy(leftFiles, l, result, i, length);
|
||||
i += length;
|
||||
} else if (r < rightFiles.length) {
|
||||
final int length = rightFiles.length - r;
|
||||
System.arraycopy(rightFiles, r, result, i, length);
|
||||
i += length;
|
||||
}
|
||||
|
||||
return Arrays.copyOfRange(result, 0, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] visit(final Not expression) {
|
||||
|
||||
final Expression negatedExpression = expression.getExpression();
|
||||
|
||||
final int[] files = negatedExpression.visit(this);
|
||||
final int[] allDocIds = getAllDocumentIds();
|
||||
|
||||
final int[] tmp = new int[allDocIds.length];
|
||||
|
||||
for (int i = 0; i < files.length; i++) {
|
||||
tmp[files[i]] = -1;
|
||||
}
|
||||
|
||||
Arrays.sort(tmp);
|
||||
int indexOfFirstValue = 0;
|
||||
for (indexOfFirstValue = 0; indexOfFirstValue < tmp.length; indexOfFirstValue++) {
|
||||
if (tmp[indexOfFirstValue] >= 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
final int[] result = Arrays.copyOfRange(tmp, indexOfFirstValue, tmp.length);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] visit(final Expression.MatchAll expression) {
|
||||
|
||||
return getAllDocumentIds();
|
||||
}
|
||||
|
||||
private int[] getAllDocumentIds() {
|
||||
final int[] result = new int[fileToTags.size()];
|
||||
for (int i = 0; i < result.length; i++) {
|
||||
result[i] = i;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] visit(final Property expression) {
|
||||
|
||||
final int[] result;
|
||||
final String property = expression.property;
|
||||
final String stringValue = expression.stringValue;
|
||||
final Map<String, IntList> values = tagToFiles.get(property);
|
||||
if (values != null) {
|
||||
final IntList files = values.get(stringValue);
|
||||
if (files != null) {
|
||||
result = files.toArray();
|
||||
} else {
|
||||
result = EMPTY;
|
||||
}
|
||||
} else {
|
||||
result = EMPTY;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
package org.lucares.pdb.keyword.db;
|
||||
|
||||
public abstract class ExpressionVisitor<T> {
|
||||
public T visit(final Expression.And expression) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public T visit(final Expression.Or expression) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public T visit(final Expression.Not expression) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public T visit(final Expression.Property expression) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public T visit(final Expression.Terminal expression) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public T visit(final Expression.MatchAll expression) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public T visit(final Expression.Parentheses parentheses) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
@@ -1,97 +0,0 @@
|
||||
package org.lucares.pdb.keyword.db;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
|
||||
public class KeywordTags {
|
||||
|
||||
private final Map<String, String> tags;
|
||||
|
||||
private int cachedHash = 0;
|
||||
|
||||
private final String file;
|
||||
|
||||
public KeywordTags(final String file, final Map<String, String> tags) {
|
||||
this.file = file;
|
||||
this.tags = tags;
|
||||
}
|
||||
|
||||
public String getValue(final String key) {
|
||||
final String value = tags.get(key);
|
||||
return value;
|
||||
}
|
||||
|
||||
public Set<String> getKeys() {
|
||||
return new TreeSet<>(tags.keySet());
|
||||
}
|
||||
|
||||
public String getFile() {
|
||||
return file;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.valueOf(tags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
if (cachedHash != 0) {
|
||||
return cachedHash;
|
||||
} else {
|
||||
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((tags == null) ? 0 : tags.hashCode());
|
||||
cachedHash = result;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (obj == null)
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
final KeywordTags other = (KeywordTags) obj;
|
||||
if (cachedHash != other.cachedHash)
|
||||
return false;
|
||||
if (tags == null) {
|
||||
if (other.tags != null)
|
||||
return false;
|
||||
} else if (!tags.equals(other.tags))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
public String abbreviatedRepresentation() {
|
||||
final StringBuilder result = new StringBuilder();
|
||||
final int maxLength = 200;
|
||||
|
||||
final SortedSet<String> keys = new TreeSet<>(tags.keySet());
|
||||
|
||||
final int cutAt = maxLength / (keys.size() * 2 + 2);
|
||||
|
||||
for (final String key : keys) {
|
||||
|
||||
final String value = tags.get(key);
|
||||
|
||||
result.append(substr(key, cutAt));
|
||||
result.append("-");
|
||||
result.append(substr(value, cutAt));
|
||||
result.append("_");
|
||||
}
|
||||
|
||||
return substr(result.toString(), maxLength);
|
||||
}
|
||||
|
||||
private static String substr(final String s, final int maxLength) {
|
||||
return s.substring(0, Math.min(maxLength, s.length()));
|
||||
}
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
package org.lucares.pdb.keyword.db;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.lucares.collections.IntList;
|
||||
|
||||
public class Keywords {
|
||||
|
||||
List<KeywordTags> fileToTags = new ArrayList<>();
|
||||
|
||||
final Map<String, Map<String, IntList>> tagToFiles = new HashMap<>();
|
||||
|
||||
public void addFile(final String file, final Map<String, String> tags) {
|
||||
|
||||
final int docId = fileToTags.size();
|
||||
fileToTags.add(new KeywordTags(file, tags));
|
||||
|
||||
for (final Entry<String, String> e : tags.entrySet()) {
|
||||
|
||||
final String field = e.getKey();
|
||||
final String value = e.getValue();
|
||||
|
||||
tagToFiles.putIfAbsent(field, new HashMap<>());
|
||||
final Map<String, IntList> fieldToFiles = tagToFiles.get(field);
|
||||
fieldToFiles.putIfAbsent(value, new IntList(1));
|
||||
|
||||
final IntList t = fieldToFiles.get(value);
|
||||
|
||||
t.add(docId);
|
||||
}
|
||||
}
|
||||
|
||||
public int[] search(final String query) {
|
||||
final long start = System.nanoTime();
|
||||
final Expression expression = KeywordsLanguageParser.parse(query);
|
||||
final long duration = System.nanoTime() - start;
|
||||
final String parsing = "parsing: " + duration / 1_000_000.0 + "ms";
|
||||
// System.out.println(expression.visit(new PrintExpressionVisitor()));
|
||||
|
||||
final ExpressionToFilesVisitor visitor = new ExpressionToFilesVisitor(tagToFiles, fileToTags);
|
||||
|
||||
final long start2 = System.nanoTime();
|
||||
final int[] result = expression.visit(visitor);
|
||||
final long duration2 = System.nanoTime() - start2;
|
||||
System.out.println(parsing + "; searching: " + duration2 / 1_000_000.0 + "ms; found=" + result.length);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Keywords [files=" + fileToTags.size() + ", fields=" + tagToFiles.size() + "]";
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,122 +0,0 @@
|
||||
package org.lucares.pdb.keyword.db;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Stack;
|
||||
|
||||
import org.antlr.v4.gui.TreeViewer;
|
||||
import org.antlr.v4.runtime.ANTLRInputStream;
|
||||
import org.antlr.v4.runtime.CommonTokenStream;
|
||||
import org.antlr.v4.runtime.DiagnosticErrorListener;
|
||||
import org.antlr.v4.runtime.tree.ParseTree;
|
||||
import org.antlr.v4.runtime.tree.ParseTreeListener;
|
||||
import org.antlr.v4.runtime.tree.ParseTreeWalker;
|
||||
import org.lucares.pdb.keyword.db.Expression.And;
|
||||
import org.lucares.pdb.keyword.db.Expression.Not;
|
||||
import org.lucares.pdb.keyword.db.Expression.Or;
|
||||
import org.lucares.pdb.keyword.db.Expression.Terminal;
|
||||
import org.lucares.pdb.keyword.db.KeywordsLangParser.AndExpressionContext;
|
||||
import org.lucares.pdb.keyword.db.KeywordsLangParser.IdentifierExpressionContext;
|
||||
import org.lucares.pdb.keyword.db.KeywordsLangParser.NotExpressionContext;
|
||||
import org.lucares.pdb.keyword.db.KeywordsLangParser.OrExpressionContext;
|
||||
import org.lucares.pdb.keyword.db.KeywordsLangParser.PropertyExpressionContext;
|
||||
|
||||
public class KeywordsLangLanguage {
|
||||
|
||||
public Expression parse(final String input) {
|
||||
// define the input
|
||||
final ANTLRInputStream in = new ANTLRInputStream(input);
|
||||
|
||||
// create lexer and parser
|
||||
final KeywordsLangLexer lexer = new KeywordsLangLexer(in);
|
||||
lexer.addErrorListener(new DiagnosticErrorListener());
|
||||
|
||||
final CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||
final KeywordsLangParser parser = new KeywordsLangParser(tokens);
|
||||
|
||||
final Stack<Expression> stack = new Stack<>();
|
||||
|
||||
// define a listener that is called for every terminals and
|
||||
// non-terminals
|
||||
final ParseTreeListener listener = new KeywordsLangBaseListener() {
|
||||
|
||||
@Override
|
||||
public void exitIdentifierExpression(final IdentifierExpressionContext ctx) {
|
||||
// System.out.println("push identifier " + ctx.getText());
|
||||
|
||||
if (ctx.getText().length() > 255) {
|
||||
throw new SyntaxException(ctx, "token too long");
|
||||
}
|
||||
|
||||
stack.push(new Terminal(ctx.getText()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exitPropertyExpression(final PropertyExpressionContext ctx) {
|
||||
// System.out.println("property expression");
|
||||
|
||||
final Expression value = stack.pop();
|
||||
final Terminal property = (Terminal) stack.pop();
|
||||
|
||||
if (value instanceof Terminal) {
|
||||
stack.push(new Expression.Property(property.getValue(), ((Terminal) value).getValue()));
|
||||
|
||||
} else {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exitNotExpression(final NotExpressionContext ctx) {
|
||||
|
||||
final Expression expression = stack.pop();
|
||||
|
||||
final Expression notExpression = new Not(expression);
|
||||
stack.push(notExpression);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exitAndExpression(final AndExpressionContext ctx) {
|
||||
|
||||
final Expression right = stack.pop();
|
||||
final Expression left = stack.pop();
|
||||
|
||||
stack.push(new And(left, right));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exitOrExpression(final OrExpressionContext ctx) {
|
||||
final Expression right = stack.pop();
|
||||
final Expression left = stack.pop();
|
||||
|
||||
stack.push(new Or(left, right));
|
||||
}
|
||||
};
|
||||
|
||||
// Specify our entry point
|
||||
final ParseTree parseTree = parser.start();
|
||||
|
||||
// Walk it and attach our listener
|
||||
final ParseTreeWalker walker = new ParseTreeWalker();
|
||||
walker.walk(listener, parseTree);
|
||||
|
||||
if (stack.size() != 1) {
|
||||
throw new RuntimeException("stack should have exactly one element " + stack);
|
||||
}
|
||||
|
||||
return stack.pop();
|
||||
}
|
||||
|
||||
public static void main(final String[] args) {
|
||||
final org.antlr.v4.runtime.CharStream stream = new ANTLRInputStream("prop=value");
|
||||
final KeywordsLangLexer lexer = new KeywordsLangLexer(stream);
|
||||
final CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||
final KeywordsLangParser parser = new KeywordsLangParser(tokens);
|
||||
final ParseTree tree = parser.start();
|
||||
final List<String> ruleNames = Arrays.asList(KeywordsLangParser.ruleNames);
|
||||
final TreeViewer view = new TreeViewer(ruleNames, tree);
|
||||
view.open();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
package org.lucares.pdb.keyword.db;
|
||||
|
||||
public class KeywordsLanguageParser {
|
||||
public static Expression parse(final String query) {
|
||||
|
||||
final Expression result;
|
||||
if (query == null || query.length() == 0) {
|
||||
result = Expression.matchAll();
|
||||
} else {
|
||||
final KeywordsLangLanguage lang = new KeywordsLangLanguage();
|
||||
result = lang.parse(query);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -1,53 +0,0 @@
|
||||
package org.lucares.pdb.keyword.db;
|
||||
|
||||
import org.lucares.pdb.keyword.db.Expression.And;
|
||||
import org.lucares.pdb.keyword.db.Expression.Not;
|
||||
import org.lucares.pdb.keyword.db.Expression.Or;
|
||||
import org.lucares.pdb.keyword.db.Expression.Property;
|
||||
|
||||
public class PrintExpressionVisitor extends ExpressionVisitor<String> {
|
||||
|
||||
public PrintExpressionVisitor() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(final And expression) {
|
||||
|
||||
final Expression left = expression.getLeft();
|
||||
final Expression right = expression.getRight();
|
||||
|
||||
return "(" + left.visit(this) + " and " + right.visit(this) + ")";
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(final Or expression) {
|
||||
final Expression left = expression.getLeft();
|
||||
final Expression right = expression.getRight();
|
||||
|
||||
return "(" + left.visit(this) + " or " + right.visit(this) + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(final Not expression) {
|
||||
|
||||
final Expression negatedExpression = expression.getExpression();
|
||||
|
||||
return "!" + negatedExpression.visit(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(final Expression.MatchAll expression) {
|
||||
|
||||
return "*";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(final Property expression) {
|
||||
|
||||
final String property = expression.property;
|
||||
final String stringValue = expression.stringValue;
|
||||
|
||||
return property + "=" + stringValue;
|
||||
}
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
package org.lucares.pdb.keyword.db;
|
||||
|
||||
import org.antlr.v4.runtime.ParserRuleContext;
|
||||
|
||||
public class SyntaxException extends RuntimeException {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
private int lineStart;
|
||||
private int startIndex;
|
||||
private int lineStop;
|
||||
private int stopIndex;
|
||||
|
||||
public SyntaxException(final ParserRuleContext context, final String message) {
|
||||
super(message + ": " + generateMessage(context));
|
||||
|
||||
lineStart = context.getStart().getLine();
|
||||
startIndex = context.getStart().getStartIndex();
|
||||
lineStop = context.getStop().getLine();
|
||||
stopIndex = context.getStop().getStopIndex();
|
||||
}
|
||||
|
||||
private static String generateMessage(final ParserRuleContext context) {
|
||||
|
||||
final int lineStart = context.getStart().getLine();
|
||||
final int startIndex = context.getStart().getStartIndex();
|
||||
final int lineStop = context.getStop().getLine();
|
||||
final int stopIndex = context.getStop().getStopIndex();
|
||||
|
||||
return String.format("line=%d, start=%d, to line=%d stop=%d", lineStart, startIndex, lineStop, stopIndex);
|
||||
}
|
||||
|
||||
public int getLineStart() {
|
||||
return lineStart;
|
||||
}
|
||||
|
||||
public void setLineStart(final int lineStart) {
|
||||
this.lineStart = lineStart;
|
||||
}
|
||||
|
||||
public int getStartIndex() {
|
||||
return startIndex;
|
||||
}
|
||||
|
||||
public void setStartIndex(final int startIndex) {
|
||||
this.startIndex = startIndex;
|
||||
}
|
||||
|
||||
public int getLineStop() {
|
||||
return lineStop;
|
||||
}
|
||||
|
||||
public void setLineStop(final int lineStop) {
|
||||
this.lineStop = lineStop;
|
||||
}
|
||||
|
||||
public int getStopIndex() {
|
||||
return stopIndex;
|
||||
}
|
||||
|
||||
public void setStopIndex(final int stopIndex) {
|
||||
this.stopIndex = stopIndex;
|
||||
}
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
package org.lucares.pdb.keyword.db;
|
||||
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
@Test
|
||||
public class KeywordsTest {
|
||||
public void testKeywords() throws Exception {
|
||||
|
||||
final Keywords keywords = new Keywords();
|
||||
|
||||
final String file = "/tmp/abc";
|
||||
final Map<String, String> tags = new HashMap<>();
|
||||
|
||||
tags.put("method", "m1");
|
||||
tags.put("host", "h1");
|
||||
tags.put("pod", "p1");
|
||||
|
||||
keywords.addFile(file, tags);
|
||||
|
||||
assertSearch(keywords, "method=m1", 0);
|
||||
assertSearch(keywords, "method=m1 AND host=h1", 0);
|
||||
assertSearch(keywords, "method=m1 OR pod=X", 0);
|
||||
query(keywords, "pod=pod3 and method=method124 or pod=pod3 and method=method125");
|
||||
query(keywords, "!(pod=pod3)");
|
||||
}
|
||||
|
||||
public void testKeywordsPerformance() throws Exception {
|
||||
|
||||
final Keywords keywords = new Keywords();
|
||||
|
||||
fill(keywords);
|
||||
|
||||
query(keywords, "host=host1");
|
||||
query(keywords, "host=host1 and method=method123");
|
||||
query(keywords, "host=host1 or host=host2");
|
||||
query(keywords, "pod=pod3 and (method=method124 or method=method125)");
|
||||
query(keywords, "pod=pod3 and method=method124 or pod=pod3 and method=method125");
|
||||
query(keywords, "method=method124 and pod=pod3 or method=method125 and pod=pod3");
|
||||
|
||||
System.out.println(keywords.toString());
|
||||
}
|
||||
|
||||
private void query(final Keywords keywords, final String query) {
|
||||
final Expression expression = KeywordsLanguageParser.parse(query);
|
||||
System.out.println(expression.visit(new PrintExpressionVisitor()));
|
||||
for (int i = 0; i < 5; i++) {
|
||||
|
||||
keywords.search(query);
|
||||
}
|
||||
}
|
||||
|
||||
private void assertSearch(final Keywords keywords, final String query, final int... files) {
|
||||
final int[] actual = keywords.search(query);
|
||||
|
||||
// Assert.assertEquals(new HashSet<>(Arrays.asList(actual)), new
|
||||
// HashSet<>(Arrays.asList(files)));
|
||||
}
|
||||
|
||||
private void fill(final Keywords keywords) {
|
||||
|
||||
final List<String> pods = IntStream.rangeClosed(1, 10).mapToObj(i -> "pod" + i).collect(Collectors.toList());
|
||||
final List<String> hosts = IntStream.rangeClosed(1, 10).mapToObj(i -> "host" + i).collect(Collectors.toList());
|
||||
final List<String> versions = IntStream.rangeClosed(1, 10).mapToObj(i -> "5." + i).collect(Collectors.toList());
|
||||
final List<String> methods = IntStream.rangeClosed(1, 200).mapToObj(i -> "method" + i)
|
||||
.collect(Collectors.toList());
|
||||
final List<String> types = Arrays.asList("app", "engine", "web", "batch");
|
||||
|
||||
final int i = 0;
|
||||
for (final String pod : pods) {
|
||||
for (final String host : hosts) {
|
||||
for (final String version : versions) {
|
||||
for (final String method : methods) {
|
||||
for (final String type : types) {
|
||||
final Map<String, String> tags = new HashMap<>();
|
||||
|
||||
final String file = Paths.get("/some/prefix", UUID.randomUUID().toString()).toString();
|
||||
tags.put("pod", pod);
|
||||
tags.put("host", host);
|
||||
tags.put("method", method);
|
||||
tags.put("version", version);
|
||||
tags.put("type", type);
|
||||
|
||||
keywords.addFile(file, tags);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user