diff --git a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java index 0eef11b..d585da7 100644 --- a/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java +++ b/data-store/src/main/java/org/lucares/pdb/datastore/lang/ExpressionToDocIdVisitor.java @@ -5,6 +5,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.regex.Pattern; +import java.util.stream.Collectors; import org.lucares.collections.LongList; import org.lucares.pdb.api.RuntimeIOException; @@ -17,6 +18,7 @@ import org.lucares.pdb.datastore.lang.Expression.Or; import org.lucares.pdb.datastore.lang.Expression.Parentheses; import org.lucares.pdb.diskstorage.DiskStorage; import org.lucares.pdb.map.PersistentMap; +import org.lucares.utils.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,7 +43,8 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { final long start = System.nanoTime(); final LongList result = LongList.intersection(leftFiles, rightFiles); - LOGGER.trace("{} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, result.size()); + LOGGER.trace("and: {} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, + result.size()); assert result.isSorted(); return result; @@ -56,7 +59,8 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { final LongList rightFiles = right.visit(this); final long start = System.nanoTime(); final LongList result = LongList.union(leftFiles, rightFiles); - LOGGER.trace("{} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, result.size()); + LOGGER.trace("or: {} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, + result.size()); assert result.isSorted(); return result; @@ -72,7 +76,8 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { final LongList result = getAllDocIds().clone(); result.removeAll(docIdsToBeNegated); - LOGGER.trace("{} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, result.size()); + LOGGER.trace("not: {} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, + result.size()); return result; } @@ -88,7 +93,8 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { public LongList visit(final Expression.MatchAll expression) { final long start = System.nanoTime(); final LongList result = getAllDocIds(); - LOGGER.trace("{} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, result.size()); + LOGGER.trace("matchAll: {} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, + result.size()); return result; } @@ -108,7 +114,8 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { result = LongList.union(result, mergedDocIds); } - LOGGER.trace("{} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, result.size()); + LOGGER.trace("in: {} took {} ms results={}", expression, (System.nanoTime() - start) / 1_000_000.0, + result.size()); return result; } @@ -125,12 +132,25 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { private List filterByWildcard(final String propertyName, final Pattern valuePattern) { final List result = new ArrayList<>(); try { - + final long start = System.nanoTime(); keyToValueToDocId.visitValues(new Tag(propertyName, ""), (tags, blockOffsetToDocIds) -> { try { if (valuePattern.matcher(tags.getValueAsString()).matches()) { try (final BSFile bsFile = BSFile.existingFile(blockOffsetToDocIds, diskStorage)) { - bsFile.streamOfLongLists().forEach(result::add); + + // We know that all LongLists coming from a BSFile are sorted, non-overlapping + // and increasing, that means we can just concatenate them and get a sorted + // list. + final List longLists = bsFile.streamOfLongLists().collect(Collectors.toList()); + final LongList concatenatedLists = concatenateLists(longLists); + + Preconditions.checkTrue(concatenatedLists.isSorted(), + "The LongLists containing document ids must be sorted, " + + "non-overlapping and increasing, so that the concatenation " + + "is sorted. This is guaranteed by the fact that document ids " + + "are generated in monotonically increasing order."); + + result.add(concatenatedLists); } } } catch (final IOException e) { @@ -138,6 +158,9 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { } }); + LOGGER.trace("filterByWildcard: for key {} took {}ms", propertyName, + (System.nanoTime() - start) / 1_000_000.0); + return result; } catch (final IOException e) { throw new RuntimeIOException(e); @@ -155,4 +178,17 @@ public class ExpressionToDocIdVisitor extends ExpressionVisitor { return result; } + + private static LongList concatenateLists(final Collection lists) { + + final int totalSize = lists.stream().mapToInt(LongList::size).sum(); + final LongList result = new LongList(totalSize); + + for (final LongList list : lists) { + result.addAll(list); + } + + return result; + + } } diff --git a/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java b/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java index 719dadb..265bf27 100644 --- a/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java +++ b/pdb-utils/src/main/java/org/lucares/utils/Preconditions.java @@ -30,6 +30,8 @@ public class Preconditions { * @param b * @param message formatted with {@link MessageFormat} * @param args + * @throws IllegalStateException if {@code a} is not greater or equal to + * {@code b} */ public static void checkGreaterOrEqual(final long a, final long b, final String message, final Object... args) { if (a < b) { @@ -49,6 +51,8 @@ public class Preconditions { * @param expected the expected value * @param message formatted with {@link MessageFormat} * @param args arguments for the message + * @throws IllegalStateException if {@code actual} is not equal to + * {@code expected} */ public static void checkEqual(final Object actual, final Object expected, final String message, final Object... args) { @@ -58,6 +62,18 @@ public class Preconditions { } } + /** + * Check that the given value is true. + * + * @param actual must be true + * @param message formatted with {@link MessageFormat} + * @param args arguments for the message + * @throws IllegalStateException if {@code actual} is not true + */ + public static void checkTrue(final boolean actual, final String message, final Object... args) { + checkEqual(actual, true, message, args); + } + public static void checkNull(final Object actual, final String message, final Object... args) { if (actual != null) { throw new IllegalStateException(MessageFormat.format(message, args));