Skip to content

Commit

Permalink
atlas: add couldMatch to QueryIndex (#1139)
Browse files Browse the repository at this point in the history
Add couldMatch check which can be used as a quick pre-filter
on a partial tag set. This can be useful for cases where there
is an expensive transform needed to get the final set of tags.
  • Loading branch information
brharrington authored Jul 9, 2024
1 parent 4fbe367 commit 3bee626
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Consumer;
import java.util.function.Predicate;

/**
* Simple tree for finding all values associated with a prefix that matches the search
Expand Down Expand Up @@ -278,6 +279,58 @@ private void forEachImpl(Node node, String key, int offset, Consumer<Query.KeyQu
}
}

/**
* Invokes the predicate function for each value associated with a prefix of the search key
* until a match is found.
*
* @param key
* Key to compare against the prefixes.
* @param predicate
* Function to call to see if there is a match.
*/
boolean exists(String key, Predicate<Query.KeyQuery> predicate) {
// In queries cannot have an empty value, so cannot be in the root set
if (exists(otherQueries, predicate)) {
return true;
}
Node node = root;
return node != null && existsImpl(node, key, 0, predicate);
}

private boolean existsImpl(Node node, String key, int offset, Predicate<Query.KeyQuery> predicate) {
final int prefixLength = node.prefix.length();
final int keyLength = key.length() - offset;
final int commonLength = commonPrefixLength(node.prefix, key, offset);

if (commonLength == prefixLength) {
// Prefix matches, consume other queries
if (exists(node.otherQueries, predicate)) {
return true;
}

if (commonLength < keyLength) {
// There is more to the key, check if there are also matches for child nodes
int childOffset = offset + commonLength;
int pos = find(node.children, key, childOffset);
return pos >= 0 && existsImpl(node.children[pos], key, childOffset, predicate);
} else {
// It is an exact match, consume in queries
return true;
}
}

return false;
}

private boolean exists(Set<Query.KeyQuery> qs, Predicate<Query.KeyQuery> predicate) {
for (Query.KeyQuery kq : qs) {
if (predicate.test(kq)) {
return true;
}
}
return false;
}

/**
* Returns true if the tree is empty.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,68 @@ public void forEachMatch(Function<String, String> tags, Consumer<T> consumer) {
}
}

/**
* Check the set of tags, which could be a partial set, and return true if it is possible
* that it would match some set of expressions. This method can be used as a cheap pre-filter
* check. In some cases this can be useful to avoid expensive transforms to get the final
* set of tags for matching.
*
* @param tags
* Partial set of tags to check against the index. Function is used to look up the
* value for a given tag key. The function should return {@code null} if there is no
* value for the key.
* @return
* True if it is possible there would be a match based on the partial set of tags.
*/
@SuppressWarnings("PMD.NPathComplexity")
public boolean couldMatch(Function<String, String> tags) {
// Matches for this level
if (!matches.isEmpty()) {
return true;
}

boolean keyPresent = false;
if (key != null) {
String v = tags.apply(key);
if (v != null) {
keyPresent = true;

// Check exact matches
QueryIndex<T> eqIdx = equalChecks.get(v);
if (eqIdx != null && eqIdx.couldMatch(tags)) {
return true;
}

// Scan for matches with other conditions
if (!otherChecks.isEmpty()) {
boolean otherMatches = otherChecksTree.exists(v, kq -> {
if (kq instanceof Query.In || couldMatch(kq, v)) {
QueryIndex<T> idx = otherChecks.get(kq);
return idx != null && idx.couldMatch(tags);
}
return false;
});
if (otherMatches) {
return true;
}
}

// Check matches for has key
if (hasKeyIdx != null && hasKeyIdx.couldMatch(tags)) {
return true;
}
}
}

// Check matches with other keys
if (otherKeysIdx != null && otherKeysIdx.couldMatch(tags)) {
return true;
}

// Check matches with missing keys
return !keyPresent;
}

private boolean matches(Query.KeyQuery kq, String value) {
if (kq instanceof Query.Regex) {
Query.Regex re = (Query.Regex) kq;
Expand All @@ -535,6 +597,16 @@ private boolean matches(Query.KeyQuery kq, String value) {
}
}

private boolean couldMatch(Query.KeyQuery kq, String value) {
if (kq instanceof Query.Regex) {
// For this possible matches prefix check is sufficient, avoid full regex to
// keep the pre-filter checks cheap.
return true;
} else {
return kq.matches(value);
}
}

/**
* Find hot spots in the index where there is a large set of linear matches, e.g. a bunch
* of regex queries for a given key.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,33 +44,43 @@ private void assertSize(PrefixTree tree, int expected) {
Assertions.assertEquals(expected == 0, tree.isEmpty());
}

private void assertEquals(List<Query.KeyQuery> expected, PrefixTree tree, String key) {
List<Query.KeyQuery> actual = sort(tree.get(key));
if (!actual.isEmpty()) {
Assertions.assertTrue(tree.exists(key, k -> true));
} else {
Assertions.assertFalse(tree.exists(key, k -> true));
}
Assertions.assertEquals(expected, actual);
}

@Test
public void nullPrefix() {
PrefixTree tree = new PrefixTree();
tree.put(null, re("1"));
assertSize(tree, 1);

Assertions.assertEquals(list("1"), tree.get("foo"));
Assertions.assertEquals(list("1"), tree.get("bar"));
Assertions.assertEquals(list("1"), tree.get(""));
assertEquals(list("1"), tree, "foo");
assertEquals(list("1"), tree, "bar");
assertEquals(list("1"), tree, "");

Assertions.assertFalse(tree.remove(null, re("2")));
Assertions.assertTrue(tree.remove(null, re("1")));
assertSize(tree, 0);
Assertions.assertEquals(Collections.emptyList(), tree.get("foo"));
assertEquals(Collections.emptyList(), tree, "foo");
}

@Test
public void emptyPrefix() {
PrefixTree tree = new PrefixTree();
tree.put("", re("1"));
Assertions.assertEquals(list("1"), tree.get("foo"));
Assertions.assertEquals(list("1"), tree.get("bar"));
Assertions.assertEquals(list("1"), tree.get(""));
assertEquals(list("1"), tree, "foo");
assertEquals(list("1"), tree, "bar");
assertEquals(list("1"), tree, "");

Assertions.assertFalse(tree.remove("", re("2")));
Assertions.assertTrue(tree.remove("", re("1")));
Assertions.assertEquals(Collections.emptyList(), tree.get("foo"));
assertEquals(Collections.emptyList(), tree, "foo");
}

@Test
Expand All @@ -79,16 +89,16 @@ public void simplePrefix() {
tree.put("abc", re("1"));
assertSize(tree, 1);

Assertions.assertEquals(list("1"), tree.get("abcdef"));
Assertions.assertEquals(list("1"), tree.get("abcghi"));
Assertions.assertEquals(list("1"), tree.get("abc"));
Assertions.assertEquals(Collections.emptyList(), tree.get("abd"));
Assertions.assertEquals(Collections.emptyList(), tree.get("ab"));
assertEquals(list("1"), tree, "abcdef");
assertEquals(list("1"), tree, "abcghi");
assertEquals(list("1"), tree, "abc");
assertEquals(Collections.emptyList(), tree, "abd");
assertEquals(Collections.emptyList(), tree, "ab");

Assertions.assertTrue(tree.remove("abc", re("1")));
Assertions.assertFalse(tree.remove("abc", re("1")));
assertSize(tree, 0);
Assertions.assertEquals(Collections.emptyList(), tree.get("abcdef"));
assertEquals(Collections.emptyList(), tree, "abcdef");
}

@Test
Expand All @@ -100,15 +110,15 @@ public void multipleMatches() {
tree.put("abc", re("4"));
assertSize(tree, 4);

Assertions.assertEquals(list("1", "2", "3", "4"), sort(tree.get("abcdef")));
Assertions.assertEquals(list("2", "3"), sort(tree.get("abdef")));
Assertions.assertEquals(list("3"), tree.get("adef"));
Assertions.assertEquals(Collections.emptyList(), tree.get("bcdef"));
assertEquals(list("1", "2", "3", "4"), tree, "abcdef");
assertEquals(list("2", "3"), tree, "abdef");
assertEquals(list("3"), tree, "adef");
assertEquals(Collections.emptyList(), tree, "bcdef");

Assertions.assertFalse(tree.remove("ab", re("1")));
Assertions.assertTrue(tree.remove("abc", re("1")));
assertSize(tree, 3);
Assertions.assertEquals(list("2", "3", "4"), sort(tree.get("abcdef")));
assertEquals(list("2", "3", "4"), tree, "abcdef");
}

@Test
Expand All @@ -117,20 +127,20 @@ public void unicodeCharInPrefix() {
tree.put("aβc", re("1"));
assertSize(tree, 1);

Assertions.assertEquals(list("1"), tree.get("aβcdef"));
Assertions.assertEquals(list("1"), tree.get("aβcghi"));
Assertions.assertEquals(list("1"), tree.get("aβc"));
Assertions.assertEquals(Collections.emptyList(), tree.get("abcdef"));
Assertions.assertEquals(Collections.emptyList(), tree.get("abcghi"));
Assertions.assertEquals(Collections.emptyList(), tree.get("abc"));
Assertions.assertEquals(Collections.emptyList(), tree.get("abd"));
Assertions.assertEquals(Collections.emptyList(), tree.get("ab"));
Assertions.assertEquals(Collections.emptyList(), tree.get("aβ"));
Assertions.assertEquals(Collections.emptyList(), tree.get("b"));
assertEquals(list("1"), tree, "aβcdef");
assertEquals(list("1"), tree, "aβcghi");
assertEquals(list("1"), tree, "aβc");
assertEquals(Collections.emptyList(), tree, "abcdef");
assertEquals(Collections.emptyList(), tree, "abcghi");
assertEquals(Collections.emptyList(), tree, "abc");
assertEquals(Collections.emptyList(), tree, "abd");
assertEquals(Collections.emptyList(), tree, "ab");
assertEquals(Collections.emptyList(), tree, "aβ");
assertEquals(Collections.emptyList(), tree, "b");

Assertions.assertTrue(tree.remove("aβc", re("1")));
assertSize(tree, 0);
Assertions.assertEquals(Collections.emptyList(), tree.get("abcdef"));
assertEquals(Collections.emptyList(), tree, "abcdef");
}

@Test
Expand All @@ -139,18 +149,18 @@ public void emptyRootPrefix() {
tree.put("ab", re("ab"));
tree.put("cd", re("cd"));

Assertions.assertEquals(list("ab"), tree.get("ab"));
Assertions.assertEquals(list("cd"), tree.get("cd"));
Assertions.assertEquals(Collections.emptyList(), tree.get("ef"));
assertEquals(list("ab"), tree, "ab");
assertEquals(list("cd"), tree, "cd");
assertEquals(Collections.emptyList(), tree, "ef");

Assertions.assertTrue(tree.remove("ab", re("ab")));
PrefixTree tree2 = new PrefixTree();
tree2.put("cd", re("cd"));
Assertions.assertEquals(tree2, tree);

Assertions.assertEquals(Collections.emptyList(), tree.get("ab"));
Assertions.assertEquals(list("cd"), tree.get("cd"));
Assertions.assertEquals(Collections.emptyList(), tree.get("ef"));
assertEquals(Collections.emptyList(), tree, "ab");
assertEquals(list("cd"), tree, "cd");
assertEquals(Collections.emptyList(), tree, "ef");
}

@Test
Expand All @@ -159,7 +169,7 @@ public void updateExistingNode() {
tree.put("abcdef", re("1"));
tree.put("abcdef", re("2"));

Assertions.assertEquals(list("1", "2"), tree.get("abcdef"));
assertEquals(list("1", "2"), tree, "abcdef");
}

@Test
Expand All @@ -169,8 +179,8 @@ public void addChildNode() {
tree.put("abcdefghi", re("2"));
tree.put("abcdef", re("3"));

Assertions.assertEquals(list("1", "3"), tree.get("abcdef"));
Assertions.assertEquals(list("1", "3", "2"), tree.get("abcdefghi"));
assertEquals(list("1", "3"), tree, "abcdef");
assertEquals(list("1", "2", "3"), tree, "abcdefghi");
}

@Test
Expand All @@ -179,8 +189,8 @@ public void splitInteriorNode() {
tree.put("abcdef", re("abcdef"));
tree.put("abcghi", re("abcghi"));

Assertions.assertEquals(list("abcdef"), tree.get("abcdef"));
Assertions.assertEquals(list("abcghi"), tree.get("abcghi"));
assertEquals(list("abcdef"), tree, "abcdef");
assertEquals(list("abcghi"), tree, "abcghi");
}

@Test
Expand All @@ -189,7 +199,7 @@ public void manyDifferentRootPrefixes() {
tree.put("abc", re("abc"));
tree.put("def", re("def"));
tree.put("ghi", re("ghi"));
Assertions.assertEquals(list("abc"), tree.get("abcdef"));
assertEquals(list("abc"), tree, "abcdef");
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ private void assertEquals(List<Query> expected, QueryIndex<Query> idx, Id id) {
}
for (int i = 0; i < 4; ++i) {
Assertions.assertEquals(expected, sort(idx.findMatches(Query.toMap(id)::get)));
// Only check could match case here since couldMatch should be a superset of actual
// matches
if (!expected.isEmpty()) {
Assertions.assertTrue(idx.couldMatch(Query.toMap(id)::get));
}
}
}

Expand Down Expand Up @@ -551,4 +556,24 @@ public void findHotSpots() {
Assertions.assertEquals(5, queries.size());
});
}

@Test
public void couldMatchPartial() {
Registry registry = new NoopRegistry();
QueryIndex<Integer> idx = QueryIndex.newInstance(registry);
Query q = Parser.parseQuery("name,foo,:eq,id,bar,:eq,:and,app,baz,:re,:and");
idx.add(q, 42);

Assertions.assertTrue(idx.couldMatch(Query.toMap(id("foo"))::get));
Assertions.assertTrue(idx.couldMatch(Query.toMap(id("foo", "id", "bar"))::get));
Assertions.assertTrue(idx.couldMatch(Query.toMap(id("foo", "app", "baz-main"))::get));

// Since `id` is after `app` and `app` is missing in tags, it will short-circuit and
// assume it could match
Assertions.assertTrue(idx.couldMatch(Query.toMap(id("foo", "id", "baz"))::get));

// Filtered out
Assertions.assertFalse(idx.couldMatch(Query.toMap(id("foo2", "id", "bar"))::get));
Assertions.assertFalse(idx.couldMatch(Query.toMap(id("foo", "app", "bar-main"))::get));
}
}

0 comments on commit 3bee626

Please sign in to comment.