From 551068bad2bfffe4a92b31e22d8e572df53fb0d0 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 14:19:23 -0700 Subject: [PATCH] Make the HitQueue size more appropriate for exact search (#1549) (#1580) Signed-off-by: panguixin (cherry picked from commit c861966708219b5a0c27fa60e6eb1c150dfc0efa) Co-authored-by: panguixin --- CHANGELOG.md | 1 + .../java/org/opensearch/knn/index/query/KNNWeight.java | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 04c972711..03de217a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x](https://github.com/opensearch-project/k-NN/compare/2.13...2.x) ### Features ### Enhancements +* Make the HitQueue size more appropriate for exact search [#1549](https://github.com/opensearch-project/k-NN/pull/1549) ### Bug Fixes ### Infrastructure * Add micro-benchmark module in k-NN plugin for benchmark streaming vectors to JNI layer functionality. [#1583](https://github.com/opensearch-project/k-NN/pull/1583) diff --git a/src/main/java/org/opensearch/knn/index/query/KNNWeight.java b/src/main/java/org/opensearch/knn/index/query/KNNWeight.java index 1c4d0a646..06bf96d63 100644 --- a/src/main/java/org/opensearch/knn/index/query/KNNWeight.java +++ b/src/main/java/org/opensearch/knn/index/query/KNNWeight.java @@ -117,7 +117,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { * This improves the recall. */ if (filterWeight != null && canDoExactSearch(cardinality)) { - docIdsToScoreMap.putAll(doExactSearch(context, filterBitSet)); + docIdsToScoreMap.putAll(doExactSearch(context, filterBitSet, cardinality)); } else { Map annResults = doANNSearch(context, filterBitSet, cardinality); if (annResults == null) { @@ -131,7 +131,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { annResults.size(), cardinality ); - annResults = doExactSearch(context, filterBitSet); + annResults = doExactSearch(context, filterBitSet, cardinality); } docIdsToScoreMap.putAll(annResults); } @@ -309,10 +309,10 @@ private Map doANNSearch(final LeafReaderContext context, final B .collect(Collectors.toMap(KNNQueryResult::getId, result -> knnEngine.score(result.getScore(), spaceType))); } - private Map doExactSearch(final LeafReaderContext leafReaderContext, final BitSet filterIdsBitSet) { + private Map doExactSearch(final LeafReaderContext leafReaderContext, final BitSet filterIdsBitSet, int cardinality) { try { // Creating min heap and init with MAX DocID and Score as -INF. - final HitQueue queue = new HitQueue(this.knnQuery.getK(), true); + final HitQueue queue = new HitQueue(Math.min(this.knnQuery.getK(), cardinality), true); ScoreDoc topDoc = queue.top(); final Map docToScore = new HashMap<>(); FilteredIdsKNNIterator iterator = getFilteredKNNIterator(leafReaderContext, filterIdsBitSet);