From 20e7d0147d83eefb2d522a5ceb3b61a0c7689a1b Mon Sep 17 00:00:00 2001
From: VIKASH TIWARI
Date: Mon, 30 Sep 2024 15:28:54 -0700
Subject: [PATCH] Adding Support to Enable/Disble Share level Rescoring and
Update Oversampling Factor
Signed-off-by: VIKASH TIWARI
---
.../org/opensearch/knn/index/KNNSettings.java | 37 +++++++++-
.../knn/index/mapper/CompressionLevel.java | 31 ++++----
.../nativelib/NativeEngineKnnVectorQuery.java | 9 ++-
.../index/query/rescore/RescoreContext.java | 9 +++
.../knn/index/KNNSettingsTests.java | 35 +++++++++
.../index/mapper/CompressionLevelTests.java | 73 ++++++++++++-------
.../NativeEngineKNNVectorQueryTests.java | 71 +++++++++++++++++-
7 files changed, 217 insertions(+), 48 deletions(-)
diff --git a/src/main/java/org/opensearch/knn/index/KNNSettings.java b/src/main/java/org/opensearch/knn/index/KNNSettings.java
index 5fcc51bb5..26cb9880f 100644
--- a/src/main/java/org/opensearch/knn/index/KNNSettings.java
+++ b/src/main/java/org/opensearch/knn/index/KNNSettings.java
@@ -88,6 +88,7 @@ public class KNNSettings {
public static final String QUANTIZATION_STATE_CACHE_SIZE_LIMIT = "knn.quantization.cache.size.limit";
public static final String QUANTIZATION_STATE_CACHE_EXPIRY_TIME_MINUTES = "knn.quantization.cache.expiry.minutes";
public static final String KNN_FAISS_AVX512_DISABLED = "knn.faiss.avx512.disabled";
+ public static final String KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED = "index.knn.disk.vector.shard_level_rescoring_disabled";
/**
* Default setting values
@@ -112,11 +113,32 @@ public class KNNSettings {
public static final Integer KNN_MAX_QUANTIZATION_STATE_CACHE_SIZE_LIMIT_PERCENTAGE = 10; // Quantization state cache limit cannot exceed
// 10% of the JVM heap
public static final Integer KNN_DEFAULT_QUANTIZATION_STATE_CACHE_EXPIRY_TIME_MINUTES = 60;
+ public static final boolean KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED_VALUE = true;
/**
* Settings Definition
*/
+ /**
+ * This setting controls whether shard-level re-scoring for KNN disk-based vectors is turned off.
+ * The setting uses:
+ *
+ * - KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED: The name of the setting.
+ * - KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED_VALUE: The default value (true or false).
+ * - IndexScope: The setting works at the index level.
+ * - Dynamic: This setting can be changed without restarting the cluster.
+ *
+ *
+ *
+ * @see Setting#boolSetting(String, boolean, Setting.Property...)
+ */
+ public static final Setting KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED_SETTING = Setting.boolSetting(
+ KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED,
+ KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED_VALUE,
+ IndexScope,
+ Dynamic
+ );
+
// This setting controls how much memory should be used to transfer vectors from Java to JNI Layer. The default
// 1% of the JVM heap
public static final Setting KNN_VECTOR_STREAMING_MEMORY_LIMIT_PCT_SETTING = Setting.memorySizeSetting(
@@ -454,6 +476,10 @@ private Setting> getSetting(String key) {
return QUANTIZATION_STATE_CACHE_EXPIRY_TIME_MINUTES_SETTING;
}
+ if (KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED.equals(key)) {
+ return KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED_SETTING;
+ }
+
throw new IllegalArgumentException("Cannot find setting by key [" + key + "]");
}
@@ -475,7 +501,8 @@ public List> getSettings() {
KNN_VECTOR_STREAMING_MEMORY_LIMIT_PCT_SETTING,
KNN_FAISS_AVX512_DISABLED_SETTING,
QUANTIZATION_STATE_CACHE_SIZE_LIMIT_SETTING,
- QUANTIZATION_STATE_CACHE_EXPIRY_TIME_MINUTES_SETTING
+ QUANTIZATION_STATE_CACHE_EXPIRY_TIME_MINUTES_SETTING,
+ KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED_SETTING
);
return Stream.concat(settings.stream(), Stream.concat(getFeatureFlags().stream(), dynamicCacheSettings.values().stream()))
.collect(Collectors.toList());
@@ -528,6 +555,14 @@ public static Integer getFilteredExactSearchThreshold(final String indexName) {
.getAsInt(ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_DEFAULT_VALUE);
}
+ public static boolean isShardLevelRescoringDisabledForDiskBasedVector(String indexName) {
+ return KNNSettings.state().clusterService.state()
+ .getMetadata()
+ .index(indexName)
+ .getSettings()
+ .getAsBoolean(KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED, true);
+ }
+
public void initialize(Client client, ClusterService clusterService) {
this.client = client;
this.clusterService = clusterService;
diff --git a/src/main/java/org/opensearch/knn/index/mapper/CompressionLevel.java b/src/main/java/org/opensearch/knn/index/mapper/CompressionLevel.java
index 3e1b47db7..c9a169efc 100644
--- a/src/main/java/org/opensearch/knn/index/mapper/CompressionLevel.java
+++ b/src/main/java/org/opensearch/knn/index/mapper/CompressionLevel.java
@@ -97,32 +97,35 @@ public static boolean isConfigured(CompressionLevel compressionLevel) {
/**
* Returns the appropriate {@link RescoreContext} based on the given {@code mode} and {@code dimension}.
*
- * If the {@code mode} is present in the valid {@code modesForRescore} set, the method checks the value of
- * {@code dimension}:
+ *
If the {@code mode} is present in the valid {@code modesForRescore} set, the method adjusts the oversample factor based on the
+ * {@code dimension} value:
*
- * - If {@code dimension} is less than or equal to 1000, it returns a {@link RescoreContext} with an
- * oversample factor of 5.0f.
- * - If {@code dimension} is greater than 1000, it returns the default {@link RescoreContext} associated with
- * the {@link CompressionLevel}. If no default is set, it falls back to {@link RescoreContext#getDefault()}.
+ * - If {@code dimension} is greater than or equal to 1000, no oversampling is applied (oversample factor = 1.0).
+ * - If {@code dimension} is greater than or equal to 768 but less than 1000, a 2x oversample factor is applied (oversample factor = 2.0).
+ * - If {@code dimension} is less than 768, a 3x oversample factor is applied (oversample factor = 3.0).
*
- * If the {@code mode} is not valid, the method returns {@code null}.
+ * If the {@code mode} is not present in the {@code modesForRescore} set, the method returns {@code null}.
*
* @param mode The {@link Mode} for which to retrieve the {@link RescoreContext}.
* @param dimension The dimensional value that determines the {@link RescoreContext} behavior.
- * @return A {@link RescoreContext} with an oversample factor of 5.0f if {@code dimension} is less than
- * or equal to 1000, the default {@link RescoreContext} if greater, or {@code null} if the mode
- * is invalid.
+ * @return A {@link RescoreContext} with the appropriate oversample factor based on the dimension, or {@code null} if the mode
+ * is not valid.
*/
public RescoreContext getDefaultRescoreContext(Mode mode, int dimension) {
if (modesForRescore.contains(mode)) {
// Adjust RescoreContext based on dimension
- if (dimension <= RescoreContext.DIMENSION_THRESHOLD) {
- // For dimensions <= 1000, return a RescoreContext with 5.0f oversample factor
- return RescoreContext.builder().oversampleFactor(RescoreContext.OVERSAMPLE_FACTOR_BELOW_DIMENSION_THRESHOLD).build();
+ if (dimension >= RescoreContext.DIMENSION_THRESHOLD_1000) {
+ // No oversampling for dimensions >= 1000
+ return RescoreContext.builder().oversampleFactor(RescoreContext.OVERSAMPLE_FACTOR_1000).build();
+ } else if (dimension >= RescoreContext.DIMENSION_THRESHOLD_768) {
+ // 2x oversampling for dimensions >= 768 but < 1000
+ return RescoreContext.builder().oversampleFactor(RescoreContext.OVERSAMPLE_FACTOR_768).build();
} else {
- return defaultRescoreContext;
+ // 3x oversampling for dimensions < 768
+ return RescoreContext.builder().oversampleFactor(RescoreContext.OVERSAMPLE_FACTOR_BELOW_768).build();
}
}
return null;
}
+
}
diff --git a/src/main/java/org/opensearch/knn/index/query/nativelib/NativeEngineKnnVectorQuery.java b/src/main/java/org/opensearch/knn/index/query/nativelib/NativeEngineKnnVectorQuery.java
index 945da850a..0993033b8 100644
--- a/src/main/java/org/opensearch/knn/index/query/nativelib/NativeEngineKnnVectorQuery.java
+++ b/src/main/java/org/opensearch/knn/index/query/nativelib/NativeEngineKnnVectorQuery.java
@@ -20,6 +20,7 @@
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.opensearch.common.StopWatch;
+import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.query.ExactSearcher;
import org.opensearch.knn.index.query.KNNQuery;
import org.opensearch.knn.index.query.KNNWeight;
@@ -54,7 +55,6 @@ public Weight createWeight(IndexSearcher indexSearcher, ScoreMode scoreMode, flo
final IndexReader reader = indexSearcher.getIndexReader();
final KNNWeight knnWeight = (KNNWeight) knnQuery.createWeight(indexSearcher, ScoreMode.COMPLETE, 1);
List leafReaderContexts = reader.leaves();
-
List