elastic · mayya-sharipova · Oct 24, 2025 · Oct 24, 2025 · Oct 26, 2025 · Nov 6, 2025
diff --git a/docs/changelog/137126.yaml b/docs/changelog/137126.yaml
@@ -0,0 +1,5 @@
+pr: 137126
+summary: Use IVF_PQ for GPU index build for large datasets
+area: Vector Search
+type: enhancement
+issues: []
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java
@@ -22,6 +22,17 @@ public class GPUSupport {
     // Set the minimum at 7.5GB: 8GB GPUs (which are our targeted minimum) report less than that via the API
     private static final long MIN_DEVICE_MEMORY_IN_BYTES = 8053063680L;
 
+    private static volatile Long cachedTotalGpuMemory;
+
+    /**
+     * Checks whether a GPU meets the minimum requirements for GPU-accelerated indexing.
+     * Checks compute capability and total memory requirements.
+     */
+    private static boolean meetsComputeCapabilityRequirements(int major, int minor) {
+        return major >= GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR
+            && (major > GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR || minor >= GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR);
+    }
+
     /** Tells whether the platform supports cuvs. */
     public static boolean isSupported(boolean logError) {
         try {
@@ -35,9 +46,7 @@ public static boolean isSupported(boolean logError) {
             }
 
             for (var gpu : availableGPUs) {
-                if (gpu.computeCapabilityMajor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR
-                    || (gpu.computeCapabilityMajor() == GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR
-                        && gpu.computeCapabilityMinor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR)) {
+                if (meetsComputeCapabilityRequirements(gpu.computeCapabilityMajor(), gpu.computeCapabilityMinor()) == false) {
                     if (logError) {
                         LOG.warn(
                             "GPU [{}] does not have the minimum compute capabilities (required: [{}.{}], found: [{}.{}])",
@@ -116,4 +125,48 @@ public static CuVSResources cuVSResourcesOrNull(boolean logError) {
         }
         return null;
     }
+
+    /**
+     * Returns the total device memory in bytes of the first available compatible GPU.
+     * This is a constant property of the GPU device and is cached after the first call.
+     *
+     * @return total device memory in bytes, or -1 if GPU is not available or supported
+     */
+    public static long getTotalGpuMemory() {
+        if (cachedTotalGpuMemory != null) {
+            return cachedTotalGpuMemory;
+        }
+
+        synchronized (GPUSupport.class) {
+            if (cachedTotalGpuMemory != null) {
+                return cachedTotalGpuMemory;
+            }
+
+            try {
+                var gpuInfoProvider = CuVSProvider.provider().gpuInfoProvider();
+                var availableGPUs = gpuInfoProvider.availableGPUs();
+
+                for (var gpu : availableGPUs) {
+                    boolean hasRequiredCapability = meetsComputeCapabilityRequirements(
+                        gpu.computeCapabilityMajor(),
+                        gpu.computeCapabilityMinor()
+                    );
+                    boolean hasRequiredMemory = gpu.totalDeviceMemoryInBytes() >= MIN_DEVICE_MEMORY_IN_BYTES;
+
+                    if (hasRequiredCapability && hasRequiredMemory) {
+                        cachedTotalGpuMemory = gpu.totalDeviceMemoryInBytes();
+                        return cachedTotalGpuMemory;
+                    }
+                }
+
+                // No suitable GPU found
+                cachedTotalGpuMemory = -1L;
+                return -1L;
+            } catch (Throwable e) {
+                LOG.debug("Unable to query GPU total memory", e);
+                cachedTotalGpuMemory = -1L;
+                return -1L;
+            }
+        }
+    }
 }
diff --git a/...ck/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSIvfPqParamsFactory.java b/...ck/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSIvfPqParamsFactory.java
@@ -0,0 +1,149 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu.codec;
+
+import com.nvidia.cuvs.CagraIndexParams;
+import com.nvidia.cuvs.CuVSIvfPqIndexParams;
+import com.nvidia.cuvs.CuVSIvfPqParams;
+import com.nvidia.cuvs.CuVSIvfPqSearchParams;
+
+/**
+ * Factory for creating {@link CuVSIvfPqParams} with automatic parameter calculation
+ * based on dataset dimensions and distance metric.
+ *
+ * <p>This class provides a Java equivalent to the C++ function:
+ * {@code cuvs::neighbors::graph_build_params::ivf_pq_params(dataset_extents, metric)}
+ *
+ * <p>The parameters are calculated automatically based on the dataset shape (number of
+ * rows and features), following the same heuristics as the C++ implementation.
+ *
+ * TODO: Remove this class when cuvs 25.12 is available and use functions from there directly.
+ */
+public class CuVSIvfPqParamsFactory {
+
+    /**
+     * Creates {@link CuVSIvfPqParams} with automatically calculated parameters based on the
+     * dataset dimensions, distance metric, and efConstruction parameter.
+     *
+     * <p>This method replicates the parameter calculation logic from the C++ function:
+     * {@code cuvs::neighbors::graph_build_params::ivf_pq_params(dataset_extents, metric)}
+     *
+     * @param numVectors the number of vectors in the dataset
+     * @param dims the dimensionality of the vectors
+     * @param distanceType the distance metric to use (e.g., L2Expanded, Cosine)
+     * @param efConstruction the efConstruction parameter in an HNSW graph
+     * @return a {@link CuVSIvfPqParams} instance with calculated parameters
+     * @throws IllegalArgumentException if dimensions are invalid
+     */
+    public static CuVSIvfPqParams create(int numVectors, int dims, CagraIndexParams.CuvsDistanceType distanceType, int efConstruction) {
+        long nRows = numVectors;
+        long nFeatures = dims;
+
+        if (nRows <= 0 || nFeatures <= 0) {
+            throw new IllegalArgumentException("Dataset dimensions must be positive: rows=" + nRows + ", features=" + nFeatures);
+        }
+        return createFromDimensions(nRows, nFeatures, distanceType, efConstruction);
+    }
+
+    /**
+     * Creates {@link CuVSIvfPqParams} with automatically calculated parameters based on dataset
+     * dimensions and construction parameter.
+     *
+     * <p>This is a convenience method when you have the dataset dimensions but not the dataset
+     * object itself. The calculation logic is identical to {@link #create(int, int,
+     * CagraIndexParams.CuvsDistanceType, int)}.
+     *
+     * @param nRows the number of rows (vectors) in the dataset
+     * @param nFeatures the number of features (dimensions) per vector
+     * @param distanceType the distance metric to use (e.g., L2Expanded, Cosine)
+     * @param efConstruction the construction parameter for parameter calculation
+     * @return a {@link CuVSIvfPqParams} instance with calculated parameters
+     * @throws IllegalArgumentException if dimensions are invalid
+     */
+    public static CuVSIvfPqParams createFromDimensions(
+        long nRows,
+        long nFeatures,
+        CagraIndexParams.CuvsDistanceType distanceType,
+        int efConstruction
+    ) {
+        if (nRows <= 0 || nFeatures <= 0) {
+            throw new IllegalArgumentException("Dataset dimensions must be positive: rows=" + nRows + ", features=" + nFeatures);
+        }
+
+        // Calculate PQ dimensions and bits based on feature count
+        int pqDim;
+        int pqBits;
+
+        if (nFeatures <= 32) {
+            pqDim = 16;
+            pqBits = 8;
+        } else {
+            pqBits = 4;
+            if (nFeatures <= 64) {
+                pqDim = 32;
+            } else if (nFeatures <= 128) {
+                pqDim = 64;
+            } else if (nFeatures <= 192) {
+                pqDim = 96;
+            } else {
+                // Round up to nearest multiple of 128
+                pqDim = (int) roundUpSafe(nFeatures / 2, 128);
+            }
+        }
+        // Calculate number of lists: approximately 1 cluster per 2000 rows
+        int nLists = Math.max(1, (int) (nRows / 2000));
+
+        // Calculate kmeans training set fraction adaptively
+        final double kMinPointsPerCluster = 32.0;
+        final double minKmeansTrainsetPoints = kMinPointsPerCluster * nLists;
+        final double maxKmeansTrainsetFraction = 1.0;
+        final double minKmeansTrainsetFraction = Math.min(maxKmeansTrainsetFraction, minKmeansTrainsetPoints / nRows);
+        double kmeansTrainsetFraction = Math.clamp(1.0 / Math.sqrt(nRows * 1e-5), minKmeansTrainsetFraction, maxKmeansTrainsetFraction);
+
+        // Calculate number of probes based on number of lists and efConstruction
+        int nProbes = Math.round((float) (2.0 + Math.sqrt(nLists) / 20.0 + efConstruction / 16.0));
+
+        // Build index parameters
+        CuVSIvfPqIndexParams indexParams = new CuVSIvfPqIndexParams.Builder().withMetric(distanceType)
+            .withPqDim(pqDim)
+            .withPqBits(pqBits)
+            .withNLists(nLists)
+            .withKmeansNIters(10)
+            .withKmeansTrainsetFraction(kmeansTrainsetFraction)
+            .withCodebookKind(CagraIndexParams.CodebookGen.PER_SUBSPACE)
+            .build();
+
+        // Build search parameters
+        CuVSIvfPqSearchParams searchParams = new CuVSIvfPqSearchParams.Builder().withNProbes(nProbes)
+            .withLutDtype(CagraIndexParams.CudaDataType.CUDA_R_16F)
+            .withInternalDistanceDtype(CagraIndexParams.CudaDataType.CUDA_R_16F)
+            .build();
+
+        // Build and return the complete IVF_PQ parameters
+        return new CuVSIvfPqParams.Builder().withCuVSIvfPqIndexParams(indexParams)
+            .withCuVSIvfPqSearchParams(searchParams)
+            .withRefinementRate(1.0f)
+            .build();
+    }
+
+    /**
+     * Helper method to round up to the nearest multiple of a given divisor.
+     *
+     * <p>Equivalent to C++ {@code raft::round_up_safe<uint32_t>(value, divisor)}
+     *
+     * @param value the value to round up
+     * @param divisor the divisor to round to
+     * @return the rounded up value
+     */
+    private static long roundUpSafe(long value, long divisor) {
+        if (divisor <= 0) {
+            throw new IllegalArgumentException("divisor must be positive");
+        }
+        return ((value + divisor - 1) / divisor) * divisor;
+    }
+}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java
@@ -59,6 +59,23 @@ ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataT
     /** Shuts down the manager, releasing all open resources. */
     void shutdown();
 
+    /**
+     * Estimates the required GPU memory for building an index using the NN_DESCENT algorithm.
+     *
+     * @param numVectors the number of vectors
+     * @param dims the dimensionality of vectors
+     * @param dataType the data type of the vectors
+     * @return the estimated memory in bytes needed for NN_DESCENT
+     */
+    static long estimateNNDescentMemory(int numVectors, int dims, CuVSMatrix.DataType dataType) {
+        int elementTypeBytes = switch (dataType) {
+            case FLOAT -> Float.BYTES;
+            case INT, UINT -> Integer.BYTES;
+            case BYTE -> Byte.BYTES;
+        };
+        return (long) (2.0 * numVectors * dims * elementTypeBytes);
+    }
+
     /** Returns the system-wide pooling manager. */
     static CuVSResourceManager pooling() {
         return PoolingCuVSResourceManager.Holder.INSTANCE;
@@ -193,25 +210,24 @@ public ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataTyp
         }
 
         private long estimateRequiredMemory(int numVectors, int dims, CuVSMatrix.DataType dataType, CagraIndexParams cagraIndexParams) {
-            int elementTypeBytes = switch (dataType) {
-                case FLOAT -> Float.BYTES;
-                case INT, UINT -> Integer.BYTES;
-                case BYTE -> Byte.BYTES;
-            };
-
             if (cagraIndexParams.getCagraGraphBuildAlgo() == CagraIndexParams.CagraGraphBuildAlgo.IVF_PQ
                 && cagraIndexParams.getCuVSIvfPqParams() != null
                 && cagraIndexParams.getCuVSIvfPqParams().getIndexParams() != null
                 && cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getPqDim() != 0) {
                 // See https://docs.rapids.ai/api/cuvs/nightly/neighbors/ivfpq/#index-device-memory
+                int elementTypeBytes = switch (dataType) {
+                    case FLOAT -> Float.BYTES;
+                    case INT, UINT -> Integer.BYTES;
+                    case BYTE -> Byte.BYTES;
+                };
                 var pqDim = cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getPqDim();
                 var pqBits = cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getPqBits();
                 var numClusters = cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getnLists();
                 var approximatedIvfBytes = numVectors * (pqDim * (pqBits / 8.0) + elementTypeBytes) + (long) numClusters * Integer.BYTES;
                 return (long) (GPU_COMPUTATION_MEMORY_FACTOR * approximatedIvfBytes);
             }
 
-            return (long) (GPU_COMPUTATION_MEMORY_FACTOR * numVectors * dims * elementTypeBytes);
+            return CuVSResourceManager.estimateNNDescentMemory(numVectors, dims, dataType);
         }
 
         // visible for testing