diff --git a/docs/changelog/137126.yaml b/docs/changelog/137126.yaml
new file mode 100644
index 0000000000000..b3831939dcf07
--- /dev/null
+++ b/docs/changelog/137126.yaml
@@ -0,0 +1,5 @@
+pr: 137126
+summary: Use IVF_PQ for GPU index build for large datasets
+area: Vector Search
+type: enhancement
+issues: []
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java
index c21bda894790a..c7e41870636d9 100644
--- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java
@@ -22,6 +22,17 @@ public class GPUSupport {
     // Set the minimum at 7.5GB: 8GB GPUs (which are our targeted minimum) report less than that via the API
     private static final long MIN_DEVICE_MEMORY_IN_BYTES = 8053063680L;
 
+    private static volatile Long cachedTotalGpuMemory;
+
+    /**
+     * Checks whether a GPU meets the minimum requirements for GPU-accelerated indexing.
+     * Checks compute capability and total memory requirements.
+     */
+    private static boolean meetsComputeCapabilityRequirements(int major, int minor) {
+        return major >= GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR
+            && (major > GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR || minor >= GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR);
+    }
+
     /** Tells whether the platform supports cuvs. */
     public static boolean isSupported(boolean logError) {
         try {
@@ -35,9 +46,7 @@ public static boolean isSupported(boolean logError) {
             }
 
             for (var gpu : availableGPUs) {
-                if (gpu.computeCapabilityMajor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR
-                    || (gpu.computeCapabilityMajor() == GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR
-                        && gpu.computeCapabilityMinor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR)) {
+                if (meetsComputeCapabilityRequirements(gpu.computeCapabilityMajor(), gpu.computeCapabilityMinor()) == false) {
                     if (logError) {
                         LOG.warn(
                             "GPU [{}] does not have the minimum compute capabilities (required: [{}.{}], found: [{}.{}])",
@@ -116,4 +125,48 @@ public static CuVSResources cuVSResourcesOrNull(boolean logError) {
         }
         return null;
     }
+
+    /**
+     * Returns the total device memory in bytes of the first available compatible GPU.
+     * This is a constant property of the GPU device and is cached after the first call.
+     *
+     * @return total device memory in bytes, or -1 if GPU is not available or supported
+     */
+    public static long getTotalGpuMemory() {
+        if (cachedTotalGpuMemory != null) {
+            return cachedTotalGpuMemory;
+        }
+
+        synchronized (GPUSupport.class) {
+            if (cachedTotalGpuMemory != null) {
+                return cachedTotalGpuMemory;
+            }
+
+            try {
+                var gpuInfoProvider = CuVSProvider.provider().gpuInfoProvider();
+                var availableGPUs = gpuInfoProvider.availableGPUs();
+
+                for (var gpu : availableGPUs) {
+                    boolean hasRequiredCapability = meetsComputeCapabilityRequirements(
+                        gpu.computeCapabilityMajor(),
+                        gpu.computeCapabilityMinor()
+                    );
+                    boolean hasRequiredMemory = gpu.totalDeviceMemoryInBytes() >= MIN_DEVICE_MEMORY_IN_BYTES;
+
+                    if (hasRequiredCapability && hasRequiredMemory) {
+                        cachedTotalGpuMemory = gpu.totalDeviceMemoryInBytes();
+                        return cachedTotalGpuMemory;
+                    }
+                }
+
+                // No suitable GPU found
+                cachedTotalGpuMemory = -1L;
+                return -1L;
+            } catch (Throwable e) {
+                LOG.debug("Unable to query GPU total memory", e);
+                cachedTotalGpuMemory = -1L;
+                return -1L;
+            }
+        }
+    }
 }
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSIvfPqParamsFactory.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSIvfPqParamsFactory.java
new file mode 100644
index 0000000000000..c47cf0beff2b5
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSIvfPqParamsFactory.java
@@ -0,0 +1,149 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu.codec;
+
+import com.nvidia.cuvs.CagraIndexParams;
+import com.nvidia.cuvs.CuVSIvfPqIndexParams;
+import com.nvidia.cuvs.CuVSIvfPqParams;
+import com.nvidia.cuvs.CuVSIvfPqSearchParams;
+
+/**
+ * Factory for creating {@link CuVSIvfPqParams} with automatic parameter calculation
+ * based on dataset dimensions and distance metric.
+ *
+ * <p>This class provides a Java equivalent to the C++ function:
+ * {@code cuvs::neighbors::graph_build_params::ivf_pq_params(dataset_extents, metric)}
+ *
+ * <p>The parameters are calculated automatically based on the dataset shape (number of
+ * rows and features), following the same heuristics as the C++ implementation.
+ *
+ * TODO: Remove this class when cuvs 25.12 is available and use functions from there directly.
+ */
+public class CuVSIvfPqParamsFactory {
+
+    /**
+     * Creates {@link CuVSIvfPqParams} with automatically calculated parameters based on the
+     * dataset dimensions, distance metric, and efConstruction parameter.
+     *
+     * <p>This method replicates the parameter calculation logic from the C++ function:
+     * {@code cuvs::neighbors::graph_build_params::ivf_pq_params(dataset_extents, metric)}
+     *
+     * @param numVectors the number of vectors in the dataset
+     * @param dims the dimensionality of the vectors
+     * @param distanceType the distance metric to use (e.g., L2Expanded, Cosine)
+     * @param efConstruction the efConstruction parameter in an HNSW graph
+     * @return a {@link CuVSIvfPqParams} instance with calculated parameters
+     * @throws IllegalArgumentException if dimensions are invalid
+     */
+    public static CuVSIvfPqParams create(int numVectors, int dims, CagraIndexParams.CuvsDistanceType distanceType, int efConstruction) {
+        long nRows = numVectors;
+        long nFeatures = dims;
+
+        if (nRows <= 0 || nFeatures <= 0) {
+            throw new IllegalArgumentException("Dataset dimensions must be positive: rows=" + nRows + ", features=" + nFeatures);
+        }
+        return createFromDimensions(nRows, nFeatures, distanceType, efConstruction);
+    }
+
+    /**
+     * Creates {@link CuVSIvfPqParams} with automatically calculated parameters based on dataset
+     * dimensions and construction parameter.
+     *
+     * <p>This is a convenience method when you have the dataset dimensions but not the dataset
+     * object itself. The calculation logic is identical to {@link #create(int, int,
+     * CagraIndexParams.CuvsDistanceType, int)}.
+     *
+     * @param nRows the number of rows (vectors) in the dataset
+     * @param nFeatures the number of features (dimensions) per vector
+     * @param distanceType the distance metric to use (e.g., L2Expanded, Cosine)
+     * @param efConstruction the construction parameter for parameter calculation
+     * @return a {@link CuVSIvfPqParams} instance with calculated parameters
+     * @throws IllegalArgumentException if dimensions are invalid
+     */
+    public static CuVSIvfPqParams createFromDimensions(
+        long nRows,
+        long nFeatures,
+        CagraIndexParams.CuvsDistanceType distanceType,
+        int efConstruction
+    ) {
+        if (nRows <= 0 || nFeatures <= 0) {
+            throw new IllegalArgumentException("Dataset dimensions must be positive: rows=" + nRows + ", features=" + nFeatures);
+        }
+
+        // Calculate PQ dimensions and bits based on feature count
+        int pqDim;
+        int pqBits;
+
+        if (nFeatures <= 32) {
+            pqDim = 16;
+            pqBits = 8;
+        } else {
+            pqBits = 4;
+            if (nFeatures <= 64) {
+                pqDim = 32;
+            } else if (nFeatures <= 128) {
+                pqDim = 64;
+            } else if (nFeatures <= 192) {
+                pqDim = 96;
+            } else {
+                // Round up to nearest multiple of 128
+                pqDim = (int) roundUpSafe(nFeatures / 2, 128);
+            }
+        }
+        // Calculate number of lists: approximately 1 cluster per 2000 rows
+        int nLists = Math.max(1, (int) (nRows / 2000));
+
+        // Calculate kmeans training set fraction adaptively
+        final double kMinPointsPerCluster = 32.0;
+        final double minKmeansTrainsetPoints = kMinPointsPerCluster * nLists;
+        final double maxKmeansTrainsetFraction = 1.0;
+        final double minKmeansTrainsetFraction = Math.min(maxKmeansTrainsetFraction, minKmeansTrainsetPoints / nRows);
+        double kmeansTrainsetFraction = Math.clamp(1.0 / Math.sqrt(nRows * 1e-5), minKmeansTrainsetFraction, maxKmeansTrainsetFraction);
+
+        // Calculate number of probes based on number of lists and efConstruction
+        int nProbes = Math.round((float) (2.0 + Math.sqrt(nLists) / 20.0 + efConstruction / 16.0));
+
+        // Build index parameters
+        CuVSIvfPqIndexParams indexParams = new CuVSIvfPqIndexParams.Builder().withMetric(distanceType)
+            .withPqDim(pqDim)
+            .withPqBits(pqBits)
+            .withNLists(nLists)
+            .withKmeansNIters(10)
+            .withKmeansTrainsetFraction(kmeansTrainsetFraction)
+            .withCodebookKind(CagraIndexParams.CodebookGen.PER_SUBSPACE)
+            .build();
+
+        // Build search parameters
+        CuVSIvfPqSearchParams searchParams = new CuVSIvfPqSearchParams.Builder().withNProbes(nProbes)
+            .withLutDtype(CagraIndexParams.CudaDataType.CUDA_R_16F)
+            .withInternalDistanceDtype(CagraIndexParams.CudaDataType.CUDA_R_16F)
+            .build();
+
+        // Build and return the complete IVF_PQ parameters
+        return new CuVSIvfPqParams.Builder().withCuVSIvfPqIndexParams(indexParams)
+            .withCuVSIvfPqSearchParams(searchParams)
+            .withRefinementRate(1.0f)
+            .build();
+    }
+
+    /**
+     * Helper method to round up to the nearest multiple of a given divisor.
+     *
+     * <p>Equivalent to C++ {@code raft::round_up_safe<uint32_t>(value, divisor)}
+     *
+     * @param value the value to round up
+     * @param divisor the divisor to round to
+     * @return the rounded up value
+     */
+    private static long roundUpSafe(long value, long divisor) {
+        if (divisor <= 0) {
+            throw new IllegalArgumentException("divisor must be positive");
+        }
+        return ((value + divisor - 1) / divisor) * divisor;
+    }
+}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java
index 64e2f920d2c0c..b8cc101ff32ea 100644
--- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java
@@ -59,6 +59,23 @@ ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataT
     /** Shuts down the manager, releasing all open resources. */
     void shutdown();
 
+    /**
+     * Estimates the required GPU memory for building an index using the NN_DESCENT algorithm.
+     *
+     * @param numVectors the number of vectors
+     * @param dims the dimensionality of vectors
+     * @param dataType the data type of the vectors
+     * @return the estimated memory in bytes needed for NN_DESCENT
+     */
+    static long estimateNNDescentMemory(int numVectors, int dims, CuVSMatrix.DataType dataType) {
+        int elementTypeBytes = switch (dataType) {
+            case FLOAT -> Float.BYTES;
+            case INT, UINT -> Integer.BYTES;
+            case BYTE -> Byte.BYTES;
+        };
+        return (long) (2.0 * numVectors * dims * elementTypeBytes);
+    }
+
     /** Returns the system-wide pooling manager. */
     static CuVSResourceManager pooling() {
         return PoolingCuVSResourceManager.Holder.INSTANCE;
@@ -193,17 +210,16 @@ public ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataTyp
         }
 
         private long estimateRequiredMemory(int numVectors, int dims, CuVSMatrix.DataType dataType, CagraIndexParams cagraIndexParams) {
-            int elementTypeBytes = switch (dataType) {
-                case FLOAT -> Float.BYTES;
-                case INT, UINT -> Integer.BYTES;
-                case BYTE -> Byte.BYTES;
-            };
-
             if (cagraIndexParams.getCagraGraphBuildAlgo() == CagraIndexParams.CagraGraphBuildAlgo.IVF_PQ
                 && cagraIndexParams.getCuVSIvfPqParams() != null
                 && cagraIndexParams.getCuVSIvfPqParams().getIndexParams() != null
                 && cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getPqDim() != 0) {
                 // See https://docs.rapids.ai/api/cuvs/nightly/neighbors/ivfpq/#index-device-memory
+                int elementTypeBytes = switch (dataType) {
+                    case FLOAT -> Float.BYTES;
+                    case INT, UINT -> Integer.BYTES;
+                    case BYTE -> Byte.BYTES;
+                };
                 var pqDim = cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getPqDim();
                 var pqBits = cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getPqBits();
                 var numClusters = cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getnLists();
@@ -211,7 +227,7 @@ private long estimateRequiredMemory(int numVectors, int dims, CuVSMatrix.DataTyp
                 return (long) (GPU_COMPUTATION_MEMORY_FACTOR * approximatedIvfBytes);
             }
 
-            return (long) (GPU_COMPUTATION_MEMORY_FACTOR * numVectors * dims * elementTypeBytes);
+            return CuVSResourceManager.estimateNNDescentMemory(numVectors, dims, dataType);
         }
 
         // visible for testing
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java
index 20d7612533cc4..f18bfa6f6e613 100644
--- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java
@@ -42,6 +42,7 @@
 import org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils;
 import org.elasticsearch.logging.LogManager;
 import org.elasticsearch.logging.Logger;
+import org.elasticsearch.xpack.gpu.GPUSupport;
 
 import java.io.IOException;
 import java.lang.foreign.Arena;
@@ -71,6 +72,9 @@ final class ES92GpuHnswVectorsWriter extends KnnVectorsWriter {
     private static final int LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT = 16;
     private static final long DIRECT_COPY_THRESHOLD_IN_BYTES = 128 * 1024 * 1024; // 128MB
 
+    // TODO: lower the numVectors threshold when to switch to IVF_PQ based on more benchmarks
+    private static final long MAX_NUM_VECTORS_FOR_NN_DESCENT = 5_000_000L;
+
     private final CuVSResourceManager cuVSResourceManager;
     private final SegmentWriteState segmentWriteState;
     private final IndexOutput meta, vectorIndex;
@@ -178,9 +182,13 @@ private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IO
             var started = System.nanoTime();
             var fieldInfo = field.fieldInfo;
 
-            CagraIndexParams cagraIndexParams = createCagraIndexParams(fieldInfo.getVectorSimilarityFunction());
-
             var numVectors = field.flatFieldVectorsWriter.getVectors().size();
+            CagraIndexParams cagraIndexParams = createCagraIndexParams(
+                fieldInfo.getVectorSimilarityFunction(),
+                numVectors,
+                fieldInfo.getVectorDimension()
+            );
+
             if (numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD) {
                 if (logger.isDebugEnabled()) {
                     logger.debug(
@@ -334,7 +342,7 @@ private CagraIndex buildGPUIndex(
         return index;
     }
 
-    private CagraIndexParams createCagraIndexParams(VectorSimilarityFunction similarityFunction) {
+    private CagraIndexParams createCagraIndexParams(VectorSimilarityFunction similarityFunction, int numVectors, int dims) {
         CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) {
             case COSINE -> CagraIndexParams.CuvsDistanceType.CosineExpanded;
             case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded;
@@ -350,14 +358,52 @@ private CagraIndexParams createCagraIndexParams(VectorSimilarityFunction similar
             }
         };
 
-        // TODO: expose cagra index params for algorithm, NNDescentNumIterations
-        return new CagraIndexParams.Builder().withNumWriterThreads(1) // TODO: how many CPU threads we can use?
-            .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.NN_DESCENT)
-            .withGraphDegree(M)
-            .withIntermediateGraphDegree(beamWidth)
-            .withNNDescentNumIterations(5)
-            .withMetric(distanceType)
-            .build();
+        int numCPUThreads = 1; // TODO: how many CPU threads we can use?
+        CagraIndexParams params;
+
+        boolean useIvfPQ = false;
+        // Check if we should use IVF_PQ based on vector count and distance type
+        // IVF_PQ doesn't support Cosine distance in CUVS 25.10
+        // TODO: Remove this check on distance when updating to CUVS 25.12+
+        if ((distanceType != CagraIndexParams.CuvsDistanceType.CosineExpanded) && (numVectors >= MAX_NUM_VECTORS_FOR_NN_DESCENT)) {
+            useIvfPQ = true;
+        }
+
+        // Check if we should use IVF_PQ due to insufficient GPU memory for NN_DESCENT
+        if ((useIvfPQ == false) && distanceType != CagraIndexParams.CuvsDistanceType.CosineExpanded) {
+            long totalDeviceMemory = GPUSupport.getTotalGpuMemory();
+            if (totalDeviceMemory > 0) {
+                long requiredMemoryForNnDescent = CuVSResourceManager.estimateNNDescentMemory(numVectors, dims, dataType);
+                if (requiredMemoryForNnDescent > totalDeviceMemory) {
+                    useIvfPQ = true;
+                    if (logger.isDebugEnabled()) {
+                        logger.debug(
+                            "Using IVF_PQ algorithm due to insufficient GPU memory for NN_DESCENT; required [{}B] > total [{}B]",
+                            requiredMemoryForNnDescent,
+                            totalDeviceMemory
+                        );
+                    }
+                }
+            }
+        }
+
+        if (useIvfPQ) {
+            var ivfPqParams = CuVSIvfPqParamsFactory.create(numVectors, dims, distanceType, beamWidth);
+            params = new CagraIndexParams.Builder().withNumWriterThreads(numCPUThreads)
+                .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.IVF_PQ)
+                .withCuVSIvfPqParams(ivfPqParams)
+                .withMetric(distanceType)
+                .build();
+        } else {
+            params = new CagraIndexParams.Builder().withNumWriterThreads(numCPUThreads)
+                .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.NN_DESCENT)
+                .withGraphDegree(M)
+                .withIntermediateGraphDegree(beamWidth)
+                .withNNDescentNumIterations(5)
+                .withMetric(distanceType)
+                .build();
+        }
+        return params;
     }
 
     private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) throws IOException {
@@ -515,7 +561,11 @@ private void mergeByteVectorField(
             ? null
             : VectorsFormatReflectionUtils.getByteScoringSupplierVectorOrNull(randomScorerSupplier);
 
-        CagraIndexParams cagraIndexParams = createCagraIndexParams(fieldInfo.getVectorSimilarityFunction());
+        CagraIndexParams cagraIndexParams = createCagraIndexParams(
+            fieldInfo.getVectorSimilarityFunction(),
+            numVectors,
+            fieldInfo.getVectorDimension()
+        );
 
         if (vectorValues != null) {
             IndexInput slice = vectorValues.getSlice();
@@ -612,7 +662,11 @@ private void mergeFloatVectorField(
         var vectorValues = randomScorerSupplier == null
             ? null
             : VectorsFormatReflectionUtils.getFloatScoringSupplierVectorOrNull(randomScorerSupplier);
-        CagraIndexParams cagraIndexParams = createCagraIndexParams(fieldInfo.getVectorSimilarityFunction());
+        CagraIndexParams cagraIndexParams = createCagraIndexParams(
+            fieldInfo.getVectorSimilarityFunction(),
+            numVectors,
+            fieldInfo.getVectorDimension()
+        );
 
         if (vectorValues != null) {
             IndexInput slice = vectorValues.getSlice();