diff --git a/docs/changelog/137126.yaml b/docs/changelog/137126.yaml new file mode 100644 index 0000000000000..b3831939dcf07 --- /dev/null +++ b/docs/changelog/137126.yaml @@ -0,0 +1,5 @@ +pr: 137126 +summary: Use IVF_PQ for GPU index build for large datasets +area: Vector Search +type: enhancement +issues: [] diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java index c21bda894790a..c7e41870636d9 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java @@ -22,6 +22,17 @@ public class GPUSupport { // Set the minimum at 7.5GB: 8GB GPUs (which are our targeted minimum) report less than that via the API private static final long MIN_DEVICE_MEMORY_IN_BYTES = 8053063680L; + private static volatile Long cachedTotalGpuMemory; + + /** + * Checks whether a GPU meets the minimum requirements for GPU-accelerated indexing. + * Checks compute capability and total memory requirements. + */ + private static boolean meetsComputeCapabilityRequirements(int major, int minor) { + return major >= GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR + && (major > GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR || minor >= GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR); + } + /** Tells whether the platform supports cuvs. */ public static boolean isSupported(boolean logError) { try { @@ -35,9 +46,7 @@ public static boolean isSupported(boolean logError) { } for (var gpu : availableGPUs) { - if (gpu.computeCapabilityMajor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR - || (gpu.computeCapabilityMajor() == GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR - && gpu.computeCapabilityMinor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR)) { + if (meetsComputeCapabilityRequirements(gpu.computeCapabilityMajor(), gpu.computeCapabilityMinor()) == false) { if (logError) { LOG.warn( "GPU [{}] does not have the minimum compute capabilities (required: [{}.{}], found: [{}.{}])", @@ -116,4 +125,48 @@ public static CuVSResources cuVSResourcesOrNull(boolean logError) { } return null; } + + /** + * Returns the total device memory in bytes of the first available compatible GPU. + * This is a constant property of the GPU device and is cached after the first call. + * + * @return total device memory in bytes, or -1 if GPU is not available or supported + */ + public static long getTotalGpuMemory() { + if (cachedTotalGpuMemory != null) { + return cachedTotalGpuMemory; + } + + synchronized (GPUSupport.class) { + if (cachedTotalGpuMemory != null) { + return cachedTotalGpuMemory; + } + + try { + var gpuInfoProvider = CuVSProvider.provider().gpuInfoProvider(); + var availableGPUs = gpuInfoProvider.availableGPUs(); + + for (var gpu : availableGPUs) { + boolean hasRequiredCapability = meetsComputeCapabilityRequirements( + gpu.computeCapabilityMajor(), + gpu.computeCapabilityMinor() + ); + boolean hasRequiredMemory = gpu.totalDeviceMemoryInBytes() >= MIN_DEVICE_MEMORY_IN_BYTES; + + if (hasRequiredCapability && hasRequiredMemory) { + cachedTotalGpuMemory = gpu.totalDeviceMemoryInBytes(); + return cachedTotalGpuMemory; + } + } + + // No suitable GPU found + cachedTotalGpuMemory = -1L; + return -1L; + } catch (Throwable e) { + LOG.debug("Unable to query GPU total memory", e); + cachedTotalGpuMemory = -1L; + return -1L; + } + } + } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSIvfPqParamsFactory.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSIvfPqParamsFactory.java new file mode 100644 index 0000000000000..c47cf0beff2b5 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSIvfPqParamsFactory.java @@ -0,0 +1,149 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.CagraIndexParams; +import com.nvidia.cuvs.CuVSIvfPqIndexParams; +import com.nvidia.cuvs.CuVSIvfPqParams; +import com.nvidia.cuvs.CuVSIvfPqSearchParams; + +/** + * Factory for creating {@link CuVSIvfPqParams} with automatic parameter calculation + * based on dataset dimensions and distance metric. + * + *

This class provides a Java equivalent to the C++ function: + * {@code cuvs::neighbors::graph_build_params::ivf_pq_params(dataset_extents, metric)} + * + *

The parameters are calculated automatically based on the dataset shape (number of + * rows and features), following the same heuristics as the C++ implementation. + * + * TODO: Remove this class when cuvs 25.12 is available and use functions from there directly. + */ +public class CuVSIvfPqParamsFactory { + + /** + * Creates {@link CuVSIvfPqParams} with automatically calculated parameters based on the + * dataset dimensions, distance metric, and efConstruction parameter. + * + *

This method replicates the parameter calculation logic from the C++ function: + * {@code cuvs::neighbors::graph_build_params::ivf_pq_params(dataset_extents, metric)} + * + * @param numVectors the number of vectors in the dataset + * @param dims the dimensionality of the vectors + * @param distanceType the distance metric to use (e.g., L2Expanded, Cosine) + * @param efConstruction the efConstruction parameter in an HNSW graph + * @return a {@link CuVSIvfPqParams} instance with calculated parameters + * @throws IllegalArgumentException if dimensions are invalid + */ + public static CuVSIvfPqParams create(int numVectors, int dims, CagraIndexParams.CuvsDistanceType distanceType, int efConstruction) { + long nRows = numVectors; + long nFeatures = dims; + + if (nRows <= 0 || nFeatures <= 0) { + throw new IllegalArgumentException("Dataset dimensions must be positive: rows=" + nRows + ", features=" + nFeatures); + } + return createFromDimensions(nRows, nFeatures, distanceType, efConstruction); + } + + /** + * Creates {@link CuVSIvfPqParams} with automatically calculated parameters based on dataset + * dimensions and construction parameter. + * + *

This is a convenience method when you have the dataset dimensions but not the dataset + * object itself. The calculation logic is identical to {@link #create(int, int, + * CagraIndexParams.CuvsDistanceType, int)}. + * + * @param nRows the number of rows (vectors) in the dataset + * @param nFeatures the number of features (dimensions) per vector + * @param distanceType the distance metric to use (e.g., L2Expanded, Cosine) + * @param efConstruction the construction parameter for parameter calculation + * @return a {@link CuVSIvfPqParams} instance with calculated parameters + * @throws IllegalArgumentException if dimensions are invalid + */ + public static CuVSIvfPqParams createFromDimensions( + long nRows, + long nFeatures, + CagraIndexParams.CuvsDistanceType distanceType, + int efConstruction + ) { + if (nRows <= 0 || nFeatures <= 0) { + throw new IllegalArgumentException("Dataset dimensions must be positive: rows=" + nRows + ", features=" + nFeatures); + } + + // Calculate PQ dimensions and bits based on feature count + int pqDim; + int pqBits; + + if (nFeatures <= 32) { + pqDim = 16; + pqBits = 8; + } else { + pqBits = 4; + if (nFeatures <= 64) { + pqDim = 32; + } else if (nFeatures <= 128) { + pqDim = 64; + } else if (nFeatures <= 192) { + pqDim = 96; + } else { + // Round up to nearest multiple of 128 + pqDim = (int) roundUpSafe(nFeatures / 2, 128); + } + } + // Calculate number of lists: approximately 1 cluster per 2000 rows + int nLists = Math.max(1, (int) (nRows / 2000)); + + // Calculate kmeans training set fraction adaptively + final double kMinPointsPerCluster = 32.0; + final double minKmeansTrainsetPoints = kMinPointsPerCluster * nLists; + final double maxKmeansTrainsetFraction = 1.0; + final double minKmeansTrainsetFraction = Math.min(maxKmeansTrainsetFraction, minKmeansTrainsetPoints / nRows); + double kmeansTrainsetFraction = Math.clamp(1.0 / Math.sqrt(nRows * 1e-5), minKmeansTrainsetFraction, maxKmeansTrainsetFraction); + + // Calculate number of probes based on number of lists and efConstruction + int nProbes = Math.round((float) (2.0 + Math.sqrt(nLists) / 20.0 + efConstruction / 16.0)); + + // Build index parameters + CuVSIvfPqIndexParams indexParams = new CuVSIvfPqIndexParams.Builder().withMetric(distanceType) + .withPqDim(pqDim) + .withPqBits(pqBits) + .withNLists(nLists) + .withKmeansNIters(10) + .withKmeansTrainsetFraction(kmeansTrainsetFraction) + .withCodebookKind(CagraIndexParams.CodebookGen.PER_SUBSPACE) + .build(); + + // Build search parameters + CuVSIvfPqSearchParams searchParams = new CuVSIvfPqSearchParams.Builder().withNProbes(nProbes) + .withLutDtype(CagraIndexParams.CudaDataType.CUDA_R_16F) + .withInternalDistanceDtype(CagraIndexParams.CudaDataType.CUDA_R_16F) + .build(); + + // Build and return the complete IVF_PQ parameters + return new CuVSIvfPqParams.Builder().withCuVSIvfPqIndexParams(indexParams) + .withCuVSIvfPqSearchParams(searchParams) + .withRefinementRate(1.0f) + .build(); + } + + /** + * Helper method to round up to the nearest multiple of a given divisor. + * + *

Equivalent to C++ {@code raft::round_up_safe(value, divisor)} + * + * @param value the value to round up + * @param divisor the divisor to round to + * @return the rounded up value + */ + private static long roundUpSafe(long value, long divisor) { + if (divisor <= 0) { + throw new IllegalArgumentException("divisor must be positive"); + } + return ((value + divisor - 1) / divisor) * divisor; + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java index 64e2f920d2c0c..b8cc101ff32ea 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java @@ -59,6 +59,23 @@ ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataT /** Shuts down the manager, releasing all open resources. */ void shutdown(); + /** + * Estimates the required GPU memory for building an index using the NN_DESCENT algorithm. + * + * @param numVectors the number of vectors + * @param dims the dimensionality of vectors + * @param dataType the data type of the vectors + * @return the estimated memory in bytes needed for NN_DESCENT + */ + static long estimateNNDescentMemory(int numVectors, int dims, CuVSMatrix.DataType dataType) { + int elementTypeBytes = switch (dataType) { + case FLOAT -> Float.BYTES; + case INT, UINT -> Integer.BYTES; + case BYTE -> Byte.BYTES; + }; + return (long) (2.0 * numVectors * dims * elementTypeBytes); + } + /** Returns the system-wide pooling manager. */ static CuVSResourceManager pooling() { return PoolingCuVSResourceManager.Holder.INSTANCE; @@ -193,17 +210,16 @@ public ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataTyp } private long estimateRequiredMemory(int numVectors, int dims, CuVSMatrix.DataType dataType, CagraIndexParams cagraIndexParams) { - int elementTypeBytes = switch (dataType) { - case FLOAT -> Float.BYTES; - case INT, UINT -> Integer.BYTES; - case BYTE -> Byte.BYTES; - }; - if (cagraIndexParams.getCagraGraphBuildAlgo() == CagraIndexParams.CagraGraphBuildAlgo.IVF_PQ && cagraIndexParams.getCuVSIvfPqParams() != null && cagraIndexParams.getCuVSIvfPqParams().getIndexParams() != null && cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getPqDim() != 0) { // See https://docs.rapids.ai/api/cuvs/nightly/neighbors/ivfpq/#index-device-memory + int elementTypeBytes = switch (dataType) { + case FLOAT -> Float.BYTES; + case INT, UINT -> Integer.BYTES; + case BYTE -> Byte.BYTES; + }; var pqDim = cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getPqDim(); var pqBits = cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getPqBits(); var numClusters = cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getnLists(); @@ -211,7 +227,7 @@ private long estimateRequiredMemory(int numVectors, int dims, CuVSMatrix.DataTyp return (long) (GPU_COMPUTATION_MEMORY_FACTOR * approximatedIvfBytes); } - return (long) (GPU_COMPUTATION_MEMORY_FACTOR * numVectors * dims * elementTypeBytes); + return CuVSResourceManager.estimateNNDescentMemory(numVectors, dims, dataType); } // visible for testing diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java index 20d7612533cc4..f18bfa6f6e613 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java @@ -42,6 +42,7 @@ import org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; +import org.elasticsearch.xpack.gpu.GPUSupport; import java.io.IOException; import java.lang.foreign.Arena; @@ -71,6 +72,9 @@ final class ES92GpuHnswVectorsWriter extends KnnVectorsWriter { private static final int LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT = 16; private static final long DIRECT_COPY_THRESHOLD_IN_BYTES = 128 * 1024 * 1024; // 128MB + // TODO: lower the numVectors threshold when to switch to IVF_PQ based on more benchmarks + private static final long MAX_NUM_VECTORS_FOR_NN_DESCENT = 5_000_000L; + private final CuVSResourceManager cuVSResourceManager; private final SegmentWriteState segmentWriteState; private final IndexOutput meta, vectorIndex; @@ -178,9 +182,13 @@ private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IO var started = System.nanoTime(); var fieldInfo = field.fieldInfo; - CagraIndexParams cagraIndexParams = createCagraIndexParams(fieldInfo.getVectorSimilarityFunction()); - var numVectors = field.flatFieldVectorsWriter.getVectors().size(); + CagraIndexParams cagraIndexParams = createCagraIndexParams( + fieldInfo.getVectorSimilarityFunction(), + numVectors, + fieldInfo.getVectorDimension() + ); + if (numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD) { if (logger.isDebugEnabled()) { logger.debug( @@ -334,7 +342,7 @@ private CagraIndex buildGPUIndex( return index; } - private CagraIndexParams createCagraIndexParams(VectorSimilarityFunction similarityFunction) { + private CagraIndexParams createCagraIndexParams(VectorSimilarityFunction similarityFunction, int numVectors, int dims) { CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) { case COSINE -> CagraIndexParams.CuvsDistanceType.CosineExpanded; case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded; @@ -350,14 +358,52 @@ private CagraIndexParams createCagraIndexParams(VectorSimilarityFunction similar } }; - // TODO: expose cagra index params for algorithm, NNDescentNumIterations - return new CagraIndexParams.Builder().withNumWriterThreads(1) // TODO: how many CPU threads we can use? - .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.NN_DESCENT) - .withGraphDegree(M) - .withIntermediateGraphDegree(beamWidth) - .withNNDescentNumIterations(5) - .withMetric(distanceType) - .build(); + int numCPUThreads = 1; // TODO: how many CPU threads we can use? + CagraIndexParams params; + + boolean useIvfPQ = false; + // Check if we should use IVF_PQ based on vector count and distance type + // IVF_PQ doesn't support Cosine distance in CUVS 25.10 + // TODO: Remove this check on distance when updating to CUVS 25.12+ + if ((distanceType != CagraIndexParams.CuvsDistanceType.CosineExpanded) && (numVectors >= MAX_NUM_VECTORS_FOR_NN_DESCENT)) { + useIvfPQ = true; + } + + // Check if we should use IVF_PQ due to insufficient GPU memory for NN_DESCENT + if ((useIvfPQ == false) && distanceType != CagraIndexParams.CuvsDistanceType.CosineExpanded) { + long totalDeviceMemory = GPUSupport.getTotalGpuMemory(); + if (totalDeviceMemory > 0) { + long requiredMemoryForNnDescent = CuVSResourceManager.estimateNNDescentMemory(numVectors, dims, dataType); + if (requiredMemoryForNnDescent > totalDeviceMemory) { + useIvfPQ = true; + if (logger.isDebugEnabled()) { + logger.debug( + "Using IVF_PQ algorithm due to insufficient GPU memory for NN_DESCENT; required [{}B] > total [{}B]", + requiredMemoryForNnDescent, + totalDeviceMemory + ); + } + } + } + } + + if (useIvfPQ) { + var ivfPqParams = CuVSIvfPqParamsFactory.create(numVectors, dims, distanceType, beamWidth); + params = new CagraIndexParams.Builder().withNumWriterThreads(numCPUThreads) + .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.IVF_PQ) + .withCuVSIvfPqParams(ivfPqParams) + .withMetric(distanceType) + .build(); + } else { + params = new CagraIndexParams.Builder().withNumWriterThreads(numCPUThreads) + .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.NN_DESCENT) + .withGraphDegree(M) + .withIntermediateGraphDegree(beamWidth) + .withNNDescentNumIterations(5) + .withMetric(distanceType) + .build(); + } + return params; } private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) throws IOException { @@ -515,7 +561,11 @@ private void mergeByteVectorField( ? null : VectorsFormatReflectionUtils.getByteScoringSupplierVectorOrNull(randomScorerSupplier); - CagraIndexParams cagraIndexParams = createCagraIndexParams(fieldInfo.getVectorSimilarityFunction()); + CagraIndexParams cagraIndexParams = createCagraIndexParams( + fieldInfo.getVectorSimilarityFunction(), + numVectors, + fieldInfo.getVectorDimension() + ); if (vectorValues != null) { IndexInput slice = vectorValues.getSlice(); @@ -612,7 +662,11 @@ private void mergeFloatVectorField( var vectorValues = randomScorerSupplier == null ? null : VectorsFormatReflectionUtils.getFloatScoringSupplierVectorOrNull(randomScorerSupplier); - CagraIndexParams cagraIndexParams = createCagraIndexParams(fieldInfo.getVectorSimilarityFunction()); + CagraIndexParams cagraIndexParams = createCagraIndexParams( + fieldInfo.getVectorSimilarityFunction(), + numVectors, + fieldInfo.getVectorDimension() + ); if (vectorValues != null) { IndexInput slice = vectorValues.getSlice();