diff --git a/docs/changelog/137126.yaml b/docs/changelog/137126.yaml new file mode 100644 index 0000000000000..b3831939dcf07 --- /dev/null +++ b/docs/changelog/137126.yaml @@ -0,0 +1,5 @@ +pr: 137126 +summary: Use IVF_PQ for GPU index build for large datasets +area: Vector Search +type: enhancement +issues: [] diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java index c21bda894790a..c7e41870636d9 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java @@ -22,6 +22,17 @@ public class GPUSupport { // Set the minimum at 7.5GB: 8GB GPUs (which are our targeted minimum) report less than that via the API private static final long MIN_DEVICE_MEMORY_IN_BYTES = 8053063680L; + private static volatile Long cachedTotalGpuMemory; + + /** + * Checks whether a GPU meets the minimum requirements for GPU-accelerated indexing. + * Checks compute capability and total memory requirements. + */ + private static boolean meetsComputeCapabilityRequirements(int major, int minor) { + return major >= GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR + && (major > GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR || minor >= GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR); + } + /** Tells whether the platform supports cuvs. */ public static boolean isSupported(boolean logError) { try { @@ -35,9 +46,7 @@ public static boolean isSupported(boolean logError) { } for (var gpu : availableGPUs) { - if (gpu.computeCapabilityMajor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR - || (gpu.computeCapabilityMajor() == GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR - && gpu.computeCapabilityMinor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR)) { + if (meetsComputeCapabilityRequirements(gpu.computeCapabilityMajor(), gpu.computeCapabilityMinor()) == false) { if (logError) { LOG.warn( "GPU [{}] does not have the minimum compute capabilities (required: [{}.{}], found: [{}.{}])", @@ -116,4 +125,48 @@ public static CuVSResources cuVSResourcesOrNull(boolean logError) { } return null; } + + /** + * Returns the total device memory in bytes of the first available compatible GPU. + * This is a constant property of the GPU device and is cached after the first call. + * + * @return total device memory in bytes, or -1 if GPU is not available or supported + */ + public static long getTotalGpuMemory() { + if (cachedTotalGpuMemory != null) { + return cachedTotalGpuMemory; + } + + synchronized (GPUSupport.class) { + if (cachedTotalGpuMemory != null) { + return cachedTotalGpuMemory; + } + + try { + var gpuInfoProvider = CuVSProvider.provider().gpuInfoProvider(); + var availableGPUs = gpuInfoProvider.availableGPUs(); + + for (var gpu : availableGPUs) { + boolean hasRequiredCapability = meetsComputeCapabilityRequirements( + gpu.computeCapabilityMajor(), + gpu.computeCapabilityMinor() + ); + boolean hasRequiredMemory = gpu.totalDeviceMemoryInBytes() >= MIN_DEVICE_MEMORY_IN_BYTES; + + if (hasRequiredCapability && hasRequiredMemory) { + cachedTotalGpuMemory = gpu.totalDeviceMemoryInBytes(); + return cachedTotalGpuMemory; + } + } + + // No suitable GPU found + cachedTotalGpuMemory = -1L; + return -1L; + } catch (Throwable e) { + LOG.debug("Unable to query GPU total memory", e); + cachedTotalGpuMemory = -1L; + return -1L; + } + } + } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSIvfPqParamsFactory.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSIvfPqParamsFactory.java new file mode 100644 index 0000000000000..c47cf0beff2b5 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSIvfPqParamsFactory.java @@ -0,0 +1,149 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.CagraIndexParams; +import com.nvidia.cuvs.CuVSIvfPqIndexParams; +import com.nvidia.cuvs.CuVSIvfPqParams; +import com.nvidia.cuvs.CuVSIvfPqSearchParams; + +/** + * Factory for creating {@link CuVSIvfPqParams} with automatic parameter calculation + * based on dataset dimensions and distance metric. + * + *
This class provides a Java equivalent to the C++ function: + * {@code cuvs::neighbors::graph_build_params::ivf_pq_params(dataset_extents, metric)} + * + *
The parameters are calculated automatically based on the dataset shape (number of + * rows and features), following the same heuristics as the C++ implementation. + * + * TODO: Remove this class when cuvs 25.12 is available and use functions from there directly. + */ +public class CuVSIvfPqParamsFactory { + + /** + * Creates {@link CuVSIvfPqParams} with automatically calculated parameters based on the + * dataset dimensions, distance metric, and efConstruction parameter. + * + *
This method replicates the parameter calculation logic from the C++ function: + * {@code cuvs::neighbors::graph_build_params::ivf_pq_params(dataset_extents, metric)} + * + * @param numVectors the number of vectors in the dataset + * @param dims the dimensionality of the vectors + * @param distanceType the distance metric to use (e.g., L2Expanded, Cosine) + * @param efConstruction the efConstruction parameter in an HNSW graph + * @return a {@link CuVSIvfPqParams} instance with calculated parameters + * @throws IllegalArgumentException if dimensions are invalid + */ + public static CuVSIvfPqParams create(int numVectors, int dims, CagraIndexParams.CuvsDistanceType distanceType, int efConstruction) { + long nRows = numVectors; + long nFeatures = dims; + + if (nRows <= 0 || nFeatures <= 0) { + throw new IllegalArgumentException("Dataset dimensions must be positive: rows=" + nRows + ", features=" + nFeatures); + } + return createFromDimensions(nRows, nFeatures, distanceType, efConstruction); + } + + /** + * Creates {@link CuVSIvfPqParams} with automatically calculated parameters based on dataset + * dimensions and construction parameter. + * + *
This is a convenience method when you have the dataset dimensions but not the dataset + * object itself. The calculation logic is identical to {@link #create(int, int, + * CagraIndexParams.CuvsDistanceType, int)}. + * + * @param nRows the number of rows (vectors) in the dataset + * @param nFeatures the number of features (dimensions) per vector + * @param distanceType the distance metric to use (e.g., L2Expanded, Cosine) + * @param efConstruction the construction parameter for parameter calculation + * @return a {@link CuVSIvfPqParams} instance with calculated parameters + * @throws IllegalArgumentException if dimensions are invalid + */ + public static CuVSIvfPqParams createFromDimensions( + long nRows, + long nFeatures, + CagraIndexParams.CuvsDistanceType distanceType, + int efConstruction + ) { + if (nRows <= 0 || nFeatures <= 0) { + throw new IllegalArgumentException("Dataset dimensions must be positive: rows=" + nRows + ", features=" + nFeatures); + } + + // Calculate PQ dimensions and bits based on feature count + int pqDim; + int pqBits; + + if (nFeatures <= 32) { + pqDim = 16; + pqBits = 8; + } else { + pqBits = 4; + if (nFeatures <= 64) { + pqDim = 32; + } else if (nFeatures <= 128) { + pqDim = 64; + } else if (nFeatures <= 192) { + pqDim = 96; + } else { + // Round up to nearest multiple of 128 + pqDim = (int) roundUpSafe(nFeatures / 2, 128); + } + } + // Calculate number of lists: approximately 1 cluster per 2000 rows + int nLists = Math.max(1, (int) (nRows / 2000)); + + // Calculate kmeans training set fraction adaptively + final double kMinPointsPerCluster = 32.0; + final double minKmeansTrainsetPoints = kMinPointsPerCluster * nLists; + final double maxKmeansTrainsetFraction = 1.0; + final double minKmeansTrainsetFraction = Math.min(maxKmeansTrainsetFraction, minKmeansTrainsetPoints / nRows); + double kmeansTrainsetFraction = Math.clamp(1.0 / Math.sqrt(nRows * 1e-5), minKmeansTrainsetFraction, maxKmeansTrainsetFraction); + + // Calculate number of probes based on number of lists and efConstruction + int nProbes = Math.round((float) (2.0 + Math.sqrt(nLists) / 20.0 + efConstruction / 16.0)); + + // Build index parameters + CuVSIvfPqIndexParams indexParams = new CuVSIvfPqIndexParams.Builder().withMetric(distanceType) + .withPqDim(pqDim) + .withPqBits(pqBits) + .withNLists(nLists) + .withKmeansNIters(10) + .withKmeansTrainsetFraction(kmeansTrainsetFraction) + .withCodebookKind(CagraIndexParams.CodebookGen.PER_SUBSPACE) + .build(); + + // Build search parameters + CuVSIvfPqSearchParams searchParams = new CuVSIvfPqSearchParams.Builder().withNProbes(nProbes) + .withLutDtype(CagraIndexParams.CudaDataType.CUDA_R_16F) + .withInternalDistanceDtype(CagraIndexParams.CudaDataType.CUDA_R_16F) + .build(); + + // Build and return the complete IVF_PQ parameters + return new CuVSIvfPqParams.Builder().withCuVSIvfPqIndexParams(indexParams) + .withCuVSIvfPqSearchParams(searchParams) + .withRefinementRate(1.0f) + .build(); + } + + /** + * Helper method to round up to the nearest multiple of a given divisor. + * + *
Equivalent to C++ {@code raft::round_up_safe