Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/137126.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 137126
summary: Use IVF_PQ for GPU index build for large datasets
area: Vector Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@ public class GPUSupport {
// Set the minimum at 7.5GB: 8GB GPUs (which are our targeted minimum) report less than that via the API
private static final long MIN_DEVICE_MEMORY_IN_BYTES = 8053063680L;

private static volatile Long cachedTotalGpuMemory;

/**
* Checks whether a GPU meets the minimum requirements for GPU-accelerated indexing.
* Checks compute capability and total memory requirements.
*/
private static boolean meetsComputeCapabilityRequirements(int major, int minor) {
return major >= GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR
&& (major > GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR || minor >= GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR);
}

/** Tells whether the platform supports cuvs. */
public static boolean isSupported(boolean logError) {
try {
Expand All @@ -35,9 +46,7 @@ public static boolean isSupported(boolean logError) {
}

for (var gpu : availableGPUs) {
if (gpu.computeCapabilityMajor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR
|| (gpu.computeCapabilityMajor() == GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR
&& gpu.computeCapabilityMinor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR)) {
if (meetsComputeCapabilityRequirements(gpu.computeCapabilityMajor(), gpu.computeCapabilityMinor()) == false) {
if (logError) {
LOG.warn(
"GPU [{}] does not have the minimum compute capabilities (required: [{}.{}], found: [{}.{}])",
Expand Down Expand Up @@ -116,4 +125,48 @@ public static CuVSResources cuVSResourcesOrNull(boolean logError) {
}
return null;
}

/**
* Returns the total device memory in bytes of the first available compatible GPU.
* This is a constant property of the GPU device and is cached after the first call.
*
* @return total device memory in bytes, or -1 if GPU is not available or supported
*/
public static long getTotalGpuMemory() {
if (cachedTotalGpuMemory != null) {
return cachedTotalGpuMemory;
}

synchronized (GPUSupport.class) {
if (cachedTotalGpuMemory != null) {
return cachedTotalGpuMemory;
}

try {
var gpuInfoProvider = CuVSProvider.provider().gpuInfoProvider();
var availableGPUs = gpuInfoProvider.availableGPUs();

for (var gpu : availableGPUs) {
boolean hasRequiredCapability = meetsComputeCapabilityRequirements(
gpu.computeCapabilityMajor(),
gpu.computeCapabilityMinor()
);
boolean hasRequiredMemory = gpu.totalDeviceMemoryInBytes() >= MIN_DEVICE_MEMORY_IN_BYTES;

if (hasRequiredCapability && hasRequiredMemory) {
cachedTotalGpuMemory = gpu.totalDeviceMemoryInBytes();
return cachedTotalGpuMemory;
}
}

// No suitable GPU found
cachedTotalGpuMemory = -1L;
return -1L;
} catch (Throwable e) {
LOG.debug("Unable to query GPU total memory", e);
cachedTotalGpuMemory = -1L;
return -1L;
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.gpu.codec;

import com.nvidia.cuvs.CagraIndexParams;
import com.nvidia.cuvs.CuVSIvfPqIndexParams;
import com.nvidia.cuvs.CuVSIvfPqParams;
import com.nvidia.cuvs.CuVSIvfPqSearchParams;

/**
* Factory for creating {@link CuVSIvfPqParams} with automatic parameter calculation
* based on dataset dimensions and distance metric.
*
* <p>This class provides a Java equivalent to the C++ function:
* {@code cuvs::neighbors::graph_build_params::ivf_pq_params(dataset_extents, metric)}
*
* <p>The parameters are calculated automatically based on the dataset shape (number of
* rows and features), following the same heuristics as the C++ implementation.
*
* TODO: Remove this class when cuvs 25.12 is available and use functions from there directly.
*/
public class CuVSIvfPqParamsFactory {

/**
* Creates {@link CuVSIvfPqParams} with automatically calculated parameters based on the
* dataset dimensions, distance metric, and efConstruction parameter.
*
* <p>This method replicates the parameter calculation logic from the C++ function:
* {@code cuvs::neighbors::graph_build_params::ivf_pq_params(dataset_extents, metric)}
*
* @param numVectors the number of vectors in the dataset
* @param dims the dimensionality of the vectors
* @param distanceType the distance metric to use (e.g., L2Expanded, Cosine)
* @param efConstruction the efConstruction parameter in an HNSW graph
* @return a {@link CuVSIvfPqParams} instance with calculated parameters
* @throws IllegalArgumentException if dimensions are invalid
*/
public static CuVSIvfPqParams create(int numVectors, int dims, CagraIndexParams.CuvsDistanceType distanceType, int efConstruction) {
long nRows = numVectors;
long nFeatures = dims;

if (nRows <= 0 || nFeatures <= 0) {
throw new IllegalArgumentException("Dataset dimensions must be positive: rows=" + nRows + ", features=" + nFeatures);
}
return createFromDimensions(nRows, nFeatures, distanceType, efConstruction);
}

/**
* Creates {@link CuVSIvfPqParams} with automatically calculated parameters based on dataset
* dimensions and construction parameter.
*
* <p>This is a convenience method when you have the dataset dimensions but not the dataset
* object itself. The calculation logic is identical to {@link #create(int, int,
* CagraIndexParams.CuvsDistanceType, int)}.
*
* @param nRows the number of rows (vectors) in the dataset
* @param nFeatures the number of features (dimensions) per vector
* @param distanceType the distance metric to use (e.g., L2Expanded, Cosine)
* @param efConstruction the construction parameter for parameter calculation
* @return a {@link CuVSIvfPqParams} instance with calculated parameters
* @throws IllegalArgumentException if dimensions are invalid
*/
public static CuVSIvfPqParams createFromDimensions(
long nRows,
long nFeatures,
CagraIndexParams.CuvsDistanceType distanceType,
int efConstruction
) {
if (nRows <= 0 || nFeatures <= 0) {
throw new IllegalArgumentException("Dataset dimensions must be positive: rows=" + nRows + ", features=" + nFeatures);
}

// Calculate PQ dimensions and bits based on feature count
int pqDim;
int pqBits;

if (nFeatures <= 32) {
pqDim = 16;
pqBits = 8;
} else {
pqBits = 4;
if (nFeatures <= 64) {
pqDim = 32;
} else if (nFeatures <= 128) {
pqDim = 64;
} else if (nFeatures <= 192) {
pqDim = 96;
} else {
// Round up to nearest multiple of 128
pqDim = (int) roundUpSafe(nFeatures / 2, 128);
}
}
// Calculate number of lists: approximately 1 cluster per 2000 rows
int nLists = Math.max(1, (int) (nRows / 2000));

// Calculate kmeans training set fraction adaptively
final double kMinPointsPerCluster = 32.0;
final double minKmeansTrainsetPoints = kMinPointsPerCluster * nLists;
final double maxKmeansTrainsetFraction = 1.0;
final double minKmeansTrainsetFraction = Math.min(maxKmeansTrainsetFraction, minKmeansTrainsetPoints / nRows);
double kmeansTrainsetFraction = Math.clamp(1.0 / Math.sqrt(nRows * 1e-5), minKmeansTrainsetFraction, maxKmeansTrainsetFraction);

// Calculate number of probes based on number of lists and efConstruction
int nProbes = Math.round((float) (2.0 + Math.sqrt(nLists) / 20.0 + efConstruction / 16.0));

// Build index parameters
CuVSIvfPqIndexParams indexParams = new CuVSIvfPqIndexParams.Builder().withMetric(distanceType)
.withPqDim(pqDim)
.withPqBits(pqBits)
.withNLists(nLists)
.withKmeansNIters(10)
.withKmeansTrainsetFraction(kmeansTrainsetFraction)
.withCodebookKind(CagraIndexParams.CodebookGen.PER_SUBSPACE)
.build();

// Build search parameters
CuVSIvfPqSearchParams searchParams = new CuVSIvfPqSearchParams.Builder().withNProbes(nProbes)
.withLutDtype(CagraIndexParams.CudaDataType.CUDA_R_16F)
.withInternalDistanceDtype(CagraIndexParams.CudaDataType.CUDA_R_16F)
.build();

// Build and return the complete IVF_PQ parameters
return new CuVSIvfPqParams.Builder().withCuVSIvfPqIndexParams(indexParams)
.withCuVSIvfPqSearchParams(searchParams)
.withRefinementRate(1.0f)
.build();
}

/**
* Helper method to round up to the nearest multiple of a given divisor.
*
* <p>Equivalent to C++ {@code raft::round_up_safe<uint32_t>(value, divisor)}
*
* @param value the value to round up
* @param divisor the divisor to round to
* @return the rounded up value
*/
private static long roundUpSafe(long value, long divisor) {
if (divisor <= 0) {
throw new IllegalArgumentException("divisor must be positive");
}
return ((value + divisor - 1) / divisor) * divisor;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,23 @@ ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataT
/** Shuts down the manager, releasing all open resources. */
void shutdown();

/**
* Estimates the required GPU memory for building an index using the NN_DESCENT algorithm.
*
* @param numVectors the number of vectors
* @param dims the dimensionality of vectors
* @param dataType the data type of the vectors
* @return the estimated memory in bytes needed for NN_DESCENT
*/
static long estimateNNDescentMemory(int numVectors, int dims, CuVSMatrix.DataType dataType) {
int elementTypeBytes = switch (dataType) {
case FLOAT -> Float.BYTES;
case INT, UINT -> Integer.BYTES;
case BYTE -> Byte.BYTES;
};
return (long) (2.0 * numVectors * dims * elementTypeBytes);
}

/** Returns the system-wide pooling manager. */
static CuVSResourceManager pooling() {
return PoolingCuVSResourceManager.Holder.INSTANCE;
Expand Down Expand Up @@ -193,25 +210,24 @@ public ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataTyp
}

private long estimateRequiredMemory(int numVectors, int dims, CuVSMatrix.DataType dataType, CagraIndexParams cagraIndexParams) {
int elementTypeBytes = switch (dataType) {
case FLOAT -> Float.BYTES;
case INT, UINT -> Integer.BYTES;
case BYTE -> Byte.BYTES;
};

if (cagraIndexParams.getCagraGraphBuildAlgo() == CagraIndexParams.CagraGraphBuildAlgo.IVF_PQ
&& cagraIndexParams.getCuVSIvfPqParams() != null
&& cagraIndexParams.getCuVSIvfPqParams().getIndexParams() != null
&& cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getPqDim() != 0) {
// See https://docs.rapids.ai/api/cuvs/nightly/neighbors/ivfpq/#index-device-memory
int elementTypeBytes = switch (dataType) {
case FLOAT -> Float.BYTES;
case INT, UINT -> Integer.BYTES;
case BYTE -> Byte.BYTES;
};
var pqDim = cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getPqDim();
var pqBits = cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getPqBits();
var numClusters = cagraIndexParams.getCuVSIvfPqParams().getIndexParams().getnLists();
var approximatedIvfBytes = numVectors * (pqDim * (pqBits / 8.0) + elementTypeBytes) + (long) numClusters * Integer.BYTES;
return (long) (GPU_COMPUTATION_MEMORY_FACTOR * approximatedIvfBytes);
}

return (long) (GPU_COMPUTATION_MEMORY_FACTOR * numVectors * dims * elementTypeBytes);
return CuVSResourceManager.estimateNNDescentMemory(numVectors, dims, dataType);
}

// visible for testing
Expand Down
Loading