diff --git a/README.md b/README.md index 7a0861238..5303e738f 100755 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ ## Useful Resources -- [Documentation](https://docs.rapids.ai/api/cuvs/): Library documentation. +- [Documentation](https://docs.rapids.ai/api/cuvs/nightly/): Library documentation. - [Build and Install Guide](https://docs.rapids.ai/api/cuvs/nightly/build): Instructions for installing and building cuVS. - [Getting Started Guide](https://docs.rapids.ai/api/cuvs/nightly/getting_started): Guide to getting started with cuVS. - [Code Examples](https://github.com/rapidsai/cuvs/tree/HEAD/examples): Self-contained Code Examples. diff --git a/build.sh b/build.sh index b463f0f0d..b787d3a41 100755 --- a/build.sh +++ b/build.sh @@ -18,7 +18,7 @@ ARGS=$* # scripts, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libcuvs python rust docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h" +VALIDARGS="clean libcuvs python rust docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h" HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-ann=] [--build-metrics=] where is: clean - remove all existing build artifacts and configuration (start over) @@ -37,10 +37,13 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool==0.0.0a0 - make +- nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index ce9a7f058..a25393050 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -37,6 +37,7 @@ dependencies: - libcusparse=11.7.5.86 - librmm==24.10.*,>=0.0.0a0 - make +- nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 116e80ac2..bb4a96d48 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -34,6 +34,7 @@ dependencies: - libcusparse-dev - librmm==24.10.*,>=0.0.0a0 - make +- nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 7f7ad045d..bd1b95ae8 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -34,6 +34,7 @@ dependencies: - libcusparse-dev - librmm==24.10.*,>=0.0.0a0 - make +- nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 73c42ca71..554ad41ab 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -35,6 +35,7 @@ dependencies: - libcusparse=11.7.5.86 - librmm==24.10.*,>=0.0.0a0 - matplotlib +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-aarch64=11.8 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 473e50bc6..dc38f3565 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -35,6 +35,7 @@ dependencies: - libcusparse=11.7.5.86 - librmm==24.10.*,>=0.0.0a0 - matplotlib +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-64=11.8 diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index 8a877c4c0..aeb23a9ef 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -32,6 +32,7 @@ dependencies: - libcusparse-dev - librmm==24.10.*,>=0.0.0a0 - matplotlib +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index 54859a77f..3a408cd64 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -32,6 +32,7 @@ dependencies: - libcusparse-dev - librmm==24.10.*,>=0.0.0a0 - matplotlib +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas diff --git a/conda/recipes/cuvs_bench/build.sh b/conda/recipes/cuvs_bench/build.sh new file mode 100644 index 000000000..05fb7bada --- /dev/null +++ b/conda/recipes/cuvs_bench/build.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +./build.sh bench-ann --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats +cmake --install cpp/build --component ann_bench diff --git a/conda/recipes/cuvs_bench/conda_build_config.yaml b/conda/recipes/cuvs_bench/conda_build_config.yaml new file mode 100644 index 000000000..47bd730da --- /dev/null +++ b/conda/recipes/cuvs_bench/conda_build_config.yaml @@ -0,0 +1,70 @@ +c_compiler_version: + - 11 + +cxx_compiler_version: + - 11 + +cuda_compiler: + - cuda-nvcc + +cuda11_compiler: + - nvcc + +c_stdlib: + - sysroot + +c_stdlib_version: + - "2.17" + +cmake_version: + - ">=3.26.4,!=3.30.0" + +nccl_version: + - ">=2.19" + +glog_version: + - ">=0.6.0" + +h5py_version: + - ">=3.8.0" + +nlohmann_json_version: + - ">=3.11.2" + +# The CTK libraries below are missing from the conda-forge::cudatoolkit package +# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages +# and the "*_run_*" version specifiers correspond to `11.x` packages. + +cuda11_libcublas_host_version: + - "=11.11.3.6" + +cuda11_libcublas_run_version: + - ">=11.5.2.43,<12.0.0" + +cuda11_libcurand_host_version: + - "=10.3.0.86" + +cuda11_libcurand_run_version: + - ">=10.2.5.43,<10.3.1" + +cuda11_libcusolver_host_version: + - "=11.4.1.48" + +cuda11_libcusolver_run_version: + - ">=11.2.0.43,<11.4.2" + +cuda11_libcusparse_host_version: + - "=11.7.5.86" + +cuda11_libcusparse_run_version: + - ">=11.6.0.43,<12.0.0" + +# `cuda-profiler-api` only has `11.8.0` and `12.0.0` packages for all +# architectures. The "*_host_*" version specifiers correspond to `11.8` packages and the +# "*_run_*" version specifiers correspond to `11.x` packages. + +cuda11_cuda_profiler_api_host_version: + - "=11.8.86" + +cuda11_cuda_profiler_api_run_version: + - ">=11.4.240,<12" diff --git a/conda/recipes/cuvs_bench/meta.yaml b/conda/recipes/cuvs_bench/meta.yaml new file mode 100644 index 000000000..9ecbf82bb --- /dev/null +++ b/conda/recipes/cuvs_bench/meta.yaml @@ -0,0 +1,105 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +# Usage: +# conda build . -c rapidsai -c conda-forge -c nvidia +{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} +{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set py_version = environ['CONDA_PY'] %} +{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %} +{% set cuda_major = cuda_version.split('.')[0] %} +{% set date_string = environ['RAPIDS_DATE_STRING'] %} + +package: + name: cuvs_bench + version: {{ version }} + script: build.sh + +source: + path: ../../.. + +build: + script_env: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL + - RAPIDS_ARTIFACTS_DIR + - SCCACHE_BUCKET + - SCCACHE_IDLE_TIMEOUT + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=cuvs-bench-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=cuvs-bench-linux64 # [linux64] + - SCCACHE_S3_USE_SSL + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + ignore_run_exports_from: + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} + {% else %} + - {{ compiler('cuda') }} + - cuda-cudart-dev + - libcublas-dev + {% endif %} + +requirements: + build: + - {{ compiler('c') }} + - {{ compiler('cxx') }} + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} ={{ cuda_version }} + {% else %} + - {{ compiler('cuda') }} + {% endif %} + - cuda-version ={{ cuda_version }} + - cmake {{ cmake_version }} + - ninja + - {{ stdlib("c") }} + + host: + - benchmark + - cuda-version ={{ cuda_version }} + {% if cuda_major == "11" %} + - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }} + - libcublas {{ cuda11_libcublas_host_version }} + - libcublas-dev {{ cuda11_libcublas_host_version }} + {% else %} + - cuda-cudart-dev + - cuda-profiler-api + - libcublas-dev + {% endif %} + - glog {{ glog_version }} + - libcuvs {{ version }} + - nlohmann_json {{ nlohmann_json_version }} + - openblas + # rmm is needed to determine if package is gpu-enabled + - python + - rapids-build-backend>=0.3.0,<0.4.0.dev0 + - rmm ={{ minor_version }} + + run: + - benchmark + - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} + {% if cuda_major == "11" %} + - cudatoolkit + {% else %} + - cuda-cudart + - libcublas + {% endif %} + - glog {{ glog_version }} + - libcuvs {{ version }} + - h5py {{ h5py_version }} + - matplotlib + - pandas + - pyyaml + # rmm is needed to determine if package is gpu-enabled + - pylibraft ={{ minor_version }} + - python + - rmm ={{ minor_version }} +about: + home: https://rapids.ai/ + license: Apache-2.0 + summary: cuVS GPU and CPU benchmarks diff --git a/conda/recipes/cuvs_bench_cpu/build.sh b/conda/recipes/cuvs_bench_cpu/build.sh new file mode 100644 index 000000000..163872053 --- /dev/null +++ b/conda/recipes/cuvs_bench_cpu/build.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +./build.sh bench-ann --cpu-only --no-nvtx --build-metrics=bench_ann_cpu --incl-cache-stats +cmake --install cpp/build --component ann_bench diff --git a/conda/recipes/cuvs_bench_cpu/conda_build_config.yaml b/conda/recipes/cuvs_bench_cpu/conda_build_config.yaml new file mode 100644 index 000000000..ed6f708e1 --- /dev/null +++ b/conda/recipes/cuvs_bench_cpu/conda_build_config.yaml @@ -0,0 +1,29 @@ +c_compiler_version: + - 11 + +cxx_compiler_version: + - 11 + +c_stdlib: + - sysroot + +c_stdlib_version: + - "2.17" + +cmake_version: + - ">=3.26.4,!=3.30.0" + +glog_version: + - ">=0.6.0" + +h5py_version: + - ">=3.8.0" + +nlohmann_json_version: + - ">=3.11.2" + +spdlog_version: + - ">=1.14.1,<1.15" + +fmt_version: + - ">=11.0.2,<12" diff --git a/conda/recipes/cuvs_bench_cpu/meta.yaml b/conda/recipes/cuvs_bench_cpu/meta.yaml new file mode 100644 index 000000000..0ce5db744 --- /dev/null +++ b/conda/recipes/cuvs_bench_cpu/meta.yaml @@ -0,0 +1,67 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +# Usage: +# conda build . -c rapidsai -c conda-forge -c nvidia +{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} +{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set py_version = environ['CONDA_PY'] %} +{% set date_string = environ['RAPIDS_DATE_STRING'] %} + +package: + name: cuvs_bench_cpu + version: {{ version }} + script: build.sh + +source: + path: ../../.. + +build: + script_env: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL + - RAPIDS_ARTIFACTS_DIR + - SCCACHE_BUCKET + - SCCACHE_IDLE_TIMEOUT + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=cuvs-bench-cpu-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=cuvs-bench-cpu-linux64 # [linux64] + - SCCACHE_S3_USE_SSL + number: {{ GIT_DESCRIBE_NUMBER }} + string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + +requirements: + build: + - {{ compiler('c') }} + - {{ compiler('cxx') }} + - cmake {{ cmake_version }} + - ninja + - {{ stdlib("c") }} + + host: + - benchmark + - fmt {{ fmt_version }} + - glog {{ glog_version }} + - nlohmann_json {{ nlohmann_json_version }} + - openblas + - python + - rapids-build-backend>=0.3.0,<0.4.0.dev0 + - spdlog {{ spdlog_version }} + + run: + - benchmark + - glog {{ glog_version }} + - h5py {{ h5py_version }} + - matplotlib + - pandas + - pyyaml + - python +about: + home: https://rapids.ai/ + license: Apache-2.0 + summary: cuVS CPU benchmarks diff --git a/conda/recipes/libcuvs/conda_build_config.yaml b/conda/recipes/libcuvs/conda_build_config.yaml index e165f7ed9..b8c49943e 100644 --- a/conda/recipes/libcuvs/conda_build_config.yaml +++ b/conda/recipes/libcuvs/conda_build_config.yaml @@ -22,6 +22,9 @@ cmake_version: h5py_version: - ">=3.8.0" +nccl_version: + - ">=2.19" + # The CTK libraries below are missing from the conda-forge::cudatoolkit package # for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages # and the "*_run_*" version specifiers correspond to `11.x` packages. diff --git a/conda/recipes/libcuvs/meta.yaml b/conda/recipes/libcuvs/meta.yaml index e154ccf41..46552c397 100644 --- a/conda/recipes/libcuvs/meta.yaml +++ b/conda/recipes/libcuvs/meta.yaml @@ -65,6 +65,7 @@ outputs: host: - librmm ={{ minor_version }} - libraft-headers ={{ minor_version }} + - nccl {{ nccl_version }} - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }} @@ -131,6 +132,7 @@ outputs: host: - librmm ={{ minor_version }} - libraft-headers ={{ minor_version }} + - nccl {{ nccl_version }} - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }} @@ -159,6 +161,7 @@ outputs: - libcusolver - libcusparse {% endif %} + - libraft-headers ={{ minor_version }} about: home: https://rapids.ai/ license: Apache-2.0 @@ -197,6 +200,7 @@ outputs: host: - librmm ={{ minor_version }} - libraft-headers ={{ minor_version }} + - nccl {{ nccl_version }} - {{ pin_subpackage('libcuvs', exact=True) }} - cuda-version ={{ cuda_version }} - openblas # required by some CPU algos in benchmarks @@ -268,6 +272,7 @@ outputs: host: - librmm ={{ minor_version }} - libraft-headers ={{ minor_version }} + - nccl {{ nccl_version }} - {{ pin_subpackage('libcuvs', exact=True) }} - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6f5178251..3e98a247e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -57,6 +57,7 @@ option(BUILD_C_LIBRARY "Build cuVS C API library" OFF) option(BUILD_C_TESTS "Build cuVS C API tests" OFF) option(BUILD_CUVS_BENCH "Build cuVS ann benchmarks" OFF) option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON) +option(BUILD_MG_ALGOS "Build with multi-GPU support" ON) option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF) option(CUDA_ENABLE_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF @@ -85,6 +86,12 @@ if(NOT BUILD_C_LIBRARY) set(BUILD_C_TESTS OFF) endif() +if(NOT BUILD_SHARED_LIBS) + set(BUILD_TESTS OFF) + set(BUILD_C_LIBRARY OFF) + set(BUILD_CAGRA_HNSWLIB OFF) +endif() + # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to # have different values for the `Threads::Threads` target. Setting this flag ensures # `Threads::Threads` is the same value across all builds so that cache hits occur @@ -175,6 +182,7 @@ rapids_cpm_init() if(NOT BUILD_CPU_ONLY) include(cmake/thirdparty/get_raft.cmake) + include(cmake/thirdparty/get_cutlass.cmake) endif() if(BUILD_C_LIBRARY) @@ -186,8 +194,6 @@ if(BUILD_TESTS OR BUILD_C_TESTS) rapids_cpm_gtest(BUILD_STATIC) endif() -include(cmake/thirdparty/get_cutlass.cmake) - if(BUILD_CUVS_BENCH) include(${rapids-cmake-dir}/cpm/gbench.cmake) rapids_cpm_gbench(BUILD_STATIC) @@ -199,526 +205,568 @@ endif() # ################################################################################################## # * cuvs --------------------------------------------------------------------- -add_library( - cuvs-cagra-search STATIC - src/neighbors/cagra_search_float.cu - src/neighbors/cagra_search_half.cu - src/neighbors/cagra_search_int8.cu - src/neighbors/cagra_search_uint8.cu - src/neighbors/detail/cagra/compute_distance.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim128_t8.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim256_t16.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim128_t8.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim256_t16.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_int8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_int8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_int8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_uint8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_uint8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_uint8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_float_uint32_dim128_t8.cu - src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_float_uint32_dim256_t16.cu - src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_float_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim128_t8.cu - src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim256_t16.cu - src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint32.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint32.cu - src/neighbors/detail/cagra/search_multi_cta_int8_uint32.cu - src/neighbors/detail/cagra/search_multi_cta_uint8_uint32.cu - src/neighbors/detail/cagra/search_single_cta_float_uint32.cu - src/neighbors/detail/cagra/search_single_cta_half_uint32.cu - src/neighbors/detail/cagra/search_single_cta_int8_uint32.cu - src/neighbors/detail/cagra/search_single_cta_uint8_uint32.cu -) - -file(GLOB_RECURSE compute_distance_sources "src/neighbors/detail/cagra/compute_distance_*.cu") -set_source_files_properties(${compute_distance_sources} PROPERTIES COMPILE_FLAGS -maxrregcount=64) - -set_target_properties( - cuvs-cagra-search - PROPERTIES BUILD_RPATH "\$ORIGIN" - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION ON - INTERFACE_POSITION_INDEPENDENT_CODE ON - POSITION_INDEPENDENT_CODE ON -) -target_link_libraries(cuvs-cagra-search PRIVATE raft::raft) -target_include_directories( - cuvs-cagra-search PRIVATE "$" -) -target_compile_options( - cuvs-cagra-search PRIVATE "$<$:${CUVS_CXX_FLAGS}>" - "$<$:${CUVS_CUDA_FLAGS}>" -) - -add_library( - cuvs_objs OBJECT - src/cluster/kmeans_balanced_fit_float.cu - src/cluster/kmeans_fit_mg_float.cu - src/cluster/kmeans_fit_mg_double.cu - src/cluster/kmeans_fit_double.cu - src/cluster/kmeans_fit_float.cu - src/cluster/kmeans_auto_find_k_float.cu - src/cluster/kmeans_fit_predict_double.cu - src/cluster/kmeans_fit_predict_float.cu - src/cluster/kmeans_predict_double.cu - src/cluster/kmeans_predict_float.cu - src/cluster/kmeans_balanced_fit_float.cu - src/cluster/kmeans_balanced_fit_predict_float.cu - src/cluster/kmeans_balanced_predict_float.cu - src/cluster/kmeans_balanced_fit_int8.cu - src/cluster/kmeans_balanced_fit_predict_int8.cu - src/cluster/kmeans_balanced_predict_int8.cu - src/cluster/kmeans_transform_double.cu - src/cluster/kmeans_transform_float.cu - src/cluster/single_linkage_float.cu - src/core/bitset.cu - src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_canberra_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_correlation_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_cosine_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_kl_divergence_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l1_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_expanded_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l_inf_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_russel_rao_half_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_rbf.cu - src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int64_t.cu - src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int64_t.cu - src/distance/detail/fused_distance_nn.cu - src/distance/distance.cu - src/distance/pairwise_distance.cu - src/neighbors/brute_force.cu - src/neighbors/cagra_build_float.cu - src/neighbors/cagra_build_half.cu - src/neighbors/cagra_build_int8.cu - src/neighbors/cagra_build_uint8.cu - src/neighbors/cagra_extend_float.cu - src/neighbors/cagra_extend_int8.cu - src/neighbors/cagra_extend_uint8.cu - src/neighbors/cagra_optimize.cu - src/neighbors/cagra_serialize_float.cu - src/neighbors/cagra_serialize_half.cu - src/neighbors/cagra_serialize_int8.cu - src/neighbors/cagra_serialize_uint8.cu - src/neighbors/detail/cagra/cagra_build.cpp - src/neighbors/detail/cagra/topk_for_cagra/topk.cu - $<$:src/neighbors/hnsw.cpp> - src/neighbors/ivf_flat_index.cpp - src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu - src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu - src/neighbors/ivf_flat/ivf_flat_build_extend_uint8_t_int64_t.cu - src/neighbors/ivf_flat/ivf_flat_helpers.cu - src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu - src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu - src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu - src/neighbors/ivf_flat/ivf_flat_serialize_float_int64_t.cu - src/neighbors/ivf_flat/ivf_flat_serialize_int8_t_int64_t.cu - src/neighbors/ivf_flat/ivf_flat_serialize_uint8_t_int64_t.cu - src/neighbors/ivf_pq_index.cpp - src/neighbors/ivf_pq/ivf_pq_build_common.cu - src/neighbors/ivf_pq/ivf_pq_serialize.cu - src/neighbors/ivf_pq/ivf_pq_deserialize.cu - src/neighbors/ivf_pq/detail/ivf_pq_build_extend_float_int64_t.cu - src/neighbors/ivf_pq/detail/ivf_pq_build_extend_half_int64_t.cu - src/neighbors/ivf_pq/detail/ivf_pq_build_extend_int8_t_int64_t.cu - src/neighbors/ivf_pq/detail/ivf_pq_build_extend_uint8_t_int64_t.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half_bitset64.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half_bitset64.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float_bitset64.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu - src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu - src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu - src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu - src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu - src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu - src/neighbors/nn_descent.cu - src/neighbors/nn_descent_float.cu - src/neighbors/nn_descent_half.cu - src/neighbors/nn_descent_int8.cu - src/neighbors/nn_descent_uint8.cu - src/neighbors/reachability.cu - src/neighbors/refine/detail/refine_device_float_float.cu - src/neighbors/refine/detail/refine_device_half_float.cu - src/neighbors/refine/detail/refine_device_int8_t_float.cu - src/neighbors/refine/detail/refine_device_uint8_t_float.cu - src/neighbors/refine/detail/refine_host_float_float.cpp - src/neighbors/refine/detail/refine_host_half_float.cpp - src/neighbors/refine/detail/refine_host_int8_t_float.cpp - src/neighbors/refine/detail/refine_host_uint8_t_float.cpp - src/neighbors/sample_filter.cu - src/neighbors/vamana_build_float.cu - src/neighbors/vamana_build_uint8.cu - src/neighbors/vamana_build_int8.cu - src/neighbors/vamana_serialize_float.cu - src/neighbors/vamana_serialize_uint8.cu - src/neighbors/vamana_serialize_int8.cu - src/selection/select_k_float_int64_t.cu - src/selection/select_k_float_int32_t.cu - src/selection/select_k_float_uint32_t.cu - src/selection/select_k_half_uint32_t.cu - src/stats/silhouette_score.cu - src/stats/trustworthiness_score.cu -) +if(BUILD_SHARED_LIBS) + add_library( + cuvs-cagra-search STATIC + src/neighbors/cagra_search_float.cu + src/neighbors/cagra_search_half.cu + src/neighbors/cagra_search_int8.cu + src/neighbors/cagra_search_uint8.cu + src/neighbors/detail/cagra/compute_distance.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim128_t8.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim256_t16.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim512_t32.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim128_t8.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim256_t16.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim512_t32.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_int8_uint32_dim128_t8.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_int8_uint32_dim256_t16.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_int8_uint32_dim512_t32.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_uint8_uint32_dim128_t8.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_uint8_uint32_dim256_t16.cu + src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_uint8_uint32_dim512_t32.cu + src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_float_uint32_dim128_t8.cu + src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_float_uint32_dim256_t16.cu + src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_float_uint32_dim512_t32.cu + src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim128_t8.cu + src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim256_t16.cu + src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim512_t32.cu + src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim128_t8.cu + src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim256_t16.cu + src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim512_t32.cu + src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim128_t8.cu + src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim256_t16.cu + src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim512_t32.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/search_multi_cta_float_uint32.cu + src/neighbors/detail/cagra/search_multi_cta_half_uint32.cu + src/neighbors/detail/cagra/search_multi_cta_int8_uint32.cu + src/neighbors/detail/cagra/search_multi_cta_uint8_uint32.cu + src/neighbors/detail/cagra/search_single_cta_float_uint32.cu + src/neighbors/detail/cagra/search_single_cta_half_uint32.cu + src/neighbors/detail/cagra/search_single_cta_int8_uint32.cu + src/neighbors/detail/cagra/search_single_cta_uint8_uint32.cu + ) -set_target_properties( - cuvs_objs - PROPERTIES CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON -) -target_compile_options( - cuvs_objs PRIVATE "$<$:${CUVS_CXX_FLAGS}>" - "$<$:${CUVS_CUDA_FLAGS}>" -) -target_link_libraries( - cuvs_objs PUBLIC raft::raft rmm::rmm ${CUVS_CTK_MATH_DEPENDENCIES} - $ -) + file(GLOB_RECURSE compute_distance_sources "src/neighbors/detail/cagra/compute_distance_*.cu") + set_source_files_properties(${compute_distance_sources} PROPERTIES COMPILE_FLAGS -maxrregcount=64) -add_library(cuvs SHARED $) -add_library(cuvs_static STATIC $) + set_target_properties( + cuvs-cagra-search + PROPERTIES BUILD_RPATH "\$ORIGIN" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + CUDA_SEPARABLE_COMPILATION ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + POSITION_INDEPENDENT_CODE ON + ) + target_link_libraries(cuvs-cagra-search PRIVATE raft::raft) + target_include_directories( + cuvs-cagra-search PRIVATE "$" + ) + target_compile_options( + cuvs-cagra-search PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" + ) -target_compile_options( - cuvs INTERFACE $<$:--expt-extended-lambda - --expt-relaxed-constexpr> -) + if(BUILD_MG_ALGOS) + set(CUVS_MG_ALGOS + src/neighbors/mg/mg_flat_float_int64_t.cu + src/neighbors/mg/mg_flat_int8_t_int64_t.cu + src/neighbors/mg/mg_flat_uint8_t_int64_t.cu + src/neighbors/mg/mg_pq_float_int64_t.cu + src/neighbors/mg/mg_pq_half_int64_t.cu + src/neighbors/mg/mg_pq_int8_t_int64_t.cu + src/neighbors/mg/mg_pq_uint8_t_int64_t.cu + src/neighbors/mg/mg_cagra_float_uint32_t.cu + src/neighbors/mg/mg_cagra_half_uint32_t.cu + src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu + src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu + src/neighbors/mg/omp_checks.cpp + src/neighbors/mg/nccl_comm.cpp + ) + endif() -add_library(cuvs::cuvs ALIAS cuvs) -add_library(cuvs::cuvs_static ALIAS cuvs_static) - -set_target_properties( - cuvs_static - PROPERTIES BUILD_RPATH "\$ORIGIN" - INSTALL_RPATH "\$ORIGIN" - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON - INTERFACE_POSITION_INDEPENDENT_CODE ON - EXPORT_NAME cuvs_static -) + add_library( + cuvs_objs OBJECT + src/cluster/kmeans_balanced_fit_float.cu + src/cluster/kmeans_fit_mg_float.cu + src/cluster/kmeans_fit_mg_double.cu + src/cluster/kmeans_fit_double.cu + src/cluster/kmeans_fit_float.cu + src/cluster/kmeans_auto_find_k_float.cu + src/cluster/kmeans_fit_predict_double.cu + src/cluster/kmeans_fit_predict_float.cu + src/cluster/kmeans_predict_double.cu + src/cluster/kmeans_predict_float.cu + src/cluster/kmeans_balanced_fit_float.cu + src/cluster/kmeans_balanced_fit_predict_float.cu + src/cluster/kmeans_balanced_predict_float.cu + src/cluster/kmeans_balanced_fit_int8.cu + src/cluster/kmeans_balanced_fit_predict_int8.cu + src/cluster/kmeans_balanced_predict_int8.cu + src/cluster/kmeans_transform_double.cu + src/cluster/kmeans_transform_float.cu + src/cluster/single_linkage_float.cu + src/core/bitset.cu + src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_canberra_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_correlation_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_cosine_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_kl_divergence_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_l1_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_l2_expanded_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_l_inf_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_russel_rao_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_rbf.cu + src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int64_t.cu + src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int64_t.cu + src/distance/detail/fused_distance_nn.cu + src/distance/distance.cu + src/distance/pairwise_distance.cu + src/neighbors/brute_force.cu + src/neighbors/cagra_build_float.cu + src/neighbors/cagra_build_half.cu + src/neighbors/cagra_build_int8.cu + src/neighbors/cagra_build_uint8.cu + src/neighbors/cagra_extend_float.cu + src/neighbors/cagra_extend_int8.cu + src/neighbors/cagra_extend_uint8.cu + src/neighbors/cagra_optimize.cu + src/neighbors/cagra_serialize_float.cu + src/neighbors/cagra_serialize_half.cu + src/neighbors/cagra_serialize_int8.cu + src/neighbors/cagra_serialize_uint8.cu + src/neighbors/iface/iface_cagra_float_uint32_t.cu + src/neighbors/iface/iface_cagra_half_uint32_t.cu + src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu + src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu + src/neighbors/iface/iface_flat_float_int64_t.cu + src/neighbors/iface/iface_flat_int8_t_int64_t.cu + src/neighbors/iface/iface_flat_uint8_t_int64_t.cu + src/neighbors/iface/iface_pq_float_int64_t.cu + src/neighbors/iface/iface_pq_half_int64_t.cu + src/neighbors/iface/iface_pq_int8_t_int64_t.cu + src/neighbors/iface/iface_pq_uint8_t_int64_t.cu + src/neighbors/detail/cagra/cagra_build.cpp + src/neighbors/detail/cagra/topk_for_cagra/topk.cu + $<$:src/neighbors/hnsw.cpp> + src/neighbors/ivf_flat_index.cpp + src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_build_extend_uint8_t_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_helpers.cu + src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_serialize_float_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_serialize_int8_t_int64_t.cu + src/neighbors/ivf_flat/ivf_flat_serialize_uint8_t_int64_t.cu + src/neighbors/ivf_pq_index.cpp + src/neighbors/ivf_pq/ivf_pq_build_common.cu + src/neighbors/ivf_pq/ivf_pq_serialize.cu + src/neighbors/ivf_pq/ivf_pq_deserialize.cu + src/neighbors/ivf_pq/detail/ivf_pq_build_extend_float_int64_t.cu + src/neighbors/ivf_pq/detail/ivf_pq_build_extend_half_int64_t.cu + src/neighbors/ivf_pq/detail/ivf_pq_build_extend_int8_t_int64_t.cu + src/neighbors/ivf_pq/detail/ivf_pq_build_extend_uint8_t_int64_t.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half_bitset64.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half_bitset64.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float_bitset64.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu + src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu + src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu + src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu + src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu + src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu + src/neighbors/nn_descent.cu + src/neighbors/nn_descent_float.cu + src/neighbors/nn_descent_half.cu + src/neighbors/nn_descent_int8.cu + src/neighbors/nn_descent_uint8.cu + src/neighbors/reachability.cu + src/neighbors/refine/detail/refine_device_float_float.cu + src/neighbors/refine/detail/refine_device_half_float.cu + src/neighbors/refine/detail/refine_device_int8_t_float.cu + src/neighbors/refine/detail/refine_device_uint8_t_float.cu + src/neighbors/refine/detail/refine_host_float_float.cpp + src/neighbors/refine/detail/refine_host_half_float.cpp + src/neighbors/refine/detail/refine_host_int8_t_float.cpp + src/neighbors/refine/detail/refine_host_uint8_t_float.cpp + src/neighbors/sample_filter.cu + src/neighbors/vamana_build_float.cu + src/neighbors/vamana_build_uint8.cu + src/neighbors/vamana_build_int8.cu + src/neighbors/vamana_serialize_float.cu + src/neighbors/vamana_serialize_uint8.cu + src/neighbors/vamana_serialize_int8.cu + src/selection/select_k_float_int64_t.cu + src/selection/select_k_float_int32_t.cu + src/selection/select_k_float_uint32_t.cu + src/selection/select_k_half_uint32_t.cu + src/stats/silhouette_score.cu + src/stats/trustworthiness_score.cu + ${CUVS_MG_ALGOS} + ) -target_compile_options(cuvs_static PRIVATE "$<$:${CUVS_CXX_FLAGS}>") + set_target_properties( + cuvs_objs + PROPERTIES CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + ) + target_compile_options( + cuvs_objs PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" + ) + target_link_libraries( + cuvs_objs PUBLIC raft::raft rmm::rmm ${CUVS_CTK_MATH_DEPENDENCIES} + $ + ) -target_include_directories( - cuvs_objs - PUBLIC "$" - "$" - INTERFACE "$" -) + add_library(cuvs SHARED $) + add_library(cuvs_static STATIC $) -target_include_directories( - cuvs_static - PUBLIC "$" - INTERFACE "$" -) + target_compile_options( + cuvs INTERFACE $<$:--expt-extended-lambda + --expt-relaxed-constexpr> + ) -# ensure CUDA symbols aren't relocated to the middle of the debug build binaries -target_link_options(cuvs_static PRIVATE $) + add_library(cuvs::cuvs ALIAS cuvs) + add_library(cuvs::cuvs_static ALIAS cuvs_static) -target_include_directories( - cuvs_static PUBLIC "$" - "$" -) + set_target_properties( + cuvs_static + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + EXPORT_NAME cuvs_static + ) -target_include_directories( - cuvs PUBLIC "$" - "$" -) + target_compile_options(cuvs_static PRIVATE "$<$:${CUVS_CXX_FLAGS}>") -rapids_find_package( - OpenMP REQUIRED - BUILD_EXPORT_SET cuvs-exports - INSTALL_EXPORT_SET cuvs-exports -) + target_include_directories( + cuvs_objs + PUBLIC "$" + "$" + INTERFACE "$" + ) -if(NOT BUILD_CPU_ONLY) + target_include_directories( + cuvs_static + PUBLIC "$" + INTERFACE "$" + ) - set(CUVS_CUSOLVER_DEPENDENCY CUDA::cusolver${_ctk_static_suffix}) - set(CUVS_CUBLAS_DEPENDENCY CUDA::cublas${_ctk_static_suffix}) - set(CUVS_CURAND_DEPENDENCY CUDA::curand${_ctk_static_suffix}) - set(CUVS_CUSPARSE_DEPENDENCY CUDA::cusparse${_ctk_static_suffix}) + # ensure CUDA symbols aren't relocated to the middle of the debug build binaries + target_link_options(cuvs_static PRIVATE $) - set(CUVS_CTK_MATH_DEPENDENCIES ${CUVS_CUBLAS_DEPENDENCY} ${CUVS_CUSOLVER_DEPENDENCY} - ${CUVS_CUSPARSE_DEPENDENCY} ${CUVS_CURAND_DEPENDENCY} + target_include_directories( + cuvs_static PUBLIC "$" + "$" ) - # Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target. - target_link_libraries( - cuvs - PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} - PRIVATE nvidia::cutlass::cutlass $ cuvs-cagra-search + target_include_directories( + cuvs PUBLIC "$" + "$" ) - target_link_libraries( - cuvs_static - PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} - PRIVATE nvidia::cutlass::cutlass $ cuvs-cagra-search + rapids_find_package( + OpenMP REQUIRED + BUILD_EXPORT_SET cuvs-exports + INSTALL_EXPORT_SET cuvs-exports ) -endif() -if(BUILD_CAGRA_HNSWLIB) - target_link_libraries(cuvs_objs PRIVATE hnswlib::hnswlib) - target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB) -endif() + if(NOT BUILD_CPU_ONLY) -# Endian detection -include(TestBigEndian) -test_big_endian(BIG_ENDIAN) -if(BIG_ENDIAN) - target_compile_definitions(cuvs PRIVATE CUVS_SYSTEM_LITTLE_ENDIAN=0) -else() - target_compile_definitions(cuvs PRIVATE CUVS_SYSTEM_LITTLE_ENDIAN=1) -endif() + set(CUVS_CUSOLVER_DEPENDENCY CUDA::cusolver${_ctk_static_suffix}) + set(CUVS_CUBLAS_DEPENDENCY CUDA::cublas${_ctk_static_suffix}) + set(CUVS_CURAND_DEPENDENCY CUDA::curand${_ctk_static_suffix}) + set(CUVS_CUSPARSE_DEPENDENCY CUDA::cusparse${_ctk_static_suffix}) + + set(CUVS_CTK_MATH_DEPENDENCIES ${CUVS_CUBLAS_DEPENDENCY} ${CUVS_CUSOLVER_DEPENDENCY} + ${CUVS_CUSPARSE_DEPENDENCY} ${CUVS_CURAND_DEPENDENCY} + ) + + if(BUILD_MG_ALGOS) + set(CUVS_COMMS_DEPENDENCY nccl) + endif() + + # Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target. + target_link_libraries( + cuvs + PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} + PRIVATE nvidia::cutlass::cutlass $ + cuvs-cagra-search ${CUVS_COMMS_DEPENDENCY} + ) + + target_link_libraries( + cuvs_static + PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} + PRIVATE nvidia::cutlass::cutlass $ + ) + endif() + + if(BUILD_MG_ALGOS) + target_compile_definitions(cuvs PUBLIC CUVS_BUILD_MG_ALGOS) + target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_MG_ALGOS) + endif() + + if(BUILD_CAGRA_HNSWLIB) + target_link_libraries(cuvs_objs PRIVATE hnswlib::hnswlib) + target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB) + endif() + + # Endian detection + include(TestBigEndian) + test_big_endian(BIG_ENDIAN) + if(BIG_ENDIAN) + target_compile_definitions(cuvs PRIVATE CUVS_SYSTEM_LITTLE_ENDIAN=0) + else() + target_compile_definitions(cuvs PRIVATE CUVS_SYSTEM_LITTLE_ENDIAN=1) + endif() -file( - WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld" - [=[ + file( + WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld" + [=[ SECTIONS { .nvFatBinSegment : { *(.nvFatBinSegment) } .nv_fatbin : { *(.nv_fatbin) } } ]=] -) - -# ################################################################################################## -# * NVTX support in cuvs ----------------------------------------------------- - -if(CUVS_NVTX) - # This enables NVTX within the project with no option to disable it downstream. - target_link_libraries(cuvs PUBLIC CUDA::nvtx3) - target_compile_definitions(cuvs PUBLIC NVTX_ENABLED) -else() - # Allow enable NVTX downstream if not set here. This creates a new option at build/install time, - # which is set by default to OFF, but can be enabled in the dependent project. - get_property( - nvtx_option_help_string - CACHE CUVS_NVTX - PROPERTY HELPSTRING ) - string( - CONCAT - nvtx_export_string - "option(CUVS_NVTX \"" - ${nvtx_option_help_string} - "\" OFF)" - [=[ + + # ################################################################################################ + # * NVTX support in cuvs ----------------------------------------------------- + + if(CUVS_NVTX) + # This enables NVTX within the project with no option to disable it downstream. + target_link_libraries(cuvs PUBLIC CUDA::nvtx3) + target_compile_definitions(cuvs PUBLIC NVTX_ENABLED) + else() + # Allow enable NVTX downstream if not set here. This creates a new option at build/install time, + # which is set by default to OFF, but can be enabled in the dependent project. + get_property( + nvtx_option_help_string + CACHE CUVS_NVTX + PROPERTY HELPSTRING + ) + string( + CONCAT + nvtx_export_string + "option(CUVS_NVTX \"" + ${nvtx_option_help_string} + "\" OFF)" + [=[ target_link_libraries(cuvs::cuvs INTERFACE $<$:CUDA::nvtx3>) target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENABLED>) ]=] - ) -endif() - -set_target_properties( - cuvs - PROPERTIES BUILD_RPATH "\$ORIGIN" - INSTALL_RPATH "\$ORIGIN" - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - INTERFACE_POSITION_INDEPENDENT_CODE ON - POSITION_INDEPENDENT_CODE ON -) - -target_compile_options( - cuvs PRIVATE "$<$:${CUVS_CXX_FLAGS}>" - "$<$:${CUVS_CUDA_FLAGS}>" -) -# ensure CUDA symbols aren't relocated to the middle of the debug build binaries -target_link_options(cuvs PRIVATE $) - -# ################################################################################################## -# * cuvs_c ------------------------------------------------------------------------------- -if(BUILD_C_LIBRARY) - add_library( - cuvs_c SHARED - src/core/c_api.cpp - src/neighbors/brute_force_c.cpp - src/neighbors/ivf_flat_c.cpp - src/neighbors/ivf_pq_c.cpp - src/neighbors/cagra_c.cpp - $<$:src/neighbors/hnsw_c.cpp> - src/neighbors/refine/refine_c.cpp - src/distance/pairwise_distance_c.cpp - ) - - if(BUILD_CAGRA_HNSWLIB) - target_link_libraries(cuvs_c PRIVATE hnswlib::hnswlib) - target_compile_definitions(cuvs_c PUBLIC CUVS_BUILD_CAGRA_HNSWLIB) + ) endif() - add_library(cuvs::c_api ALIAS cuvs_c) - set_target_properties( - cuvs_c + cuvs PROPERTIES BUILD_RPATH "\$ORIGIN" INSTALL_RPATH "\$ORIGIN" CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON INTERFACE_POSITION_INDEPENDENT_CODE ON - EXPORT_NAME c_api + POSITION_INDEPENDENT_CODE ON ) - target_compile_options(cuvs_c PRIVATE "$<$:${CUVS_CXX_FLAGS}>") - - target_include_directories( - cuvs_c - PUBLIC "$" - INTERFACE "$" + target_compile_options( + cuvs PRIVATE "$<$:${CUVS_CXX_FLAGS}>" + "$<$:${CUVS_CUDA_FLAGS}>" ) + # ensure CUDA symbols aren't relocated to the middle of the debug build binaries + target_link_options(cuvs PRIVATE $) + + # ################################################################################################ + # * cuvs_c ------------------------------------------------------------------------------- + if(BUILD_C_LIBRARY) + add_library( + cuvs_c SHARED + src/core/c_api.cpp + src/neighbors/brute_force_c.cpp + src/neighbors/ivf_flat_c.cpp + src/neighbors/ivf_pq_c.cpp + src/neighbors/cagra_c.cpp + $<$:src/neighbors/hnsw_c.cpp> + src/neighbors/refine/refine_c.cpp + src/distance/pairwise_distance_c.cpp + ) - target_link_libraries( - cuvs_c - PUBLIC cuvs::cuvs ${CUVS_CTK_MATH_DEPENDENCIES} - PRIVATE raft::raft - ) + if(BUILD_CAGRA_HNSWLIB) + target_link_libraries(cuvs_c PRIVATE hnswlib::hnswlib) + target_compile_definitions(cuvs_c PUBLIC CUVS_BUILD_CAGRA_HNSWLIB) + endif() + + add_library(cuvs::c_api ALIAS cuvs_c) + + set_target_properties( + cuvs_c + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + EXPORT_NAME c_api + ) - # ensure CUDA symbols aren't relocated to the middle of the debug build binaries - target_link_options(cuvs_c PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") -endif() + target_compile_options(cuvs_c PRIVATE "$<$:${CUVS_CXX_FLAGS}>") -# ################################################################################################## -# * install targets----------------------------------------------------------- -rapids_cmake_install_lib_dir(lib_dir) -include(GNUInstallDirs) -include(CPack) - -install( - TARGETS cuvs cuvs_static cuvs-cagra-search - DESTINATION ${lib_dir} - COMPONENT cuvs - EXPORT cuvs-exports -) + target_include_directories( + cuvs_c + PUBLIC "$" + INTERFACE "$" + ) -install( - DIRECTORY include/cuvs - COMPONENT cuvs - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} -) + target_link_libraries( + cuvs_c + PUBLIC cuvs::cuvs ${CUVS_CTK_MATH_DEPENDENCIES} + PRIVATE raft::raft + ) + + # ensure CUDA symbols aren't relocated to the middle of the debug build binaries + target_link_options(cuvs_c PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") + endif() + + # ################################################################################################ + # * install targets----------------------------------------------------------- + rapids_cmake_install_lib_dir(lib_dir) + include(GNUInstallDirs) + include(CPack) -if(BUILD_C_LIBRARY) install( - TARGETS cuvs_c + TARGETS cuvs cuvs_static DESTINATION ${lib_dir} - COMPONENT c_api - EXPORT cuvs-c-exports + COMPONENT cuvs + EXPORT cuvs-exports ) -endif() -install( - FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuvs/version_config.hpp - COMPONENT cuvs - DESTINATION include/cuvs -) + install( + DIRECTORY include/cuvs + COMPONENT cuvs + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + ) -if(TARGET cuvs_c) - list(APPEND cuvs_components c_api) - list(APPEND cuvs_export_sets cuvs-c-exports) - set(CUVS_C_TARGET cuvs_c) -endif() + if(BUILD_C_LIBRARY) + install( + TARGETS cuvs_c + DESTINATION ${lib_dir} + COMPONENT c_api + EXPORT cuvs-c-exports + ) + endif() -# Use `rapids_export` for 22.04 as it will have COMPONENT support -rapids_export( - INSTALL cuvs - EXPORT_SET cuvs-exports - COMPONENTS ${cuvs_components} - COMPONENTS_EXPORT_SET ${cuvs_export_sets} - GLOBAL_TARGETS cuvs ${CUVS_C_TARGET} - NAMESPACE cuvs:: -) + install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuvs/version_config.hpp + COMPONENT cuvs + DESTINATION include/cuvs + ) -# ################################################################################################## -# * build export ------------------------------------------------------------- -rapids_export( - BUILD cuvs - EXPORT_SET cuvs-exports - COMPONENTS ${cuvs_components} - COMPONENTS_EXPORT_SET ${cuvs_export_sets} - GLOBAL_TARGETS cuvs ${CUVS_C_TARGET} - NAMESPACE cuvs:: -) + if(TARGET cuvs_c) + list(APPEND cuvs_components c_api) + list(APPEND cuvs_export_sets cuvs-c-exports) + set(CUVS_C_TARGET cuvs_c) + endif() + + # Use `rapids_export` for 22.04 as it will have COMPONENT support + rapids_export( + INSTALL cuvs + EXPORT_SET cuvs-exports + COMPONENTS ${cuvs_components} + COMPONENTS_EXPORT_SET ${cuvs_export_sets} + GLOBAL_TARGETS cuvs ${CUVS_C_TARGET} + NAMESPACE cuvs:: + ) + + # ################################################################################################ + # * build export ------------------------------------------------------------- + rapids_export( + BUILD cuvs + EXPORT_SET cuvs-exports + COMPONENTS ${cuvs_components} + COMPONENTS_EXPORT_SET ${cuvs_export_sets} + GLOBAL_TARGETS cuvs ${CUVS_C_TARGET} + NAMESPACE cuvs:: + ) +endif() # ################################################################################################## # * build test executable ---------------------------------------------------- diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 8cbf8c8b3..c36e70ace 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -32,6 +32,7 @@ option(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE "Include cuVS brute force knn in benc option(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB "Include cuVS CAGRA with HNSW search in benchmark" ON) option(CUVS_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" OFF) +option(CUVS_ANN_BENCH_USE_CUVS_MG "Include cuVS ann mg algorithm in benchmark" ${BUILD_MG_ALGOS}) option(CUVS_ANN_BENCH_SINGLE_EXE "Make a single executable with benchmark as shared library modules" OFF ) @@ -55,6 +56,7 @@ if(BUILD_CPU_ONLY) set(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB OFF) set(CUVS_ANN_BENCH_USE_GGNN OFF) set(CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE OFF) + set(CUVS_ANN_BENCH_USE_CUVS_MG OFF) else() set(CUVS_FAISS_ENABLE_GPU ON) endif() @@ -66,6 +68,7 @@ if(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ OR CUVS_ANN_BENCH_USE_CUVS_CAGRA OR CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB OR CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE + OR CUVS_ANN_BENCH_USE_CUVS_MG ) set(CUVS_ANN_BENCH_USE_CUVS ON) endif() @@ -245,6 +248,21 @@ if(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) ) endif() +if(CUVS_ANN_BENCH_USE_CUVS_MG) + ConfigureAnnBench( + NAME + CUVS_MG + PATH + src/cuvs/cuvs_benchmark.cu + $<$:src/cuvs/cuvs_mg_ivf_flat.cu> + $<$:src/cuvs/cuvs_mg_ivf_pq.cu> + $<$:src/cuvs/cuvs_mg_cagra.cu> + LINKS + cuvs + nccl + ) +endif() + message("CUVS_FAISS_TARGETS: ${CUVS_FAISS_TARGETS}") message("CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}") if(CUVS_ANN_BENCH_USE_FAISS_CPU_FLAT) diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h index 22f0cab6f..57d5b1910 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h @@ -45,7 +45,18 @@ extern template class cuvs::bench::cuvs_cagra; extern template class cuvs::bench::cuvs_cagra; #endif -#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT +#ifdef CUVS_ANN_BENCH_USE_CUVS_MG +#include "cuvs_ivf_flat_wrapper.h" +#include "cuvs_mg_ivf_flat_wrapper.h" + +#include "cuvs_ivf_pq_wrapper.h" +#include "cuvs_mg_ivf_pq_wrapper.h" + +#include "cuvs_cagra_wrapper.h" +#include "cuvs_mg_cagra_wrapper.h" +#endif + +#if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT) || defined(CUVS_ANN_BENCH_USE_CUVS_MG) template void parse_build_param(const nlohmann::json& conf, typename cuvs::bench::cuvs_ivf_flat::build_param& param) @@ -64,7 +75,7 @@ void parse_search_param(const nlohmann::json& conf, #endif #if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || \ - defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) + defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || defined(CUVS_ANN_BENCH_USE_CUVS_MG) template void parse_build_param(const nlohmann::json& conf, typename cuvs::bench::cuvs_ivf_pq::build_param& param) @@ -130,7 +141,8 @@ void parse_search_param(const nlohmann::json& conf, } #endif -#if defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) +#if defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || \ + defined(CUVS_ANN_BENCH_USE_CUVS_MG) template void parse_build_param(const nlohmann::json& conf, cuvs::neighbors::nn_descent::index_params& param) { diff --git a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu index a956ab139..893097236 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu +++ b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu @@ -29,6 +29,43 @@ namespace cuvs::bench { +#ifdef CUVS_ANN_BENCH_USE_CUVS_MG +void add_distribution_mode(cuvs::neighbors::mg::distribution_mode* dist_mode, + const nlohmann::json& conf) +{ + if (conf.contains("distribution_mode")) { + std::string distribution_mode = conf.at("distribution_mode"); + if (distribution_mode == "replicated") { + *dist_mode = cuvs::neighbors::mg::distribution_mode::REPLICATED; + } else if (distribution_mode == "sharded") { + *dist_mode = cuvs::neighbors::mg::distribution_mode::SHARDED; + } else { + throw std::runtime_error("invalid value for distribution_mode"); + } + } else { + // default + *dist_mode = cuvs::neighbors::mg::distribution_mode::SHARDED; + } +}; + +void add_merge_mode(cuvs::neighbors::mg::sharded_merge_mode* merge_mode, const nlohmann::json& conf) +{ + if (conf.contains("merge_mode")) { + std::string sharded_merge_mode = conf.at("merge_mode"); + if (sharded_merge_mode == "tree_merge") { + *merge_mode = cuvs::neighbors::mg::sharded_merge_mode::TREE_MERGE; + } else if (sharded_merge_mode == "merge_on_root_rank") { + *merge_mode = cuvs::neighbors::mg::sharded_merge_mode::MERGE_ON_ROOT_RANK; + } else { + throw std::runtime_error("invalid value for merge_mode"); + } + } else { + // default + *merge_mode = cuvs::neighbors::mg::sharded_merge_mode::TREE_MERGE; + } +}; +#endif + template auto create_algo(const std::string& algo_name, const std::string& distance, @@ -71,6 +108,32 @@ auto create_algo(const std::string& algo_name, parse_build_param(conf, param); a = std::make_unique>(metric, dim, param); } +#endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_MG + if constexpr (std::is_same_v || std::is_same_v || + std::is_same_v) { + if (algo_name == "raft_mg_ivf_flat" || algo_name == "cuvs_mg_ivf_flat") { + typename cuvs::bench::cuvs_mg_ivf_flat::build_param param; + parse_build_param(conf, param); + add_distribution_mode(¶m.mode, conf); + a = std::make_unique>(metric, dim, param); + } + } + + if (algo_name == "raft_mg_ivf_pq" || algo_name == "cuvs_mg_ivf_pq") { + typename cuvs::bench::cuvs_mg_ivf_pq::build_param param; + parse_build_param(conf, param); + add_distribution_mode(¶m.mode, conf); + a = std::make_unique>(metric, dim, param); + } + + if (algo_name == "raft_mg_cagra" || algo_name == "cuvs_mg_cagra") { + typename cuvs::bench::cuvs_mg_cagra::build_param param; + parse_build_param(conf, param); + add_distribution_mode(¶m.mode, conf); + a = std::make_unique>(metric, dim, param); + } + #endif if (!a) { throw std::runtime_error("invalid algo: '" + algo_name + "'"); } @@ -113,6 +176,32 @@ auto create_search_param(const std::string& algo_name, const nlohmann::json& con return param; } #endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_MG + if constexpr (std::is_same_v || std::is_same_v || + std::is_same_v) { + if (algo_name == "raft_mg_ivf_flat" || algo_name == "cuvs_mg_ivf_flat") { + auto param = + std::make_unique::search_param>(); + parse_search_param(conf, *param); + add_merge_mode(¶m->merge_mode, conf); + return param; + } + } + + if (algo_name == "raft_mg_ivf_pq" || algo_name == "cuvs_mg_ivf_pq") { + auto param = std::make_unique::search_param>(); + parse_search_param(conf, *param); + add_merge_mode(¶m->merge_mode, conf); + return param; + } + + if (algo_name == "raft_mg_cagra" || algo_name == "cuvs_mg_cagra") { + auto param = std::make_unique::search_param>(); + parse_search_param(conf, *param); + add_merge_mode(¶m->merge_mode, conf); + return param; + } +#endif // else throw std::runtime_error("invalid algo: '" + algo_name + "'"); diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h index ff854f890..b2ba35eee 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h @@ -72,6 +72,23 @@ class cuvs_cagra : public algo, public algo_gpu { std::optional ivf_pq_refine_rate = std::nullopt; std::optional ivf_pq_build_params = std::nullopt; std::optional ivf_pq_search_params = std::nullopt; + + void prepare_build_params(const raft::extent_2d& dataset_extents) + { + if (algo == CagraBuildAlgo::kIvfPq) { + auto pq_params = cuvs::neighbors::cagra::graph_build_params::ivf_pq_params( + dataset_extents, cagra_params.metric); + if (ivf_pq_build_params) { pq_params.build_params = *ivf_pq_build_params; } + if (ivf_pq_search_params) { pq_params.search_params = *ivf_pq_search_params; } + if (ivf_pq_refine_rate) { pq_params.refinement_rate = *ivf_pq_refine_rate; } + cagra_params.graph_build_params = pq_params; + } else if (algo == CagraBuildAlgo::kNnDescent) { + auto nn_params = cuvs::neighbors::cagra::graph_build_params::nn_descent_params( + cagra_params.intermediate_graph_degree); + if (nn_descent_params) { nn_params = *nn_descent_params; } + cagra_params.graph_build_params = nn_params; + } + } }; cuvs_cagra(Metric metric, int dim, const build_param& param, int concurrent_searches = 1) @@ -168,28 +185,9 @@ template void cuvs_cagra::build(const T* dataset, size_t nrow) { auto dataset_extents = raft::make_extents(nrow, dimension_); + index_params_.prepare_build_params(dataset_extents); auto& params = index_params_.cagra_params; - - if (index_params_.algo == CagraBuildAlgo::kIvfPq) { - auto pq_params = - cuvs::neighbors::cagra::graph_build_params::ivf_pq_params(dataset_extents, params.metric); - if (index_params_.ivf_pq_build_params) { - pq_params.build_params = *index_params_.ivf_pq_build_params; - } - if (index_params_.ivf_pq_search_params) { - pq_params.search_params = *index_params_.ivf_pq_search_params; - } - if (index_params_.ivf_pq_refine_rate) { - pq_params.refinement_rate = *index_params_.ivf_pq_refine_rate; - } - params.graph_build_params = pq_params; - } else if (index_params_.algo == CagraBuildAlgo::kNnDescent) { - auto nn_params = cuvs::neighbors::cagra::graph_build_params::nn_descent_params( - params.intermediate_graph_degree); - if (index_params_.nn_descent_params) { nn_params = *index_params_.nn_descent_params; } - params.graph_build_params = nn_params; - } auto dataset_view_host = raft::make_mdspan(dataset, dataset_extents); auto dataset_view_device = diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu new file mode 100644 index 000000000..801caa85f --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cuvs_mg_cagra_wrapper.h" + +namespace cuvs::bench { +template class cuvs_mg_cagra; +template class cuvs_mg_cagra; +template class cuvs_mg_cagra; +template class cuvs_mg_cagra; +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h new file mode 100644 index 000000000..50c1ff4db --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "cuvs_ann_bench_utils.h" +#include "cuvs_cagra_wrapper.h" +#include +#include + +namespace cuvs::bench { +using namespace cuvs::neighbors; + +enum class AllocatorType; +enum class CagraBuildAlgo; + +template +class cuvs_mg_cagra : public algo, public algo_gpu { + public: + using search_param_base = typename algo::search_param; + using algo::dim_; + + struct build_param : public cuvs::bench::cuvs_cagra::build_param { + cuvs::neighbors::mg::distribution_mode mode; + }; + + struct search_param : public cuvs::bench::cuvs_cagra::search_param { + cuvs::neighbors::mg::sharded_merge_mode merge_mode; + }; + + cuvs_mg_cagra(Metric metric, int dim, const build_param& param, int concurrent_searches = 1) + : algo(metric, dim), index_params_(param) + { + index_params_.cagra_params.metric = parse_metric_type(metric); + index_params_.ivf_pq_build_params->metric = parse_metric_type(metric); + + // init nccl clique outside as to not affect benchmark + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle_); + } + + void build(const T* dataset, size_t nrow) final; + + void set_search_param(const search_param_base& param) override; + + void set_search_dataset(const T* dataset, size_t nrow) override; + + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override; + void search_base(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const; + + [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override + { + auto stream = raft::resource::get_cuda_stream(handle_); + return stream; + } + + // to enable dataset access from GPU memory + [[nodiscard]] auto get_preference() const -> algo_property override + { + algo_property property; + property.dataset_memory_type = MemoryType::kHost; + property.query_memory_type = MemoryType::kHost; + return property; + } + void save(const std::string& file) const override; + void load(const std::string&) override; + void save_to_hnswlib(const std::string& file) const; + std::unique_ptr> copy() override; + + private: + raft::device_resources handle_; + float refine_ratio_; + build_param index_params_; + cuvs::neighbors::mg::search_params search_params_; + std::shared_ptr, T, IdxT>> + index_; +}; + +template +void cuvs_mg_cagra::build(const T* dataset, size_t nrow) +{ + auto dataset_extents = raft::make_extents(nrow, dim_); + index_params_.prepare_build_params(dataset_extents); + cuvs::neighbors::mg::index_params build_params = index_params_.cagra_params; + build_params.mode = index_params_.mode; + + auto dataset_view = + raft::make_host_matrix_view(dataset, nrow, dim_); + auto idx = cuvs::neighbors::mg::build(handle_, build_params, dataset_view); + index_ = + std::make_shared, T, IdxT>>( + std::move(idx)); +} + +inline auto allocator_to_string(AllocatorType mem_type) -> std::string; + +template +void cuvs_mg_cagra::set_search_param(const search_param_base& param) +{ + auto sp = dynamic_cast(param); + // search_params_ = static_cast>(sp.p); + cagra::search_params* search_params_ptr_ = static_cast(&search_params_); + *search_params_ptr_ = sp.p; + search_params_.merge_mode = sp.merge_mode; + refine_ratio_ = sp.refine_ratio; +} + +template +void cuvs_mg_cagra::set_search_dataset(const T* dataset, size_t nrow) +{ +} + +template +void cuvs_mg_cagra::save(const std::string& file) const +{ + cuvs::neighbors::mg::serialize(handle_, *index_, file); +} + +template +void cuvs_mg_cagra::load(const std::string& file) +{ + index_ = + std::make_shared, T, IdxT>>( + std::move(cuvs::neighbors::mg::deserialize_cagra(handle_, file))); +} + +template +std::unique_ptr> cuvs_mg_cagra::copy() +{ + return std::make_unique>(*this); // use copy constructor +} + +template +void cuvs_mg_cagra::search_base( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + static_assert(std::is_integral_v); + static_assert(std::is_integral_v); + + auto queries_view = + raft::make_host_matrix_view(queries, batch_size, dim_); + auto neighbors_view = + raft::make_host_matrix_view((IdxT*)neighbors, batch_size, k); + auto distances_view = + raft::make_host_matrix_view(distances, batch_size, k); + + cuvs::neighbors::mg::search( + handle_, *index_, search_params_, queries_view, neighbors_view, distances_view); +} + +template +void cuvs_mg_cagra::search( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + auto k0 = static_cast(refine_ratio_ * k); + const bool disable_refinement = k0 <= static_cast(k); + + if (disable_refinement) { + search_base(queries, batch_size, k, neighbors, distances); + } else { + throw std::runtime_error("refinement not supported"); + } +} +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu new file mode 100644 index 000000000..20cdc41e3 --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cuvs_mg_ivf_flat_wrapper.h" + +namespace cuvs::bench { +template class cuvs_mg_ivf_flat; +// template class cuvs_mg_ivf_flat; +template class cuvs_mg_ivf_flat; +template class cuvs_mg_ivf_flat; +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h new file mode 100644 index 000000000..54a0d2fac --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "cuvs_ann_bench_utils.h" +#include "cuvs_ivf_flat_wrapper.h" +#include +#include + +namespace cuvs::bench { +using namespace cuvs::neighbors; + +template +class cuvs_mg_ivf_flat : public algo, public algo_gpu { + public: + using search_param_base = typename algo::search_param; + using algo::dim_; + + using build_param = cuvs::neighbors::mg::index_params; + + struct search_param : public cuvs::bench::cuvs_ivf_flat::search_param { + cuvs::neighbors::mg::sharded_merge_mode merge_mode; + }; + + cuvs_mg_ivf_flat(Metric metric, int dim, const build_param& param) + : algo(metric, dim), index_params_(param) + { + index_params_.metric = parse_metric_type(metric); + // init nccl clique outside as to not affect benchmark + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle_); + } + + void build(const T* dataset, size_t nrow) final; + void set_search_param(const search_param_base& param) override; + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override; + + [[nodiscard]] auto get_preference() const -> algo_property override + { + algo_property property; + property.dataset_memory_type = MemoryType::kHost; + property.query_memory_type = MemoryType::kHost; + return property; + } + + [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override + { + auto stream = raft::resource::get_cuda_stream(handle_); + return stream; + } + + [[nodiscard]] auto uses_stream() const noexcept -> bool override { return false; } + + void save(const std::string& file) const override; + void load(const std::string&) override; + std::unique_ptr> copy() override; + + private: + raft::device_resources handle_; + build_param index_params_; + cuvs::neighbors::mg::search_params search_params_; + std::shared_ptr, T, IdxT>> + index_; +}; + +template +void cuvs_mg_ivf_flat::build(const T* dataset, size_t nrow) +{ + auto dataset_view = + raft::make_host_matrix_view(dataset, IdxT(nrow), IdxT(dim_)); + auto idx = cuvs::neighbors::mg::build(handle_, index_params_, dataset_view); + index_ = std::make_shared< + cuvs::neighbors::mg::index, T, IdxT>>(std::move(idx)); +} + +template +void cuvs_mg_ivf_flat::set_search_param(const search_param_base& param) +{ + auto sp = dynamic_cast(param); + // search_params_ = sp.ivf_flat_params; + ivf_flat::search_params* search_params_ptr_ = + static_cast(&search_params_); + *search_params_ptr_ = sp.ivf_flat_params; + search_params_.merge_mode = sp.merge_mode; + assert(search_params_.n_probes <= index_params_.n_lists); +} + +template +void cuvs_mg_ivf_flat::save(const std::string& file) const +{ + cuvs::neighbors::mg::serialize(handle_, *index_, file); +} + +template +void cuvs_mg_ivf_flat::load(const std::string& file) +{ + index_ = std::make_shared< + cuvs::neighbors::mg::index, T, IdxT>>( + std::move(cuvs::neighbors::mg::deserialize_flat(handle_, file))); +} + +template +std::unique_ptr> cuvs_mg_ivf_flat::copy() +{ + return std::make_unique>(*this); // use copy constructor +} + +template +void cuvs_mg_ivf_flat::search( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + auto queries_view = raft::make_host_matrix_view( + queries, IdxT(batch_size), IdxT(dim_)); + auto neighbors_view = raft::make_host_matrix_view( + (IdxT*)neighbors, IdxT(batch_size), IdxT(k)); + auto distances_view = raft::make_host_matrix_view( + distances, IdxT(batch_size), IdxT(k)); + + cuvs::neighbors::mg::search( + handle_, *index_, search_params_, queries_view, neighbors_view, distances_view); +} + +} // namespace cuvs::bench \ No newline at end of file diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu new file mode 100644 index 000000000..a74bab6f5 --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cuvs_mg_ivf_pq_wrapper.h" + +namespace cuvs::bench { +template class cuvs_mg_ivf_pq; +template class cuvs_mg_ivf_pq; +template class cuvs_mg_ivf_pq; +template class cuvs_mg_ivf_pq; +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h new file mode 100644 index 000000000..84aea7d4a --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "cuvs_ann_bench_utils.h" +#include "cuvs_ivf_pq_wrapper.h" +#include +#include + +namespace cuvs::bench { +using namespace cuvs::neighbors; + +template +class cuvs_mg_ivf_pq : public algo, public algo_gpu { + public: + using search_param_base = typename algo::search_param; + using algo::dim_; + + using build_param = cuvs::neighbors::mg::index_params; + + struct search_param : public cuvs::bench::cuvs_ivf_pq::search_param { + cuvs::neighbors::mg::sharded_merge_mode merge_mode; + }; + + cuvs_mg_ivf_pq(Metric metric, int dim, const build_param& param) + : algo(metric, dim), index_params_(param) + { + index_params_.metric = parse_metric_type(metric); + // init nccl clique outside as to not affect benchmark + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle_); + } + + void build(const T* dataset, size_t nrow) final; + void set_search_param(const search_param_base& param) override; + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override; + + [[nodiscard]] auto get_preference() const -> algo_property override + { + algo_property property; + property.dataset_memory_type = MemoryType::kHost; + property.query_memory_type = MemoryType::kHost; + return property; + } + + [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override + { + auto stream = raft::resource::get_cuda_stream(handle_); + return stream; + } + + [[nodiscard]] auto uses_stream() const noexcept -> bool override { return false; } + + void save(const std::string& file) const override; + void load(const std::string&) override; + std::unique_ptr> copy() override; + + private: + raft::device_resources handle_; + build_param index_params_; + cuvs::neighbors::mg::search_params search_params_; + std::shared_ptr, T, IdxT>> index_; +}; + +template +void cuvs_mg_ivf_pq::build(const T* dataset, size_t nrow) +{ + auto dataset_view = + raft::make_host_matrix_view(dataset, IdxT(nrow), IdxT(dim_)); + auto idx = cuvs::neighbors::mg::build(handle_, index_params_, dataset_view); + index_ = + std::make_shared, T, IdxT>>( + std::move(idx)); +} + +template +void cuvs_mg_ivf_pq::set_search_param(const search_param_base& param) +{ + auto sp = dynamic_cast(param); + // search_params_ = static_cast>(sp.pq_param); + ivf_pq::search_params* search_params_ptr_ = static_cast(&search_params_); + *search_params_ptr_ = sp.pq_param; + search_params_.merge_mode = sp.merge_mode; + assert(search_params_.n_probes <= index_params_.n_lists); +} + +template +void cuvs_mg_ivf_pq::save(const std::string& file) const +{ + cuvs::neighbors::mg::serialize(handle_, *index_, file); +} + +template +void cuvs_mg_ivf_pq::load(const std::string& file) +{ + index_ = + std::make_shared, T, IdxT>>( + std::move(cuvs::neighbors::mg::deserialize_pq(handle_, file))); +} + +template +std::unique_ptr> cuvs_mg_ivf_pq::copy() +{ + return std::make_unique>(*this); // use copy constructor +} + +template +void cuvs_mg_ivf_pq::search( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + auto queries_view = raft::make_host_matrix_view( + queries, IdxT(batch_size), IdxT(dim_)); + auto neighbors_view = raft::make_host_matrix_view( + (IdxT*)neighbors, IdxT(batch_size), IdxT(k)); + auto distances_view = raft::make_host_matrix_view( + distances, IdxT(batch_size), IdxT(k)); + + cuvs::neighbors::mg::search( + handle_, *index_, search_params_, queries_view, neighbors_view, distances_view); +} + +} // namespace cuvs::bench \ No newline at end of file diff --git a/cpp/cmake/thirdparty/get_cuvs.cmake b/cpp/cmake/thirdparty/get_cuvs.cmake new file mode 100644 index 000000000..c21cccbcc --- /dev/null +++ b/cpp/cmake/thirdparty/get_cuvs.cmake @@ -0,0 +1,64 @@ +# ============================================================================= +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. + +# Use RAPIDS_VERSION_MAJOR_MINOR from rapids_config.cmake +set(CUVS_VERSION "${RAPIDS_VERSION_MAJOR_MINOR}") +set(CUVS_FORK "rapidsai") +set(CUVS_PINNED_TAG "branch-${RAPIDS_VERSION_MAJOR_MINOR}") + +function(find_and_configure_cuvs) + set(oneValueArgs VERSION FORK PINNED_TAG ENABLE_NVTX CLONE_ON_PIN BUILD_CPU_ONLY BUILD_SHARED_LIBS) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN} ) + + if(PKG_CLONE_ON_PIN AND NOT PKG_PINNED_TAG STREQUAL "branch-${CUVS_VERSION}") + message(STATUS "cuVS: pinned tag found: ${PKG_PINNED_TAG}. Cloning cuVS locally.") + set(CPM_DOWNLOAD_cuvs ON) + endif() + + #----------------------------------------------------- + # Invoke CPM find_package() + #----------------------------------------------------- + rapids_cpm_find(cuvs ${PKG_VERSION} + GLOBAL_TARGETS cuvs::cuvs + BUILD_EXPORT_SET cuvs-bench-exports + INSTALL_EXPORT_SET cuvs-bench-exports + COMPONENTS cuvs + CPM_ARGS + GIT_REPOSITORY https://github.com/${PKG_FORK}/cuvs.git + GIT_TAG ${PKG_PINNED_TAG} + SOURCE_SUBDIR cpp + OPTIONS + "BUILD_SHARED_LIBS ${PKG_BUILD_SHARED_LIBS}" + "BUILD_CPU_ONLY ${PKG_BUILD_CPU_ONLY}" + "BUILD_TESTS OFF" + "BUILD_CAGRA_HNSWLIB OFF" + "CUVS_CLONE_ON_PIN ${PKG_CLONE_ON_PIN}" + ) +endfunction() + + +# Change pinned tag here to test a commit in CI +# To use a different cuVS locally, set the CMake variable +# CPM_cuvs_SOURCE=/path/to/local/cuvs +find_and_configure_cuvs(VERSION ${CUVS_VERSION}.00 + FORK ${CUVS_FORK} + PINNED_TAG ${CUVS_PINNED_TAG} + ENABLE_NVTX OFF + # When PINNED_TAG above doesn't match the default rapids branch, + # force local cuvs clone in build directory + # even if it's already installed. + CLONE_ON_PIN ${CUVS_CLONE_ON_PIN} + BUILD_CPU_ONLY ${BUILD_CPU_ONLY} + BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS} +) diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 2459d521d..e28572457 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -2149,7 +2149,7 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = +PREDEFINED = "CUVS_BUILD_MG_ALGOS=1" # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h index 241f5d8b0..14331ebbc 100644 --- a/cpp/include/cuvs/neighbors/cagra.h +++ b/cpp/include/cuvs/neighbors/cagra.h @@ -267,6 +267,15 @@ cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index); */ cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index); +/** + * @brief Get dimension of the CAGRA index + * + * @param[in] index CAGRA index + * @param[out] dim return dimension of the index + * @return cuvsError_t + */ +cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int* dim); + /** * @} */ @@ -338,7 +347,7 @@ cuvsError_t cuvsCagraBuild(cuvsResources_t res, * with the same type of `queries`, such that `index.dtype.code == * queries.dl_tensor.dtype.code` Types for input are: * 1. `queries`: - *` a. kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * a. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` * b. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` * c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8` * 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32` diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp index 73ce80b41..60b8cc122 100644 --- a/cpp/include/cuvs/neighbors/common.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -19,7 +19,8 @@ #include #include #include -#include +#include +#include #include #include #include // get_device_for_address @@ -636,5 +637,56 @@ enable_if_valid_list_t deserialize_list(const raft::resources& handle, const typename ListT::spec_type& store_spec, const typename ListT::spec_type& device_spec); } // namespace ivf +} // namespace cuvs::neighbors + +namespace cuvs::neighbors { +using namespace raft; + +template +struct iface { + iface() : mutex_(std::make_shared()) {} + + const IdxT size() const { return index_.value().size(); } + + std::optional index_; + std::shared_ptr mutex_; +}; + +template +void build(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + const cuvs::neighbors::index_params* index_params, + raft::mdspan, row_major, Accessor> index_dataset); + +template +void extend( + const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + raft::mdspan, row_major, Accessor1> new_vectors, + std::optional, layout_c_contiguous, Accessor2>> + new_indices); + +template +void search(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + const cuvs::neighbors::search_params* search_params, + raft::device_matrix_view h_queries, + raft::device_matrix_view d_neighbors, + raft::device_matrix_view d_distances); + +template +void serialize(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + std::ostream& os); + +template +void deserialize(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + std::istream& is); + +template +void deserialize(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + const std::string& filename); }; // namespace cuvs::neighbors diff --git a/cpp/include/cuvs/neighbors/hnsw.h b/cpp/include/cuvs/neighbors/hnsw.h index 5e94de60a..0495c574a 100644 --- a/cpp/include/cuvs/neighbors/hnsw.h +++ b/cpp/include/cuvs/neighbors/hnsw.h @@ -105,8 +105,10 @@ cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index); * with the same type of `queries`, such that `index.dtype.code == * queries.dl_tensor.dtype.code` * Supported types for input are: - * 1. `queries`: `kDLDataType.code == kDLFloat` or `kDLDataType.code == kDLInt` and - * `kDLDataType.bits = 32` + * 1. `queries`: + * a. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * b. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` + * c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8` * 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 64` * 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` * NOTE: The HNSW index can only be searched by the hnswlib wrapper in cuVS, diff --git a/cpp/include/cuvs/neighbors/hnsw.hpp b/cpp/include/cuvs/neighbors/hnsw.hpp index 007adef0d..d5abd6d55 100644 --- a/cpp/include/cuvs/neighbors/hnsw.hpp +++ b/cpp/include/cuvs/neighbors/hnsw.hpp @@ -173,6 +173,8 @@ std::unique_ptr> from_cagra( /**@}*/ +// TODO: Filtered Search APIs: https://github.com/rapidsai/cuvs/issues/363 + /** * @defgroup hnsw_cpp_index_search Search hnswlib index * @{ @@ -260,7 +262,7 @@ void search(raft::resources const& res, void search(raft::resources const& res, const search_params& params, const index& idx, - raft::host_matrix_view queries, + raft::host_matrix_view queries, raft::host_matrix_view neighbors, raft::host_matrix_view distances); @@ -303,7 +305,7 @@ void search(raft::resources const& res, void search(raft::resources const& res, const search_params& params, const index& idx, - raft::host_matrix_view queries, + raft::host_matrix_view queries, raft::host_matrix_view neighbors, raft::host_matrix_view distances); diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index 67d1b46c0..7f852d635 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -1168,7 +1168,7 @@ void extend(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_flat::search_params& params, - cuvs::neighbors::ivf_flat::index& index, + const cuvs::neighbors::ivf_flat::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1209,7 +1209,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_flat::search_params& params, - cuvs::neighbors::ivf_flat::index& index, + const cuvs::neighbors::ivf_flat::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1250,7 +1250,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_flat::search_params& params, - cuvs::neighbors::ivf_flat::index& index, + const cuvs::neighbors::ivf_flat::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, diff --git a/cpp/include/cuvs/neighbors/ivf_pq.hpp b/cpp/include/cuvs/neighbors/ivf_pq.hpp index 3ce5f382f..ae543c9e9 100644 --- a/cpp/include/cuvs/neighbors/ivf_pq.hpp +++ b/cpp/include/cuvs/neighbors/ivf_pq.hpp @@ -1221,6 +1221,75 @@ void extend(raft::resources const& handle, std::optional> new_indices, cuvs::neighbors::ivf_pq::index* idx); +/** + * @brief Extend the index with the new data. + * + * Note, the user can set a stream pool in the input raft::resource with + * at least one stream to enable kernel and copy overlapping. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_pq::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_pq::build(handle, index_params, dataset); + * // optional: create a stream pool with at least one stream to enable kernel and copy + * // overlapping + * raft::resource::set_cuda_stream_pool(handle, std::make_shared(1)); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * auto index = ivf_pq::extend(handle, new_vectors, no_op, index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors a host matrix view to a row-major matrix [n_rows, idx.dim()] + * @param[in] new_indices a host vector view to a vector of indices [n_rows]. + * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx + */ +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_pq::index& idx) + -> cuvs::neighbors::ivf_pq::index; + +/** + * @brief Extend the index with the new data. + * + * Note, the user can set a stream pool in the input raft::resource with + * at least one stream to enable kernel and copy overlapping. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_pq::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_pq::build(handle, index_params, dataset); + * // optional: create a stream pool with at least one stream to enable kernel and copy + * // overlapping + * raft::resource::set_cuda_stream_pool(handle, std::make_shared(1)); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * ivf_pq::extend(handle, new_vectors, no_op, &index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors a host matrix view to a row-major matrix [n_rows, idx.dim()] + * @param[in] new_indices a host vector view to a vector of indices [n_rows]. + * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx + */ +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_pq::index* idx); + /** * @brief Extend the index with the new data. * @@ -1405,7 +1474,7 @@ void extend(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_pq::search_params& search_params, - cuvs::neighbors::ivf_pq::index& index, + const cuvs::neighbors::ivf_pq::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1450,7 +1519,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_pq::search_params& search_params, - cuvs::neighbors::ivf_pq::index& index, + const cuvs::neighbors::ivf_pq::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1495,7 +1564,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_pq::search_params& search_params, - cuvs::neighbors::ivf_pq::index& index, + const cuvs::neighbors::ivf_pq::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1540,7 +1609,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_pq::search_params& search_params, - cuvs::neighbors::ivf_pq::index& index, + const cuvs::neighbors::ivf_pq::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, diff --git a/cpp/include/cuvs/neighbors/mg.hpp b/cpp/include/cuvs/neighbors/mg.hpp new file mode 100644 index 000000000..4657fa8fb --- /dev/null +++ b/cpp/include/cuvs/neighbors/mg.hpp @@ -0,0 +1,1367 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifdef CUVS_BUILD_MG_ALGOS + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#define DEFAULT_SEARCH_BATCH_SIZE 1 << 20 + +/// \defgroup mg_cpp_index_params ANN MG index build parameters + +namespace cuvs::neighbors::mg { +/** Distribution mode */ +/// \ingroup mg_cpp_index_params +enum distribution_mode { + /** Index is replicated on each device, favors throughput */ + REPLICATED, + /** Index is split on several devices, favors scaling */ + SHARDED +}; + +/// \defgroup mg_cpp_search_params ANN MG search parameters + +/** Search mode when using a replicated index */ +/// \ingroup mg_cpp_search_params +enum replicated_search_mode { + /** Search queries are splited to maintain equal load on GPUs */ + LOAD_BALANCER, + /** Each search query is processed by a single GPU in a round-robin fashion */ + ROUND_ROBIN +}; + +/** Merge mode when using a sharded index */ +/// \ingroup mg_cpp_search_params +enum sharded_merge_mode { + /** Search batches are merged on the root rank */ + MERGE_ON_ROOT_RANK, + /** Search batches are merged in a tree reduction fashion */ + TREE_MERGE +}; + +/** Build parameters */ +/// \ingroup mg_cpp_index_params +template +struct index_params : public Upstream { + index_params() : mode(SHARDED) {} + + index_params(const Upstream& sp) : Upstream(sp), mode(SHARDED) {} + + /** Distribution mode */ + cuvs::neighbors::mg::distribution_mode mode = SHARDED; +}; + +/** Search parameters */ +/// \ingroup mg_cpp_search_params +template +struct search_params : public Upstream { + search_params() : search_mode(LOAD_BALANCER), merge_mode(TREE_MERGE) {} + + search_params(const Upstream& sp) + : Upstream(sp), search_mode(LOAD_BALANCER), merge_mode(TREE_MERGE) + { + } + + /** Replicated search mode */ + cuvs::neighbors::mg::replicated_search_mode search_mode = LOAD_BALANCER; + /** Sharded merge mode */ + cuvs::neighbors::mg::sharded_merge_mode merge_mode = TREE_MERGE; +}; + +} // namespace cuvs::neighbors::mg + +namespace cuvs::neighbors::mg { + +using namespace raft; + +template +struct index { + index(distribution_mode mode, int num_ranks_); + index(const raft::device_resources& handle, const std::string& filename); + + index(const index&) = delete; + index(index&&) = default; + auto operator=(const index&) -> index& = delete; + auto operator=(index&&) -> index& = default; + + distribution_mode mode_; + int num_ranks_; + std::vector> ann_interfaces_; + + // for load balancing mechanism + std::shared_ptr> round_robin_counter_; +}; + +/// \defgroup mg_cpp_index_build ANN MG index build + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-Flat MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, float, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-Flat MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, int8_t, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-Flat MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, uint8_t, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-PQ MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, float, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-PQ MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, half, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-PQ MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, int8_t, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-PQ MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, uint8_t, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed CAGRA MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, float, uint32_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed CAGRA MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, half, uint32_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed CAGRA MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, int8_t, uint32_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed CAGRA MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, uint8_t, uint32_t>; + +/// \defgroup mg_cpp_index_extend ANN MG index extend + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, float, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, int8_t, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, uint8_t, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, float, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, half, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, int8_t, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, uint8_t, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, float, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, half, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, int8_t, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, uint8_t, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \defgroup mg_cpp_index_search ANN MG index search + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, float, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, int8_t, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, uint8_t, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, float, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, half, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, int8_t, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, uint8_t, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, float, uint32_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, half, uint32_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, int8_t, uint32_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, uint8_t, uint32_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \defgroup mg_cpp_serialize ANN MG index serialization + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, float, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, int8_t, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, uint8_t, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, float, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, half, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, int8_t, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, uint8_t, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, float, uint32_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, half, uint32_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, int8_t, uint32_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, uint8_t, uint32_t>& index, + const std::string& filename); + +/// \defgroup mg_cpp_deserialize ANN MG index deserialization + +/// \ingroup mg_cpp_deserialize +/** + * @brief Deserializes an IVF-Flat multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * auto new_index = cuvs::neighbors::mg::deserialize_flat(handle, filename); + * + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized + * + */ +template +auto deserialize_flat(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \ingroup mg_cpp_deserialize +/** + * @brief Deserializes an IVF-PQ multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * auto new_index = cuvs::neighbors::mg::deserialize_pq(handle, filename); + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized + * + */ +template +auto deserialize_pq(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \ingroup mg_cpp_deserialize +/** + * @brief Deserializes a CAGRA multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * auto new_index = cuvs::neighbors::mg::deserialize_cagra(handle, filename); + * + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized + * + */ +template +auto deserialize_cagra(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \defgroup mg_cpp_distribute ANN MG local index distribution + +/// \ingroup mg_cpp_distribute +/** + * @brief Replicates a locally built and serialized IVF-Flat index to all GPUs to form a distributed + * multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::ivf_flat::index_params index_params; + * auto index = cuvs::neighbors::ivf_flat::build(handle, index_params, index_dataset); + * const std::string filename = "local_index.cuvs"; + * cuvs::neighbors::ivf_flat::serialize(handle, filename, index); + * auto new_index = cuvs::neighbors::mg::distribute_flat(handle, filename); + * + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized : a local index + * + */ +template +auto distribute_flat(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \ingroup mg_cpp_distribute +/** + * @brief Replicates a locally built and serialized IVF-PQ index to all GPUs to form a distributed + * multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::ivf_pq::index_params index_params; + * auto index = cuvs::neighbors::ivf_pq::build(handle, index_params, index_dataset); + * const std::string filename = "local_index.cuvs"; + * cuvs::neighbors::ivf_pq::serialize(handle, filename, index); + * auto new_index = cuvs::neighbors::mg::distribute_pq(handle, filename); + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized : a local index + * + */ +template +auto distribute_pq(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \ingroup mg_cpp_distribute +/** + * @brief Replicates a locally built and serialized CAGRA index to all GPUs to form a distributed + * multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::cagra::index_params index_params; + * auto index = cuvs::neighbors::cagra::build(handle, index_params, index_dataset); + * const std::string filename = "local_index.cuvs"; + * cuvs::neighbors::cagra::serialize(handle, filename, index); + * auto new_index = cuvs::neighbors::mg::distribute_cagra(handle, filename); + * + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized : a local index + * + */ +template +auto distribute_cagra(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +} // namespace cuvs::neighbors::mg + +#else + +static_assert(false, + "FORBIDEN_MG_ALGORITHM_IMPORT\n\n" + "Please recompile the cuVS library with MG algorithms BUILD_MG_ALGOS=ON.\n"); + +#endif diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp index 164448f2c..6985ff094 100644 --- a/cpp/src/neighbors/cagra_c.cpp +++ b/cpp/src/neighbors/cagra_c.cpp @@ -176,6 +176,14 @@ extern "C" cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index_c_ptr) }); } +extern "C" cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int* dim) +{ + return cuvs::core::translate_exceptions([=] { + auto index_ptr = reinterpret_cast*>(index->addr); + *dim = index_ptr->dim(); + }); +} + extern "C" cuvsError_t cuvsCagraBuild(cuvsResources_t res, cuvsCagraIndexParams_t params, DLManagedTensor* dataset_tensor, diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index 9694a3e7a..b03b8214b 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -29,9 +29,10 @@ #include +namespace cuvs::neighbors::cagra { + static const std::string RAFT_NAME = "raft"; -namespace cuvs::neighbors::cagra { template void add_node_core( raft::resources const& handle, diff --git a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh index f86ed9ef6..a077c098f 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh @@ -32,9 +32,10 @@ #include #include -static const std::string RAFT_NAME = "raft"; namespace cuvs::neighbors::cagra::detail { +static const std::string RAFT_NAME = "raft"; + constexpr int serialization_version = 4; /** @@ -119,9 +120,9 @@ void serialize_to_hnswlib(raft::resources const& res, os.write(reinterpret_cast(&curr_element_count), sizeof(std::size_t)); // Example:M: 16, dim = 128, data_t = float, index_t = uint32_t, list_size_type = uint32_t, // labeltype: size_t size_data_per_element_ = M * 2 * sizeof(index_t) + sizeof(list_size_type) + - // dim * 4 + sizeof(labeltype) - auto size_data_per_element = - static_cast(index_.graph_degree() * sizeof(IdxT) + 4 + index_.dim() * 4 + 8); + // dim * sizeof(T) + sizeof(labeltype) + auto size_data_per_element = static_cast(index_.graph_degree() * sizeof(IdxT) + 4 + + index_.dim() * sizeof(T) + 8); os.write(reinterpret_cast(&size_data_per_element), sizeof(std::size_t)); // label_offset std::size_t label_offset = size_data_per_element - 8; @@ -184,18 +185,9 @@ void serialize_to_hnswlib(raft::resources const& res, } auto data_row = host_dataset.data_handle() + (index_.dim() * i); - if constexpr (std::is_same_v) { - for (std::size_t j = 0; j < index_.dim(); ++j) { - auto data_elem = static_cast(host_dataset(i, j)); - os.write(reinterpret_cast(&data_elem), sizeof(float)); - } - } else if constexpr (std::is_same_v or std::is_same_v) { - for (std::size_t j = 0; j < index_.dim(); ++j) { - auto data_elem = static_cast(host_dataset(i, j)); - os.write(reinterpret_cast(&data_elem), sizeof(int)); - } - } else { - RAFT_FAIL("Unsupported dataset type while saving CAGRA dataset to HNSWlib format"); + for (std::size_t j = 0; j < index_.dim(); ++j) { + auto data_elem = static_cast(host_dataset(i, j)); + os.write(reinterpret_cast(&data_elem), sizeof(T)); } os.write(reinterpret_cast(&i), sizeof(std::size_t)); diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp index 0d1ae4ec9..ce1e03264 100644 --- a/cpp/src/neighbors/detail/hnsw.hpp +++ b/cpp/src/neighbors/detail/hnsw.hpp @@ -110,9 +110,9 @@ std::unique_ptr> from_cagra(raft::resources const& res, return std::unique_ptr>(hnsw_index); } -template -void get_search_knn_results(hnswlib::HierarchicalNSW const* idx, - const QueriesT* query, +template +void get_search_knn_results(hnswlib::HierarchicalNSW::type> const* idx, + const T* query, int k, uint64_t* indices, float* distances) @@ -127,11 +127,11 @@ void get_search_knn_results(hnswlib::HierarchicalNSW const* idx, } } -template +template void search(raft::resources const& res, const search_params& params, const index& idx, - raft::host_matrix_view queries, + raft::host_matrix_view queries, raft::host_matrix_view neighbors, raft::host_matrix_view distances) { @@ -146,7 +146,8 @@ void search(raft::resources const& res, idx.set_ef(params.ef); auto const* hnswlib_index = - reinterpret_cast const*>(idx.get_index()); + reinterpret_cast::type> const*>( + idx.get_index()); // when num_threads == 0, automatically maximize parallelism if (params.num_threads) { diff --git a/cpp/src/neighbors/hnsw.cpp b/cpp/src/neighbors/hnsw.cpp index 36cbb16c9..e6f3fbcc7 100644 --- a/cpp/src/neighbors/hnsw.cpp +++ b/cpp/src/neighbors/hnsw.cpp @@ -34,20 +34,20 @@ CUVS_INST_HNSW_FROM_CAGRA(int8_t); #undef CUVS_INST_HNSW_FROM_CAGRA -#define CUVS_INST_HNSW_SEARCH(T, QueriesT) \ - void search(raft::resources const& res, \ - const search_params& params, \ - const index& idx, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbors, \ - raft::host_matrix_view distances) \ - { \ - detail::search(res, params, idx, queries, neighbors, distances); \ +#define CUVS_INST_HNSW_SEARCH(T) \ + void search(raft::resources const& res, \ + const search_params& params, \ + const index& idx, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances) \ + { \ + detail::search(res, params, idx, queries, neighbors, distances); \ } -CUVS_INST_HNSW_SEARCH(float, float); -CUVS_INST_HNSW_SEARCH(uint8_t, int); -CUVS_INST_HNSW_SEARCH(int8_t, int); +CUVS_INST_HNSW_SEARCH(float); +CUVS_INST_HNSW_SEARCH(uint8_t); +CUVS_INST_HNSW_SEARCH(int8_t); #undef CUVS_INST_HNSW_SEARCH diff --git a/cpp/src/neighbors/hnsw_c.cpp b/cpp/src/neighbors/hnsw_c.cpp index ab5268a6d..a19875641 100644 --- a/cpp/src/neighbors/hnsw_c.cpp +++ b/cpp/src/neighbors/hnsw_c.cpp @@ -31,7 +31,7 @@ #include namespace { -template +template void _search(cuvsResources_t res, cuvsHnswSearchParams params, cuvsHnswIndex index, @@ -46,7 +46,7 @@ void _search(cuvsResources_t res, search_params.ef = params.ef; search_params.num_threads = params.numThreads; - using queries_mdspan_type = raft::host_matrix_view; + using queries_mdspan_type = raft::host_matrix_view; using neighbors_mdspan_type = raft::host_matrix_view; using distances_mdspan_type = raft::host_matrix_view; auto queries_mds = cuvs::core::from_dlpack(queries_tensor); @@ -127,16 +127,13 @@ extern "C" cuvsError_t cuvsHnswSearch(cuvsResources_t res, auto index = *index_c_ptr; RAFT_EXPECTS(queries.dtype.code == index.dtype.code, "type mismatch between index and queries"); - RAFT_EXPECTS(queries.dtype.bits == 32, "number of bits in queries dtype should be 32"); if (index.dtype.code == kDLFloat) { - _search( - res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); + _search(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); } else if (index.dtype.code == kDLUInt) { - _search( - res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); + _search(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); } else if (index.dtype.code == kDLInt) { - _search(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); + _search(res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); } else { RAFT_FAIL("Unsupported index dtype: %d and bits: %d", queries.dtype.code, queries.dtype.bits); } @@ -152,13 +149,10 @@ extern "C" cuvsError_t cuvsHnswDeserialize(cuvsResources_t res, return cuvs::core::translate_exceptions([=] { if (index->dtype.code == kDLFloat && index->dtype.bits == 32) { index->addr = reinterpret_cast(_deserialize(res, filename, dim, metric)); - index->dtype.code = kDLFloat; } else if (index->dtype.code == kDLUInt && index->dtype.bits == 8) { index->addr = reinterpret_cast(_deserialize(res, filename, dim, metric)); - index->dtype.code = kDLInt; } else if (index->dtype.code == kDLInt && index->dtype.bits == 8) { index->addr = reinterpret_cast(_deserialize(res, filename, dim, metric)); - index->dtype.code = kDLUInt; } else { RAFT_FAIL("Unsupported dtype in file %s", filename); } diff --git a/cpp/src/neighbors/iface/generate_iface.py b/cpp/src/neighbors/iface/generate_iface.py new file mode 100644 index 000000000..794219bbf --- /dev/null +++ b/cpp/src/neighbors/iface/generate_iface.py @@ -0,0 +1,273 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header = """/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +""" + +include_macro = """ +#include "iface.hpp" +""" + +namespace_macro = """ +namespace cuvs::neighbors { +""" + +footer = """ +} // namespace cuvs::neighbors +""" + +flat_macro = """ +#define CUVS_INST_MG_FLAT(T, IdxT) \\ + using T_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using T_da= raft::host_device_accessor, raft::memory_type::host>; \\ + using IdxT_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using IdxT_da = raft::host_device_accessor, raft::memory_type::host>; \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_ha> index_dataset); \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_da> index_dataset); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_ha> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_ha>> new_indices); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_da> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_da>> new_indices); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::device_matrix_view queries, \\ + raft::device_matrix_view neighbors, \\ + raft::device_matrix_view distances); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::host_matrix_view h_queries, \\ + raft::device_matrix_view d_neighbors, \\ + raft::device_matrix_view d_distances); \\ + \\ + template void serialize(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::ostream& os); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::istream& is); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const std::string& filename); +""" + +pq_macro = """ +#define CUVS_INST_MG_PQ(T, IdxT) \\ + using T_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using T_da= raft::host_device_accessor, raft::memory_type::host>; \\ + using IdxT_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using IdxT_da = raft::host_device_accessor, raft::memory_type::host>; \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_ha> index_dataset); \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_da> index_dataset); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_ha> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_ha>> new_indices); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_da> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_da>> new_indices); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::device_matrix_view queries, \\ + raft::device_matrix_view neighbors, \\ + raft::device_matrix_view distances); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::host_matrix_view h_queries, \\ + raft::device_matrix_view d_neighbors, \\ + raft::device_matrix_view d_distances); \\ + \\ + template void serialize(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::ostream& os); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::istream& is); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const std::string& filename); +""" + +cagra_macro = """ +#define CUVS_INST_MG_CAGRA(T, IdxT) \\ + using T_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using T_da= raft::host_device_accessor, raft::memory_type::host>; \\ + using IdxT_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using IdxT_da = raft::host_device_accessor, raft::memory_type::host>; \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_ha> index_dataset); \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_da> index_dataset); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_ha> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_ha>> new_indices); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_da> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_da>> new_indices); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::device_matrix_view queries, \\ + raft::device_matrix_view neighbors, \\ + raft::device_matrix_view distances); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::host_matrix_view h_queries, \\ + raft::device_matrix_view d_neighbors, \\ + raft::device_matrix_view d_distances); \\ + \\ + template void serialize(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::ostream& os); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::istream& is); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const std::string& filename); +""" + +flat_macros = dict ( + flat = dict( + include=include_macro, + definition=flat_macro, + name="CUVS_INST_MG_FLAT", + ) +) + +pq_macros = dict ( + pq = dict( + include=include_macro, + definition=pq_macro, + name="CUVS_INST_MG_PQ", + ) +) + +cagra_macros = dict ( + cagra = dict( + include=include_macro, + definition=cagra_macro, + name="CUVS_INST_MG_CAGRA", + ) +) + +flat_types = dict( + float_int64_t=("float", "int64_t"), + int8_t_int64_t=("int8_t", "int64_t"), + uint8_t_int64_t=("uint8_t", "int64_t"), +) + +pq_types = dict( + float_int64_t=("float", "int64_t"), + half_int64_t=("half", "int64_t"), + int8_t_int64_t=("int8_t", "int64_t"), + uint8_t_int64_t=("uint8_t", "int64_t"), +) + +cagra_types = dict( + float_uint32_t=("float", "uint32_t"), + half_uint32_t=("half", "uint32_t"), + int8_t_uint32_t=("int8_t", "uint32_t"), + uint8_t_uint32_t=("uint8_t", "uint32_t"), +) + +for macros, types in [(flat_macros, flat_types), (pq_macros, pq_types), (cagra_macros, cagra_types)]: + for type_path, (T, IdxT) in types.items(): + for macro_path, macro in macros.items(): + path = f"iface_{macro_path}_{type_path}.cu" + with open(path, "w") as f: + f.write(header) + f.write(macro['include']) + f.write(namespace_macro) + f.write(macro["definition"]) + f.write(f"{macro['name']}({T}, {IdxT});\n\n") + f.write(f"#undef {macro['name']}\n") + f.write(footer) + + print(f"src/neighbors/iface/{path}") diff --git a/cpp/src/neighbors/iface/iface.hpp b/cpp/src/neighbors/iface/iface.hpp new file mode 100644 index 000000000..a329db429 --- /dev/null +++ b/cpp/src/neighbors/iface/iface.hpp @@ -0,0 +1,198 @@ +#include + +#include +#include +#include +#include +#include + +namespace cuvs::neighbors { + +using namespace raft; + +template +void build(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + const cuvs::neighbors::index_params* index_params, + raft::mdspan, row_major, Accessor> index_dataset) +{ + interface.mutex_->lock(); + + if constexpr (std::is_same>::value) { + auto idx = cuvs::neighbors::ivf_flat::build( + handle, *static_cast(index_params), index_dataset); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + auto idx = cuvs::neighbors::ivf_pq::build( + handle, *static_cast(index_params), index_dataset); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + auto idx = cuvs::neighbors::cagra::build( + handle, *static_cast(index_params), index_dataset); + interface.index_.emplace(std::move(idx)); + } + resource::sync_stream(handle); + + interface.mutex_->unlock(); +} + +template +void extend( + const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + raft::mdspan, row_major, Accessor1> new_vectors, + std::optional, layout_c_contiguous, Accessor2>> + new_indices) +{ + interface.mutex_->lock(); + + if constexpr (std::is_same>::value) { + auto idx = + cuvs::neighbors::ivf_flat::extend(handle, new_vectors, new_indices, interface.index_.value()); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + auto idx = + cuvs::neighbors::ivf_pq::extend(handle, new_vectors, new_indices, interface.index_.value()); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + RAFT_FAIL("CAGRA does not implement the extend method"); + } + resource::sync_stream(handle); + + interface.mutex_->unlock(); +} + +template +void search(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + const cuvs::neighbors::search_params* search_params, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances) +{ + // interface.mutex_->lock(); + if constexpr (std::is_same>::value) { + cuvs::neighbors::ivf_flat::search( + handle, + *reinterpret_cast(search_params), + interface.index_.value(), + queries, + neighbors, + distances); + } else if constexpr (std::is_same>::value) { + cuvs::neighbors::ivf_pq::search(handle, + *reinterpret_cast(search_params), + interface.index_.value(), + queries, + neighbors, + distances); + } else if constexpr (std::is_same>::value) { + cuvs::neighbors::cagra::search(handle, + *reinterpret_cast(search_params), + interface.index_.value(), + queries, + neighbors, + distances); + } + resource::sync_stream(handle); + + // interface.mutex_->unlock(); +} + +// for MG ANN only +template +void search(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view h_queries, + raft::device_matrix_view d_neighbors, + raft::device_matrix_view d_distances) +{ + // interface.mutex_->lock(); + + int64_t n_rows = h_queries.extent(0); + int64_t n_dims = h_queries.extent(1); + auto d_queries = raft::make_device_matrix(handle, n_rows, n_dims); + raft::copy(d_queries.data_handle(), + h_queries.data_handle(), + n_rows * n_dims, + resource::get_cuda_stream(handle)); + auto d_query_view = raft::make_const_mdspan(d_queries.view()); + + search(handle, interface, search_params, d_query_view, d_neighbors, d_distances); + + // interface.mutex_->unlock(); +} + +template +void serialize(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + std::ostream& os) +{ + interface.mutex_->lock(); + + if constexpr (std::is_same>::value) { + ivf_flat::serialize(handle, os, interface.index_.value()); + } else if constexpr (std::is_same>::value) { + ivf_pq::serialize(handle, os, interface.index_.value()); + } else if constexpr (std::is_same>::value) { + cagra::serialize(handle, os, interface.index_.value(), true); + } + + interface.mutex_->unlock(); +} + +template +void deserialize(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + std::istream& is) +{ + interface.mutex_->lock(); + + if constexpr (std::is_same>::value) { + ivf_flat::index idx(handle); + ivf_flat::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + ivf_pq::index idx(handle); + ivf_pq::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + cagra::index idx(handle); + cagra::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } + + interface.mutex_->unlock(); +} + +template +void deserialize(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + const std::string& filename) +{ + interface.mutex_->lock(); + + std::ifstream is(filename, std::ios::in | std::ios::binary); + if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + + if constexpr (std::is_same>::value) { + ivf_flat::index idx(handle); + ivf_flat::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + ivf_pq::index idx(handle); + ivf_pq::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + cagra::index idx(handle); + cagra::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } + + is.close(); + + interface.mutex_->unlock(); +} + +}; // namespace cuvs::neighbors \ No newline at end of file diff --git a/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu new file mode 100644 index 000000000..b5e329dd8 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_CAGRA(float, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu new file mode 100644 index 000000000..23fcffc59 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_CAGRA(half, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu new file mode 100644 index 000000000..30377ab66 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_CAGRA(int8_t, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu new file mode 100644 index 000000000..59a1640e8 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_CAGRA(uint8_t, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu b/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu new file mode 100644 index 000000000..a0a455375 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize( \ + const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_FLAT(float, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu new file mode 100644 index 000000000..9fdd6464f --- /dev/null +++ b/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize( \ + const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_FLAT(int8_t, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu new file mode 100644 index 000000000..daee59c4a --- /dev/null +++ b/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize( \ + const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_FLAT(uint8_t, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu new file mode 100644 index 000000000..7282d6bd0 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_PQ(float, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu new file mode 100644 index 000000000..4d67f9aed --- /dev/null +++ b/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_PQ(half, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu new file mode 100644 index 000000000..46537b3f9 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_PQ(int8_t, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu new file mode 100644 index 000000000..591ac881a --- /dev/null +++ b/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_PQ(uint8_t, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py b/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py index 1fabcca8c..c435cc6d9 100644 --- a/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py +++ b/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py @@ -144,7 +144,7 @@ void search( \\ raft::resources const& handle, \\ const cuvs::neighbors::ivf_flat::search_params& params, \\ - cuvs::neighbors::ivf_flat::index& index, \\ + const cuvs::neighbors::ivf_flat::index& index, \\ raft::device_matrix_view queries, \\ raft::device_matrix_view neighbors, \\ raft::device_matrix_view distances, \\ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index 9626b2ce5..f5a4267cd 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -1206,8 +1206,8 @@ void launch_with_fixed_consts(cuvs::distance::DistanceType metric, Args&&... arg inner_prod_dist>( {}, raft::compose_op(raft::add_const_op{1.0f}, raft::mul_const_op{-1.0f}), - std::forward(args)...); - // NB: update the description of `knn::ivf_flat::build` when adding here a new metric. + std::forward(args)...); // NB: update the description of `knn::ivf_flat::build` when + // adding here a new metric. default: RAFT_FAIL("The chosen distance metric is not supported (%d)", int(metric)); } } diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu index 3f262d612..87abc0bc0 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_flat { #define CUVS_INST_IVF_FLAT_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::search_params& params, \ - cuvs::neighbors::ivf_flat::index& index, \ + const cuvs::neighbors::ivf_flat::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu index 4357afb0a..c1e92ae5b 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_flat { #define CUVS_INST_IVF_FLAT_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::search_params& params, \ - cuvs::neighbors::ivf_flat::index& index, \ + const cuvs::neighbors::ivf_flat::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu index 8265a3e17..4ff8ed770 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_flat { #define CUVS_INST_IVF_FLAT_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::search_params& params, \ - cuvs::neighbors::ivf_flat::index& index, \ + const cuvs::neighbors::ivf_flat::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py b/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py index a5a829967..a2ac048ff 100644 --- a/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py +++ b/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py @@ -68,7 +68,7 @@ #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \\ void search(raft::resources const& handle, \\ const cuvs::neighbors::ivf_pq::search_params& params, \\ - cuvs::neighbors::ivf_pq::index& index, \\ + const cuvs::neighbors::ivf_pq::index& index, \\ raft::device_matrix_view queries, \\ raft::device_matrix_view neighbors, \\ raft::device_matrix_view distances, \\ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu index 07ee110bc..44e9777ba 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_pq { #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::search_params& params, \ - cuvs::neighbors::ivf_pq::index& index, \ + const cuvs::neighbors::ivf_pq::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu index cf387cb67..d7446e846 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_pq { #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::search_params& params, \ - cuvs::neighbors::ivf_pq::index& index, \ + const cuvs::neighbors::ivf_pq::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu index 5ec9093df..c1ffede97 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_pq { #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::search_params& params, \ - cuvs::neighbors::ivf_pq::index& index, \ + const cuvs::neighbors::ivf_pq::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu index d2e2f3b00..08e4f0536 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_pq { #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::search_params& params, \ - cuvs::neighbors::ivf_pq::index& index, \ + const cuvs::neighbors::ivf_pq::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh index c65ea8108..4c9867126 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -41,6 +42,8 @@ #include #include #include +#include +#include #include #include #include @@ -1466,6 +1469,13 @@ void extend(raft::resources const& handle, std::is_same_v, "Unsupported data type"); + if (index->metric() == distance::DistanceType::CosineExpanded) { + if constexpr (std::is_same_v || std::is_same_v) + RAFT_FAIL( + "CosineExpanded distance metric is currently not supported for uint8_t and int8_t data " + "type"); + } + rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle); rmm::device_async_resource_ref large_memory = raft::resource::get_large_workspace_resource(handle); @@ -1632,6 +1642,14 @@ void extend(raft::resources const& handle, vec_batches.prefetch_next_batch(); for (const auto& vec_batch : vec_batches) { const auto& idx_batch = *idx_batches++; + if (index->metric() == CosineExpanded) { + auto vec_batch_view = raft::make_device_matrix_view( + const_cast(vec_batch.data()), vec_batch.size(), index->dim()); + raft::linalg::row_normalize(handle, + raft::make_const_mdspan(vec_batch_view), + vec_batch_view, + raft::linalg::NormType::L2Norm); + } process_and_fill_codes(handle, *index, vec_batch.data(), @@ -1683,6 +1701,13 @@ auto build(raft::resources const& handle, << (int)params.pq_dim << std::endl; RAFT_EXPECTS(n_rows > 0 && dim > 0, "empty dataset"); RAFT_EXPECTS(n_rows >= params.n_lists, "number of rows can't be less than n_lists"); + if (params.metric == distance::DistanceType::CosineExpanded) { + // TODO: support int8_t and uint8_t types (https://github.com/rapidsai/cuvs/issues/389) + if constexpr (std::is_same_v || std::is_same_v) + RAFT_FAIL( + "CosineExpanded distance metric is currently not supported for uint8_t and int8_t data " + "type"); + } auto stream = raft::resource::get_cuda_stream(handle); @@ -1755,6 +1780,11 @@ auto build(raft::resources const& handle, cuvs::cluster::kmeans::balanced_params kmeans_params; kmeans_params.n_iters = params.kmeans_n_iters; kmeans_params.metric = static_cast((int)index.metric()); + + if (index.metric() == distance::DistanceType::CosineExpanded) { + raft::linalg::row_normalize( + handle, trainset_const_view, trainset.view(), raft::linalg::NormType::L2Norm); + } cuvs::cluster::kmeans_balanced::fit( handle, kmeans_params, trainset_const_view, centers_view, utils::mapping{}); @@ -1762,6 +1792,10 @@ auto build(raft::resources const& handle, rmm::device_uvector labels(n_rows_train, stream, big_memory_resource); auto centers_const_view = raft::make_device_matrix_view( cluster_centers, index.n_lists(), index.dim()); + if (index.metric() == distance::DistanceType::CosineExpanded) { + raft::linalg::row_normalize( + handle, centers_const_view, centers_view, raft::linalg::NormType::L2Norm); + } auto labels_view = raft::make_device_vector_view(labels.data(), n_rows_train); cuvs::cluster::kmeans_balanced::predict(handle, diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_compute_similarity_impl.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_compute_similarity_impl.cuh index 8404ca1f9..fbbdd06c2 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_compute_similarity_impl.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_compute_similarity_impl.cuh @@ -369,6 +369,7 @@ RAFT_KERNEL compute_similarity_kernel(uint32_t dim, reinterpret_cast(lut_end)[i] = query[i] - cluster_center[i]; } } break; + case distance::DistanceType::CosineExpanded: case distance::DistanceType::InnerProduct: { float2 pvals; for (uint32_t i = threadIdx.x; i < dim; i += blockDim.x) { @@ -408,6 +409,7 @@ RAFT_KERNEL compute_similarity_kernel(uint32_t dim, diff -= pq_c; score += diff * diff; } break; + case distance::DistanceType::CosineExpanded: case distance::DistanceType::InnerProduct: { // NB: we negate the scores as we hardcoded select-topk to always compute the minimum float q; @@ -485,6 +487,7 @@ RAFT_KERNEL compute_similarity_kernel(uint32_t dim, reinterpret_cast(pq_thread_data), lut_scores, early_stop_limit); + if (metric == distance::DistanceType::CosineExpanded) { score = OutT(1) + score; } } if constexpr (kManageLocalTopK) { block_topk.add(score, sample_offset + i); diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh index e185f18dc..db8f9fbd3 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh @@ -37,6 +37,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -104,12 +107,21 @@ void select_clusters(raft::resources const& handle, This is a negative inner-product distance. We minimize it to find the similar clusters. + NB: qc_distances is NOT used further in ivfpq_search. + + Cosine distance: + `qc_distances[i, j] = - (queries[i], cluster_centers[j])` + + This is a negative inner-product distance. The queries and cluster centers are row normalized. + We minimize it to find the similar clusters. + NB: qc_distances is NOT used further in ivfpq_search. */ float norm_factor; switch (metric) { case cuvs::distance::DistanceType::L2SqrtExpanded: case cuvs::distance::DistanceType::L2Expanded: norm_factor = 1.0 / -2.0; break; + case cuvs::distance::DistanceType::CosineExpanded: case cuvs::distance::DistanceType::InnerProduct: norm_factor = 0.0; break; default: RAFT_FAIL("Unsupported distance type %d.", int(metric)); } @@ -133,6 +145,7 @@ void select_clusters(raft::resources const& handle, gemm_k = dim + 1; RAFT_EXPECTS(gemm_k <= dim_ext, "unexpected gemm_k or dim_ext"); } break; + case cuvs::distance::DistanceType::CosineExpanded: case cuvs::distance::DistanceType::InnerProduct: { alpha = -1.0; beta = 0.0; @@ -363,8 +376,9 @@ void ivfpq_search_worker(raft::resources const& handle, // stores basediff (query[i] - center[i]) precomp_data_count = index.rot_dim(); } break; + case distance::DistanceType::CosineExpanded: case distance::DistanceType::InnerProduct: { - // stores two components (query[i] * center[i], query[i] * center[i]) + // stores two components (query[i], query[i] * center[i]) precomp_data_count = index.rot_dim() * 2; } break; default: { @@ -457,8 +471,14 @@ void ivfpq_search_worker(raft::resources const& handle, num_samples_vector); // Postprocessing - ivf::detail::postprocess_distances( - distances, topk_dists.data(), index.metric(), n_queries, topK, scaling_factor, true, stream); + ivf::detail::postprocess_distances(distances, + topk_dists.data(), + index.metric(), + n_queries, + topK, + scaling_factor, + index.metric() != distance::DistanceType::CosineExpanded, + stream); ivf::detail::postprocess_neighbors(neighbors, neighbors_uint32, index.inds_ptrs().data_handle(), @@ -508,6 +528,7 @@ struct ivfpq_search { { bool signed_metric = false; switch (metric) { + case cuvs::distance::DistanceType::CosineExpanded: signed_metric = true; break; case cuvs::distance::DistanceType::InnerProduct: signed_metric = true; break; default: break; } @@ -606,6 +627,12 @@ inline void search(raft::resources const& handle, static_assert(std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v, "Unsupported element type."); + if (index.metric() == distance::DistanceType::CosineExpanded) { + if constexpr (std::is_same_v || std::is_same_v) + RAFT_FAIL( + "CosineExpanded distance metric is currently not supported for uint8_t and int8_t data " + "type"); + } raft::common::nvtx::range fun_scope( "ivf_pq::search(n_queries = %u, n_probes = %u, k = %u, dim = %zu)", n_queries, @@ -698,7 +725,14 @@ inline void search(raft::resources const& handle, rot_queries.data(), index.rot_dim(), stream); - + if (index.metric() == distance::DistanceType::CosineExpanded) { + auto rot_queries_view = raft::make_device_matrix_view( + rot_queries.data(), max_queries, index.rot_dim()); + raft::linalg::row_normalize(handle, + raft::make_const_mdspan(rot_queries_view), + rot_queries_view, + raft::linalg::NormType::L2Norm); + } for (uint32_t offset_b = 0; offset_b < queries_batch; offset_b += max_batch_size) { uint32_t batch_size = min(max_batch_size, queries_batch - offset_b); /* The distance calculation is done in the rotated/transformed space; diff --git a/cpp/src/neighbors/mg/generate_mg.py b/cpp/src/neighbors/mg/generate_mg.py new file mode 100644 index 000000000..af5e60545 --- /dev/null +++ b/cpp/src/neighbors/mg/generate_mg.py @@ -0,0 +1,286 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header = """/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +""" + +include_macro = """ +#include "mg.cuh" +""" + +namespace_macro = """ +namespace cuvs::neighbors::mg { +""" + +footer = """ +} // namespace cuvs::neighbors::mg +""" + +flat_macro = """ +#define CUVS_INST_MG_FLAT(T, IdxT) \\ + index, T, IdxT> build(const raft::device_resources& handle, \\ + const mg::index_params& index_params, \\ + raft::host_matrix_view index_dataset) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::build(handle, index, \\ + static_cast(&index_params), \\ + index_dataset); \\ + return index; \\ + } \\ + \\ + void extend(const raft::device_resources& handle, \\ + index, T, IdxT>& index, \\ + raft::host_matrix_view new_vectors, \\ + std::optional> new_indices) \\ + { \\ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \\ + } \\ + \\ + void search(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const mg::search_params& search_params, \\ + raft::host_matrix_view queries, \\ + raft::host_matrix_view neighbors, \\ + raft::host_matrix_view distances, \\ + int64_t n_rows_per_batch) \\ + { \\ + cuvs::neighbors::mg::detail::search(handle, index, \\ + static_cast(&search_params), \\ + queries, neighbors, distances, n_rows_per_batch); \\ + } \\ + \\ + void serialize(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const std::string& filename) \\ + { \\ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \\ + } \\ + \\ + template<> \\ + index, T, IdxT> deserialize_flat(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + auto idx = index, T, IdxT>(handle, filename); \\ + return idx; \\ + } \\ + \\ + template<> \\ + index, T, IdxT> distribute_flat(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \\ + return idx; \\ + } +""" + +pq_macro = """ +#define CUVS_INST_MG_PQ(T, IdxT) \\ + index, T, IdxT> build(const raft::device_resources& handle, \\ + const mg::index_params& index_params, \\ + raft::host_matrix_view index_dataset) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::build(handle, index, \\ + static_cast(&index_params), \\ + index_dataset); \\ + return index; \\ + } \\ + \\ + void extend(const raft::device_resources& handle, \\ + index, T, IdxT>& index, \\ + raft::host_matrix_view new_vectors, \\ + std::optional> new_indices) \\ + { \\ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \\ + } \\ + \\ + void search(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const mg::search_params& search_params, \\ + raft::host_matrix_view queries, \\ + raft::host_matrix_view neighbors, \\ + raft::host_matrix_view distances, \\ + int64_t n_rows_per_batch) \\ + { \\ + cuvs::neighbors::mg::detail::search(handle, index, \\ + static_cast(&search_params), \\ + queries, neighbors, distances, n_rows_per_batch); \\ + } \\ + \\ + void serialize(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const std::string& filename) \\ + { \\ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \\ + } \\ + \\ + template<> \\ + index, T, IdxT> deserialize_pq(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + auto idx = index, T, IdxT>(handle, filename); \\ + return idx; \\ + } \\ + \\ + template<> \\ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \\ + return idx; \\ + } +""" + +cagra_macro = """ +#define CUVS_INST_MG_CAGRA(T, IdxT) \\ + index, T, IdxT> build(const raft::device_resources& handle, \\ + const mg::index_params& index_params, \\ + raft::host_matrix_view index_dataset) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::build(handle, index, \\ + static_cast(&index_params), \\ + index_dataset); \\ + return index; \\ + } \\ + \\ + void search(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const mg::search_params& search_params, \\ + raft::host_matrix_view queries, \\ + raft::host_matrix_view neighbors, \\ + raft::host_matrix_view distances, \\ + int64_t n_rows_per_batch) \\ + { \\ + cuvs::neighbors::mg::detail::search(handle, index, \\ + static_cast(&search_params), \\ + queries, neighbors, distances, n_rows_per_batch); \\ + } \\ + \\ + void serialize(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const std::string& filename) \\ + { \\ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \\ + } \\ + \\ + template<> \\ + index, T, IdxT> deserialize_cagra(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + auto idx = index, T, IdxT>(handle, filename); \\ + return idx; \\ + } \\ + \\ + template<> \\ + index, T, IdxT> distribute_cagra(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \\ + return idx; \\ + } +""" + +flat_macros = dict ( + flat = dict( + include=include_macro, + definition=flat_macro, + name="CUVS_INST_MG_FLAT", + ) +) + +pq_macros = dict ( + pq = dict( + include=include_macro, + definition=pq_macro, + name="CUVS_INST_MG_PQ", + ) +) + +cagra_macros = dict ( + cagra = dict( + include=include_macro, + definition=cagra_macro, + name="CUVS_INST_MG_CAGRA", + ) +) + +flat_types = dict( + float_int64_t=("float", "int64_t"), + int8_t_int64_t=("int8_t", "int64_t"), + uint8_t_int64_t=("uint8_t", "int64_t"), +) + +pq_types = dict( + float_int64_t=("float", "int64_t"), + half_int64_t=("half", "int64_t"), + int8_t_int64_t=("int8_t", "int64_t"), + uint8_t_int64_t=("uint8_t", "int64_t"), +) + +cagra_types = dict( + float_uint32_t=("float", "uint32_t"), + half_uint32_t=("half", "uint32_t"), + int8_t_uint32_t=("int8_t", "uint32_t"), + uint8_t_uint32_t=("uint8_t", "uint32_t"), +) + +for macros, types in [(flat_macros, flat_types), (pq_macros, pq_types), (cagra_macros, cagra_types)]: + for type_path, (T, IdxT) in types.items(): + for macro_path, macro in macros.items(): + path = f"mg_{macro_path}_{type_path}.cu" + with open(path, "w") as f: + f.write(header) + f.write(macro['include']) + f.write(namespace_macro) + f.write(macro["definition"]) + f.write(f"{macro['name']}({T}, {IdxT});\n\n") + f.write(f"#undef {macro['name']}\n") + f.write(footer) + + print(f"src/neighbors/mg/{path}") diff --git a/cpp/src/neighbors/mg/mg.cuh b/cpp/src/neighbors/mg/mg.cuh new file mode 100644 index 000000000..d3f635bc4 --- /dev/null +++ b/cpp/src/neighbors/mg/mg.cuh @@ -0,0 +1,690 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../detail/knn_merge_parts.cuh" +#include +#include +#include +#include + +#include +#include + +namespace cuvs::neighbors { +using namespace raft; + +template +void search(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view h_queries, + raft::device_matrix_view d_neighbors, + raft::device_matrix_view d_distances); +} // namespace cuvs::neighbors + +namespace cuvs::neighbors::mg { +void check_omp_threads(const int requirements); +} // namespace cuvs::neighbors::mg + +namespace cuvs::neighbors::mg::detail { +using namespace cuvs::neighbors; +using namespace raft; + +// local index deserialization and distribution +template +void deserialize_and_distribute(const raft::device_resources& handle, + index& index, + const std::string& filename) +{ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_.emplace_back(); + cuvs::neighbors::deserialize(dev_res, ann_if, filename); + } +} + +// MG index deserialization +template +void deserialize(const raft::device_resources& handle, + index& index, + const std::string& filename) +{ + std::ifstream is(filename, std::ios::in | std::ios::binary); + if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + index.mode_ = (cuvs::neighbors::mg::distribution_mode)deserialize_scalar(handle, is); + index.num_ranks_ = deserialize_scalar(handle, is); + + if (index.num_ranks_ != clique.num_ranks_) { + RAFT_FAIL("Serialized index has %d ranks whereas NCCL clique has %d ranks", + index.num_ranks_, + clique.num_ranks_); + } + + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_.emplace_back(); + cuvs::neighbors::deserialize(dev_res, ann_if, is); + } + + is.close(); +} + +template +void build(const raft::device_resources& handle, + index& index, + const cuvs::neighbors::index_params* index_params, + raft::host_matrix_view index_dataset) +{ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + if (index.mode_ == REPLICATED) { + int64_t n_rows = index_dataset.extent(0); + RAFT_LOG_INFO("REPLICATED BUILD: %d*%drows", index.num_ranks_, n_rows); + + index.ann_interfaces_.resize(index.num_ranks_); +#pragma omp parallel for + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::build(dev_res, ann_if, index_params, index_dataset); + resource::sync_stream(dev_res); + } + } else if (index.mode_ == SHARDED) { + int64_t n_rows = index_dataset.extent(0); + int64_t n_cols = index_dataset.extent(1); + int64_t n_rows_per_shard = raft::ceildiv(n_rows, (int64_t)index.num_ranks_); + + RAFT_LOG_INFO("SHARDED BUILD: %d*%drows", index.num_ranks_, n_rows_per_shard); + + index.ann_interfaces_.resize(index.num_ranks_); +#pragma omp parallel for + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + int64_t offset = rank * n_rows_per_shard; + int64_t n_rows_of_current_shard = std::min(n_rows_per_shard, n_rows - offset); + const T* partition_ptr = index_dataset.data_handle() + (offset * n_cols); + auto partition = raft::make_host_matrix_view( + partition_ptr, n_rows_of_current_shard, n_cols); + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::build(dev_res, ann_if, index_params, partition); + resource::sync_stream(dev_res); + } + } +} + +template +void extend(const raft::device_resources& handle, + index& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices) +{ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + int64_t n_rows = new_vectors.extent(0); + if (index.mode_ == REPLICATED) { + RAFT_LOG_INFO("REPLICATED EXTEND: %d*%drows", index.num_ranks_, n_rows); + +#pragma omp parallel for + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::extend(dev_res, ann_if, new_vectors, new_indices); + resource::sync_stream(dev_res); + } + } else if (index.mode_ == SHARDED) { + int64_t n_cols = new_vectors.extent(1); + int64_t n_rows_per_shard = raft::ceildiv(n_rows, (int64_t)index.num_ranks_); + + RAFT_LOG_INFO("SHARDED EXTEND: %d*%drows", index.num_ranks_, n_rows_per_shard); + +#pragma omp parallel for + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + int64_t offset = rank * n_rows_per_shard; + int64_t n_rows_of_current_shard = std::min(n_rows_per_shard, n_rows - offset); + const T* new_vectors_ptr = new_vectors.data_handle() + (offset * n_cols); + auto new_vectors_part = raft::make_host_matrix_view( + new_vectors_ptr, n_rows_of_current_shard, n_cols); + + std::optional> new_indices_part = std::nullopt; + if (new_indices.has_value()) { + const IdxT* new_indices_ptr = new_indices.value().data_handle() + offset; + new_indices_part = raft::make_host_vector_view( + new_indices_ptr, n_rows_of_current_shard); + } + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::extend(dev_res, ann_if, new_vectors_part, new_indices_part); + resource::sync_stream(dev_res); + } + } +} + +template +void sharded_search_with_direct_merge(const raft::comms::nccl_clique& clique, + const index& index, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch, + int64_t n_rows, + int64_t n_cols, + int64_t n_neighbors, + int64_t n_batches) +{ + const auto& root_handle = clique.set_current_device_to_root_rank(); + auto in_neighbors = raft::make_device_matrix( + root_handle, index.num_ranks_ * n_rows_per_batch, n_neighbors); + auto in_distances = raft::make_device_matrix( + root_handle, index.num_ranks_ * n_rows_per_batch, n_neighbors); + auto out_neighbors = + raft::make_device_matrix(root_handle, n_rows_per_batch, n_neighbors); + auto out_distances = + raft::make_device_matrix(root_handle, n_rows_per_batch, n_neighbors); + + for (int64_t batch_idx = 0; batch_idx < n_batches; batch_idx++) { + int64_t offset = batch_idx * n_rows_per_batch; + int64_t query_offset = offset * n_cols; + int64_t output_offset = offset * n_neighbors; + int64_t n_rows_of_current_batch = std::min((int64_t)n_rows_per_batch, n_rows - offset); + int64_t part_size = n_rows_of_current_batch * n_neighbors; + auto query_partition = raft::make_host_matrix_view( + queries.data_handle() + query_offset, n_rows_of_current_batch, n_cols); + + const int& requirements = index.num_ranks_; + check_omp_threads(requirements); // should use at least num_ranks_ threads to avoid NCCL hang +#pragma omp parallel for num_threads(index.num_ranks_) + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + auto& ann_if = index.ann_interfaces_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + + if (rank == clique.root_rank_) { // root rank + uint64_t batch_offset = clique.root_rank_ * part_size; + auto d_neighbors = raft::make_device_matrix_view( + in_neighbors.data_handle() + batch_offset, n_rows_of_current_batch, n_neighbors); + auto d_distances = raft::make_device_matrix_view( + in_distances.data_handle() + batch_offset, n_rows_of_current_batch, n_neighbors); + cuvs::neighbors::search( + dev_res, ann_if, search_params, query_partition, d_neighbors, d_distances); + + // wait for other ranks + ncclGroupStart(); + for (int from_rank = 0; from_rank < index.num_ranks_; from_rank++) { + if (from_rank == clique.root_rank_) continue; + + batch_offset = from_rank * part_size; + ncclRecv(in_neighbors.data_handle() + batch_offset, + part_size * sizeof(IdxT), + ncclUint8, + from_rank, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclRecv(in_distances.data_handle() + batch_offset, + part_size * sizeof(float), + ncclUint8, + from_rank, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + } + ncclGroupEnd(); + resource::sync_stream(dev_res); + } else { // non-root ranks + auto d_neighbors = raft::make_device_matrix( + dev_res, n_rows_of_current_batch, n_neighbors); + auto d_distances = raft::make_device_matrix( + dev_res, n_rows_of_current_batch, n_neighbors); + cuvs::neighbors::search( + dev_res, ann_if, search_params, query_partition, d_neighbors.view(), d_distances.view()); + + // send results to root rank + ncclGroupStart(); + ncclSend(d_neighbors.data_handle(), + part_size * sizeof(IdxT), + ncclUint8, + clique.root_rank_, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclSend(d_distances.data_handle(), + part_size * sizeof(float), + ncclUint8, + clique.root_rank_, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclGroupEnd(); + resource::sync_stream(dev_res); + } + } + + const auto& root_handle_ = clique.set_current_device_to_root_rank(); + auto h_trans = std::vector(index.num_ranks_); + int64_t translation_offset = 0; + for (int rank = 0; rank < index.num_ranks_; rank++) { + h_trans[rank] = translation_offset; + translation_offset += index.ann_interfaces_[rank].size(); + } + auto d_trans = raft::make_device_vector(root_handle_, index.num_ranks_); + raft::copy(d_trans.data_handle(), + h_trans.data(), + index.num_ranks_, + resource::get_cuda_stream(root_handle_)); + + cuvs::neighbors::detail::knn_merge_parts(in_distances.data_handle(), + in_neighbors.data_handle(), + out_distances.data_handle(), + out_neighbors.data_handle(), + n_rows_of_current_batch, + index.num_ranks_, + n_neighbors, + resource::get_cuda_stream(root_handle_), + d_trans.data_handle()); + + raft::copy(neighbors.data_handle() + output_offset, + out_neighbors.data_handle(), + part_size, + resource::get_cuda_stream(root_handle_)); + raft::copy(distances.data_handle() + output_offset, + out_distances.data_handle(), + part_size, + resource::get_cuda_stream(root_handle_)); + + resource::sync_stream(root_handle_); + } +} + +template +void sharded_search_with_tree_merge(const raft::comms::nccl_clique& clique, + const index& index, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch, + int64_t n_rows, + int64_t n_cols, + int64_t n_neighbors, + int64_t n_batches) +{ + for (int64_t batch_idx = 0; batch_idx < n_batches; batch_idx++) { + int64_t offset = batch_idx * n_rows_per_batch; + int64_t query_offset = offset * n_cols; + int64_t output_offset = offset * n_neighbors; + int64_t n_rows_of_current_batch = std::min((int64_t)n_rows_per_batch, n_rows - offset); + auto query_partition = raft::make_host_matrix_view( + queries.data_handle() + query_offset, n_rows_of_current_batch, n_cols); + + const int& requirements = index.num_ranks_; + check_omp_threads(requirements); // should use at least num_ranks_ threads to avoid NCCL hang +#pragma omp parallel for num_threads(index.num_ranks_) + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + auto& ann_if = index.ann_interfaces_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + + int64_t part_size = n_rows_of_current_batch * n_neighbors; + + auto tmp_neighbors = raft::make_device_matrix( + dev_res, 2 * n_rows_of_current_batch, n_neighbors); + auto tmp_distances = raft::make_device_matrix( + dev_res, 2 * n_rows_of_current_batch, n_neighbors); + auto neighbors_view = raft::make_device_matrix_view( + tmp_neighbors.data_handle(), n_rows_of_current_batch, n_neighbors); + auto distances_view = raft::make_device_matrix_view( + tmp_distances.data_handle(), n_rows_of_current_batch, n_neighbors); + cuvs::neighbors::search( + dev_res, ann_if, search_params, query_partition, neighbors_view, distances_view); + + int64_t translation_offset = 0; + for (int r = 0; r < rank; r++) { + translation_offset += index.ann_interfaces_[r].size(); + } + raft::linalg::addScalar(neighbors_view.data_handle(), + neighbors_view.data_handle(), + (IdxT)translation_offset, + part_size, + resource::get_cuda_stream(dev_res)); + + auto d_trans = raft::make_device_vector(dev_res, 2); + cudaMemsetAsync( + d_trans.data_handle(), 0, 2 * sizeof(IdxT), resource::get_cuda_stream(dev_res)); + + int64_t remaining = index.num_ranks_; + int64_t radix = 2; + + while (remaining > 1) { + bool received_something = false; + int64_t offset = radix / 2; + ncclGroupStart(); + if (rank % radix == 0) // This is one of the receivers + { + int other_id = rank + offset; + if (other_id < index.num_ranks_) // Make sure someone's sending anything + { + ncclRecv(tmp_neighbors.data_handle() + part_size, + part_size * sizeof(IdxT), + ncclUint8, + other_id, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclRecv(tmp_distances.data_handle() + part_size, + part_size * sizeof(float), + ncclUint8, + other_id, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + received_something = true; + } + } else if (rank % radix == offset) // This is one of the senders + { + int other_id = rank - offset; + ncclSend(tmp_neighbors.data_handle(), + part_size * sizeof(IdxT), + ncclUint8, + other_id, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclSend(tmp_distances.data_handle(), + part_size * sizeof(float), + ncclUint8, + other_id, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + } + ncclGroupEnd(); + + remaining = (remaining + 1) / 2; + radix *= 2; + + if (received_something) { + // merge inplace + cuvs::neighbors::detail::knn_merge_parts(tmp_distances.data_handle(), + tmp_neighbors.data_handle(), + tmp_distances.data_handle(), + tmp_neighbors.data_handle(), + n_rows_of_current_batch, + 2, + n_neighbors, + resource::get_cuda_stream(dev_res), + d_trans.data_handle()); + + // If done, copy the final result + if (remaining <= 1) { + raft::copy(neighbors.data_handle() + output_offset, + tmp_neighbors.data_handle(), + part_size, + resource::get_cuda_stream(dev_res)); + raft::copy(distances.data_handle() + output_offset, + tmp_distances.data_handle(), + part_size, + resource::get_cuda_stream(dev_res)); + + resource::sync_stream(dev_res); + } + } + } + } + } +} + +template +void run_search_batch(const raft::comms::nccl_clique& clique, + const index& index, + int rank, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view& queries, + raft::host_matrix_view& neighbors, + raft::host_matrix_view& distances, + int64_t query_offset, + int64_t output_offset, + int64_t n_rows_of_current_batch, + int64_t n_cols, + int64_t n_neighbors) +{ + int dev_id = clique.device_ids_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + const raft::device_resources& dev_res = clique.device_resources_[rank]; + auto& ann_if = index.ann_interfaces_[rank]; + + auto query_partition = raft::make_host_matrix_view( + queries.data_handle() + query_offset, n_rows_of_current_batch, n_cols); + auto d_neighbors = raft::make_device_matrix( + dev_res, n_rows_of_current_batch, n_neighbors); + auto d_distances = raft::make_device_matrix( + dev_res, n_rows_of_current_batch, n_neighbors); + + cuvs::neighbors::search( + dev_res, ann_if, search_params, query_partition, d_neighbors.view(), d_distances.view()); + + raft::copy(neighbors.data_handle() + output_offset, + d_neighbors.data_handle(), + n_rows_of_current_batch * n_neighbors, + resource::get_cuda_stream(dev_res)); + raft::copy(distances.data_handle() + output_offset, + d_distances.data_handle(), + n_rows_of_current_batch * n_neighbors, + resource::get_cuda_stream(dev_res)); + + resource::sync_stream(dev_res); +} + +template +void search(const raft::device_resources& handle, + const index& index, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch) +{ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + int64_t n_rows = queries.extent(0); + int64_t n_cols = queries.extent(1); + int64_t n_neighbors = neighbors.extent(1); + + if (index.mode_ == REPLICATED) { + cuvs::neighbors::mg::replicated_search_mode search_mode; + if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>( + search_params); + search_mode = mg_search_params->search_mode; + } else if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>( + search_params); + search_mode = mg_search_params->search_mode; + } else if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>(search_params); + search_mode = mg_search_params->search_mode; + } + + if (search_mode == LOAD_BALANCER) { + int64_t n_rows_per_rank = raft::ceildiv(n_rows, (int64_t)index.num_ranks_); + n_rows_per_batch = + std::min(n_rows_per_batch, n_rows_per_rank); // get at least num_ranks_ batches + int64_t n_batches = raft::ceildiv(n_rows, (int64_t)n_rows_per_batch); + if (n_batches <= 1) n_rows_per_batch = n_rows; + + RAFT_LOG_INFO( + "REPLICATED SEARCH IN LOAD BALANCER MODE: %d*%drows", n_batches, n_rows_per_batch); + +#pragma omp parallel for + for (int64_t batch_idx = 0; batch_idx < n_batches; batch_idx++) { + int rank = batch_idx % index.num_ranks_; // alternate GPUs + int64_t offset = batch_idx * n_rows_per_batch; + int64_t query_offset = offset * n_cols; + int64_t output_offset = offset * n_neighbors; + int64_t n_rows_of_current_batch = std::min(n_rows_per_batch, n_rows - offset); + + run_search_batch(clique, + index, + rank, + search_params, + queries, + neighbors, + distances, + query_offset, + output_offset, + n_rows_of_current_batch, + n_cols, + n_neighbors); + } + } else if (search_mode == ROUND_ROBIN) { + RAFT_LOG_INFO("REPLICATED SEARCH IN ROUND ROBIN MODE: %d*%drows", 1, n_rows); + + ASSERT(n_rows <= n_rows_per_batch, + "In round-robin mode, n_rows must lower or equal to n_rows_per_batch"); + + auto& rrc = *index.round_robin_counter_; + int64_t rank = rrc++; + rank %= index.num_ranks_; + + run_search_batch(clique, + index, + rank, + search_params, + queries, + neighbors, + distances, + 0, + 0, + n_rows, + n_cols, + n_neighbors); + } + } else if (index.mode_ == SHARDED) { + cuvs::neighbors::mg::sharded_merge_mode merge_mode; + if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>( + search_params); + merge_mode = mg_search_params->merge_mode; + } else if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>( + search_params); + merge_mode = mg_search_params->merge_mode; + } else if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>(search_params); + merge_mode = mg_search_params->merge_mode; + } + + int64_t n_batches = raft::ceildiv(n_rows, (int64_t)n_rows_per_batch); + if (n_batches <= 1) n_rows_per_batch = n_rows; + + if (merge_mode == MERGE_ON_ROOT_RANK) { + RAFT_LOG_INFO("SHARDED SEARCH WITH MERGE_ON_ROOT_RANK MERGE MODE: %d*%drows", + n_batches, + n_rows_per_batch); + sharded_search_with_direct_merge(clique, + index, + search_params, + queries, + neighbors, + distances, + n_rows_per_batch, + n_rows, + n_cols, + n_neighbors, + n_batches); + } else if (merge_mode == TREE_MERGE) { + RAFT_LOG_INFO( + "SHARDED SEARCH WITH TREE_MERGE MERGE MODE %d*%drows", n_batches, n_rows_per_batch); + sharded_search_with_tree_merge(clique, + index, + search_params, + queries, + neighbors, + distances, + n_rows_per_batch, + n_rows, + n_cols, + n_neighbors, + n_batches); + } + } +} + +template +void serialize(const raft::device_resources& handle, + const index& index, + const std::string& filename) +{ + std::ofstream of(filename, std::ios::out | std::ios::binary); + if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + serialize_scalar(handle, of, (int)index.mode_); + serialize_scalar(handle, of, index.num_ranks_); + + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::serialize(dev_res, ann_if, of); + } + + of.close(); + if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); } +} + +} // namespace cuvs::neighbors::mg::detail + +namespace cuvs::neighbors::mg { +using namespace cuvs::neighbors; +using namespace raft; + +template +index::index(distribution_mode mode, int num_ranks_) + : mode_(mode), + num_ranks_(num_ranks_), + round_robin_counter_(std::make_shared>(0)) +{ +} + +template +index::index(const raft::device_resources& handle, + const std::string& filename) + : round_robin_counter_(std::make_shared>(0)) +{ + cuvs::neighbors::mg::detail::deserialize(handle, *this, filename); +} +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_cagra_float_uint32_t.cu b/cpp/src/neighbors/mg/mg_cagra_float_uint32_t.cu new file mode 100644 index 000000000..b11610fb4 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_cagra_float_uint32_t.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_CAGRA(float, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_cagra_half_uint32_t.cu b/cpp/src/neighbors/mg/mg_cagra_half_uint32_t.cu new file mode 100644 index 000000000..8f76c69a3 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_cagra_half_uint32_t.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_CAGRA(half, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu b/cpp/src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu new file mode 100644 index 000000000..67b88d742 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_CAGRA(int8_t, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu b/cpp/src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu new file mode 100644 index 000000000..f72174923 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_CAGRA(uint8_t, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_flat_float_int64_t.cu b/cpp/src/neighbors/mg/mg_flat_float_int64_t.cu new file mode 100644 index 000000000..4495e2527 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_flat_float_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_FLAT(float, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_flat_int8_t_int64_t.cu b/cpp/src/neighbors/mg/mg_flat_int8_t_int64_t.cu new file mode 100644 index 000000000..5494414a6 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_flat_int8_t_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_FLAT(int8_t, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_flat_uint8_t_int64_t.cu b/cpp/src/neighbors/mg/mg_flat_uint8_t_int64_t.cu new file mode 100644 index 000000000..35df2146b --- /dev/null +++ b/cpp/src/neighbors/mg/mg_flat_uint8_t_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_FLAT(uint8_t, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_pq_float_int64_t.cu b/cpp/src/neighbors/mg/mg_pq_float_int64_t.cu new file mode 100644 index 000000000..c671740e6 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_pq_float_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_pq( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \ + const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_PQ(float, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_pq_half_int64_t.cu b/cpp/src/neighbors/mg/mg_pq_half_int64_t.cu new file mode 100644 index 000000000..b167239c6 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_pq_half_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_pq( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \ + const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_PQ(half, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_pq_int8_t_int64_t.cu b/cpp/src/neighbors/mg/mg_pq_int8_t_int64_t.cu new file mode 100644 index 000000000..127baf8fd --- /dev/null +++ b/cpp/src/neighbors/mg/mg_pq_int8_t_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_pq( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \ + const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_PQ(int8_t, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_pq_uint8_t_int64_t.cu b/cpp/src/neighbors/mg/mg_pq_uint8_t_int64_t.cu new file mode 100644 index 000000000..869e009a5 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_pq_uint8_t_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_pq( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \ + const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_PQ(uint8_t, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/nccl_comm.cpp b/cpp/src/neighbors/mg/nccl_comm.cpp new file mode 100644 index 000000000..c4556957a --- /dev/null +++ b/cpp/src/neighbors/mg/nccl_comm.cpp @@ -0,0 +1,8 @@ +#include +#include + +namespace raft::comms { +void build_comms_nccl_only(raft::resources* handle, ncclComm_t nccl_comm, int num_ranks, int rank) +{ +} +} // namespace raft::comms diff --git a/cpp/src/neighbors/mg/omp_checks.cpp b/cpp/src/neighbors/mg/omp_checks.cpp new file mode 100644 index 000000000..e09182dfe --- /dev/null +++ b/cpp/src/neighbors/mg/omp_checks.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace cuvs::neighbors::mg { +using raft::RAFT_NAME; + +void check_omp_threads(const int requirements) +{ + const int max_threads = omp_get_max_threads(); + if (max_threads < requirements) + RAFT_LOG_WARN( + "OpenMP is only allowed %d threads to run %d GPUs. Please increase the number of OpenMP " + "threads to avoid NCCL hangs by modifying the environment variable OMP_NUM_THREADS.", + max_threads, + requirements); +} + +} // namespace cuvs::neighbors::mg diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index bd07bebee..f4d35e438 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -22,7 +22,7 @@ rapids_test_init() function(ConfigureTest) set(options OPTIONAL NOCUDA C_LIB) - set(oneValueArgs NAME GPUS PERCENT) + set(oneValueArgs NAME GPUS PERCENT ADDITIONAL_DEP) set(multiValueArgs PATH TARGETS CONFIGURATIONS) cmake_parse_arguments(_CUVS_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -56,6 +56,7 @@ function(ConfigureTest) $ $ $<$:cuvs::c_api> + ${_CUVS_TEST_ADDITIONAL_DEP} ) set_target_properties( ${TEST_NAME} @@ -159,7 +160,7 @@ if(BUILD_TESTS) 100 ) - ConfigureTest( + ConfigureTest( NAME NEIGHBORS_ANN_VAMANA_TEST PATH @@ -178,6 +179,12 @@ if(BUILD_TESTS) target_compile_definitions(NEIGHBORS_HNSW_TEST PUBLIC CUVS_BUILD_CAGRA_HNSWLIB) endif() + if(BUILD_MG_ALGOS) + ConfigureTest( + NAME NEIGHBORS_MG_TEST PATH neighbors/mg/test_float.cu GPUS 1 PERCENT 100 ADDITIONAL_DEP nccl + ) + endif() + ConfigureTest( NAME DISTANCE_TEST diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh index f02568b74..fd4e330db 100644 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ b/cpp/test/neighbors/ann_ivf_pq.cuh @@ -282,6 +282,8 @@ class ivf_pq_test : public ::testing::TestWithParam { uint32_t n_take, uint32_t n_skip) { + // the original data cannot be reconstructed since the dataset was normalized + if (index.metric() == cuvs::distance::DistanceType::CosineExpanded) { return; } auto& rec_list = index.lists()[label]; auto dim = index.dim(); n_take = std::min(n_take, rec_list->size.load()); @@ -313,6 +315,7 @@ class ivf_pq_test : public ::testing::TestWithParam { auto old_list = index->lists()[label]; auto n_rows = old_list->size.load(); if (n_rows == 0) { return; } + if (index->metric() == cuvs::distance::DistanceType::CosineExpanded) { return; } auto vectors_1 = raft::make_device_matrix(handle_, n_rows, index->dim()); auto indices = raft::make_device_vector(handle_, n_rows); @@ -374,7 +377,7 @@ class ivf_pq_test : public ::testing::TestWithParam { cuvs::Compare{})); // Pack a few vectors back to the list. - int row_offset = 9; + int row_offset = 5; int n_vec = 3; ASSERT_TRUE(row_offset + n_vec < n_rows); size_t offset = row_offset * index->pq_dim(); @@ -884,6 +887,25 @@ inline auto enum_variety_l2sqrt() -> test_cases_t }); } +inline auto enum_variety_cosine() -> test_cases_t +{ + return map(enum_variety(), [](const ivf_pq_inputs& x) { + ivf_pq_inputs y(x); + if (y.min_recall.has_value()) { + if (y.search_params.lut_dtype == CUDA_R_8U) { + // TODO: Increase this recall threshold for 8 bit lut + // (https://github.com/rapidsai/cuvs/issues/390) + y.min_recall = y.min_recall.value() * 0.70; + } else { + // In other cases it seems to perform a little bit better, still worse than L2 + y.min_recall = y.min_recall.value() * 0.94; + } + } + y.index_params.metric = distance::DistanceType::CosineExpanded; + return y; + }); +} + /** * Try different number of n_probes, some of which may trigger the non-fused version of the search * kernel. diff --git a/cpp/test/neighbors/ann_ivf_pq/test_float_int64_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_float_int64_t.cu index cdc6c1b7e..834fdb3d0 100644 --- a/cpp/test/neighbors/ann_ivf_pq/test_float_int64_t.cu +++ b/cpp/test/neighbors/ann_ivf_pq/test_float_int64_t.cu @@ -25,9 +25,13 @@ TEST_BUILD_HOST_INPUT_SEARCH(f32_f32_i64) TEST_BUILD_HOST_INPUT_OVERLAP_SEARCH(f32_f32_i64) TEST_BUILD_EXTEND_SEARCH(f32_f32_i64) TEST_BUILD_SERIALIZE_SEARCH(f32_f32_i64) -INSTANTIATE(f32_f32_i64, defaults() + small_dims() + big_dims_moderate_lut()); +INSTANTIATE(f32_f32_i64, + defaults() + small_dims() + big_dims_moderate_lut() + enum_variety_l2() + + enum_variety_l2sqrt() + enum_variety_ip() + enum_variety_cosine()); TEST_BUILD_SEARCH(f32_f32_i64_filter) -INSTANTIATE(f32_f32_i64_filter, defaults() + small_dims() + big_dims_moderate_lut()); +INSTANTIATE(f32_f32_i64_filter, + defaults() + small_dims() + big_dims_moderate_lut() + enum_variety_l2() + + enum_variety_l2sqrt() + enum_variety_ip() + enum_variety_cosine()); } // namespace cuvs::neighbors::ivf_pq diff --git a/cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu index 80b0e2ccb..c9e5d4f01 100644 --- a/cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu +++ b/cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu @@ -25,8 +25,9 @@ TEST_BUILD_SEARCH(f32_i08_i64) TEST_BUILD_HOST_INPUT_SEARCH(f32_i08_i64) TEST_BUILD_HOST_INPUT_OVERLAP_SEARCH(f32_i08_i64) TEST_BUILD_SERIALIZE_SEARCH(f32_i08_i64) -INSTANTIATE(f32_i08_i64, defaults() + big_dims() + var_k()); +INSTANTIATE(f32_i08_i64, defaults() + big_dims() + var_k() + enum_variety_l2() + enum_variety_ip()); TEST_BUILD_SEARCH(f32_i08_i64_filter) -INSTANTIATE(f32_i08_i64_filter, defaults() + big_dims() + var_k()); +INSTANTIATE(f32_i08_i64_filter, + defaults() + big_dims() + var_k() + enum_variety_l2() + enum_variety_ip()); } // namespace cuvs::neighbors::ivf_pq diff --git a/cpp/test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu index 0216a1e80..6e0732227 100644 --- a/cpp/test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu +++ b/cpp/test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu @@ -25,8 +25,12 @@ TEST_BUILD_SEARCH(f32_u08_i64) TEST_BUILD_HOST_INPUT_SEARCH(f32_u08_i64) TEST_BUILD_HOST_INPUT_OVERLAP_SEARCH(f32_u08_i64) TEST_BUILD_EXTEND_SEARCH(f32_u08_i64) -INSTANTIATE(f32_u08_i64, small_dims_per_cluster() + enum_variety()); +INSTANTIATE(f32_u08_i64, + small_dims_per_cluster() + enum_variety() + enum_variety_l2() + enum_variety_l2sqrt() + + enum_variety_ip()); TEST_BUILD_SEARCH(f32_u08_i64_filter) -INSTANTIATE(f32_u08_i64_filter, small_dims_per_cluster() + enum_variety()); +INSTANTIATE(f32_u08_i64_filter, + small_dims_per_cluster() + enum_variety() + enum_variety_l2() + enum_variety_l2sqrt() + + enum_variety_ip()); } // namespace cuvs::neighbors::ivf_pq diff --git a/cpp/test/neighbors/mg.cuh b/cpp/test/neighbors/mg.cuh new file mode 100644 index 000000000..be30ca615 --- /dev/null +++ b/cpp/test/neighbors/mg.cuh @@ -0,0 +1,825 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "../test_utils.cuh" +#include "ann_utils.cuh" +#include "naive_knn.cuh" + +#include +#include + +namespace cuvs::neighbors::mg { + +enum class algo_t { IVF_FLAT, IVF_PQ, CAGRA }; +enum class d_mode_t { REPLICATED, SHARDED, LOCAL_THEN_DISTRIBUTED, ROUND_ROBIN }; +enum class m_mode_t { MERGE_ON_ROOT_RANK, TREE_MERGE, UNDEFINED }; + +struct AnnMGInputs { + int64_t num_queries; + int64_t num_db_vecs; + int64_t dim; + int64_t k; + d_mode_t d_mode; + m_mode_t m_mode; + algo_t algo; + int64_t nprobe; + int64_t nlist; + cuvs::distance::DistanceType metric; + bool adaptive_centers; +}; + +template +class AnnMGTest : public ::testing::TestWithParam { + public: + AnnMGTest() + : stream_(resource::get_cuda_stream(handle_)), + clique_(raft::resource::get_nccl_clique(handle_)), + ps(::testing::TestWithParam::GetParam()), + d_index_dataset(0, stream_), + d_queries(0, stream_), + h_index_dataset(0), + h_queries(0) + { + } + + void testAnnMG() + { + size_t queries_size = ps.num_queries * ps.k; + std::vector neighbors_ref(queries_size); + std::vector distances_ref(queries_size); + std::vector neighbors_snmg_ann(queries_size); + std::vector distances_snmg_ann(queries_size); + std::vector neighbors_ref_32bits(queries_size); + std::vector neighbors_snmg_ann_32bits(queries_size); + + { + rmm::device_uvector distances_ref_dev(queries_size, stream_); + rmm::device_uvector neighbors_ref_dev(queries_size, stream_); + cuvs::neighbors::naive_knn(handle_, + distances_ref_dev.data(), + neighbors_ref_dev.data(), + d_queries.data(), + d_index_dataset.data(), + ps.num_queries, + ps.num_db_vecs, + ps.dim, + ps.k, + ps.metric); + update_host(distances_ref.data(), distances_ref_dev.data(), queries_size, stream_); + update_host(neighbors_ref.data(), neighbors_ref_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + int64_t n_rows_per_search_batch = 3000; // [3000, 3000, 1000] == 7000 rows + + // IVF-Flat + if (ps.algo == algo_t::IVF_FLAT && + (ps.d_mode == d_mode_t::REPLICATED || ps.d_mode == d_mode_t::SHARDED)) { + distribution_mode d_mode; + if (ps.d_mode == d_mode_t::REPLICATED) + d_mode = distribution_mode::REPLICATED; + else + d_mode = distribution_mode::SHARDED; + + mg::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + index_params.mode = d_mode; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = LOAD_BALANCER; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + { + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::extend(handle_, index, index_dataset, std::nullopt); + cuvs::neighbors::mg::serialize(handle_, index, "mg_ivf_flat_index"); + } + auto new_index = + cuvs::neighbors::mg::deserialize_flat(handle_, "mg_ivf_flat_index"); + + if (ps.m_mode == m_mode_t::MERGE_ON_ROOT_RANK) + search_params.merge_mode = MERGE_ON_ROOT_RANK; + else + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search( + handle_, new_index, search_params, queries, neighbors, distances, n_rows_per_search_batch); + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref, + neighbors_snmg_ann, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann.begin(), neighbors_snmg_ann.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + // IVF-PQ + if (ps.algo == algo_t::IVF_PQ && + (ps.d_mode == d_mode_t::REPLICATED || ps.d_mode == d_mode_t::SHARDED)) { + distribution_mode d_mode; + if (ps.d_mode == d_mode_t::REPLICATED) + d_mode = distribution_mode::REPLICATED; + else + d_mode = distribution_mode::SHARDED; + + mg::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + index_params.mode = d_mode; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = LOAD_BALANCER; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + { + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::extend(handle_, index, index_dataset, std::nullopt); + cuvs::neighbors::mg::serialize(handle_, index, "mg_ivf_pq_index"); + } + auto new_index = + cuvs::neighbors::mg::deserialize_pq(handle_, "mg_ivf_pq_index"); + + if (ps.m_mode == m_mode_t::MERGE_ON_ROOT_RANK) + search_params.merge_mode = MERGE_ON_ROOT_RANK; + else + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search( + handle_, new_index, search_params, queries, neighbors, distances, n_rows_per_search_batch); + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref, + neighbors_snmg_ann, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann.begin(), neighbors_snmg_ann.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + // CAGRA + if (ps.algo == algo_t::CAGRA && + (ps.d_mode == d_mode_t::REPLICATED || ps.d_mode == d_mode_t::SHARDED)) { + distribution_mode d_mode; + if (ps.d_mode == d_mode_t::REPLICATED) + d_mode = distribution_mode::REPLICATED; + else + d_mode = distribution_mode::SHARDED; + + mg::index_params index_params; + index_params.graph_build_params = cagra::graph_build_params::ivf_pq_params( + raft::matrix_extent(ps.num_db_vecs, ps.dim)); + index_params.mode = d_mode; + + mg::search_params search_params; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann_32bits.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + { + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::serialize(handle_, index, "mg_cagra_index"); + } + auto new_index = + cuvs::neighbors::mg::deserialize_cagra(handle_, "mg_cagra_index"); + + if (ps.m_mode == m_mode_t::MERGE_ON_ROOT_RANK) + search_params.merge_mode = MERGE_ON_ROOT_RANK; + else + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search( + handle_, new_index, search_params, queries, neighbors, distances, n_rows_per_search_batch); + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref_32bits, + neighbors_snmg_ann_32bits, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann_32bits.begin(), neighbors_snmg_ann_32bits.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + if (ps.algo == algo_t::IVF_FLAT && ps.d_mode == d_mode_t::LOCAL_THEN_DISTRIBUTED) { + ivf_flat::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + index_params.add_data_on_build = true; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = LOAD_BALANCER; + + { + auto index_dataset = raft::make_device_matrix_view( + d_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto index = cuvs::neighbors::ivf_flat::build(handle_, index_params, index_dataset); + ivf_flat::serialize(handle_, "local_ivf_flat_index", index); + } + + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + auto distributed_index = + cuvs::neighbors::mg::distribute_flat(handle_, "local_ivf_flat_index"); + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search(handle_, + distributed_index, + search_params, + queries, + neighbors, + distances, + n_rows_per_search_batch); + + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref, + neighbors_snmg_ann, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann.begin(), neighbors_snmg_ann.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + if (ps.algo == algo_t::IVF_PQ && ps.d_mode == d_mode_t::LOCAL_THEN_DISTRIBUTED) { + ivf_pq::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.add_data_on_build = true; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = LOAD_BALANCER; + + { + auto index_dataset = raft::make_device_matrix_view( + d_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto index = cuvs::neighbors::ivf_pq::build(handle_, index_params, index_dataset); + ivf_pq::serialize(handle_, "local_ivf_pq_index", index); + } + + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + auto distributed_index = + cuvs::neighbors::mg::distribute_pq(handle_, "local_ivf_pq_index"); + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search(handle_, + distributed_index, + search_params, + queries, + neighbors, + distances, + n_rows_per_search_batch); + + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref, + neighbors_snmg_ann, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann.begin(), neighbors_snmg_ann.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + if (ps.algo == algo_t::CAGRA && ps.d_mode == d_mode_t::LOCAL_THEN_DISTRIBUTED) { + cagra::index_params index_params; + index_params.graph_build_params = cagra::graph_build_params::ivf_pq_params( + raft::matrix_extent(ps.num_db_vecs, ps.dim)); + + mg::search_params search_params; + + { + auto index_dataset = raft::make_device_matrix_view( + d_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto index = cuvs::neighbors::cagra::build(handle_, index_params, index_dataset); + cuvs::neighbors::cagra::serialize(handle_, "local_cagra_index", index); + } + + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann_32bits.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + auto distributed_index = + cuvs::neighbors::mg::distribute_cagra(handle_, "local_cagra_index"); + + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search(handle_, + distributed_index, + search_params, + queries, + neighbors, + distances, + n_rows_per_search_batch); + + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref_32bits, + neighbors_snmg_ann_32bits, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann_32bits.begin(), neighbors_snmg_ann_32bits.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + if (ps.algo == algo_t::IVF_FLAT && ps.d_mode == d_mode_t::ROUND_ROBIN) { + ASSERT_TRUE(ps.num_queries <= 4); + + mg::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + index_params.mode = REPLICATED; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = ROUND_ROBIN; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto small_batch_query = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::extend(handle_, index, index_dataset, std::nullopt); + + int n_parallel_searches = 16; + std::vector searches_correctness(n_parallel_searches); + std::vector load_balancer_neighbors_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); + std::vector load_balancer_distances_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); +#pragma omp parallel for + for (uint64_t search_idx = 0; search_idx < searches_correctness.size(); search_idx++) { + uint64_t offset = search_idx * ps.num_queries * ps.k; + auto small_batch_neighbors = raft::make_host_matrix_view( + load_balancer_neighbors_snmg_ann.data() + offset, ps.num_queries, ps.k); + auto small_batch_distances = raft::make_host_matrix_view( + load_balancer_distances_snmg_ann.data() + offset, ps.num_queries, ps.k); + cuvs::neighbors::mg::search(handle_, + index, + search_params, + small_batch_query, + small_batch_neighbors, + small_batch_distances, + n_rows_per_search_batch); + + std::vector small_batch_neighbors_vec( + small_batch_neighbors.data_handle(), + small_batch_neighbors.data_handle() + small_batch_neighbors.size()); + std::vector small_batch_distances_vec( + small_batch_distances.data_handle(), + small_batch_distances.data_handle() + small_batch_distances.size()); + searches_correctness[search_idx] = eval_neighbours(neighbors_ref, + small_batch_neighbors_vec, + distances_ref, + small_batch_distances_vec, + ps.num_queries, + ps.k, + 0.001, + 0.9); + } + ASSERT_TRUE(std::all_of(searches_correctness.begin(), + searches_correctness.end(), + [](char val) { return val != 0; })); + } + + if (ps.algo == algo_t::IVF_PQ && ps.d_mode == d_mode_t::ROUND_ROBIN) { + ASSERT_TRUE(ps.num_queries <= 4); + + mg::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + index_params.mode = REPLICATED; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = ROUND_ROBIN; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto small_batch_query = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::extend(handle_, index, index_dataset, std::nullopt); + + int n_parallel_searches = 16; + std::vector searches_correctness(n_parallel_searches); + std::vector load_balancer_neighbors_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); + std::vector load_balancer_distances_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); +#pragma omp parallel for + for (uint64_t search_idx = 0; search_idx < searches_correctness.size(); search_idx++) { + uint64_t offset = search_idx * ps.num_queries * ps.k; + auto small_batch_neighbors = raft::make_host_matrix_view( + load_balancer_neighbors_snmg_ann.data() + offset, ps.num_queries, ps.k); + auto small_batch_distances = raft::make_host_matrix_view( + load_balancer_distances_snmg_ann.data() + offset, ps.num_queries, ps.k); + cuvs::neighbors::mg::search(handle_, + index, + search_params, + small_batch_query, + small_batch_neighbors, + small_batch_distances, + n_rows_per_search_batch); + + std::vector small_batch_neighbors_vec( + small_batch_neighbors.data_handle(), + small_batch_neighbors.data_handle() + small_batch_neighbors.size()); + std::vector small_batch_distances_vec( + small_batch_distances.data_handle(), + small_batch_distances.data_handle() + small_batch_distances.size()); + searches_correctness[search_idx] = eval_neighbours(neighbors_ref, + small_batch_neighbors_vec, + distances_ref, + small_batch_distances_vec, + ps.num_queries, + ps.k, + 0.001, + 0.9); + } + ASSERT_TRUE(std::all_of(searches_correctness.begin(), + searches_correctness.end(), + [](char val) { return val != 0; })); + } + + if (ps.algo == algo_t::CAGRA && ps.d_mode == d_mode_t::ROUND_ROBIN) { + ASSERT_TRUE(ps.num_queries <= 4); + + mg::index_params index_params; + index_params.graph_build_params = cagra::graph_build_params::ivf_pq_params( + raft::matrix_extent(ps.num_db_vecs, ps.dim)); + index_params.mode = REPLICATED; + + mg::search_params search_params; + search_params.search_mode = ROUND_ROBIN; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto small_batch_query = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + + int n_parallel_searches = 16; + std::vector searches_correctness(n_parallel_searches); + std::vector load_balancer_neighbors_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); + std::vector load_balancer_distances_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); +#pragma omp parallel for + for (uint64_t search_idx = 0; search_idx < searches_correctness.size(); search_idx++) { + uint64_t offset = search_idx * ps.num_queries * ps.k; + auto small_batch_neighbors = raft::make_host_matrix_view( + load_balancer_neighbors_snmg_ann.data() + offset, ps.num_queries, ps.k); + auto small_batch_distances = raft::make_host_matrix_view( + load_balancer_distances_snmg_ann.data() + offset, ps.num_queries, ps.k); + cuvs::neighbors::mg::search(handle_, + index, + search_params, + small_batch_query, + small_batch_neighbors, + small_batch_distances, + n_rows_per_search_batch); + + std::vector small_batch_neighbors_vec( + small_batch_neighbors.data_handle(), + small_batch_neighbors.data_handle() + small_batch_neighbors.size()); + std::vector small_batch_distances_vec( + small_batch_distances.data_handle(), + small_batch_distances.data_handle() + small_batch_distances.size()); + searches_correctness[search_idx] = eval_neighbours(neighbors_ref_32bits, + small_batch_neighbors_vec, + distances_ref, + small_batch_distances_vec, + ps.num_queries, + ps.k, + 0.001, + 0.9); + } + ASSERT_TRUE(std::all_of(searches_correctness.begin(), + searches_correctness.end(), + [](char val) { return val != 0; })); + } + } + + void SetUp() override + { + d_index_dataset.resize(ps.num_db_vecs * ps.dim, stream_); + d_queries.resize(ps.num_queries * ps.dim, stream_); + h_index_dataset.resize(ps.num_db_vecs * ps.dim); + h_queries.resize(ps.num_queries * ps.dim); + + raft::random::RngState r(1234ULL); + if constexpr (std::is_same{}) { + raft::random::uniform( + handle_, r, d_index_dataset.data(), d_index_dataset.size(), DataT(0.1), DataT(2.0)); + raft::random::uniform(handle_, r, d_queries.data(), d_queries.size(), DataT(0.1), DataT(2.0)); + } else { + raft::random::uniformInt( + handle_, r, d_index_dataset.data(), d_index_dataset.size(), DataT(1), DataT(20)); + raft::random::uniformInt(handle_, r, d_queries.data(), d_queries.size(), DataT(1), DataT(20)); + } + + raft::copy(h_index_dataset.data(), + d_index_dataset.data(), + d_index_dataset.size(), + resource::get_cuda_stream(handle_)); + raft::copy( + h_queries.data(), d_queries.data(), d_queries.size(), resource::get_cuda_stream(handle_)); + resource::sync_stream(handle_); + } + + void TearDown() override {} + + private: + raft::device_resources handle_; + rmm::cuda_stream_view stream_; + raft::comms::nccl_clique clique_; + AnnMGInputs ps; + std::vector h_index_dataset; + std::vector h_queries; + rmm::device_uvector d_index_dataset; + rmm::device_uvector d_queries; +}; + +const std::vector inputs = { + {7000, + 10000, + 8, + 16, + d_mode_t::REPLICATED, + m_mode_t::UNDEFINED, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::REPLICATED, + m_mode_t::UNDEFINED, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + + /* + {7000, + 10000, + 8, + 16, + d_mode_t::REPLICATED, + m_mode_t::UNDEFINED, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + */ + + /* + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::MERGE_ON_ROOT_RANK, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::MERGE_ON_ROOT_RANK, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::MERGE_ON_ROOT_RANK, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::TREE_MERGE, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::TREE_MERGE, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::TREE_MERGE, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + */ + + {7000, + 10000, + 8, + 16, + d_mode_t::LOCAL_THEN_DISTRIBUTED, + m_mode_t::UNDEFINED, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::LOCAL_THEN_DISTRIBUTED, + m_mode_t::UNDEFINED, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + + /* + {7000, + 10000, + 8, + 16, + d_mode_t::LOCAL_THEN_DISTRIBUTED, + m_mode_t::UNDEFINED, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + */ + + {3, + 10000, + 8, + 16, + d_mode_t::ROUND_ROBIN, + m_mode_t::UNDEFINED, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {3, + 10000, + 8, + 16, + d_mode_t::ROUND_ROBIN, + m_mode_t::UNDEFINED, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + + /* + {3, + 10000, + 8, + 16, + d_mode_t::ROUND_ROBIN, + m_mode_t::UNDEFINED, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + */ +}; +} // namespace cuvs::neighbors::mg diff --git a/cpp/test/neighbors/mg/test_float.cu b/cpp/test/neighbors/mg/test_float.cu new file mode 100644 index 000000000..ef9c9a043 --- /dev/null +++ b/cpp/test/neighbors/mg/test_float.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../mg.cuh" + +namespace cuvs::neighbors::mg { + +typedef AnnMGTest AnnMGTestF_float; +TEST_P(AnnMGTestF_float, AnnMG) { this->testAnnMG(); } + +INSTANTIATE_TEST_CASE_P(AnnMGTest, AnnMGTestF_float, ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::mg diff --git a/dependencies.yaml b/dependencies.yaml index c18f53305..2b19b987f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -159,6 +159,7 @@ dependencies: packages: - c-compiler - cxx-compiler + - nccl>=2.19 specific: - output_types: conda matrices: diff --git a/docs/source/build.rst b/docs/source/build.rst index e0659ec65..a6b8ccd13 100644 --- a/docs/source/build.rst +++ b/docs/source/build.rst @@ -123,6 +123,16 @@ Once installed, the shared libraries, headers (and any dependencies downloaded a ./build.sh libcuvs --uninstall +Multi-GPU features +^^^^^^^^^^^^^^^^^^ + +To disable the multi-gpu features run : + +.. code-block:: bash + + ./build.sh libcuvs --no-mg + + Building the Googletests ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/c_api/neighbors.rst b/docs/source/c_api/neighbors.rst index dc55a74dc..9c3fce672 100644 --- a/docs/source/c_api/neighbors.rst +++ b/docs/source/c_api/neighbors.rst @@ -13,3 +13,4 @@ Nearest Neighbors neighbors_ivf_flat_c.rst neighbors_ivf_pq_c.rst neighbors_cagra_c.rst + neighbors_hnsw_c.rst diff --git a/docs/source/cpp_api/neighbors.rst b/docs/source/cpp_api/neighbors.rst index e5a9fc977..d55d58eb0 100644 --- a/docs/source/cpp_api/neighbors.rst +++ b/docs/source/cpp_api/neighbors.rst @@ -11,7 +11,9 @@ Nearest Neighbors neighbors_bruteforce.rst neighbors_cagra.rst + neighbors_hnsw.rst neighbors_ivf_flat.rst neighbors_ivf_pq.rst neighbors_nn_descent.rst neighbors_refine.rst + neighbors_mg.rst diff --git a/docs/source/cpp_api/neighbors_mg.rst b/docs/source/cpp_api/neighbors_mg.rst new file mode 100644 index 000000000..b68defec9 --- /dev/null +++ b/docs/source/cpp_api/neighbors_mg.rst @@ -0,0 +1,76 @@ +Distributed ANN +=============== + +The SNMG (single-node multi-GPUs) ANN API provides a set of functions to deploy ANN indexes on multiple GPUs. + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *cuvs::neighbors::mg* + +Index build parameters +---------------------- + +.. doxygengroup:: mg_cpp_index_params + :project: cuvs + :members: + :content-only: + +Search parameters +---------------------- + +.. doxygengroup:: mg_cpp_search_params + :project: cuvs + :members: + :content-only: + +Index build +----------- + +.. doxygengroup:: mg_cpp_index_build + :project: cuvs + :members: + :content-only: + +Index extend +------------ + +.. doxygengroup:: mg_cpp_index_extend + :project: cuvs + :members: + :content-only: + +Index search +------------ + +.. doxygengroup:: mg_cpp_index_search + :project: cuvs + :members: + :content-only: + +Index serialize +--------------- + +.. doxygengroup:: mg_cpp_serialize + :project: cuvs + :members: + :content-only: + +Index deserialize +----------------- + +.. doxygengroup:: mg_cpp_deserialize + :project: cuvs + :members: + :content-only: + +Distribute pre-built local index +-------------------------------- + +.. doxygengroup:: mg_cpp_distribute + :project: cuvs + :members: + :content-only: diff --git a/docs/source/cuvs_bench/index.rst b/docs/source/cuvs_bench/index.rst index cf812a62e..2ebf584c3 100644 --- a/docs/source/cuvs_bench/index.rst +++ b/docs/source/cuvs_bench/index.rst @@ -630,7 +630,7 @@ The table below contains all algorithms supported by cuVS. Each unique algorithm - `cuvs_brute_force`, `cuvs_cagra`, `cuvs_ivf_flat`, `cuvs_ivf_pq`, `cuvs_cagra_hnswlib` Adding a new index algorithm -============================= +============================ Implementation and configuration -------------------------------- diff --git a/docs/source/python_api/neighbors.rst b/docs/source/python_api/neighbors.rst index 022c50de3..cd4f2609c 100644 --- a/docs/source/python_api/neighbors.rst +++ b/docs/source/python_api/neighbors.rst @@ -11,5 +11,6 @@ Nearest Neighbors neighbors_brute_force.rst neighbors_cagra.rst + neighbors_hnsw.rst neighbors_ivf_flat.rst neighbors_ivf_pq.rst diff --git a/docs/source/python_api/neighbors_hnsw.rst b/docs/source/python_api/neighbors_hnsw.rst new file mode 100644 index 000000000..9922805b3 --- /dev/null +++ b/docs/source/python_api/neighbors_hnsw.rst @@ -0,0 +1,30 @@ +HNSW +==== + +This is a wrapper for hnswlib, to load a CAGRA index as an immutable HNSW index. The loaded HNSW index is only compatible in cuVS, and can be searched using wrapper functions. + +.. role:: py(code) + :language: python + :class: highlight + +Index search parameters +####################### + +.. autoclass:: cuvs.neighbors.hnsw.SearchParams + :members: + +Index +##### + +.. autoclass:: cuvs.neighbors.hnsw.Index + :members: + +Index Conversion +################ + +.. autofunction:: cuvs.neighbors.hnsw.from_cagra + +Index search +############ + +.. autofunction:: cuvs.neighbors.hnsw.search diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt index 21c3db5da..f68bbea53 100644 --- a/python/cuvs/cuvs/neighbors/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt @@ -14,6 +14,7 @@ add_subdirectory(brute_force) add_subdirectory(cagra) +add_subdirectory(hnsw) add_subdirectory(ivf_flat) add_subdirectory(ivf_pq) add_subdirectory(filters) diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd index b23c2a4b3..bba5a91a8 100644 --- a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd @@ -17,6 +17,7 @@ from libc.stdint cimport ( int8_t, + int32_t, int64_t, uint8_t, uint32_t, @@ -100,6 +101,8 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil: cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index) + cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int32_t* dim) + cuvsError_t cuvsCagraBuild(cuvsResources_t res, cuvsCagraIndexParams* params, DLManagedTensor* dataset, @@ -117,6 +120,20 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil: cuvsCagraIndex_t index, bool include_dataset) except + + cuvsError_t cuvsCagraSerializeToHnswlib(cuvsResources_t res, + const char * filename, + cuvsCagraIndex_t index) except + + cuvsError_t cuvsCagraDeserialize(cuvsResources_t res, const char * filename, cuvsCagraIndex_t index) except + + +cdef class Index: + """ + CAGRA index object. This object stores the trained CAGRA index state + which can be used to perform nearest neighbors searches. + """ + + cdef cuvsCagraIndex_t index + cdef bool trained + cdef str active_index_type diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx index f940ab8bf..95209dbeb 100644 --- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx @@ -36,6 +36,7 @@ from pylibraft.neighbors.common import _check_input_array from libc.stdint cimport ( int8_t, + int32_t, int64_t, uint8_t, uint32_t, @@ -206,16 +207,9 @@ cdef class IndexParams: cdef class Index: - """ - CAGRA index object. This object stores the trained CAGRA index state - which can be used to perform nearest neighbors searches. - """ - - cdef cuvsCagraIndex_t index - cdef bool trained - def __cinit__(self): self.trained = False + self.active_index_type = None check_cuvs(cuvsCagraIndexCreate(&self.index)) def __dealloc__(self): @@ -226,6 +220,12 @@ cdef class Index: def trained(self): return self.trained + @property + def dim(self): + cdef int32_t dim + check_cuvs(cuvsCagraIndexGetDims(self.index, &dim)) + return dim + def __repr__(self): # todo(dgd): update repr as we expose data through C API attr_str = [] @@ -299,6 +299,7 @@ def build(IndexParams index_params, dataset, resources=None): idx.index )) idx.trained = True + idx.active_index_type = dataset_ai.dtype.name return idx diff --git a/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt b/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt new file mode 100644 index 000000000..1f9c422ca --- /dev/null +++ b/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt @@ -0,0 +1,24 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources hnsw.pyx) +set(linked_libraries cuvs::cuvs cuvs::c_api) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_hnsw_ +) diff --git a/python/cuvs/cuvs/neighbors/hnsw/__init__.pxd b/python/cuvs/cuvs/neighbors/hnsw/__init__.pxd new file mode 100644 index 000000000..e69de29bb diff --git a/python/cuvs/cuvs/neighbors/hnsw/__init__.py b/python/cuvs/cuvs/neighbors/hnsw/__init__.py new file mode 100644 index 000000000..5efcdf68b --- /dev/null +++ b/python/cuvs/cuvs/neighbors/hnsw/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .hnsw import Index, SearchParams, from_cagra, load, save, search + +__all__ = [ + "Index", + "SearchParams", + "load", + "save", + "search", + "from_cagra", +] diff --git a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd new file mode 100644 index 000000000..1cdc97406 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd @@ -0,0 +1,53 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: language_level=3 + +from libc.stdint cimport int32_t, uintptr_t + +from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t +from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor +from cuvs.distance_type cimport cuvsDistanceType + + +cdef extern from "cuvs/neighbors/hnsw.h" nogil: + ctypedef struct cuvsHnswSearchParams: + int32_t ef + int32_t numThreads + + ctypedef cuvsHnswSearchParams* cuvsHnswSearchParams_t + + ctypedef struct cuvsHnswIndex: + uintptr_t addr + DLDataType dtype + + ctypedef cuvsHnswIndex* cuvsHnswIndex_t + + cuvsError_t cuvsHnswIndexCreate(cuvsHnswIndex_t* index) + + cuvsError_t cuvsHnswIndexDestroy(cuvsHnswIndex_t index) + + cuvsError_t cuvsHnswSearch(cuvsResources_t res, + cuvsHnswSearchParams* params, + cuvsHnswIndex_t index, + DLManagedTensor* queries, + DLManagedTensor* neighbors, + DLManagedTensor* distances) except + + + cuvsError_t cuvsHnswDeserialize(cuvsResources_t res, + const char * filename, + int32_t dim, + cuvsDistanceType metric, + cuvsHnswIndex_t index) except + diff --git a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx new file mode 100644 index 000000000..018fcfef9 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx @@ -0,0 +1,380 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: language_level=3 + +from libc.stdint cimport uint32_t +from libcpp cimport bool +from libcpp.string cimport string + +from cuvs.common.exceptions import check_cuvs +from cuvs.common.resources import auto_sync_resources + +from cuvs.common cimport cydlpack + +import numpy as np + +from cuvs.distance import DISTANCE_TYPES + +from cuvs.neighbors.cagra cimport cagra + +import os +import uuid + +from pylibraft.common import auto_convert_output +from pylibraft.common.cai_wrapper import wrap_array +from pylibraft.common.interruptible import cuda_interruptible +from pylibraft.neighbors.common import _check_input_array + + +cdef class SearchParams: + """ + HNSW search parameters + + Parameters + ---------- + ef: int, default = 200 + Maximum number of candidate list size used during search. + num_threads: int, default = 0 + Number of CPU threads used to increase search parallelism. + When set to 0, the number of threads is automatically determined + using OpenMP's `omp_get_max_threads()`. + """ + + cdef cuvsHnswSearchParams params + + def __init__(self, *, + ef=200, + num_threads=0): + self.params.ef = ef + self.params.numThreads = num_threads + + def __repr__(self): + attr_str = [attr + "=" + str(getattr(self, attr)) + for attr in [ + "ef", "num_threads"]] + return "SearchParams(type=HNSW, " + (", ".join(attr_str)) + ")" + + @property + def ef(self): + return self.params.ef + + @property + def num_threads(self): + return self.params.numThreads + + +cdef class Index: + """ + HNSW index object. This object stores the trained HNSW index state + which can be used to perform nearest neighbors searches. + """ + + cdef cuvsHnswIndex_t index + cdef bool trained + + def __cinit__(self): + self.trained = False + check_cuvs(cuvsHnswIndexCreate(&self.index)) + + def __dealloc__(self): + if self.index is not NULL: + check_cuvs(cuvsHnswIndexDestroy(self.index)) + + @property + def trained(self): + return self.trained + + def __repr__(self): + # todo(dgd): update repr as we expose data through C API + attr_str = [] + return "Index(type=HNSW, metric=L2" + (", ".join(attr_str)) + ")" + + +@auto_sync_resources +def save(filename, cagra.Index index, resources=None): + """ + Saves the CAGRA index to a file as an hnswlib index. + The saved index is immutable and can only be searched by the hnswlib + wrapper in cuVS, as the format is not compatible with the original + hnswlib. + + Saving / loading the index is experimental. The serialization format is + subject to change. + + Parameters + ---------- + filename : string + Name of the file. + index : Index + Trained CAGRA index. + {resources_docstring} + + Examples + -------- + >>> import cupy as cp + >>> from cuvs.neighbors import cagra + >>> n_samples = 50000 + >>> n_features = 50 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> index = cagra.build(cagra.IndexParams(), dataset) + >>> # Serialize and deserialize the cagra index built + >>> hnsw.save("my_index.bin", index) + """ + cdef string c_filename = filename.encode('utf-8') + cdef cuvsResources_t res = resources.get_c_obj() + check_cuvs(cagra.cuvsCagraSerializeToHnswlib(res, + c_filename.c_str(), + index.index)) + + +@auto_sync_resources +def load(filename, dim, dtype, metric="sqeuclidean", resources=None): + """ + Loads base-layer-only hnswlib index from file, which was originally + saved as a built CAGRA index. The loaded index is immutable and can only + be searched by the hnswlib wrapper in cuVS, as the format is not + compatible with the original hnswlib. + + Saving / loading the index is experimental. The serialization format is + subject to change, therefore loading an index saved with a previous + version of cuVS is not guaranteed to work. + + Parameters + ---------- + filename : string + Name of the file. + dim : int + Dimensions of the training dataest + dtype : np.dtype of the saved index + Valid values for dtype: [np.float32, np.byte, np.ubyte] + metric : string denoting the metric type, default="sqeuclidean" + Valid values for metric: ["sqeuclidean", "inner_product"], where + - sqeuclidean is the euclidean distance without the square root + operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2, + - inner_product distance is defined as + distance(a, b) = \\sum_i a_i * b_i. + {resources_docstring} + + Returns + ------- + index : HnswIndex + + Examples + -------- + >>> import cupy as cp + >>> from cuvs.neighbors import cagra + >>> from cuvs.neighbors import hnsw + >>> n_samples = 50000 + >>> n_features = 50 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> index = cagra.build(cagra.IndexParams(), dataset) + >>> # Serialize the CAGRA index to hnswlib base layer only index format + >>> hnsw.save("my_index.bin", index) + >>> index = hnsw.load("my_index.bin", n_features, np.float32, + ... "sqeuclidean") + """ + cdef Index idx = Index() + cdef cuvsResources_t res = resources.get_c_obj() + cdef string c_filename = filename.encode('utf-8') + cdef cydlpack.DLDataType dl_dtype + if dtype == np.float32: + dl_dtype.code = cydlpack.kDLFloat + dl_dtype.bits = 32 + dl_dtype.lanes = 1 + elif dtype == np.ubyte: + dl_dtype.code = cydlpack.kDLUInt + dl_dtype.bits = 8 + dl_dtype.lanes = 1 + elif dtype == np.byte: + dl_dtype.code = cydlpack.kDLInt + dl_dtype.bits = 8 + dl_dtype.lanes = 1 + else: + raise ValueError("Only float32 is supported for dtype") + + idx.index.dtype = dl_dtype + cdef cuvsDistanceType distance_type = DISTANCE_TYPES[metric] + + check_cuvs(cuvsHnswDeserialize( + res, + c_filename.c_str(), + dim, + distance_type, + idx.index + )) + idx.trained = True + return idx + + +@auto_sync_resources +def from_cagra(cagra.Index index, temporary_index_path=None, resources=None): + """ + Returns an hnsw base-layer-only index from a CAGRA index. + + NOTE: This method uses the filesystem to write the CAGRA index in + `/tmp/.bin` or the parameter `temporary_index_path` + if not None before reading it as an hnsw index, + then deleting the temporary file. The returned index is immutable + and can only be searched by the hnsw wrapper in cuVS, as the + format is not compatible with the original hnswlib library. + By `base_layer_only`, we mean that the hnsw index is created + without the additional layers that are used for the hierarchical + search in hnswlib. Instead, the base layer is used for the search. + + Saving / loading the index is experimental. The serialization format is + subject to change. + + Parameters + ---------- + index : Index + Trained CAGRA index. + temporary_index_path : string, default = None + Path to save the temporary index file. If None, the temporary file + will be saved in `/tmp/.bin`. + {resources_docstring} + + Examples + -------- + >>> import cupy as cp + >>> from cuvs.neighbors import cagra + >>> from cuvs.neighbors import hnsw + >>> n_samples = 50000 + >>> n_features = 50 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> index = cagra.build(cagra.IndexParams(), dataset) + >>> # Serialize the CAGRA index to hnswlib base layer only index format + >>> hnsw_index = hnsw.from_cagra(index) + """ + uuid_num = uuid.uuid4() + filename = temporary_index_path if temporary_index_path else \ + f"/tmp/{uuid_num}.bin" + save(filename, index, resources=resources) + hnsw_index = load(filename, index.dim, np.dtype(index.active_index_type), + "sqeuclidean", resources=resources) + os.remove(filename) + return hnsw_index + + +@auto_sync_resources +@auto_convert_output +def search(SearchParams search_params, + Index index, + queries, + k, + neighbors=None, + distances=None, + resources=None): + """ + Find the k nearest neighbors for each query. + + Parameters + ---------- + search_params : SearchParams + index : Index + Trained CAGRA index. + queries : CUDA array interface compliant matrix shape (n_samples, dim) + Supported dtype [float, int] + k : int + The number of neighbors. + neighbors : Optional CUDA array interface compliant matrix shape + (n_queries, k), dtype uint64_t. If supplied, neighbor + indices will be written here in-place. (default None) + distances : Optional CUDA array interface compliant matrix shape + (n_queries, k) If supplied, the distances to the + neighbors will be written here in-place. (default None) + {resources_docstring} + + Examples + -------- + >>> import cupy as cp + >>> from cuvs.neighbors import cagra, hnsw + >>> n_samples = 50000 + >>> n_features = 50 + >>> n_queries = 1000 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> index = cagra.build(cagra.IndexParams(), dataset) + >>> # Search using the built index + >>> queries = cp.random.random_sample((n_queries, n_features), + ... dtype=cp.float32) + >>> k = 10 + >>> search_params = hnsw.SearchParams( + ... ef=200, + ... num_threads=0 + ... ) + >>> # Convert CAGRA index to HNSW + >>> hnsw_index = hnsw.from_cagra(index) + >>> # Using a pooling allocator reduces overhead of temporary array + >>> # creation during search. This is useful if multiple searches + >>> # are performed with same query size. + >>> distances, neighbors = hnsw.search(search_params, index, queries, + ... k) + >>> neighbors = cp.asarray(neighbors) + >>> distances = cp.asarray(distances) + """ + if not index.trained: + raise ValueError("Index needs to be built before calling search.") + + # todo(dgd): we can make the check of dtype a parameter of wrap_array + # in RAFT to make this a single call + queries_ai = wrap_array(queries) + _check_input_array(queries_ai, [np.dtype('float32'), + np.dtype('uint8'), + np.dtype('int8')]) + + cdef uint32_t n_queries = queries_ai.shape[0] + + if neighbors is None: + neighbors = np.empty((n_queries, k), dtype='uint64') + + neighbors_ai = wrap_array(neighbors) + _check_input_array(neighbors_ai, [np.dtype('uint64')], + exp_rows=n_queries, exp_cols=k) + + if distances is None: + distances = np.empty((n_queries, k), dtype='float32') + + distances_ai = wrap_array(distances) + _check_input_array(distances_ai, [np.dtype('float32')], + exp_rows=n_queries, exp_cols=k) + + cdef cuvsHnswSearchParams* params = &search_params.params + cdef cydlpack.DLManagedTensor* queries_dlpack = \ + cydlpack.dlpack_c(queries_ai) + cdef cydlpack.DLManagedTensor* neighbors_dlpack = \ + cydlpack.dlpack_c(neighbors_ai) + cdef cydlpack.DLManagedTensor* distances_dlpack = \ + cydlpack.dlpack_c(distances_ai) + cdef cuvsResources_t res = resources.get_c_obj() + + with cuda_interruptible(): + check_cuvs(cuvsHnswSearch( + res, + params, + index.index, + queries_dlpack, + neighbors_dlpack, + distances_dlpack + )) + + return (distances, neighbors) diff --git a/python/cuvs/cuvs/test/test_hnsw.py b/python/cuvs/cuvs/test/test_hnsw.py new file mode 100644 index 000000000..0ae97266b --- /dev/null +++ b/python/cuvs/cuvs/test/test_hnsw.py @@ -0,0 +1,97 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# h ttp://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from sklearn.neighbors import NearestNeighbors +from sklearn.preprocessing import normalize + +from cuvs.neighbors import cagra, hnsw +from cuvs.test.ann_utils import calc_recall, generate_data + + +def run_hnsw_build_search_test( + n_rows=1000, + n_cols=10, + n_queries=100, + k=10, + dtype=np.float32, + metric="sqeuclidean", + build_algo="ivf_pq", + intermediate_graph_degree=128, + graph_degree=64, + search_params={}, +): + dataset = generate_data((n_rows, n_cols), dtype) + if metric == "inner_product": + dataset = normalize(dataset, norm="l2", axis=1) + if dtype in [np.int8, np.uint8]: + pytest.skip( + "inner_product metric is not supported for int8/uint8 data" + ) + if build_algo == "nn_descent": + pytest.skip("inner_product metric is not supported for nn_descent") + + build_params = cagra.IndexParams( + metric=metric, + intermediate_graph_degree=intermediate_graph_degree, + graph_degree=graph_degree, + build_algo=build_algo, + ) + + index = cagra.build(build_params, dataset) + + assert index.trained + + hnsw_index = hnsw.from_cagra(index) + + queries = generate_data((n_queries, n_cols), dtype) + + search_params = hnsw.SearchParams(**search_params) + + out_dist, out_idx = hnsw.search(search_params, hnsw_index, queries, k) + + # Calculate reference values with sklearn + skl_metric = { + "sqeuclidean": "sqeuclidean", + "inner_product": "cosine", + "euclidean": "euclidean", + }[metric] + nn_skl = NearestNeighbors( + n_neighbors=k, algorithm="brute", metric=skl_metric + ) + nn_skl.fit(dataset) + skl_dist, skl_idx = nn_skl.kneighbors(queries, return_distance=True) + + recall = calc_recall(out_idx, skl_idx) + assert recall > 0.95 + + +@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) +@pytest.mark.parametrize("k", [10, 20]) +@pytest.mark.parametrize("ef", [30, 40]) +@pytest.mark.parametrize("num_threads", [2, 4]) +@pytest.mark.parametrize("metric", ["sqeuclidean"]) +@pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"]) +def test_hnsw(dtype, k, ef, num_threads, metric, build_algo): + # Note that inner_product tests use normalized input which we cannot + # represent in int8, therefore we test only sqeuclidean metric here. + run_hnsw_build_search_test( + dtype=dtype, + k=k, + metric=metric, + build_algo=build_algo, + search_params={"ef": ef, "num_threads": num_threads}, + ) diff --git a/python/cuvs_bench/cuvs_bench/generate_groundtruth/__main__.py b/python/cuvs_bench/cuvs_bench/generate_groundtruth/__main__.py new file mode 100644 index 000000000..2b4213016 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/generate_groundtruth/__main__.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import argparse +import os +import sys + +import cupy as cp +import numpy as np +import rmm +from pylibraft.common import DeviceResources +from rmm.allocators.cupy import rmm_cupy_allocator + +from cuvs.neighbors.brute_force import knn + +from .utils import memmap_bin_file, suffix_from_dtype, write_bin + + +def generate_random_queries(n_queries, n_features, dtype=np.float32): + print("Generating random queries") + if np.issubdtype(dtype, np.integer): + queries = cp.random.randint( + 0, 255, size=(n_queries, n_features), dtype=dtype + ) + else: + queries = cp.random.uniform(size=(n_queries, n_features)).astype(dtype) + return queries + + +def choose_random_queries(dataset, n_queries): + print("Choosing random vector from dataset as query vectors") + query_idx = np.random.choice( + dataset.shape[0], size=(n_queries,), replace=False + ) + return dataset[query_idx, :] + + +def calc_truth(dataset, queries, k, metric="sqeuclidean"): + handle = DeviceResources() + n_samples = dataset.shape[0] + n = 500000 # batch size for processing neighbors + i = 0 + indices = None + distances = None + queries = cp.asarray(queries, dtype=cp.float32) + + while i < n_samples: + print("Step {0}/{1}:".format(i // n, n_samples // n)) + n_batch = n if i + n <= n_samples else n_samples - i + + X = cp.asarray(dataset[i : i + n_batch, :], cp.float32) + + D, Ind = knn(X, queries, k, metric=metric, handle=handle) + handle.sync() + + D, Ind = cp.asarray(D), cp.asarray(Ind) + Ind += i # shift neighbor index by offset i + + if distances is None: + distances = D + indices = Ind + else: + distances = cp.concatenate([distances, D], axis=1) + indices = cp.concatenate([indices, Ind], axis=1) + idx = cp.argsort(distances, axis=1)[:, :k] + distances = cp.take_along_axis(distances, idx, axis=1) + indices = cp.take_along_axis(indices, idx, axis=1) + + i += n_batch + + return distances, indices + + +def main(): + pool = rmm.mr.PoolMemoryResource( + rmm.mr.CudaMemoryResource(), initial_pool_size=2**30 + ) + rmm.mr.set_current_device_resource(pool) + cp.cuda.set_allocator(rmm_cupy_allocator) + + parser = argparse.ArgumentParser( + prog="generate_groundtruth", + description="Generate true neighbors using exact NN search. " + "The input and output files are in big-ann-benchmark's binary format.", + epilog="""Example usage + # With existing query file + python -m cuvs_bench.generate_groundtruth --dataset /dataset/base.\ +fbin --output=groundtruth_dir --queries=/dataset/query.public.10K.fbin + + # With randomly generated queries + python -m cuvs_bench.generate_groundtruth --dataset /dataset/base.\ +fbin --output=groundtruth_dir --queries=random --n_queries=10000 + + # Using only a subset of the dataset. Define queries by randomly + # selecting vectors from the (subset of the) dataset. + python -m cuvs_bench.generate_groundtruth --dataset /dataset/base.\ +fbin --nrows=2000000 --cols=128 --output=groundtruth_dir \ +--queries=random-choice --n_queries=10000 + """, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + parser.add_argument("dataset", type=str, help="input dataset file name") + parser.add_argument( + "--queries", + type=str, + default="random", + help="Queries file name, or one of 'random-choice' or 'random' " + "(default). 'random-choice': select n_queries vectors from the input " + "dataset. 'random': generate n_queries as uniform random numbers.", + ) + parser.add_argument( + "--output", + type=str, + default="", + help="output directory name (default current dir)", + ) + + parser.add_argument( + "--n_queries", + type=int, + default=10000, + help="Number of quries to generate (if no query file is given). " + "Default: 10000.", + ) + + parser.add_argument( + "-N", + "--rows", + default=None, + type=int, + help="use only first N rows from dataset, by default the whole " + "dataset is used", + ) + parser.add_argument( + "-D", + "--cols", + default=None, + type=int, + help="number of features (dataset columns). " + "Default: read from dataset file.", + ) + parser.add_argument( + "--dtype", + type=str, + help="Dataset dtype. When not specified, then derived from extension." + " Supported types: 'float32', 'float16', 'uint8', 'int8'", + ) + + parser.add_argument( + "-k", + type=int, + default=100, + help="Number of neighbors (per query) to calculate", + ) + parser.add_argument( + "--metric", + type=str, + default="sqeuclidean", + help="Metric to use while calculating distances. Valid metrics are " + "those that are accepted by cuvs.neighbors.brute_force.knn. Most" + " commonly used with cuVS are 'sqeuclidean' and 'inner_product'", + ) + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + args = parser.parse_args() + + if args.rows is not None: + print("Reading subset of the data, nrows=", args.rows) + else: + print("Reading whole dataset") + + # Load input data + dataset = memmap_bin_file( + args.dataset, args.dtype, shape=(args.rows, args.cols) + ) + n_features = dataset.shape[1] + dtype = dataset.dtype + + print( + "Dataset size {:6.1f} GB, shape {}, dtype {}".format( + dataset.size * dataset.dtype.itemsize / 1e9, + dataset.shape, + np.dtype(dtype), + ) + ) + + if len(args.output) > 0: + os.makedirs(args.output, exist_ok=True) + + if args.queries == "random" or args.queries == "random-choice": + if args.n_queries is None: + raise RuntimeError( + "n_queries must be given to generate random queries" + ) + if args.queries == "random": + queries = generate_random_queries( + args.n_queries, n_features, dtype + ) + elif args.queries == "random-choice": + queries = choose_random_queries(dataset, args.n_queries) + + queries_filename = os.path.join( + args.output, "queries" + suffix_from_dtype(dtype) + ) + print("Writing queries file", queries_filename) + write_bin(queries_filename, queries) + else: + print("Reading queries from file", args.queries) + queries = memmap_bin_file(args.queries, dtype) + + print("Calculating true nearest neighbors") + distances, indices = calc_truth(dataset, queries, args.k, args.metric) + + write_bin( + os.path.join(args.output, "groundtruth.neighbors.ibin"), + indices.astype(np.uint32), + ) + write_bin( + os.path.join(args.output, "groundtruth.distances.fbin"), + distances.astype(np.float32), + ) + + +if __name__ == "__main__": + main() diff --git a/python/cuvs_bench/cuvs_bench/generate_groundtruth/utils.py b/python/cuvs_bench/cuvs_bench/generate_groundtruth/utils.py new file mode 100644 index 000000000..a969b3d89 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/generate_groundtruth/utils.py @@ -0,0 +1,101 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +import numpy as np + + +def dtype_from_filename(filename): + ext = os.path.splitext(filename)[1] + if ext == ".fbin": + return np.float32 + if ext == ".hbin": + return np.float16 + elif ext == ".ibin": + return np.int32 + elif ext == ".u8bin": + return np.ubyte + elif ext == ".i8bin": + return np.byte + else: + raise RuntimeError("Not supported file extension" + ext) + + +def suffix_from_dtype(dtype): + if dtype == np.float32: + return ".fbin" + if dtype == np.float16: + return ".hbin" + elif dtype == np.int32: + return ".ibin" + elif dtype == np.ubyte: + return ".u8bin" + elif dtype == np.byte: + return ".i8bin" + else: + raise RuntimeError("Not supported dtype extension" + dtype) + + +def memmap_bin_file( + bin_file, dtype, shape=None, mode="r", size_dtype=np.uint32 +): + extent_itemsize = np.dtype(size_dtype).itemsize + offset = int(extent_itemsize) * 2 + if bin_file is None: + return None + if dtype is None: + dtype = dtype_from_filename(bin_file) + + if mode[0] == "r": + a = np.memmap(bin_file, mode=mode, dtype=size_dtype, shape=(2,)) + if shape is None: + shape = (a[0], a[1]) + else: + shape = tuple( + [ + aval if sval is None else sval + for aval, sval in zip(a, shape) + ] + ) + + return np.memmap( + bin_file, mode=mode, dtype=dtype, offset=offset, shape=shape + ) + elif mode[0] == "w": + if shape is None: + raise ValueError("Need to specify shape to map file in write mode") + + print("creating file", bin_file) + dirname = os.path.dirname(bin_file) + if len(dirname) > 0: + os.makedirs(dirname, exist_ok=True) + a = np.memmap(bin_file, mode=mode, dtype=size_dtype, shape=(2,)) + a[0] = shape[0] + a[1] = shape[1] + a.flush() + del a + fp = np.memmap( + bin_file, mode="r+", dtype=dtype, offset=offset, shape=shape + ) + return fp + + +def write_bin(fname, data): + print("writing", fname, data.shape, data.dtype, "...") + with open(fname, "wb") as f: + np.asarray(data.shape, dtype=np.uint32).tofile(f) + data.tofile(f) diff --git a/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py b/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py new file mode 100644 index 000000000..a6b154ef2 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py @@ -0,0 +1,115 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import subprocess +import sys +from urllib.request import urlretrieve + + +def get_dataset_path(name, ann_bench_data_path): + if not os.path.exists(ann_bench_data_path): + os.mkdir(ann_bench_data_path) + return os.path.join(ann_bench_data_path, f"{name}.hdf5") + + +def download_dataset(url, path): + if not os.path.exists(path): + print(f"downloading {url} -> {path}...") + urlretrieve(url, path) + + +def convert_hdf5_to_fbin(path, normalize): + scripts_path = os.path.dirname(os.path.realpath(__file__)) + ann_bench_scripts_path = os.path.join(scripts_path, "hdf5_to_fbin.py") + print(f"calling script {ann_bench_scripts_path}") + if normalize and "angular" in path: + subprocess.run( + ["python", ann_bench_scripts_path, "-n", "%s" % path], check=True + ) + else: + subprocess.run( + ["python", ann_bench_scripts_path, "%s" % path], check=True + ) + + +def move(name, ann_bench_data_path): + if "angular" in name: + new_name = name.replace("angular", "inner") + else: + new_name = name + new_path = os.path.join(ann_bench_data_path, new_name) + if not os.path.exists(new_path): + os.mkdir(new_path) + for bin_name in [ + "base.fbin", + "query.fbin", + "groundtruth.neighbors.ibin", + "groundtruth.distances.fbin", + ]: + os.rename( + f"{ann_bench_data_path}/{name}.{bin_name}", + f"{new_path}/{bin_name}", + ) + + +def download(name, normalize, ann_bench_data_path): + path = get_dataset_path(name, ann_bench_data_path) + try: + url = f"http://ann-benchmarks.com/{name}.hdf5" + download_dataset(url, path) + + convert_hdf5_to_fbin(path, normalize) + + move(name, ann_bench_data_path) + except Exception: + print(f"Cannot download {url}") + raise + + +def main(): + call_path = os.getcwd() + if "RAPIDS_DATASET_ROOT_DIR" in os.environ: + default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR") + else: + default_dataset_path = os.path.join(call_path, "datasets/") + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--dataset", help="dataset to download", default="glove-100-angular" + ) + parser.add_argument( + "--dataset-path", + help="path to download dataset", + default=default_dataset_path, + ) + parser.add_argument( + "--normalize", + help="normalize cosine distance to inner product", + action="store_true", + ) + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + args = parser.parse_args() + + download(args.dataset, args.normalize, args.dataset_path) + + +if __name__ == "__main__": + main() diff --git a/python/cuvs_bench/cuvs_bench/get_dataset/fbin_to_f16bin.py b/python/cuvs_bench/cuvs_bench/get_dataset/fbin_to_f16bin.py new file mode 100644 index 000000000..1255e42dc --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/get_dataset/fbin_to_f16bin.py @@ -0,0 +1,49 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from __future__ import absolute_import, division, print_function + +import sys + +import numpy as np + + +def read_fbin(fname): + shape = np.fromfile(fname, dtype=np.uint32, count=2) + if float(shape[0]) * shape[1] * 4 > 2_000_000_000: + data = np.memmap(fname, dtype=np.float32, offset=8, mode="r").reshape( + shape + ) + else: + data = np.fromfile(fname, dtype=np.float32, offset=8).reshape(shape) + return data + + +def write_bin(fname, data): + with open(fname, "wb") as f: + np.asarray(data.shape, dtype=np.uint32).tofile(f) + data.tofile(f) + + +if len(sys.argv) != 3: + print( + "usage: %s input.fbin output.f16bin" % (sys.argv[0]), + file=sys.stderr, + ) + sys.exit(-1) + +data = read_fbin(sys.argv[1]).astype(np.float16) +write_bin(sys.argv[2], data) diff --git a/python/cuvs_bench/cuvs_bench/get_dataset/hdf5_to_fbin.py b/python/cuvs_bench/cuvs_bench/get_dataset/hdf5_to_fbin.py new file mode 100644 index 000000000..317051aa2 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/get_dataset/hdf5_to_fbin.py @@ -0,0 +1,90 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import sys + +import h5py +import numpy as np + + +def normalize(x): + norm = np.linalg.norm(x, axis=1) + return (x.T / norm).T + + +def write_bin(fname, data): + with open(fname, "wb") as f: + np.asarray(data.shape, dtype=np.uint32).tofile(f) + data.tofile(f) + + +if __name__ == "__main__": + if len(sys.argv) != 2 and len(sys.argv) != 3: + print( + "usage: %s [-n] .hdf5\n" % (sys.argv[0]), + " -n: normalize base/query set\n", + "outputs: .base.fbin\n", + " .query.fbin\n", + " .groundtruth.neighbors.ibin\n", + " .groundtruth.distances.fbin", + file=sys.stderr, + ) + sys.exit(-1) + + need_normalize = False + if len(sys.argv) == 3: + assert sys.argv[1] == "-n" + need_normalize = True + fname_prefix = sys.argv[-1] + assert fname_prefix.endswith(".hdf5") + fname_prefix = fname_prefix[:-5] + + hdf5 = h5py.File(sys.argv[-1], "r") + assert ( + hdf5.attrs["distance"] == "angular" + or hdf5.attrs["distance"] == "euclidean" + ) + assert hdf5["train"].dtype == np.float32 + assert hdf5["test"].dtype == np.float32 + assert hdf5["neighbors"].dtype == np.int32 + assert hdf5["distances"].dtype == np.float32 + + base = hdf5["train"][:] + query = hdf5["test"][:] + if need_normalize: + base = normalize(base) + query = normalize(query) + elif hdf5.attrs["distance"] == "angular": + print( + "warning: input has angular distance, ", + "specify -n to normalize base/query set!\n", + ) + + output_fname = fname_prefix + ".base.fbin" + print("writing", output_fname, "...") + write_bin(output_fname, base) + + output_fname = fname_prefix + ".query.fbin" + print("writing", output_fname, "...") + write_bin(output_fname, query) + + output_fname = fname_prefix + ".groundtruth.neighbors.ibin" + print("writing", output_fname, "...") + write_bin(output_fname, hdf5["neighbors"][:]) + + output_fname = fname_prefix + ".groundtruth.distances.fbin" + print("writing", output_fname, "...") + write_bin(output_fname, hdf5["distances"][:]) diff --git a/python/cuvs_bench/cuvs_bench/run/__main__.py b/python/cuvs_bench/cuvs_bench/run/__main__.py index bf9f8586d..58fc5291b 100644 --- a/python/cuvs_bench/cuvs_bench/run/__main__.py +++ b/python/cuvs_bench/cuvs_bench/run/__main__.py @@ -19,8 +19,9 @@ from typing import Optional import click -from data_export import convert_json_to_csv_build, convert_json_to_csv_search -from run import run_benchmark + +from .data_export import convert_json_to_csv_build, convert_json_to_csv_search +from .run import run_benchmark @click.command() diff --git a/python/cuvs_bench/cuvs_bench/run/run.py b/python/cuvs_bench/cuvs_bench/run/run.py index a65d4b5fe..0159d2c19 100644 --- a/python/cuvs_bench/cuvs_bench/run/run.py +++ b/python/cuvs_bench/cuvs_bench/run/run.py @@ -21,7 +21,8 @@ from typing import Any, Dict, Optional, Tuple import yaml -from runners import cuvs_bench_cpp + +from .runners import cuvs_bench_cpp def rmm_present() -> bool: diff --git a/python/cuvs_bench/cuvs_bench/split_groundtruth/__main__.py b/python/cuvs_bench/cuvs_bench/split_groundtruth/__main__.py new file mode 100644 index 000000000..7fee30e42 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/split_groundtruth/__main__.py @@ -0,0 +1,57 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import subprocess +import sys + + +def split_groundtruth(groundtruth_filepath): + ann_bench_scripts_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "split_groundtruth.pl" + ) + pwd = os.getcwd() + path_to_groundtruth = os.path.normpath(groundtruth_filepath).split(os.sep) + if len(path_to_groundtruth) > 1: + os.chdir(os.path.join(*path_to_groundtruth[:-1])) + groundtruth_filename = path_to_groundtruth[-1] + subprocess.run( + [ann_bench_scripts_path, groundtruth_filename, "groundtruth"], + check=True, + ) + os.chdir(pwd) + + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--groundtruth", + help="Path to billion-scale dataset groundtruth file", + required=True, + ) + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + args = parser.parse_args() + + split_groundtruth(args.groundtruth) + + +if __name__ == "__main__": + main() diff --git a/python/cuvs_bench/cuvs_bench/split_groundtruth/split_groundtruth.pl b/python/cuvs_bench/cuvs_bench/split_groundtruth/split_groundtruth.pl new file mode 100644 index 000000000..b0a59f806 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/split_groundtruth/split_groundtruth.pl @@ -0,0 +1,45 @@ +#!/usr/bin/perl + +# ============================================================================= +# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. + +use warnings; +use strict; +use autodie qw(open close); + + +@ARGV == 2 + or die "usage: $0 input output_prefix\n"; + +open my $fh, '<:raw', $ARGV[0]; + +my $raw; +read($fh, $raw, 8); +my ($nrows, $dim) = unpack('LL', $raw); + +my $expected_size = 8 + $nrows * $dim * (4 + 4); +my $size = (stat($fh))[7]; +$size == $expected_size + or die("error: expected size is $expected_size, but actual size is $size\n"); + + +open my $fh_out1, '>:raw', "$ARGV[1].neighbors.ibin"; +open my $fh_out2, '>:raw', "$ARGV[1].distances.fbin"; + +print {$fh_out1} $raw; +print {$fh_out2} $raw; + +read($fh, $raw, $nrows * $dim * 4); +print {$fh_out1} $raw; +read($fh, $raw, $nrows * $dim * 4); +print {$fh_out2} $raw;