From 3383f281acea0afa34d72aedc08ac3e828a8c4fd Mon Sep 17 00:00:00 2001 From: Victor Lafargue Date: Thu, 3 Oct 2024 20:10:13 +0200 Subject: [PATCH] SNMG ANN (#231) This PR implements a distributed (single-node-multiple-GPUs) implementation of ANN indexes. It allows to build, extend and search an index on multiple GPUs. Before building the index, the user has to choose between two modes : **Sharding mode** : The index dataset is split, each GPU trains its own index with its respective share of the dataset. This is intended to both increase the search throughput and the maximal size of the index. **Index duplication mode** : The index is built once on a GPU and then copied over to others. Alternatively, the index dataset is sent to each GPU to be built there. This intended to increase the search throughput. SNMG indexes can be serialized and de-serialized. Local models can also be deserialized and deployed in index duplication mode. ![bench](https://github.com/user-attachments/assets/e313d0ef-02eb-482a-9104-9e1bb400456d) Migrated from https://github.com/rapidsai/raft/pull/1993 Authors: - Victor Lafargue (https://github.com/viclafargue) - James Lamb (https://github.com/jameslamb) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Tamas Bela Feher (https://github.com/tfeher) - James Lamb (https://github.com/jameslamb) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/231 --- build.sh | 9 +- .../all_cuda-118_arch-aarch64.yaml | 1 + .../all_cuda-118_arch-x86_64.yaml | 1 + .../all_cuda-125_arch-aarch64.yaml | 1 + .../all_cuda-125_arch-x86_64.yaml | 1 + .../bench_ann_cuda-118_arch-aarch64.yaml | 1 + .../bench_ann_cuda-118_arch-x86_64.yaml | 1 + .../bench_ann_cuda-125_arch-aarch64.yaml | 1 + .../bench_ann_cuda-125_arch-x86_64.yaml | 1 + conda/recipes/libcuvs/conda_build_config.yaml | 3 + conda/recipes/libcuvs/meta.yaml | 4 + cpp/CMakeLists.txt | 41 + cpp/bench/ann/CMakeLists.txt | 18 + .../src/cuvs/cuvs_ann_bench_param_parser.h | 18 +- cpp/bench/ann/src/cuvs/cuvs_benchmark.cu | 89 ++ cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h | 38 +- cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu | 23 + .../ann/src/cuvs/cuvs_mg_cagra_wrapper.h | 183 +++ cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu | 23 + .../ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h | 140 ++ cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu | 23 + .../ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h | 139 ++ cpp/doxygen/Doxyfile | 2 +- cpp/include/cuvs/neighbors/common.hpp | 54 +- cpp/include/cuvs/neighbors/ivf_flat.hpp | 6 +- cpp/include/cuvs/neighbors/ivf_pq.hpp | 77 +- cpp/include/cuvs/neighbors/mg.hpp | 1367 +++++++++++++++++ cpp/src/neighbors/detail/cagra/add_nodes.cuh | 3 +- .../detail/cagra/cagra_serialize.cuh | 3 +- cpp/src/neighbors/iface/generate_iface.py | 273 ++++ cpp/src/neighbors/iface/iface.hpp | 198 +++ .../iface/iface_cagra_float_uint32_t.cu | 95 ++ .../iface/iface_cagra_half_uint32_t.cu | 95 ++ .../iface/iface_cagra_int8_t_uint32_t.cu | 95 ++ .../iface/iface_cagra_uint8_t_uint32_t.cu | 95 ++ .../iface/iface_flat_float_int64_t.cu | 96 ++ .../iface/iface_flat_int8_t_int64_t.cu | 96 ++ .../iface/iface_flat_uint8_t_int64_t.cu | 96 ++ .../neighbors/iface/iface_pq_float_int64_t.cu | 95 ++ .../neighbors/iface/iface_pq_half_int64_t.cu | 95 ++ .../iface/iface_pq_int8_t_int64_t.cu | 95 ++ .../iface/iface_pq_uint8_t_int64_t.cu | 95 ++ .../neighbors/ivf_flat/generate_ivf_flat.py | 2 +- .../ivf_flat/ivf_flat_search_float_int64_t.cu | 2 +- .../ivf_flat_search_int8_t_int64_t.cu | 2 +- .../ivf_flat_search_uint8_t_int64_t.cu | 2 +- .../ivf_pq/detail/generate_ivf_pq.py | 2 +- .../detail/ivf_pq_search_float_int64_t.cu | 2 +- .../detail/ivf_pq_search_half_int64_t.cu | 2 +- .../detail/ivf_pq_search_int8_t_int64_t.cu | 2 +- .../detail/ivf_pq_search_uint8_t_int64_t.cu | 2 +- cpp/src/neighbors/mg/generate_mg.py | 286 ++++ cpp/src/neighbors/mg/mg.cuh | 690 +++++++++ .../neighbors/mg/mg_cagra_float_uint32_t.cu | 92 ++ .../neighbors/mg/mg_cagra_half_uint32_t.cu | 92 ++ .../neighbors/mg/mg_cagra_int8_t_uint32_t.cu | 92 ++ .../neighbors/mg/mg_cagra_uint8_t_uint32_t.cu | 92 ++ cpp/src/neighbors/mg/mg_flat_float_int64_t.cu | 100 ++ .../neighbors/mg/mg_flat_int8_t_int64_t.cu | 100 ++ .../neighbors/mg/mg_flat_uint8_t_int64_t.cu | 100 ++ cpp/src/neighbors/mg/mg_pq_float_int64_t.cu | 100 ++ cpp/src/neighbors/mg/mg_pq_half_int64_t.cu | 100 ++ cpp/src/neighbors/mg/mg_pq_int8_t_int64_t.cu | 100 ++ cpp/src/neighbors/mg/mg_pq_uint8_t_int64_t.cu | 100 ++ cpp/src/neighbors/mg/nccl_comm.cpp | 8 + cpp/src/neighbors/mg/omp_checks.cpp | 34 + cpp/test/CMakeLists.txt | 11 +- cpp/test/neighbors/mg.cuh | 825 ++++++++++ cpp/test/neighbors/mg/test_float.cu | 28 + dependencies.yaml | 1 + docs/source/build.rst | 10 + docs/source/cpp_api/neighbors.rst | 1 + docs/source/cpp_api/neighbors_mg.rst | 76 + 73 files changed, 6800 insertions(+), 46 deletions(-) create mode 100644 cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu create mode 100644 cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h create mode 100644 cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu create mode 100644 cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h create mode 100644 cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu create mode 100644 cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h create mode 100644 cpp/include/cuvs/neighbors/mg.hpp create mode 100644 cpp/src/neighbors/iface/generate_iface.py create mode 100644 cpp/src/neighbors/iface/iface.hpp create mode 100644 cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu create mode 100644 cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu create mode 100644 cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu create mode 100644 cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu create mode 100644 cpp/src/neighbors/iface/iface_flat_float_int64_t.cu create mode 100644 cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu create mode 100644 cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu create mode 100644 cpp/src/neighbors/iface/iface_pq_float_int64_t.cu create mode 100644 cpp/src/neighbors/iface/iface_pq_half_int64_t.cu create mode 100644 cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu create mode 100644 cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu create mode 100644 cpp/src/neighbors/mg/generate_mg.py create mode 100644 cpp/src/neighbors/mg/mg.cuh create mode 100644 cpp/src/neighbors/mg/mg_cagra_float_uint32_t.cu create mode 100644 cpp/src/neighbors/mg/mg_cagra_half_uint32_t.cu create mode 100644 cpp/src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu create mode 100644 cpp/src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu create mode 100644 cpp/src/neighbors/mg/mg_flat_float_int64_t.cu create mode 100644 cpp/src/neighbors/mg/mg_flat_int8_t_int64_t.cu create mode 100644 cpp/src/neighbors/mg/mg_flat_uint8_t_int64_t.cu create mode 100644 cpp/src/neighbors/mg/mg_pq_float_int64_t.cu create mode 100644 cpp/src/neighbors/mg/mg_pq_half_int64_t.cu create mode 100644 cpp/src/neighbors/mg/mg_pq_int8_t_int64_t.cu create mode 100644 cpp/src/neighbors/mg/mg_pq_uint8_t_int64_t.cu create mode 100644 cpp/src/neighbors/mg/nccl_comm.cpp create mode 100644 cpp/src/neighbors/mg/omp_checks.cpp create mode 100644 cpp/test/neighbors/mg.cuh create mode 100644 cpp/test/neighbors/mg/test_float.cu create mode 100644 docs/source/cpp_api/neighbors_mg.rst diff --git a/build.sh b/build.sh index b463f0f0d..c66a0c35e 100755 --- a/build.sh +++ b/build.sh @@ -18,7 +18,7 @@ ARGS=$* # scripts, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libcuvs python rust docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h" +VALIDARGS="clean libcuvs python rust docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-mg --no-nvtx --show_depr_warn --incl-cache-stats --time -h" HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-ann=] [--build-metrics=] where is: clean - remove all existing build artifacts and configuration (start over) @@ -40,6 +40,7 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool==0.0.0a0 - make +- nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index ce9a7f058..a25393050 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -37,6 +37,7 @@ dependencies: - libcusparse=11.7.5.86 - librmm==24.10.*,>=0.0.0a0 - make +- nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 116e80ac2..bb4a96d48 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -34,6 +34,7 @@ dependencies: - libcusparse-dev - librmm==24.10.*,>=0.0.0a0 - make +- nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 7f7ad045d..bd1b95ae8 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -34,6 +34,7 @@ dependencies: - libcusparse-dev - librmm==24.10.*,>=0.0.0a0 - make +- nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 73c42ca71..554ad41ab 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -35,6 +35,7 @@ dependencies: - libcusparse=11.7.5.86 - librmm==24.10.*,>=0.0.0a0 - matplotlib +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-aarch64=11.8 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 473e50bc6..dc38f3565 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -35,6 +35,7 @@ dependencies: - libcusparse=11.7.5.86 - librmm==24.10.*,>=0.0.0a0 - matplotlib +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-64=11.8 diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index 8a877c4c0..aeb23a9ef 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -32,6 +32,7 @@ dependencies: - libcusparse-dev - librmm==24.10.*,>=0.0.0a0 - matplotlib +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index 54859a77f..3a408cd64 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -32,6 +32,7 @@ dependencies: - libcusparse-dev - librmm==24.10.*,>=0.0.0a0 - matplotlib +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas diff --git a/conda/recipes/libcuvs/conda_build_config.yaml b/conda/recipes/libcuvs/conda_build_config.yaml index e165f7ed9..b8c49943e 100644 --- a/conda/recipes/libcuvs/conda_build_config.yaml +++ b/conda/recipes/libcuvs/conda_build_config.yaml @@ -22,6 +22,9 @@ cmake_version: h5py_version: - ">=3.8.0" +nccl_version: + - ">=2.19" + # The CTK libraries below are missing from the conda-forge::cudatoolkit package # for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages # and the "*_run_*" version specifiers correspond to `11.x` packages. diff --git a/conda/recipes/libcuvs/meta.yaml b/conda/recipes/libcuvs/meta.yaml index e154ccf41..3dd7c8f2e 100644 --- a/conda/recipes/libcuvs/meta.yaml +++ b/conda/recipes/libcuvs/meta.yaml @@ -65,6 +65,7 @@ outputs: host: - librmm ={{ minor_version }} - libraft-headers ={{ minor_version }} + - nccl {{ nccl_version }} - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }} @@ -131,6 +132,7 @@ outputs: host: - librmm ={{ minor_version }} - libraft-headers ={{ minor_version }} + - nccl {{ nccl_version }} - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }} @@ -197,6 +199,7 @@ outputs: host: - librmm ={{ minor_version }} - libraft-headers ={{ minor_version }} + - nccl {{ nccl_version }} - {{ pin_subpackage('libcuvs', exact=True) }} - cuda-version ={{ cuda_version }} - openblas # required by some CPU algos in benchmarks @@ -268,6 +271,7 @@ outputs: host: - librmm ={{ minor_version }} - libraft-headers ={{ minor_version }} + - nccl {{ nccl_version }} - {{ pin_subpackage('libcuvs', exact=True) }} - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6f5178251..52c5f6624 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -57,6 +57,7 @@ option(BUILD_C_LIBRARY "Build cuVS C API library" OFF) option(BUILD_C_TESTS "Build cuVS C API tests" OFF) option(BUILD_CUVS_BENCH "Build cuVS ann benchmarks" OFF) option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON) +option(BUILD_MG_ALGOS "Build with multi-GPU support" ON) option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF) option(CUDA_ENABLE_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF @@ -287,6 +288,24 @@ target_compile_options( "$<$:${CUVS_CUDA_FLAGS}>" ) +if(BUILD_MG_ALGOS) + set(CUVS_MG_ALGOS + src/neighbors/mg/mg_flat_float_int64_t.cu + src/neighbors/mg/mg_flat_int8_t_int64_t.cu + src/neighbors/mg/mg_flat_uint8_t_int64_t.cu + src/neighbors/mg/mg_pq_float_int64_t.cu + src/neighbors/mg/mg_pq_half_int64_t.cu + src/neighbors/mg/mg_pq_int8_t_int64_t.cu + src/neighbors/mg/mg_pq_uint8_t_int64_t.cu + src/neighbors/mg/mg_cagra_float_uint32_t.cu + src/neighbors/mg/mg_cagra_half_uint32_t.cu + src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu + src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu + src/neighbors/mg/omp_checks.cpp + src/neighbors/mg/nccl_comm.cpp + ) +endif() + add_library( cuvs_objs OBJECT src/cluster/kmeans_balanced_fit_float.cu @@ -367,6 +386,17 @@ add_library( src/neighbors/cagra_serialize_half.cu src/neighbors/cagra_serialize_int8.cu src/neighbors/cagra_serialize_uint8.cu + src/neighbors/iface/iface_cagra_float_uint32_t.cu + src/neighbors/iface/iface_cagra_half_uint32_t.cu + src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu + src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu + src/neighbors/iface/iface_flat_float_int64_t.cu + src/neighbors/iface/iface_flat_int8_t_int64_t.cu + src/neighbors/iface/iface_flat_uint8_t_int64_t.cu + src/neighbors/iface/iface_pq_float_int64_t.cu + src/neighbors/iface/iface_pq_half_int64_t.cu + src/neighbors/iface/iface_pq_int8_t_int64_t.cu + src/neighbors/iface/iface_pq_uint8_t_int64_t.cu src/neighbors/detail/cagra/cagra_build.cpp src/neighbors/detail/cagra/topk_for_cagra/topk.cu $<$:src/neighbors/hnsw.cpp> @@ -434,6 +464,7 @@ add_library( src/selection/select_k_half_uint32_t.cu src/stats/silhouette_score.cu src/stats/trustworthiness_score.cu + ${CUVS_MG_ALGOS} ) set_target_properties( @@ -520,11 +551,16 @@ if(NOT BUILD_CPU_ONLY) ${CUVS_CUSPARSE_DEPENDENCY} ${CUVS_CURAND_DEPENDENCY} ) + if(BUILD_MG_ALGOS) + set(CUVS_COMMS_DEPENDENCY nccl) + endif() + # Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target. target_link_libraries( cuvs PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} PRIVATE nvidia::cutlass::cutlass $ cuvs-cagra-search + ${CUVS_COMMS_DEPENDENCY} ) target_link_libraries( @@ -534,6 +570,11 @@ if(NOT BUILD_CPU_ONLY) ) endif() +if(BUILD_MG_ALGOS) + target_compile_definitions(cuvs PUBLIC CUVS_BUILD_MG_ALGOS) + target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_MG_ALGOS) +endif() + if(BUILD_CAGRA_HNSWLIB) target_link_libraries(cuvs_objs PRIVATE hnswlib::hnswlib) target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 8cbf8c8b3..c36e70ace 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -32,6 +32,7 @@ option(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE "Include cuVS brute force knn in benc option(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB "Include cuVS CAGRA with HNSW search in benchmark" ON) option(CUVS_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" OFF) +option(CUVS_ANN_BENCH_USE_CUVS_MG "Include cuVS ann mg algorithm in benchmark" ${BUILD_MG_ALGOS}) option(CUVS_ANN_BENCH_SINGLE_EXE "Make a single executable with benchmark as shared library modules" OFF ) @@ -55,6 +56,7 @@ if(BUILD_CPU_ONLY) set(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB OFF) set(CUVS_ANN_BENCH_USE_GGNN OFF) set(CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE OFF) + set(CUVS_ANN_BENCH_USE_CUVS_MG OFF) else() set(CUVS_FAISS_ENABLE_GPU ON) endif() @@ -66,6 +68,7 @@ if(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ OR CUVS_ANN_BENCH_USE_CUVS_CAGRA OR CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB OR CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE + OR CUVS_ANN_BENCH_USE_CUVS_MG ) set(CUVS_ANN_BENCH_USE_CUVS ON) endif() @@ -245,6 +248,21 @@ if(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) ) endif() +if(CUVS_ANN_BENCH_USE_CUVS_MG) + ConfigureAnnBench( + NAME + CUVS_MG + PATH + src/cuvs/cuvs_benchmark.cu + $<$:src/cuvs/cuvs_mg_ivf_flat.cu> + $<$:src/cuvs/cuvs_mg_ivf_pq.cu> + $<$:src/cuvs/cuvs_mg_cagra.cu> + LINKS + cuvs + nccl + ) +endif() + message("CUVS_FAISS_TARGETS: ${CUVS_FAISS_TARGETS}") message("CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}") if(CUVS_ANN_BENCH_USE_FAISS_CPU_FLAT) diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h index 22f0cab6f..57d5b1910 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h @@ -45,7 +45,18 @@ extern template class cuvs::bench::cuvs_cagra; extern template class cuvs::bench::cuvs_cagra; #endif -#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT +#ifdef CUVS_ANN_BENCH_USE_CUVS_MG +#include "cuvs_ivf_flat_wrapper.h" +#include "cuvs_mg_ivf_flat_wrapper.h" + +#include "cuvs_ivf_pq_wrapper.h" +#include "cuvs_mg_ivf_pq_wrapper.h" + +#include "cuvs_cagra_wrapper.h" +#include "cuvs_mg_cagra_wrapper.h" +#endif + +#if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT) || defined(CUVS_ANN_BENCH_USE_CUVS_MG) template void parse_build_param(const nlohmann::json& conf, typename cuvs::bench::cuvs_ivf_flat::build_param& param) @@ -64,7 +75,7 @@ void parse_search_param(const nlohmann::json& conf, #endif #if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || \ - defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) + defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || defined(CUVS_ANN_BENCH_USE_CUVS_MG) template void parse_build_param(const nlohmann::json& conf, typename cuvs::bench::cuvs_ivf_pq::build_param& param) @@ -130,7 +141,8 @@ void parse_search_param(const nlohmann::json& conf, } #endif -#if defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) +#if defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || \ + defined(CUVS_ANN_BENCH_USE_CUVS_MG) template void parse_build_param(const nlohmann::json& conf, cuvs::neighbors::nn_descent::index_params& param) { diff --git a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu index a956ab139..893097236 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu +++ b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu @@ -29,6 +29,43 @@ namespace cuvs::bench { +#ifdef CUVS_ANN_BENCH_USE_CUVS_MG +void add_distribution_mode(cuvs::neighbors::mg::distribution_mode* dist_mode, + const nlohmann::json& conf) +{ + if (conf.contains("distribution_mode")) { + std::string distribution_mode = conf.at("distribution_mode"); + if (distribution_mode == "replicated") { + *dist_mode = cuvs::neighbors::mg::distribution_mode::REPLICATED; + } else if (distribution_mode == "sharded") { + *dist_mode = cuvs::neighbors::mg::distribution_mode::SHARDED; + } else { + throw std::runtime_error("invalid value for distribution_mode"); + } + } else { + // default + *dist_mode = cuvs::neighbors::mg::distribution_mode::SHARDED; + } +}; + +void add_merge_mode(cuvs::neighbors::mg::sharded_merge_mode* merge_mode, const nlohmann::json& conf) +{ + if (conf.contains("merge_mode")) { + std::string sharded_merge_mode = conf.at("merge_mode"); + if (sharded_merge_mode == "tree_merge") { + *merge_mode = cuvs::neighbors::mg::sharded_merge_mode::TREE_MERGE; + } else if (sharded_merge_mode == "merge_on_root_rank") { + *merge_mode = cuvs::neighbors::mg::sharded_merge_mode::MERGE_ON_ROOT_RANK; + } else { + throw std::runtime_error("invalid value for merge_mode"); + } + } else { + // default + *merge_mode = cuvs::neighbors::mg::sharded_merge_mode::TREE_MERGE; + } +}; +#endif + template auto create_algo(const std::string& algo_name, const std::string& distance, @@ -71,6 +108,32 @@ auto create_algo(const std::string& algo_name, parse_build_param(conf, param); a = std::make_unique>(metric, dim, param); } +#endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_MG + if constexpr (std::is_same_v || std::is_same_v || + std::is_same_v) { + if (algo_name == "raft_mg_ivf_flat" || algo_name == "cuvs_mg_ivf_flat") { + typename cuvs::bench::cuvs_mg_ivf_flat::build_param param; + parse_build_param(conf, param); + add_distribution_mode(¶m.mode, conf); + a = std::make_unique>(metric, dim, param); + } + } + + if (algo_name == "raft_mg_ivf_pq" || algo_name == "cuvs_mg_ivf_pq") { + typename cuvs::bench::cuvs_mg_ivf_pq::build_param param; + parse_build_param(conf, param); + add_distribution_mode(¶m.mode, conf); + a = std::make_unique>(metric, dim, param); + } + + if (algo_name == "raft_mg_cagra" || algo_name == "cuvs_mg_cagra") { + typename cuvs::bench::cuvs_mg_cagra::build_param param; + parse_build_param(conf, param); + add_distribution_mode(¶m.mode, conf); + a = std::make_unique>(metric, dim, param); + } + #endif if (!a) { throw std::runtime_error("invalid algo: '" + algo_name + "'"); } @@ -113,6 +176,32 @@ auto create_search_param(const std::string& algo_name, const nlohmann::json& con return param; } #endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_MG + if constexpr (std::is_same_v || std::is_same_v || + std::is_same_v) { + if (algo_name == "raft_mg_ivf_flat" || algo_name == "cuvs_mg_ivf_flat") { + auto param = + std::make_unique::search_param>(); + parse_search_param(conf, *param); + add_merge_mode(¶m->merge_mode, conf); + return param; + } + } + + if (algo_name == "raft_mg_ivf_pq" || algo_name == "cuvs_mg_ivf_pq") { + auto param = std::make_unique::search_param>(); + parse_search_param(conf, *param); + add_merge_mode(¶m->merge_mode, conf); + return param; + } + + if (algo_name == "raft_mg_cagra" || algo_name == "cuvs_mg_cagra") { + auto param = std::make_unique::search_param>(); + parse_search_param(conf, *param); + add_merge_mode(¶m->merge_mode, conf); + return param; + } +#endif // else throw std::runtime_error("invalid algo: '" + algo_name + "'"); diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h index ff854f890..b2ba35eee 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h @@ -72,6 +72,23 @@ class cuvs_cagra : public algo, public algo_gpu { std::optional ivf_pq_refine_rate = std::nullopt; std::optional ivf_pq_build_params = std::nullopt; std::optional ivf_pq_search_params = std::nullopt; + + void prepare_build_params(const raft::extent_2d& dataset_extents) + { + if (algo == CagraBuildAlgo::kIvfPq) { + auto pq_params = cuvs::neighbors::cagra::graph_build_params::ivf_pq_params( + dataset_extents, cagra_params.metric); + if (ivf_pq_build_params) { pq_params.build_params = *ivf_pq_build_params; } + if (ivf_pq_search_params) { pq_params.search_params = *ivf_pq_search_params; } + if (ivf_pq_refine_rate) { pq_params.refinement_rate = *ivf_pq_refine_rate; } + cagra_params.graph_build_params = pq_params; + } else if (algo == CagraBuildAlgo::kNnDescent) { + auto nn_params = cuvs::neighbors::cagra::graph_build_params::nn_descent_params( + cagra_params.intermediate_graph_degree); + if (nn_descent_params) { nn_params = *nn_descent_params; } + cagra_params.graph_build_params = nn_params; + } + } }; cuvs_cagra(Metric metric, int dim, const build_param& param, int concurrent_searches = 1) @@ -168,28 +185,9 @@ template void cuvs_cagra::build(const T* dataset, size_t nrow) { auto dataset_extents = raft::make_extents(nrow, dimension_); + index_params_.prepare_build_params(dataset_extents); auto& params = index_params_.cagra_params; - - if (index_params_.algo == CagraBuildAlgo::kIvfPq) { - auto pq_params = - cuvs::neighbors::cagra::graph_build_params::ivf_pq_params(dataset_extents, params.metric); - if (index_params_.ivf_pq_build_params) { - pq_params.build_params = *index_params_.ivf_pq_build_params; - } - if (index_params_.ivf_pq_search_params) { - pq_params.search_params = *index_params_.ivf_pq_search_params; - } - if (index_params_.ivf_pq_refine_rate) { - pq_params.refinement_rate = *index_params_.ivf_pq_refine_rate; - } - params.graph_build_params = pq_params; - } else if (index_params_.algo == CagraBuildAlgo::kNnDescent) { - auto nn_params = cuvs::neighbors::cagra::graph_build_params::nn_descent_params( - params.intermediate_graph_degree); - if (index_params_.nn_descent_params) { nn_params = *index_params_.nn_descent_params; } - params.graph_build_params = nn_params; - } auto dataset_view_host = raft::make_mdspan(dataset, dataset_extents); auto dataset_view_device = diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu new file mode 100644 index 000000000..801caa85f --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cuvs_mg_cagra_wrapper.h" + +namespace cuvs::bench { +template class cuvs_mg_cagra; +template class cuvs_mg_cagra; +template class cuvs_mg_cagra; +template class cuvs_mg_cagra; +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h new file mode 100644 index 000000000..50c1ff4db --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "cuvs_ann_bench_utils.h" +#include "cuvs_cagra_wrapper.h" +#include +#include + +namespace cuvs::bench { +using namespace cuvs::neighbors; + +enum class AllocatorType; +enum class CagraBuildAlgo; + +template +class cuvs_mg_cagra : public algo, public algo_gpu { + public: + using search_param_base = typename algo::search_param; + using algo::dim_; + + struct build_param : public cuvs::bench::cuvs_cagra::build_param { + cuvs::neighbors::mg::distribution_mode mode; + }; + + struct search_param : public cuvs::bench::cuvs_cagra::search_param { + cuvs::neighbors::mg::sharded_merge_mode merge_mode; + }; + + cuvs_mg_cagra(Metric metric, int dim, const build_param& param, int concurrent_searches = 1) + : algo(metric, dim), index_params_(param) + { + index_params_.cagra_params.metric = parse_metric_type(metric); + index_params_.ivf_pq_build_params->metric = parse_metric_type(metric); + + // init nccl clique outside as to not affect benchmark + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle_); + } + + void build(const T* dataset, size_t nrow) final; + + void set_search_param(const search_param_base& param) override; + + void set_search_dataset(const T* dataset, size_t nrow) override; + + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override; + void search_base(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const; + + [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override + { + auto stream = raft::resource::get_cuda_stream(handle_); + return stream; + } + + // to enable dataset access from GPU memory + [[nodiscard]] auto get_preference() const -> algo_property override + { + algo_property property; + property.dataset_memory_type = MemoryType::kHost; + property.query_memory_type = MemoryType::kHost; + return property; + } + void save(const std::string& file) const override; + void load(const std::string&) override; + void save_to_hnswlib(const std::string& file) const; + std::unique_ptr> copy() override; + + private: + raft::device_resources handle_; + float refine_ratio_; + build_param index_params_; + cuvs::neighbors::mg::search_params search_params_; + std::shared_ptr, T, IdxT>> + index_; +}; + +template +void cuvs_mg_cagra::build(const T* dataset, size_t nrow) +{ + auto dataset_extents = raft::make_extents(nrow, dim_); + index_params_.prepare_build_params(dataset_extents); + cuvs::neighbors::mg::index_params build_params = index_params_.cagra_params; + build_params.mode = index_params_.mode; + + auto dataset_view = + raft::make_host_matrix_view(dataset, nrow, dim_); + auto idx = cuvs::neighbors::mg::build(handle_, build_params, dataset_view); + index_ = + std::make_shared, T, IdxT>>( + std::move(idx)); +} + +inline auto allocator_to_string(AllocatorType mem_type) -> std::string; + +template +void cuvs_mg_cagra::set_search_param(const search_param_base& param) +{ + auto sp = dynamic_cast(param); + // search_params_ = static_cast>(sp.p); + cagra::search_params* search_params_ptr_ = static_cast(&search_params_); + *search_params_ptr_ = sp.p; + search_params_.merge_mode = sp.merge_mode; + refine_ratio_ = sp.refine_ratio; +} + +template +void cuvs_mg_cagra::set_search_dataset(const T* dataset, size_t nrow) +{ +} + +template +void cuvs_mg_cagra::save(const std::string& file) const +{ + cuvs::neighbors::mg::serialize(handle_, *index_, file); +} + +template +void cuvs_mg_cagra::load(const std::string& file) +{ + index_ = + std::make_shared, T, IdxT>>( + std::move(cuvs::neighbors::mg::deserialize_cagra(handle_, file))); +} + +template +std::unique_ptr> cuvs_mg_cagra::copy() +{ + return std::make_unique>(*this); // use copy constructor +} + +template +void cuvs_mg_cagra::search_base( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + static_assert(std::is_integral_v); + static_assert(std::is_integral_v); + + auto queries_view = + raft::make_host_matrix_view(queries, batch_size, dim_); + auto neighbors_view = + raft::make_host_matrix_view((IdxT*)neighbors, batch_size, k); + auto distances_view = + raft::make_host_matrix_view(distances, batch_size, k); + + cuvs::neighbors::mg::search( + handle_, *index_, search_params_, queries_view, neighbors_view, distances_view); +} + +template +void cuvs_mg_cagra::search( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + auto k0 = static_cast(refine_ratio_ * k); + const bool disable_refinement = k0 <= static_cast(k); + + if (disable_refinement) { + search_base(queries, batch_size, k, neighbors, distances); + } else { + throw std::runtime_error("refinement not supported"); + } +} +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu new file mode 100644 index 000000000..20cdc41e3 --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cuvs_mg_ivf_flat_wrapper.h" + +namespace cuvs::bench { +template class cuvs_mg_ivf_flat; +// template class cuvs_mg_ivf_flat; +template class cuvs_mg_ivf_flat; +template class cuvs_mg_ivf_flat; +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h new file mode 100644 index 000000000..54a0d2fac --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "cuvs_ann_bench_utils.h" +#include "cuvs_ivf_flat_wrapper.h" +#include +#include + +namespace cuvs::bench { +using namespace cuvs::neighbors; + +template +class cuvs_mg_ivf_flat : public algo, public algo_gpu { + public: + using search_param_base = typename algo::search_param; + using algo::dim_; + + using build_param = cuvs::neighbors::mg::index_params; + + struct search_param : public cuvs::bench::cuvs_ivf_flat::search_param { + cuvs::neighbors::mg::sharded_merge_mode merge_mode; + }; + + cuvs_mg_ivf_flat(Metric metric, int dim, const build_param& param) + : algo(metric, dim), index_params_(param) + { + index_params_.metric = parse_metric_type(metric); + // init nccl clique outside as to not affect benchmark + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle_); + } + + void build(const T* dataset, size_t nrow) final; + void set_search_param(const search_param_base& param) override; + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override; + + [[nodiscard]] auto get_preference() const -> algo_property override + { + algo_property property; + property.dataset_memory_type = MemoryType::kHost; + property.query_memory_type = MemoryType::kHost; + return property; + } + + [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override + { + auto stream = raft::resource::get_cuda_stream(handle_); + return stream; + } + + [[nodiscard]] auto uses_stream() const noexcept -> bool override { return false; } + + void save(const std::string& file) const override; + void load(const std::string&) override; + std::unique_ptr> copy() override; + + private: + raft::device_resources handle_; + build_param index_params_; + cuvs::neighbors::mg::search_params search_params_; + std::shared_ptr, T, IdxT>> + index_; +}; + +template +void cuvs_mg_ivf_flat::build(const T* dataset, size_t nrow) +{ + auto dataset_view = + raft::make_host_matrix_view(dataset, IdxT(nrow), IdxT(dim_)); + auto idx = cuvs::neighbors::mg::build(handle_, index_params_, dataset_view); + index_ = std::make_shared< + cuvs::neighbors::mg::index, T, IdxT>>(std::move(idx)); +} + +template +void cuvs_mg_ivf_flat::set_search_param(const search_param_base& param) +{ + auto sp = dynamic_cast(param); + // search_params_ = sp.ivf_flat_params; + ivf_flat::search_params* search_params_ptr_ = + static_cast(&search_params_); + *search_params_ptr_ = sp.ivf_flat_params; + search_params_.merge_mode = sp.merge_mode; + assert(search_params_.n_probes <= index_params_.n_lists); +} + +template +void cuvs_mg_ivf_flat::save(const std::string& file) const +{ + cuvs::neighbors::mg::serialize(handle_, *index_, file); +} + +template +void cuvs_mg_ivf_flat::load(const std::string& file) +{ + index_ = std::make_shared< + cuvs::neighbors::mg::index, T, IdxT>>( + std::move(cuvs::neighbors::mg::deserialize_flat(handle_, file))); +} + +template +std::unique_ptr> cuvs_mg_ivf_flat::copy() +{ + return std::make_unique>(*this); // use copy constructor +} + +template +void cuvs_mg_ivf_flat::search( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + auto queries_view = raft::make_host_matrix_view( + queries, IdxT(batch_size), IdxT(dim_)); + auto neighbors_view = raft::make_host_matrix_view( + (IdxT*)neighbors, IdxT(batch_size), IdxT(k)); + auto distances_view = raft::make_host_matrix_view( + distances, IdxT(batch_size), IdxT(k)); + + cuvs::neighbors::mg::search( + handle_, *index_, search_params_, queries_view, neighbors_view, distances_view); +} + +} // namespace cuvs::bench \ No newline at end of file diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu new file mode 100644 index 000000000..a74bab6f5 --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cuvs_mg_ivf_pq_wrapper.h" + +namespace cuvs::bench { +template class cuvs_mg_ivf_pq; +template class cuvs_mg_ivf_pq; +template class cuvs_mg_ivf_pq; +template class cuvs_mg_ivf_pq; +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h new file mode 100644 index 000000000..84aea7d4a --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "cuvs_ann_bench_utils.h" +#include "cuvs_ivf_pq_wrapper.h" +#include +#include + +namespace cuvs::bench { +using namespace cuvs::neighbors; + +template +class cuvs_mg_ivf_pq : public algo, public algo_gpu { + public: + using search_param_base = typename algo::search_param; + using algo::dim_; + + using build_param = cuvs::neighbors::mg::index_params; + + struct search_param : public cuvs::bench::cuvs_ivf_pq::search_param { + cuvs::neighbors::mg::sharded_merge_mode merge_mode; + }; + + cuvs_mg_ivf_pq(Metric metric, int dim, const build_param& param) + : algo(metric, dim), index_params_(param) + { + index_params_.metric = parse_metric_type(metric); + // init nccl clique outside as to not affect benchmark + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle_); + } + + void build(const T* dataset, size_t nrow) final; + void set_search_param(const search_param_base& param) override; + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override; + + [[nodiscard]] auto get_preference() const -> algo_property override + { + algo_property property; + property.dataset_memory_type = MemoryType::kHost; + property.query_memory_type = MemoryType::kHost; + return property; + } + + [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override + { + auto stream = raft::resource::get_cuda_stream(handle_); + return stream; + } + + [[nodiscard]] auto uses_stream() const noexcept -> bool override { return false; } + + void save(const std::string& file) const override; + void load(const std::string&) override; + std::unique_ptr> copy() override; + + private: + raft::device_resources handle_; + build_param index_params_; + cuvs::neighbors::mg::search_params search_params_; + std::shared_ptr, T, IdxT>> index_; +}; + +template +void cuvs_mg_ivf_pq::build(const T* dataset, size_t nrow) +{ + auto dataset_view = + raft::make_host_matrix_view(dataset, IdxT(nrow), IdxT(dim_)); + auto idx = cuvs::neighbors::mg::build(handle_, index_params_, dataset_view); + index_ = + std::make_shared, T, IdxT>>( + std::move(idx)); +} + +template +void cuvs_mg_ivf_pq::set_search_param(const search_param_base& param) +{ + auto sp = dynamic_cast(param); + // search_params_ = static_cast>(sp.pq_param); + ivf_pq::search_params* search_params_ptr_ = static_cast(&search_params_); + *search_params_ptr_ = sp.pq_param; + search_params_.merge_mode = sp.merge_mode; + assert(search_params_.n_probes <= index_params_.n_lists); +} + +template +void cuvs_mg_ivf_pq::save(const std::string& file) const +{ + cuvs::neighbors::mg::serialize(handle_, *index_, file); +} + +template +void cuvs_mg_ivf_pq::load(const std::string& file) +{ + index_ = + std::make_shared, T, IdxT>>( + std::move(cuvs::neighbors::mg::deserialize_pq(handle_, file))); +} + +template +std::unique_ptr> cuvs_mg_ivf_pq::copy() +{ + return std::make_unique>(*this); // use copy constructor +} + +template +void cuvs_mg_ivf_pq::search( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + auto queries_view = raft::make_host_matrix_view( + queries, IdxT(batch_size), IdxT(dim_)); + auto neighbors_view = raft::make_host_matrix_view( + (IdxT*)neighbors, IdxT(batch_size), IdxT(k)); + auto distances_view = raft::make_host_matrix_view( + distances, IdxT(batch_size), IdxT(k)); + + cuvs::neighbors::mg::search( + handle_, *index_, search_params_, queries_view, neighbors_view, distances_view); +} + +} // namespace cuvs::bench \ No newline at end of file diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 2459d521d..e28572457 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -2149,7 +2149,7 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = +PREDEFINED = "CUVS_BUILD_MG_ALGOS=1" # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp index 73ce80b41..60b8cc122 100644 --- a/cpp/include/cuvs/neighbors/common.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -19,7 +19,8 @@ #include #include #include -#include +#include +#include #include #include #include // get_device_for_address @@ -636,5 +637,56 @@ enable_if_valid_list_t deserialize_list(const raft::resources& handle, const typename ListT::spec_type& store_spec, const typename ListT::spec_type& device_spec); } // namespace ivf +} // namespace cuvs::neighbors + +namespace cuvs::neighbors { +using namespace raft; + +template +struct iface { + iface() : mutex_(std::make_shared()) {} + + const IdxT size() const { return index_.value().size(); } + + std::optional index_; + std::shared_ptr mutex_; +}; + +template +void build(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + const cuvs::neighbors::index_params* index_params, + raft::mdspan, row_major, Accessor> index_dataset); + +template +void extend( + const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + raft::mdspan, row_major, Accessor1> new_vectors, + std::optional, layout_c_contiguous, Accessor2>> + new_indices); + +template +void search(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + const cuvs::neighbors::search_params* search_params, + raft::device_matrix_view h_queries, + raft::device_matrix_view d_neighbors, + raft::device_matrix_view d_distances); + +template +void serialize(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + std::ostream& os); + +template +void deserialize(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + std::istream& is); + +template +void deserialize(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + const std::string& filename); }; // namespace cuvs::neighbors diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index 67d1b46c0..7f852d635 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -1168,7 +1168,7 @@ void extend(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_flat::search_params& params, - cuvs::neighbors::ivf_flat::index& index, + const cuvs::neighbors::ivf_flat::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1209,7 +1209,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_flat::search_params& params, - cuvs::neighbors::ivf_flat::index& index, + const cuvs::neighbors::ivf_flat::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1250,7 +1250,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_flat::search_params& params, - cuvs::neighbors::ivf_flat::index& index, + const cuvs::neighbors::ivf_flat::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, diff --git a/cpp/include/cuvs/neighbors/ivf_pq.hpp b/cpp/include/cuvs/neighbors/ivf_pq.hpp index 3ce5f382f..ae543c9e9 100644 --- a/cpp/include/cuvs/neighbors/ivf_pq.hpp +++ b/cpp/include/cuvs/neighbors/ivf_pq.hpp @@ -1221,6 +1221,75 @@ void extend(raft::resources const& handle, std::optional> new_indices, cuvs::neighbors::ivf_pq::index* idx); +/** + * @brief Extend the index with the new data. + * + * Note, the user can set a stream pool in the input raft::resource with + * at least one stream to enable kernel and copy overlapping. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_pq::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_pq::build(handle, index_params, dataset); + * // optional: create a stream pool with at least one stream to enable kernel and copy + * // overlapping + * raft::resource::set_cuda_stream_pool(handle, std::make_shared(1)); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * auto index = ivf_pq::extend(handle, new_vectors, no_op, index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors a host matrix view to a row-major matrix [n_rows, idx.dim()] + * @param[in] new_indices a host vector view to a vector of indices [n_rows]. + * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx + */ +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_pq::index& idx) + -> cuvs::neighbors::ivf_pq::index; + +/** + * @brief Extend the index with the new data. + * + * Note, the user can set a stream pool in the input raft::resource with + * at least one stream to enable kernel and copy overlapping. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_pq::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_pq::build(handle, index_params, dataset); + * // optional: create a stream pool with at least one stream to enable kernel and copy + * // overlapping + * raft::resource::set_cuda_stream_pool(handle, std::make_shared(1)); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * ivf_pq::extend(handle, new_vectors, no_op, &index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors a host matrix view to a row-major matrix [n_rows, idx.dim()] + * @param[in] new_indices a host vector view to a vector of indices [n_rows]. + * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx + */ +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_pq::index* idx); + /** * @brief Extend the index with the new data. * @@ -1405,7 +1474,7 @@ void extend(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_pq::search_params& search_params, - cuvs::neighbors::ivf_pq::index& index, + const cuvs::neighbors::ivf_pq::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1450,7 +1519,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_pq::search_params& search_params, - cuvs::neighbors::ivf_pq::index& index, + const cuvs::neighbors::ivf_pq::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1495,7 +1564,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_pq::search_params& search_params, - cuvs::neighbors::ivf_pq::index& index, + const cuvs::neighbors::ivf_pq::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1540,7 +1609,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_pq::search_params& search_params, - cuvs::neighbors::ivf_pq::index& index, + const cuvs::neighbors::ivf_pq::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, diff --git a/cpp/include/cuvs/neighbors/mg.hpp b/cpp/include/cuvs/neighbors/mg.hpp new file mode 100644 index 000000000..4657fa8fb --- /dev/null +++ b/cpp/include/cuvs/neighbors/mg.hpp @@ -0,0 +1,1367 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifdef CUVS_BUILD_MG_ALGOS + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#define DEFAULT_SEARCH_BATCH_SIZE 1 << 20 + +/// \defgroup mg_cpp_index_params ANN MG index build parameters + +namespace cuvs::neighbors::mg { +/** Distribution mode */ +/// \ingroup mg_cpp_index_params +enum distribution_mode { + /** Index is replicated on each device, favors throughput */ + REPLICATED, + /** Index is split on several devices, favors scaling */ + SHARDED +}; + +/// \defgroup mg_cpp_search_params ANN MG search parameters + +/** Search mode when using a replicated index */ +/// \ingroup mg_cpp_search_params +enum replicated_search_mode { + /** Search queries are splited to maintain equal load on GPUs */ + LOAD_BALANCER, + /** Each search query is processed by a single GPU in a round-robin fashion */ + ROUND_ROBIN +}; + +/** Merge mode when using a sharded index */ +/// \ingroup mg_cpp_search_params +enum sharded_merge_mode { + /** Search batches are merged on the root rank */ + MERGE_ON_ROOT_RANK, + /** Search batches are merged in a tree reduction fashion */ + TREE_MERGE +}; + +/** Build parameters */ +/// \ingroup mg_cpp_index_params +template +struct index_params : public Upstream { + index_params() : mode(SHARDED) {} + + index_params(const Upstream& sp) : Upstream(sp), mode(SHARDED) {} + + /** Distribution mode */ + cuvs::neighbors::mg::distribution_mode mode = SHARDED; +}; + +/** Search parameters */ +/// \ingroup mg_cpp_search_params +template +struct search_params : public Upstream { + search_params() : search_mode(LOAD_BALANCER), merge_mode(TREE_MERGE) {} + + search_params(const Upstream& sp) + : Upstream(sp), search_mode(LOAD_BALANCER), merge_mode(TREE_MERGE) + { + } + + /** Replicated search mode */ + cuvs::neighbors::mg::replicated_search_mode search_mode = LOAD_BALANCER; + /** Sharded merge mode */ + cuvs::neighbors::mg::sharded_merge_mode merge_mode = TREE_MERGE; +}; + +} // namespace cuvs::neighbors::mg + +namespace cuvs::neighbors::mg { + +using namespace raft; + +template +struct index { + index(distribution_mode mode, int num_ranks_); + index(const raft::device_resources& handle, const std::string& filename); + + index(const index&) = delete; + index(index&&) = default; + auto operator=(const index&) -> index& = delete; + auto operator=(index&&) -> index& = default; + + distribution_mode mode_; + int num_ranks_; + std::vector> ann_interfaces_; + + // for load balancing mechanism + std::shared_ptr> round_robin_counter_; +}; + +/// \defgroup mg_cpp_index_build ANN MG index build + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-Flat MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, float, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-Flat MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, int8_t, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-Flat MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, uint8_t, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-PQ MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, float, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-PQ MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, half, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-PQ MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, int8_t, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-PQ MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, uint8_t, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed CAGRA MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, float, uint32_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed CAGRA MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, half, uint32_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed CAGRA MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, int8_t, uint32_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed CAGRA MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, uint8_t, uint32_t>; + +/// \defgroup mg_cpp_index_extend ANN MG index extend + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, float, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, int8_t, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, uint8_t, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, float, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, half, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, int8_t, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, uint8_t, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, float, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, half, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, int8_t, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, uint8_t, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \defgroup mg_cpp_index_search ANN MG index search + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, float, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, int8_t, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, uint8_t, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, float, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, half, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, int8_t, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, uint8_t, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, float, uint32_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, half, uint32_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, int8_t, uint32_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, uint8_t, uint32_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \defgroup mg_cpp_serialize ANN MG index serialization + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, float, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, int8_t, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, uint8_t, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, float, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, half, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, int8_t, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, uint8_t, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, float, uint32_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, half, uint32_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, int8_t, uint32_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, uint8_t, uint32_t>& index, + const std::string& filename); + +/// \defgroup mg_cpp_deserialize ANN MG index deserialization + +/// \ingroup mg_cpp_deserialize +/** + * @brief Deserializes an IVF-Flat multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * auto new_index = cuvs::neighbors::mg::deserialize_flat(handle, filename); + * + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized + * + */ +template +auto deserialize_flat(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \ingroup mg_cpp_deserialize +/** + * @brief Deserializes an IVF-PQ multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * auto new_index = cuvs::neighbors::mg::deserialize_pq(handle, filename); + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized + * + */ +template +auto deserialize_pq(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \ingroup mg_cpp_deserialize +/** + * @brief Deserializes a CAGRA multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * auto new_index = cuvs::neighbors::mg::deserialize_cagra(handle, filename); + * + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized + * + */ +template +auto deserialize_cagra(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \defgroup mg_cpp_distribute ANN MG local index distribution + +/// \ingroup mg_cpp_distribute +/** + * @brief Replicates a locally built and serialized IVF-Flat index to all GPUs to form a distributed + * multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::ivf_flat::index_params index_params; + * auto index = cuvs::neighbors::ivf_flat::build(handle, index_params, index_dataset); + * const std::string filename = "local_index.cuvs"; + * cuvs::neighbors::ivf_flat::serialize(handle, filename, index); + * auto new_index = cuvs::neighbors::mg::distribute_flat(handle, filename); + * + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized : a local index + * + */ +template +auto distribute_flat(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \ingroup mg_cpp_distribute +/** + * @brief Replicates a locally built and serialized IVF-PQ index to all GPUs to form a distributed + * multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::ivf_pq::index_params index_params; + * auto index = cuvs::neighbors::ivf_pq::build(handle, index_params, index_dataset); + * const std::string filename = "local_index.cuvs"; + * cuvs::neighbors::ivf_pq::serialize(handle, filename, index); + * auto new_index = cuvs::neighbors::mg::distribute_pq(handle, filename); + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized : a local index + * + */ +template +auto distribute_pq(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \ingroup mg_cpp_distribute +/** + * @brief Replicates a locally built and serialized CAGRA index to all GPUs to form a distributed + * multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::cagra::index_params index_params; + * auto index = cuvs::neighbors::cagra::build(handle, index_params, index_dataset); + * const std::string filename = "local_index.cuvs"; + * cuvs::neighbors::cagra::serialize(handle, filename, index); + * auto new_index = cuvs::neighbors::mg::distribute_cagra(handle, filename); + * + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized : a local index + * + */ +template +auto distribute_cagra(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +} // namespace cuvs::neighbors::mg + +#else + +static_assert(false, + "FORBIDEN_MG_ALGORITHM_IMPORT\n\n" + "Please recompile the cuVS library with MG algorithms BUILD_MG_ALGOS=ON.\n"); + +#endif diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index 9694a3e7a..b03b8214b 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -29,9 +29,10 @@ #include +namespace cuvs::neighbors::cagra { + static const std::string RAFT_NAME = "raft"; -namespace cuvs::neighbors::cagra { template void add_node_core( raft::resources const& handle, diff --git a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh index f86ed9ef6..b92ef0ace 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh @@ -32,9 +32,10 @@ #include #include -static const std::string RAFT_NAME = "raft"; namespace cuvs::neighbors::cagra::detail { +static const std::string RAFT_NAME = "raft"; + constexpr int serialization_version = 4; /** diff --git a/cpp/src/neighbors/iface/generate_iface.py b/cpp/src/neighbors/iface/generate_iface.py new file mode 100644 index 000000000..794219bbf --- /dev/null +++ b/cpp/src/neighbors/iface/generate_iface.py @@ -0,0 +1,273 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header = """/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +""" + +include_macro = """ +#include "iface.hpp" +""" + +namespace_macro = """ +namespace cuvs::neighbors { +""" + +footer = """ +} // namespace cuvs::neighbors +""" + +flat_macro = """ +#define CUVS_INST_MG_FLAT(T, IdxT) \\ + using T_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using T_da= raft::host_device_accessor, raft::memory_type::host>; \\ + using IdxT_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using IdxT_da = raft::host_device_accessor, raft::memory_type::host>; \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_ha> index_dataset); \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_da> index_dataset); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_ha> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_ha>> new_indices); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_da> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_da>> new_indices); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::device_matrix_view queries, \\ + raft::device_matrix_view neighbors, \\ + raft::device_matrix_view distances); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::host_matrix_view h_queries, \\ + raft::device_matrix_view d_neighbors, \\ + raft::device_matrix_view d_distances); \\ + \\ + template void serialize(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::ostream& os); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::istream& is); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const std::string& filename); +""" + +pq_macro = """ +#define CUVS_INST_MG_PQ(T, IdxT) \\ + using T_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using T_da= raft::host_device_accessor, raft::memory_type::host>; \\ + using IdxT_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using IdxT_da = raft::host_device_accessor, raft::memory_type::host>; \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_ha> index_dataset); \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_da> index_dataset); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_ha> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_ha>> new_indices); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_da> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_da>> new_indices); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::device_matrix_view queries, \\ + raft::device_matrix_view neighbors, \\ + raft::device_matrix_view distances); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::host_matrix_view h_queries, \\ + raft::device_matrix_view d_neighbors, \\ + raft::device_matrix_view d_distances); \\ + \\ + template void serialize(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::ostream& os); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::istream& is); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const std::string& filename); +""" + +cagra_macro = """ +#define CUVS_INST_MG_CAGRA(T, IdxT) \\ + using T_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using T_da= raft::host_device_accessor, raft::memory_type::host>; \\ + using IdxT_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using IdxT_da = raft::host_device_accessor, raft::memory_type::host>; \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_ha> index_dataset); \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_da> index_dataset); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_ha> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_ha>> new_indices); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_da> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_da>> new_indices); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::device_matrix_view queries, \\ + raft::device_matrix_view neighbors, \\ + raft::device_matrix_view distances); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::host_matrix_view h_queries, \\ + raft::device_matrix_view d_neighbors, \\ + raft::device_matrix_view d_distances); \\ + \\ + template void serialize(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::ostream& os); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::istream& is); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const std::string& filename); +""" + +flat_macros = dict ( + flat = dict( + include=include_macro, + definition=flat_macro, + name="CUVS_INST_MG_FLAT", + ) +) + +pq_macros = dict ( + pq = dict( + include=include_macro, + definition=pq_macro, + name="CUVS_INST_MG_PQ", + ) +) + +cagra_macros = dict ( + cagra = dict( + include=include_macro, + definition=cagra_macro, + name="CUVS_INST_MG_CAGRA", + ) +) + +flat_types = dict( + float_int64_t=("float", "int64_t"), + int8_t_int64_t=("int8_t", "int64_t"), + uint8_t_int64_t=("uint8_t", "int64_t"), +) + +pq_types = dict( + float_int64_t=("float", "int64_t"), + half_int64_t=("half", "int64_t"), + int8_t_int64_t=("int8_t", "int64_t"), + uint8_t_int64_t=("uint8_t", "int64_t"), +) + +cagra_types = dict( + float_uint32_t=("float", "uint32_t"), + half_uint32_t=("half", "uint32_t"), + int8_t_uint32_t=("int8_t", "uint32_t"), + uint8_t_uint32_t=("uint8_t", "uint32_t"), +) + +for macros, types in [(flat_macros, flat_types), (pq_macros, pq_types), (cagra_macros, cagra_types)]: + for type_path, (T, IdxT) in types.items(): + for macro_path, macro in macros.items(): + path = f"iface_{macro_path}_{type_path}.cu" + with open(path, "w") as f: + f.write(header) + f.write(macro['include']) + f.write(namespace_macro) + f.write(macro["definition"]) + f.write(f"{macro['name']}({T}, {IdxT});\n\n") + f.write(f"#undef {macro['name']}\n") + f.write(footer) + + print(f"src/neighbors/iface/{path}") diff --git a/cpp/src/neighbors/iface/iface.hpp b/cpp/src/neighbors/iface/iface.hpp new file mode 100644 index 000000000..a329db429 --- /dev/null +++ b/cpp/src/neighbors/iface/iface.hpp @@ -0,0 +1,198 @@ +#include + +#include +#include +#include +#include +#include + +namespace cuvs::neighbors { + +using namespace raft; + +template +void build(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + const cuvs::neighbors::index_params* index_params, + raft::mdspan, row_major, Accessor> index_dataset) +{ + interface.mutex_->lock(); + + if constexpr (std::is_same>::value) { + auto idx = cuvs::neighbors::ivf_flat::build( + handle, *static_cast(index_params), index_dataset); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + auto idx = cuvs::neighbors::ivf_pq::build( + handle, *static_cast(index_params), index_dataset); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + auto idx = cuvs::neighbors::cagra::build( + handle, *static_cast(index_params), index_dataset); + interface.index_.emplace(std::move(idx)); + } + resource::sync_stream(handle); + + interface.mutex_->unlock(); +} + +template +void extend( + const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + raft::mdspan, row_major, Accessor1> new_vectors, + std::optional, layout_c_contiguous, Accessor2>> + new_indices) +{ + interface.mutex_->lock(); + + if constexpr (std::is_same>::value) { + auto idx = + cuvs::neighbors::ivf_flat::extend(handle, new_vectors, new_indices, interface.index_.value()); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + auto idx = + cuvs::neighbors::ivf_pq::extend(handle, new_vectors, new_indices, interface.index_.value()); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + RAFT_FAIL("CAGRA does not implement the extend method"); + } + resource::sync_stream(handle); + + interface.mutex_->unlock(); +} + +template +void search(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + const cuvs::neighbors::search_params* search_params, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances) +{ + // interface.mutex_->lock(); + if constexpr (std::is_same>::value) { + cuvs::neighbors::ivf_flat::search( + handle, + *reinterpret_cast(search_params), + interface.index_.value(), + queries, + neighbors, + distances); + } else if constexpr (std::is_same>::value) { + cuvs::neighbors::ivf_pq::search(handle, + *reinterpret_cast(search_params), + interface.index_.value(), + queries, + neighbors, + distances); + } else if constexpr (std::is_same>::value) { + cuvs::neighbors::cagra::search(handle, + *reinterpret_cast(search_params), + interface.index_.value(), + queries, + neighbors, + distances); + } + resource::sync_stream(handle); + + // interface.mutex_->unlock(); +} + +// for MG ANN only +template +void search(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view h_queries, + raft::device_matrix_view d_neighbors, + raft::device_matrix_view d_distances) +{ + // interface.mutex_->lock(); + + int64_t n_rows = h_queries.extent(0); + int64_t n_dims = h_queries.extent(1); + auto d_queries = raft::make_device_matrix(handle, n_rows, n_dims); + raft::copy(d_queries.data_handle(), + h_queries.data_handle(), + n_rows * n_dims, + resource::get_cuda_stream(handle)); + auto d_query_view = raft::make_const_mdspan(d_queries.view()); + + search(handle, interface, search_params, d_query_view, d_neighbors, d_distances); + + // interface.mutex_->unlock(); +} + +template +void serialize(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + std::ostream& os) +{ + interface.mutex_->lock(); + + if constexpr (std::is_same>::value) { + ivf_flat::serialize(handle, os, interface.index_.value()); + } else if constexpr (std::is_same>::value) { + ivf_pq::serialize(handle, os, interface.index_.value()); + } else if constexpr (std::is_same>::value) { + cagra::serialize(handle, os, interface.index_.value(), true); + } + + interface.mutex_->unlock(); +} + +template +void deserialize(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + std::istream& is) +{ + interface.mutex_->lock(); + + if constexpr (std::is_same>::value) { + ivf_flat::index idx(handle); + ivf_flat::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + ivf_pq::index idx(handle); + ivf_pq::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + cagra::index idx(handle); + cagra::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } + + interface.mutex_->unlock(); +} + +template +void deserialize(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + const std::string& filename) +{ + interface.mutex_->lock(); + + std::ifstream is(filename, std::ios::in | std::ios::binary); + if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + + if constexpr (std::is_same>::value) { + ivf_flat::index idx(handle); + ivf_flat::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + ivf_pq::index idx(handle); + ivf_pq::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + cagra::index idx(handle); + cagra::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } + + is.close(); + + interface.mutex_->unlock(); +} + +}; // namespace cuvs::neighbors \ No newline at end of file diff --git a/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu new file mode 100644 index 000000000..b5e329dd8 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_CAGRA(float, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu new file mode 100644 index 000000000..23fcffc59 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_CAGRA(half, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu new file mode 100644 index 000000000..30377ab66 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_CAGRA(int8_t, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu new file mode 100644 index 000000000..59a1640e8 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_CAGRA(uint8_t, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu b/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu new file mode 100644 index 000000000..a0a455375 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize( \ + const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_FLAT(float, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu new file mode 100644 index 000000000..9fdd6464f --- /dev/null +++ b/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize( \ + const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_FLAT(int8_t, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu new file mode 100644 index 000000000..daee59c4a --- /dev/null +++ b/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize( \ + const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_FLAT(uint8_t, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu new file mode 100644 index 000000000..7282d6bd0 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_PQ(float, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu new file mode 100644 index 000000000..4d67f9aed --- /dev/null +++ b/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_PQ(half, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu new file mode 100644 index 000000000..46537b3f9 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_PQ(int8_t, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu new file mode 100644 index 000000000..591ac881a --- /dev/null +++ b/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_PQ(uint8_t, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py b/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py index 1fabcca8c..c435cc6d9 100644 --- a/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py +++ b/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py @@ -144,7 +144,7 @@ void search( \\ raft::resources const& handle, \\ const cuvs::neighbors::ivf_flat::search_params& params, \\ - cuvs::neighbors::ivf_flat::index& index, \\ + const cuvs::neighbors::ivf_flat::index& index, \\ raft::device_matrix_view queries, \\ raft::device_matrix_view neighbors, \\ raft::device_matrix_view distances, \\ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu index 3f262d612..87abc0bc0 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_flat { #define CUVS_INST_IVF_FLAT_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::search_params& params, \ - cuvs::neighbors::ivf_flat::index& index, \ + const cuvs::neighbors::ivf_flat::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu index 4357afb0a..c1e92ae5b 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_flat { #define CUVS_INST_IVF_FLAT_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::search_params& params, \ - cuvs::neighbors::ivf_flat::index& index, \ + const cuvs::neighbors::ivf_flat::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu index 8265a3e17..4ff8ed770 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_flat { #define CUVS_INST_IVF_FLAT_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::search_params& params, \ - cuvs::neighbors::ivf_flat::index& index, \ + const cuvs::neighbors::ivf_flat::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py b/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py index a5a829967..a2ac048ff 100644 --- a/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py +++ b/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py @@ -68,7 +68,7 @@ #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \\ void search(raft::resources const& handle, \\ const cuvs::neighbors::ivf_pq::search_params& params, \\ - cuvs::neighbors::ivf_pq::index& index, \\ + const cuvs::neighbors::ivf_pq::index& index, \\ raft::device_matrix_view queries, \\ raft::device_matrix_view neighbors, \\ raft::device_matrix_view distances, \\ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu index 07ee110bc..44e9777ba 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_pq { #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::search_params& params, \ - cuvs::neighbors::ivf_pq::index& index, \ + const cuvs::neighbors::ivf_pq::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu index cf387cb67..d7446e846 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_pq { #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::search_params& params, \ - cuvs::neighbors::ivf_pq::index& index, \ + const cuvs::neighbors::ivf_pq::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu index 5ec9093df..c1ffede97 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_pq { #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::search_params& params, \ - cuvs::neighbors::ivf_pq::index& index, \ + const cuvs::neighbors::ivf_pq::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu index d2e2f3b00..08e4f0536 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_pq { #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::search_params& params, \ - cuvs::neighbors::ivf_pq::index& index, \ + const cuvs::neighbors::ivf_pq::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/mg/generate_mg.py b/cpp/src/neighbors/mg/generate_mg.py new file mode 100644 index 000000000..af5e60545 --- /dev/null +++ b/cpp/src/neighbors/mg/generate_mg.py @@ -0,0 +1,286 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header = """/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +""" + +include_macro = """ +#include "mg.cuh" +""" + +namespace_macro = """ +namespace cuvs::neighbors::mg { +""" + +footer = """ +} // namespace cuvs::neighbors::mg +""" + +flat_macro = """ +#define CUVS_INST_MG_FLAT(T, IdxT) \\ + index, T, IdxT> build(const raft::device_resources& handle, \\ + const mg::index_params& index_params, \\ + raft::host_matrix_view index_dataset) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::build(handle, index, \\ + static_cast(&index_params), \\ + index_dataset); \\ + return index; \\ + } \\ + \\ + void extend(const raft::device_resources& handle, \\ + index, T, IdxT>& index, \\ + raft::host_matrix_view new_vectors, \\ + std::optional> new_indices) \\ + { \\ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \\ + } \\ + \\ + void search(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const mg::search_params& search_params, \\ + raft::host_matrix_view queries, \\ + raft::host_matrix_view neighbors, \\ + raft::host_matrix_view distances, \\ + int64_t n_rows_per_batch) \\ + { \\ + cuvs::neighbors::mg::detail::search(handle, index, \\ + static_cast(&search_params), \\ + queries, neighbors, distances, n_rows_per_batch); \\ + } \\ + \\ + void serialize(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const std::string& filename) \\ + { \\ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \\ + } \\ + \\ + template<> \\ + index, T, IdxT> deserialize_flat(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + auto idx = index, T, IdxT>(handle, filename); \\ + return idx; \\ + } \\ + \\ + template<> \\ + index, T, IdxT> distribute_flat(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \\ + return idx; \\ + } +""" + +pq_macro = """ +#define CUVS_INST_MG_PQ(T, IdxT) \\ + index, T, IdxT> build(const raft::device_resources& handle, \\ + const mg::index_params& index_params, \\ + raft::host_matrix_view index_dataset) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::build(handle, index, \\ + static_cast(&index_params), \\ + index_dataset); \\ + return index; \\ + } \\ + \\ + void extend(const raft::device_resources& handle, \\ + index, T, IdxT>& index, \\ + raft::host_matrix_view new_vectors, \\ + std::optional> new_indices) \\ + { \\ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \\ + } \\ + \\ + void search(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const mg::search_params& search_params, \\ + raft::host_matrix_view queries, \\ + raft::host_matrix_view neighbors, \\ + raft::host_matrix_view distances, \\ + int64_t n_rows_per_batch) \\ + { \\ + cuvs::neighbors::mg::detail::search(handle, index, \\ + static_cast(&search_params), \\ + queries, neighbors, distances, n_rows_per_batch); \\ + } \\ + \\ + void serialize(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const std::string& filename) \\ + { \\ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \\ + } \\ + \\ + template<> \\ + index, T, IdxT> deserialize_pq(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + auto idx = index, T, IdxT>(handle, filename); \\ + return idx; \\ + } \\ + \\ + template<> \\ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \\ + return idx; \\ + } +""" + +cagra_macro = """ +#define CUVS_INST_MG_CAGRA(T, IdxT) \\ + index, T, IdxT> build(const raft::device_resources& handle, \\ + const mg::index_params& index_params, \\ + raft::host_matrix_view index_dataset) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::build(handle, index, \\ + static_cast(&index_params), \\ + index_dataset); \\ + return index; \\ + } \\ + \\ + void search(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const mg::search_params& search_params, \\ + raft::host_matrix_view queries, \\ + raft::host_matrix_view neighbors, \\ + raft::host_matrix_view distances, \\ + int64_t n_rows_per_batch) \\ + { \\ + cuvs::neighbors::mg::detail::search(handle, index, \\ + static_cast(&search_params), \\ + queries, neighbors, distances, n_rows_per_batch); \\ + } \\ + \\ + void serialize(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const std::string& filename) \\ + { \\ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \\ + } \\ + \\ + template<> \\ + index, T, IdxT> deserialize_cagra(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + auto idx = index, T, IdxT>(handle, filename); \\ + return idx; \\ + } \\ + \\ + template<> \\ + index, T, IdxT> distribute_cagra(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \\ + return idx; \\ + } +""" + +flat_macros = dict ( + flat = dict( + include=include_macro, + definition=flat_macro, + name="CUVS_INST_MG_FLAT", + ) +) + +pq_macros = dict ( + pq = dict( + include=include_macro, + definition=pq_macro, + name="CUVS_INST_MG_PQ", + ) +) + +cagra_macros = dict ( + cagra = dict( + include=include_macro, + definition=cagra_macro, + name="CUVS_INST_MG_CAGRA", + ) +) + +flat_types = dict( + float_int64_t=("float", "int64_t"), + int8_t_int64_t=("int8_t", "int64_t"), + uint8_t_int64_t=("uint8_t", "int64_t"), +) + +pq_types = dict( + float_int64_t=("float", "int64_t"), + half_int64_t=("half", "int64_t"), + int8_t_int64_t=("int8_t", "int64_t"), + uint8_t_int64_t=("uint8_t", "int64_t"), +) + +cagra_types = dict( + float_uint32_t=("float", "uint32_t"), + half_uint32_t=("half", "uint32_t"), + int8_t_uint32_t=("int8_t", "uint32_t"), + uint8_t_uint32_t=("uint8_t", "uint32_t"), +) + +for macros, types in [(flat_macros, flat_types), (pq_macros, pq_types), (cagra_macros, cagra_types)]: + for type_path, (T, IdxT) in types.items(): + for macro_path, macro in macros.items(): + path = f"mg_{macro_path}_{type_path}.cu" + with open(path, "w") as f: + f.write(header) + f.write(macro['include']) + f.write(namespace_macro) + f.write(macro["definition"]) + f.write(f"{macro['name']}({T}, {IdxT});\n\n") + f.write(f"#undef {macro['name']}\n") + f.write(footer) + + print(f"src/neighbors/mg/{path}") diff --git a/cpp/src/neighbors/mg/mg.cuh b/cpp/src/neighbors/mg/mg.cuh new file mode 100644 index 000000000..d3f635bc4 --- /dev/null +++ b/cpp/src/neighbors/mg/mg.cuh @@ -0,0 +1,690 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../detail/knn_merge_parts.cuh" +#include +#include +#include +#include + +#include +#include + +namespace cuvs::neighbors { +using namespace raft; + +template +void search(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view h_queries, + raft::device_matrix_view d_neighbors, + raft::device_matrix_view d_distances); +} // namespace cuvs::neighbors + +namespace cuvs::neighbors::mg { +void check_omp_threads(const int requirements); +} // namespace cuvs::neighbors::mg + +namespace cuvs::neighbors::mg::detail { +using namespace cuvs::neighbors; +using namespace raft; + +// local index deserialization and distribution +template +void deserialize_and_distribute(const raft::device_resources& handle, + index& index, + const std::string& filename) +{ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_.emplace_back(); + cuvs::neighbors::deserialize(dev_res, ann_if, filename); + } +} + +// MG index deserialization +template +void deserialize(const raft::device_resources& handle, + index& index, + const std::string& filename) +{ + std::ifstream is(filename, std::ios::in | std::ios::binary); + if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + index.mode_ = (cuvs::neighbors::mg::distribution_mode)deserialize_scalar(handle, is); + index.num_ranks_ = deserialize_scalar(handle, is); + + if (index.num_ranks_ != clique.num_ranks_) { + RAFT_FAIL("Serialized index has %d ranks whereas NCCL clique has %d ranks", + index.num_ranks_, + clique.num_ranks_); + } + + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_.emplace_back(); + cuvs::neighbors::deserialize(dev_res, ann_if, is); + } + + is.close(); +} + +template +void build(const raft::device_resources& handle, + index& index, + const cuvs::neighbors::index_params* index_params, + raft::host_matrix_view index_dataset) +{ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + if (index.mode_ == REPLICATED) { + int64_t n_rows = index_dataset.extent(0); + RAFT_LOG_INFO("REPLICATED BUILD: %d*%drows", index.num_ranks_, n_rows); + + index.ann_interfaces_.resize(index.num_ranks_); +#pragma omp parallel for + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::build(dev_res, ann_if, index_params, index_dataset); + resource::sync_stream(dev_res); + } + } else if (index.mode_ == SHARDED) { + int64_t n_rows = index_dataset.extent(0); + int64_t n_cols = index_dataset.extent(1); + int64_t n_rows_per_shard = raft::ceildiv(n_rows, (int64_t)index.num_ranks_); + + RAFT_LOG_INFO("SHARDED BUILD: %d*%drows", index.num_ranks_, n_rows_per_shard); + + index.ann_interfaces_.resize(index.num_ranks_); +#pragma omp parallel for + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + int64_t offset = rank * n_rows_per_shard; + int64_t n_rows_of_current_shard = std::min(n_rows_per_shard, n_rows - offset); + const T* partition_ptr = index_dataset.data_handle() + (offset * n_cols); + auto partition = raft::make_host_matrix_view( + partition_ptr, n_rows_of_current_shard, n_cols); + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::build(dev_res, ann_if, index_params, partition); + resource::sync_stream(dev_res); + } + } +} + +template +void extend(const raft::device_resources& handle, + index& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices) +{ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + int64_t n_rows = new_vectors.extent(0); + if (index.mode_ == REPLICATED) { + RAFT_LOG_INFO("REPLICATED EXTEND: %d*%drows", index.num_ranks_, n_rows); + +#pragma omp parallel for + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::extend(dev_res, ann_if, new_vectors, new_indices); + resource::sync_stream(dev_res); + } + } else if (index.mode_ == SHARDED) { + int64_t n_cols = new_vectors.extent(1); + int64_t n_rows_per_shard = raft::ceildiv(n_rows, (int64_t)index.num_ranks_); + + RAFT_LOG_INFO("SHARDED EXTEND: %d*%drows", index.num_ranks_, n_rows_per_shard); + +#pragma omp parallel for + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + int64_t offset = rank * n_rows_per_shard; + int64_t n_rows_of_current_shard = std::min(n_rows_per_shard, n_rows - offset); + const T* new_vectors_ptr = new_vectors.data_handle() + (offset * n_cols); + auto new_vectors_part = raft::make_host_matrix_view( + new_vectors_ptr, n_rows_of_current_shard, n_cols); + + std::optional> new_indices_part = std::nullopt; + if (new_indices.has_value()) { + const IdxT* new_indices_ptr = new_indices.value().data_handle() + offset; + new_indices_part = raft::make_host_vector_view( + new_indices_ptr, n_rows_of_current_shard); + } + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::extend(dev_res, ann_if, new_vectors_part, new_indices_part); + resource::sync_stream(dev_res); + } + } +} + +template +void sharded_search_with_direct_merge(const raft::comms::nccl_clique& clique, + const index& index, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch, + int64_t n_rows, + int64_t n_cols, + int64_t n_neighbors, + int64_t n_batches) +{ + const auto& root_handle = clique.set_current_device_to_root_rank(); + auto in_neighbors = raft::make_device_matrix( + root_handle, index.num_ranks_ * n_rows_per_batch, n_neighbors); + auto in_distances = raft::make_device_matrix( + root_handle, index.num_ranks_ * n_rows_per_batch, n_neighbors); + auto out_neighbors = + raft::make_device_matrix(root_handle, n_rows_per_batch, n_neighbors); + auto out_distances = + raft::make_device_matrix(root_handle, n_rows_per_batch, n_neighbors); + + for (int64_t batch_idx = 0; batch_idx < n_batches; batch_idx++) { + int64_t offset = batch_idx * n_rows_per_batch; + int64_t query_offset = offset * n_cols; + int64_t output_offset = offset * n_neighbors; + int64_t n_rows_of_current_batch = std::min((int64_t)n_rows_per_batch, n_rows - offset); + int64_t part_size = n_rows_of_current_batch * n_neighbors; + auto query_partition = raft::make_host_matrix_view( + queries.data_handle() + query_offset, n_rows_of_current_batch, n_cols); + + const int& requirements = index.num_ranks_; + check_omp_threads(requirements); // should use at least num_ranks_ threads to avoid NCCL hang +#pragma omp parallel for num_threads(index.num_ranks_) + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + auto& ann_if = index.ann_interfaces_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + + if (rank == clique.root_rank_) { // root rank + uint64_t batch_offset = clique.root_rank_ * part_size; + auto d_neighbors = raft::make_device_matrix_view( + in_neighbors.data_handle() + batch_offset, n_rows_of_current_batch, n_neighbors); + auto d_distances = raft::make_device_matrix_view( + in_distances.data_handle() + batch_offset, n_rows_of_current_batch, n_neighbors); + cuvs::neighbors::search( + dev_res, ann_if, search_params, query_partition, d_neighbors, d_distances); + + // wait for other ranks + ncclGroupStart(); + for (int from_rank = 0; from_rank < index.num_ranks_; from_rank++) { + if (from_rank == clique.root_rank_) continue; + + batch_offset = from_rank * part_size; + ncclRecv(in_neighbors.data_handle() + batch_offset, + part_size * sizeof(IdxT), + ncclUint8, + from_rank, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclRecv(in_distances.data_handle() + batch_offset, + part_size * sizeof(float), + ncclUint8, + from_rank, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + } + ncclGroupEnd(); + resource::sync_stream(dev_res); + } else { // non-root ranks + auto d_neighbors = raft::make_device_matrix( + dev_res, n_rows_of_current_batch, n_neighbors); + auto d_distances = raft::make_device_matrix( + dev_res, n_rows_of_current_batch, n_neighbors); + cuvs::neighbors::search( + dev_res, ann_if, search_params, query_partition, d_neighbors.view(), d_distances.view()); + + // send results to root rank + ncclGroupStart(); + ncclSend(d_neighbors.data_handle(), + part_size * sizeof(IdxT), + ncclUint8, + clique.root_rank_, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclSend(d_distances.data_handle(), + part_size * sizeof(float), + ncclUint8, + clique.root_rank_, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclGroupEnd(); + resource::sync_stream(dev_res); + } + } + + const auto& root_handle_ = clique.set_current_device_to_root_rank(); + auto h_trans = std::vector(index.num_ranks_); + int64_t translation_offset = 0; + for (int rank = 0; rank < index.num_ranks_; rank++) { + h_trans[rank] = translation_offset; + translation_offset += index.ann_interfaces_[rank].size(); + } + auto d_trans = raft::make_device_vector(root_handle_, index.num_ranks_); + raft::copy(d_trans.data_handle(), + h_trans.data(), + index.num_ranks_, + resource::get_cuda_stream(root_handle_)); + + cuvs::neighbors::detail::knn_merge_parts(in_distances.data_handle(), + in_neighbors.data_handle(), + out_distances.data_handle(), + out_neighbors.data_handle(), + n_rows_of_current_batch, + index.num_ranks_, + n_neighbors, + resource::get_cuda_stream(root_handle_), + d_trans.data_handle()); + + raft::copy(neighbors.data_handle() + output_offset, + out_neighbors.data_handle(), + part_size, + resource::get_cuda_stream(root_handle_)); + raft::copy(distances.data_handle() + output_offset, + out_distances.data_handle(), + part_size, + resource::get_cuda_stream(root_handle_)); + + resource::sync_stream(root_handle_); + } +} + +template +void sharded_search_with_tree_merge(const raft::comms::nccl_clique& clique, + const index& index, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch, + int64_t n_rows, + int64_t n_cols, + int64_t n_neighbors, + int64_t n_batches) +{ + for (int64_t batch_idx = 0; batch_idx < n_batches; batch_idx++) { + int64_t offset = batch_idx * n_rows_per_batch; + int64_t query_offset = offset * n_cols; + int64_t output_offset = offset * n_neighbors; + int64_t n_rows_of_current_batch = std::min((int64_t)n_rows_per_batch, n_rows - offset); + auto query_partition = raft::make_host_matrix_view( + queries.data_handle() + query_offset, n_rows_of_current_batch, n_cols); + + const int& requirements = index.num_ranks_; + check_omp_threads(requirements); // should use at least num_ranks_ threads to avoid NCCL hang +#pragma omp parallel for num_threads(index.num_ranks_) + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + auto& ann_if = index.ann_interfaces_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + + int64_t part_size = n_rows_of_current_batch * n_neighbors; + + auto tmp_neighbors = raft::make_device_matrix( + dev_res, 2 * n_rows_of_current_batch, n_neighbors); + auto tmp_distances = raft::make_device_matrix( + dev_res, 2 * n_rows_of_current_batch, n_neighbors); + auto neighbors_view = raft::make_device_matrix_view( + tmp_neighbors.data_handle(), n_rows_of_current_batch, n_neighbors); + auto distances_view = raft::make_device_matrix_view( + tmp_distances.data_handle(), n_rows_of_current_batch, n_neighbors); + cuvs::neighbors::search( + dev_res, ann_if, search_params, query_partition, neighbors_view, distances_view); + + int64_t translation_offset = 0; + for (int r = 0; r < rank; r++) { + translation_offset += index.ann_interfaces_[r].size(); + } + raft::linalg::addScalar(neighbors_view.data_handle(), + neighbors_view.data_handle(), + (IdxT)translation_offset, + part_size, + resource::get_cuda_stream(dev_res)); + + auto d_trans = raft::make_device_vector(dev_res, 2); + cudaMemsetAsync( + d_trans.data_handle(), 0, 2 * sizeof(IdxT), resource::get_cuda_stream(dev_res)); + + int64_t remaining = index.num_ranks_; + int64_t radix = 2; + + while (remaining > 1) { + bool received_something = false; + int64_t offset = radix / 2; + ncclGroupStart(); + if (rank % radix == 0) // This is one of the receivers + { + int other_id = rank + offset; + if (other_id < index.num_ranks_) // Make sure someone's sending anything + { + ncclRecv(tmp_neighbors.data_handle() + part_size, + part_size * sizeof(IdxT), + ncclUint8, + other_id, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclRecv(tmp_distances.data_handle() + part_size, + part_size * sizeof(float), + ncclUint8, + other_id, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + received_something = true; + } + } else if (rank % radix == offset) // This is one of the senders + { + int other_id = rank - offset; + ncclSend(tmp_neighbors.data_handle(), + part_size * sizeof(IdxT), + ncclUint8, + other_id, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclSend(tmp_distances.data_handle(), + part_size * sizeof(float), + ncclUint8, + other_id, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + } + ncclGroupEnd(); + + remaining = (remaining + 1) / 2; + radix *= 2; + + if (received_something) { + // merge inplace + cuvs::neighbors::detail::knn_merge_parts(tmp_distances.data_handle(), + tmp_neighbors.data_handle(), + tmp_distances.data_handle(), + tmp_neighbors.data_handle(), + n_rows_of_current_batch, + 2, + n_neighbors, + resource::get_cuda_stream(dev_res), + d_trans.data_handle()); + + // If done, copy the final result + if (remaining <= 1) { + raft::copy(neighbors.data_handle() + output_offset, + tmp_neighbors.data_handle(), + part_size, + resource::get_cuda_stream(dev_res)); + raft::copy(distances.data_handle() + output_offset, + tmp_distances.data_handle(), + part_size, + resource::get_cuda_stream(dev_res)); + + resource::sync_stream(dev_res); + } + } + } + } + } +} + +template +void run_search_batch(const raft::comms::nccl_clique& clique, + const index& index, + int rank, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view& queries, + raft::host_matrix_view& neighbors, + raft::host_matrix_view& distances, + int64_t query_offset, + int64_t output_offset, + int64_t n_rows_of_current_batch, + int64_t n_cols, + int64_t n_neighbors) +{ + int dev_id = clique.device_ids_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + const raft::device_resources& dev_res = clique.device_resources_[rank]; + auto& ann_if = index.ann_interfaces_[rank]; + + auto query_partition = raft::make_host_matrix_view( + queries.data_handle() + query_offset, n_rows_of_current_batch, n_cols); + auto d_neighbors = raft::make_device_matrix( + dev_res, n_rows_of_current_batch, n_neighbors); + auto d_distances = raft::make_device_matrix( + dev_res, n_rows_of_current_batch, n_neighbors); + + cuvs::neighbors::search( + dev_res, ann_if, search_params, query_partition, d_neighbors.view(), d_distances.view()); + + raft::copy(neighbors.data_handle() + output_offset, + d_neighbors.data_handle(), + n_rows_of_current_batch * n_neighbors, + resource::get_cuda_stream(dev_res)); + raft::copy(distances.data_handle() + output_offset, + d_distances.data_handle(), + n_rows_of_current_batch * n_neighbors, + resource::get_cuda_stream(dev_res)); + + resource::sync_stream(dev_res); +} + +template +void search(const raft::device_resources& handle, + const index& index, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch) +{ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + int64_t n_rows = queries.extent(0); + int64_t n_cols = queries.extent(1); + int64_t n_neighbors = neighbors.extent(1); + + if (index.mode_ == REPLICATED) { + cuvs::neighbors::mg::replicated_search_mode search_mode; + if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>( + search_params); + search_mode = mg_search_params->search_mode; + } else if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>( + search_params); + search_mode = mg_search_params->search_mode; + } else if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>(search_params); + search_mode = mg_search_params->search_mode; + } + + if (search_mode == LOAD_BALANCER) { + int64_t n_rows_per_rank = raft::ceildiv(n_rows, (int64_t)index.num_ranks_); + n_rows_per_batch = + std::min(n_rows_per_batch, n_rows_per_rank); // get at least num_ranks_ batches + int64_t n_batches = raft::ceildiv(n_rows, (int64_t)n_rows_per_batch); + if (n_batches <= 1) n_rows_per_batch = n_rows; + + RAFT_LOG_INFO( + "REPLICATED SEARCH IN LOAD BALANCER MODE: %d*%drows", n_batches, n_rows_per_batch); + +#pragma omp parallel for + for (int64_t batch_idx = 0; batch_idx < n_batches; batch_idx++) { + int rank = batch_idx % index.num_ranks_; // alternate GPUs + int64_t offset = batch_idx * n_rows_per_batch; + int64_t query_offset = offset * n_cols; + int64_t output_offset = offset * n_neighbors; + int64_t n_rows_of_current_batch = std::min(n_rows_per_batch, n_rows - offset); + + run_search_batch(clique, + index, + rank, + search_params, + queries, + neighbors, + distances, + query_offset, + output_offset, + n_rows_of_current_batch, + n_cols, + n_neighbors); + } + } else if (search_mode == ROUND_ROBIN) { + RAFT_LOG_INFO("REPLICATED SEARCH IN ROUND ROBIN MODE: %d*%drows", 1, n_rows); + + ASSERT(n_rows <= n_rows_per_batch, + "In round-robin mode, n_rows must lower or equal to n_rows_per_batch"); + + auto& rrc = *index.round_robin_counter_; + int64_t rank = rrc++; + rank %= index.num_ranks_; + + run_search_batch(clique, + index, + rank, + search_params, + queries, + neighbors, + distances, + 0, + 0, + n_rows, + n_cols, + n_neighbors); + } + } else if (index.mode_ == SHARDED) { + cuvs::neighbors::mg::sharded_merge_mode merge_mode; + if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>( + search_params); + merge_mode = mg_search_params->merge_mode; + } else if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>( + search_params); + merge_mode = mg_search_params->merge_mode; + } else if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>(search_params); + merge_mode = mg_search_params->merge_mode; + } + + int64_t n_batches = raft::ceildiv(n_rows, (int64_t)n_rows_per_batch); + if (n_batches <= 1) n_rows_per_batch = n_rows; + + if (merge_mode == MERGE_ON_ROOT_RANK) { + RAFT_LOG_INFO("SHARDED SEARCH WITH MERGE_ON_ROOT_RANK MERGE MODE: %d*%drows", + n_batches, + n_rows_per_batch); + sharded_search_with_direct_merge(clique, + index, + search_params, + queries, + neighbors, + distances, + n_rows_per_batch, + n_rows, + n_cols, + n_neighbors, + n_batches); + } else if (merge_mode == TREE_MERGE) { + RAFT_LOG_INFO( + "SHARDED SEARCH WITH TREE_MERGE MERGE MODE %d*%drows", n_batches, n_rows_per_batch); + sharded_search_with_tree_merge(clique, + index, + search_params, + queries, + neighbors, + distances, + n_rows_per_batch, + n_rows, + n_cols, + n_neighbors, + n_batches); + } + } +} + +template +void serialize(const raft::device_resources& handle, + const index& index, + const std::string& filename) +{ + std::ofstream of(filename, std::ios::out | std::ios::binary); + if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + serialize_scalar(handle, of, (int)index.mode_); + serialize_scalar(handle, of, index.num_ranks_); + + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::serialize(dev_res, ann_if, of); + } + + of.close(); + if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); } +} + +} // namespace cuvs::neighbors::mg::detail + +namespace cuvs::neighbors::mg { +using namespace cuvs::neighbors; +using namespace raft; + +template +index::index(distribution_mode mode, int num_ranks_) + : mode_(mode), + num_ranks_(num_ranks_), + round_robin_counter_(std::make_shared>(0)) +{ +} + +template +index::index(const raft::device_resources& handle, + const std::string& filename) + : round_robin_counter_(std::make_shared>(0)) +{ + cuvs::neighbors::mg::detail::deserialize(handle, *this, filename); +} +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_cagra_float_uint32_t.cu b/cpp/src/neighbors/mg/mg_cagra_float_uint32_t.cu new file mode 100644 index 000000000..b11610fb4 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_cagra_float_uint32_t.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_CAGRA(float, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_cagra_half_uint32_t.cu b/cpp/src/neighbors/mg/mg_cagra_half_uint32_t.cu new file mode 100644 index 000000000..8f76c69a3 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_cagra_half_uint32_t.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_CAGRA(half, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu b/cpp/src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu new file mode 100644 index 000000000..67b88d742 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_CAGRA(int8_t, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu b/cpp/src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu new file mode 100644 index 000000000..f72174923 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_CAGRA(uint8_t, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_flat_float_int64_t.cu b/cpp/src/neighbors/mg/mg_flat_float_int64_t.cu new file mode 100644 index 000000000..4495e2527 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_flat_float_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_FLAT(float, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_flat_int8_t_int64_t.cu b/cpp/src/neighbors/mg/mg_flat_int8_t_int64_t.cu new file mode 100644 index 000000000..5494414a6 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_flat_int8_t_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_FLAT(int8_t, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_flat_uint8_t_int64_t.cu b/cpp/src/neighbors/mg/mg_flat_uint8_t_int64_t.cu new file mode 100644 index 000000000..35df2146b --- /dev/null +++ b/cpp/src/neighbors/mg/mg_flat_uint8_t_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_FLAT(uint8_t, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_pq_float_int64_t.cu b/cpp/src/neighbors/mg/mg_pq_float_int64_t.cu new file mode 100644 index 000000000..c671740e6 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_pq_float_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_pq( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \ + const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_PQ(float, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_pq_half_int64_t.cu b/cpp/src/neighbors/mg/mg_pq_half_int64_t.cu new file mode 100644 index 000000000..b167239c6 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_pq_half_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_pq( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \ + const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_PQ(half, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_pq_int8_t_int64_t.cu b/cpp/src/neighbors/mg/mg_pq_int8_t_int64_t.cu new file mode 100644 index 000000000..127baf8fd --- /dev/null +++ b/cpp/src/neighbors/mg/mg_pq_int8_t_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_pq( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \ + const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_PQ(int8_t, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_pq_uint8_t_int64_t.cu b/cpp/src/neighbors/mg/mg_pq_uint8_t_int64_t.cu new file mode 100644 index 000000000..869e009a5 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_pq_uint8_t_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_pq( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \ + const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_PQ(uint8_t, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/nccl_comm.cpp b/cpp/src/neighbors/mg/nccl_comm.cpp new file mode 100644 index 000000000..c4556957a --- /dev/null +++ b/cpp/src/neighbors/mg/nccl_comm.cpp @@ -0,0 +1,8 @@ +#include +#include + +namespace raft::comms { +void build_comms_nccl_only(raft::resources* handle, ncclComm_t nccl_comm, int num_ranks, int rank) +{ +} +} // namespace raft::comms diff --git a/cpp/src/neighbors/mg/omp_checks.cpp b/cpp/src/neighbors/mg/omp_checks.cpp new file mode 100644 index 000000000..e09182dfe --- /dev/null +++ b/cpp/src/neighbors/mg/omp_checks.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace cuvs::neighbors::mg { +using raft::RAFT_NAME; + +void check_omp_threads(const int requirements) +{ + const int max_threads = omp_get_max_threads(); + if (max_threads < requirements) + RAFT_LOG_WARN( + "OpenMP is only allowed %d threads to run %d GPUs. Please increase the number of OpenMP " + "threads to avoid NCCL hangs by modifying the environment variable OMP_NUM_THREADS.", + max_threads, + requirements); +} + +} // namespace cuvs::neighbors::mg diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index bd07bebee..f4d35e438 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -22,7 +22,7 @@ rapids_test_init() function(ConfigureTest) set(options OPTIONAL NOCUDA C_LIB) - set(oneValueArgs NAME GPUS PERCENT) + set(oneValueArgs NAME GPUS PERCENT ADDITIONAL_DEP) set(multiValueArgs PATH TARGETS CONFIGURATIONS) cmake_parse_arguments(_CUVS_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -56,6 +56,7 @@ function(ConfigureTest) $ $ $<$:cuvs::c_api> + ${_CUVS_TEST_ADDITIONAL_DEP} ) set_target_properties( ${TEST_NAME} @@ -159,7 +160,7 @@ if(BUILD_TESTS) 100 ) - ConfigureTest( + ConfigureTest( NAME NEIGHBORS_ANN_VAMANA_TEST PATH @@ -178,6 +179,12 @@ if(BUILD_TESTS) target_compile_definitions(NEIGHBORS_HNSW_TEST PUBLIC CUVS_BUILD_CAGRA_HNSWLIB) endif() + if(BUILD_MG_ALGOS) + ConfigureTest( + NAME NEIGHBORS_MG_TEST PATH neighbors/mg/test_float.cu GPUS 1 PERCENT 100 ADDITIONAL_DEP nccl + ) + endif() + ConfigureTest( NAME DISTANCE_TEST diff --git a/cpp/test/neighbors/mg.cuh b/cpp/test/neighbors/mg.cuh new file mode 100644 index 000000000..be30ca615 --- /dev/null +++ b/cpp/test/neighbors/mg.cuh @@ -0,0 +1,825 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "../test_utils.cuh" +#include "ann_utils.cuh" +#include "naive_knn.cuh" + +#include +#include + +namespace cuvs::neighbors::mg { + +enum class algo_t { IVF_FLAT, IVF_PQ, CAGRA }; +enum class d_mode_t { REPLICATED, SHARDED, LOCAL_THEN_DISTRIBUTED, ROUND_ROBIN }; +enum class m_mode_t { MERGE_ON_ROOT_RANK, TREE_MERGE, UNDEFINED }; + +struct AnnMGInputs { + int64_t num_queries; + int64_t num_db_vecs; + int64_t dim; + int64_t k; + d_mode_t d_mode; + m_mode_t m_mode; + algo_t algo; + int64_t nprobe; + int64_t nlist; + cuvs::distance::DistanceType metric; + bool adaptive_centers; +}; + +template +class AnnMGTest : public ::testing::TestWithParam { + public: + AnnMGTest() + : stream_(resource::get_cuda_stream(handle_)), + clique_(raft::resource::get_nccl_clique(handle_)), + ps(::testing::TestWithParam::GetParam()), + d_index_dataset(0, stream_), + d_queries(0, stream_), + h_index_dataset(0), + h_queries(0) + { + } + + void testAnnMG() + { + size_t queries_size = ps.num_queries * ps.k; + std::vector neighbors_ref(queries_size); + std::vector distances_ref(queries_size); + std::vector neighbors_snmg_ann(queries_size); + std::vector distances_snmg_ann(queries_size); + std::vector neighbors_ref_32bits(queries_size); + std::vector neighbors_snmg_ann_32bits(queries_size); + + { + rmm::device_uvector distances_ref_dev(queries_size, stream_); + rmm::device_uvector neighbors_ref_dev(queries_size, stream_); + cuvs::neighbors::naive_knn(handle_, + distances_ref_dev.data(), + neighbors_ref_dev.data(), + d_queries.data(), + d_index_dataset.data(), + ps.num_queries, + ps.num_db_vecs, + ps.dim, + ps.k, + ps.metric); + update_host(distances_ref.data(), distances_ref_dev.data(), queries_size, stream_); + update_host(neighbors_ref.data(), neighbors_ref_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + int64_t n_rows_per_search_batch = 3000; // [3000, 3000, 1000] == 7000 rows + + // IVF-Flat + if (ps.algo == algo_t::IVF_FLAT && + (ps.d_mode == d_mode_t::REPLICATED || ps.d_mode == d_mode_t::SHARDED)) { + distribution_mode d_mode; + if (ps.d_mode == d_mode_t::REPLICATED) + d_mode = distribution_mode::REPLICATED; + else + d_mode = distribution_mode::SHARDED; + + mg::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + index_params.mode = d_mode; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = LOAD_BALANCER; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + { + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::extend(handle_, index, index_dataset, std::nullopt); + cuvs::neighbors::mg::serialize(handle_, index, "mg_ivf_flat_index"); + } + auto new_index = + cuvs::neighbors::mg::deserialize_flat(handle_, "mg_ivf_flat_index"); + + if (ps.m_mode == m_mode_t::MERGE_ON_ROOT_RANK) + search_params.merge_mode = MERGE_ON_ROOT_RANK; + else + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search( + handle_, new_index, search_params, queries, neighbors, distances, n_rows_per_search_batch); + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref, + neighbors_snmg_ann, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann.begin(), neighbors_snmg_ann.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + // IVF-PQ + if (ps.algo == algo_t::IVF_PQ && + (ps.d_mode == d_mode_t::REPLICATED || ps.d_mode == d_mode_t::SHARDED)) { + distribution_mode d_mode; + if (ps.d_mode == d_mode_t::REPLICATED) + d_mode = distribution_mode::REPLICATED; + else + d_mode = distribution_mode::SHARDED; + + mg::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + index_params.mode = d_mode; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = LOAD_BALANCER; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + { + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::extend(handle_, index, index_dataset, std::nullopt); + cuvs::neighbors::mg::serialize(handle_, index, "mg_ivf_pq_index"); + } + auto new_index = + cuvs::neighbors::mg::deserialize_pq(handle_, "mg_ivf_pq_index"); + + if (ps.m_mode == m_mode_t::MERGE_ON_ROOT_RANK) + search_params.merge_mode = MERGE_ON_ROOT_RANK; + else + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search( + handle_, new_index, search_params, queries, neighbors, distances, n_rows_per_search_batch); + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref, + neighbors_snmg_ann, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann.begin(), neighbors_snmg_ann.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + // CAGRA + if (ps.algo == algo_t::CAGRA && + (ps.d_mode == d_mode_t::REPLICATED || ps.d_mode == d_mode_t::SHARDED)) { + distribution_mode d_mode; + if (ps.d_mode == d_mode_t::REPLICATED) + d_mode = distribution_mode::REPLICATED; + else + d_mode = distribution_mode::SHARDED; + + mg::index_params index_params; + index_params.graph_build_params = cagra::graph_build_params::ivf_pq_params( + raft::matrix_extent(ps.num_db_vecs, ps.dim)); + index_params.mode = d_mode; + + mg::search_params search_params; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann_32bits.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + { + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::serialize(handle_, index, "mg_cagra_index"); + } + auto new_index = + cuvs::neighbors::mg::deserialize_cagra(handle_, "mg_cagra_index"); + + if (ps.m_mode == m_mode_t::MERGE_ON_ROOT_RANK) + search_params.merge_mode = MERGE_ON_ROOT_RANK; + else + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search( + handle_, new_index, search_params, queries, neighbors, distances, n_rows_per_search_batch); + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref_32bits, + neighbors_snmg_ann_32bits, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann_32bits.begin(), neighbors_snmg_ann_32bits.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + if (ps.algo == algo_t::IVF_FLAT && ps.d_mode == d_mode_t::LOCAL_THEN_DISTRIBUTED) { + ivf_flat::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + index_params.add_data_on_build = true; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = LOAD_BALANCER; + + { + auto index_dataset = raft::make_device_matrix_view( + d_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto index = cuvs::neighbors::ivf_flat::build(handle_, index_params, index_dataset); + ivf_flat::serialize(handle_, "local_ivf_flat_index", index); + } + + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + auto distributed_index = + cuvs::neighbors::mg::distribute_flat(handle_, "local_ivf_flat_index"); + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search(handle_, + distributed_index, + search_params, + queries, + neighbors, + distances, + n_rows_per_search_batch); + + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref, + neighbors_snmg_ann, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann.begin(), neighbors_snmg_ann.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + if (ps.algo == algo_t::IVF_PQ && ps.d_mode == d_mode_t::LOCAL_THEN_DISTRIBUTED) { + ivf_pq::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.add_data_on_build = true; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = LOAD_BALANCER; + + { + auto index_dataset = raft::make_device_matrix_view( + d_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto index = cuvs::neighbors::ivf_pq::build(handle_, index_params, index_dataset); + ivf_pq::serialize(handle_, "local_ivf_pq_index", index); + } + + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + auto distributed_index = + cuvs::neighbors::mg::distribute_pq(handle_, "local_ivf_pq_index"); + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search(handle_, + distributed_index, + search_params, + queries, + neighbors, + distances, + n_rows_per_search_batch); + + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref, + neighbors_snmg_ann, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann.begin(), neighbors_snmg_ann.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + if (ps.algo == algo_t::CAGRA && ps.d_mode == d_mode_t::LOCAL_THEN_DISTRIBUTED) { + cagra::index_params index_params; + index_params.graph_build_params = cagra::graph_build_params::ivf_pq_params( + raft::matrix_extent(ps.num_db_vecs, ps.dim)); + + mg::search_params search_params; + + { + auto index_dataset = raft::make_device_matrix_view( + d_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto index = cuvs::neighbors::cagra::build(handle_, index_params, index_dataset); + cuvs::neighbors::cagra::serialize(handle_, "local_cagra_index", index); + } + + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann_32bits.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + auto distributed_index = + cuvs::neighbors::mg::distribute_cagra(handle_, "local_cagra_index"); + + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search(handle_, + distributed_index, + search_params, + queries, + neighbors, + distances, + n_rows_per_search_batch); + + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref_32bits, + neighbors_snmg_ann_32bits, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann_32bits.begin(), neighbors_snmg_ann_32bits.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + if (ps.algo == algo_t::IVF_FLAT && ps.d_mode == d_mode_t::ROUND_ROBIN) { + ASSERT_TRUE(ps.num_queries <= 4); + + mg::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + index_params.mode = REPLICATED; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = ROUND_ROBIN; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto small_batch_query = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::extend(handle_, index, index_dataset, std::nullopt); + + int n_parallel_searches = 16; + std::vector searches_correctness(n_parallel_searches); + std::vector load_balancer_neighbors_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); + std::vector load_balancer_distances_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); +#pragma omp parallel for + for (uint64_t search_idx = 0; search_idx < searches_correctness.size(); search_idx++) { + uint64_t offset = search_idx * ps.num_queries * ps.k; + auto small_batch_neighbors = raft::make_host_matrix_view( + load_balancer_neighbors_snmg_ann.data() + offset, ps.num_queries, ps.k); + auto small_batch_distances = raft::make_host_matrix_view( + load_balancer_distances_snmg_ann.data() + offset, ps.num_queries, ps.k); + cuvs::neighbors::mg::search(handle_, + index, + search_params, + small_batch_query, + small_batch_neighbors, + small_batch_distances, + n_rows_per_search_batch); + + std::vector small_batch_neighbors_vec( + small_batch_neighbors.data_handle(), + small_batch_neighbors.data_handle() + small_batch_neighbors.size()); + std::vector small_batch_distances_vec( + small_batch_distances.data_handle(), + small_batch_distances.data_handle() + small_batch_distances.size()); + searches_correctness[search_idx] = eval_neighbours(neighbors_ref, + small_batch_neighbors_vec, + distances_ref, + small_batch_distances_vec, + ps.num_queries, + ps.k, + 0.001, + 0.9); + } + ASSERT_TRUE(std::all_of(searches_correctness.begin(), + searches_correctness.end(), + [](char val) { return val != 0; })); + } + + if (ps.algo == algo_t::IVF_PQ && ps.d_mode == d_mode_t::ROUND_ROBIN) { + ASSERT_TRUE(ps.num_queries <= 4); + + mg::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + index_params.mode = REPLICATED; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = ROUND_ROBIN; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto small_batch_query = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::extend(handle_, index, index_dataset, std::nullopt); + + int n_parallel_searches = 16; + std::vector searches_correctness(n_parallel_searches); + std::vector load_balancer_neighbors_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); + std::vector load_balancer_distances_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); +#pragma omp parallel for + for (uint64_t search_idx = 0; search_idx < searches_correctness.size(); search_idx++) { + uint64_t offset = search_idx * ps.num_queries * ps.k; + auto small_batch_neighbors = raft::make_host_matrix_view( + load_balancer_neighbors_snmg_ann.data() + offset, ps.num_queries, ps.k); + auto small_batch_distances = raft::make_host_matrix_view( + load_balancer_distances_snmg_ann.data() + offset, ps.num_queries, ps.k); + cuvs::neighbors::mg::search(handle_, + index, + search_params, + small_batch_query, + small_batch_neighbors, + small_batch_distances, + n_rows_per_search_batch); + + std::vector small_batch_neighbors_vec( + small_batch_neighbors.data_handle(), + small_batch_neighbors.data_handle() + small_batch_neighbors.size()); + std::vector small_batch_distances_vec( + small_batch_distances.data_handle(), + small_batch_distances.data_handle() + small_batch_distances.size()); + searches_correctness[search_idx] = eval_neighbours(neighbors_ref, + small_batch_neighbors_vec, + distances_ref, + small_batch_distances_vec, + ps.num_queries, + ps.k, + 0.001, + 0.9); + } + ASSERT_TRUE(std::all_of(searches_correctness.begin(), + searches_correctness.end(), + [](char val) { return val != 0; })); + } + + if (ps.algo == algo_t::CAGRA && ps.d_mode == d_mode_t::ROUND_ROBIN) { + ASSERT_TRUE(ps.num_queries <= 4); + + mg::index_params index_params; + index_params.graph_build_params = cagra::graph_build_params::ivf_pq_params( + raft::matrix_extent(ps.num_db_vecs, ps.dim)); + index_params.mode = REPLICATED; + + mg::search_params search_params; + search_params.search_mode = ROUND_ROBIN; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto small_batch_query = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + + int n_parallel_searches = 16; + std::vector searches_correctness(n_parallel_searches); + std::vector load_balancer_neighbors_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); + std::vector load_balancer_distances_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); +#pragma omp parallel for + for (uint64_t search_idx = 0; search_idx < searches_correctness.size(); search_idx++) { + uint64_t offset = search_idx * ps.num_queries * ps.k; + auto small_batch_neighbors = raft::make_host_matrix_view( + load_balancer_neighbors_snmg_ann.data() + offset, ps.num_queries, ps.k); + auto small_batch_distances = raft::make_host_matrix_view( + load_balancer_distances_snmg_ann.data() + offset, ps.num_queries, ps.k); + cuvs::neighbors::mg::search(handle_, + index, + search_params, + small_batch_query, + small_batch_neighbors, + small_batch_distances, + n_rows_per_search_batch); + + std::vector small_batch_neighbors_vec( + small_batch_neighbors.data_handle(), + small_batch_neighbors.data_handle() + small_batch_neighbors.size()); + std::vector small_batch_distances_vec( + small_batch_distances.data_handle(), + small_batch_distances.data_handle() + small_batch_distances.size()); + searches_correctness[search_idx] = eval_neighbours(neighbors_ref_32bits, + small_batch_neighbors_vec, + distances_ref, + small_batch_distances_vec, + ps.num_queries, + ps.k, + 0.001, + 0.9); + } + ASSERT_TRUE(std::all_of(searches_correctness.begin(), + searches_correctness.end(), + [](char val) { return val != 0; })); + } + } + + void SetUp() override + { + d_index_dataset.resize(ps.num_db_vecs * ps.dim, stream_); + d_queries.resize(ps.num_queries * ps.dim, stream_); + h_index_dataset.resize(ps.num_db_vecs * ps.dim); + h_queries.resize(ps.num_queries * ps.dim); + + raft::random::RngState r(1234ULL); + if constexpr (std::is_same{}) { + raft::random::uniform( + handle_, r, d_index_dataset.data(), d_index_dataset.size(), DataT(0.1), DataT(2.0)); + raft::random::uniform(handle_, r, d_queries.data(), d_queries.size(), DataT(0.1), DataT(2.0)); + } else { + raft::random::uniformInt( + handle_, r, d_index_dataset.data(), d_index_dataset.size(), DataT(1), DataT(20)); + raft::random::uniformInt(handle_, r, d_queries.data(), d_queries.size(), DataT(1), DataT(20)); + } + + raft::copy(h_index_dataset.data(), + d_index_dataset.data(), + d_index_dataset.size(), + resource::get_cuda_stream(handle_)); + raft::copy( + h_queries.data(), d_queries.data(), d_queries.size(), resource::get_cuda_stream(handle_)); + resource::sync_stream(handle_); + } + + void TearDown() override {} + + private: + raft::device_resources handle_; + rmm::cuda_stream_view stream_; + raft::comms::nccl_clique clique_; + AnnMGInputs ps; + std::vector h_index_dataset; + std::vector h_queries; + rmm::device_uvector d_index_dataset; + rmm::device_uvector d_queries; +}; + +const std::vector inputs = { + {7000, + 10000, + 8, + 16, + d_mode_t::REPLICATED, + m_mode_t::UNDEFINED, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::REPLICATED, + m_mode_t::UNDEFINED, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + + /* + {7000, + 10000, + 8, + 16, + d_mode_t::REPLICATED, + m_mode_t::UNDEFINED, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + */ + + /* + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::MERGE_ON_ROOT_RANK, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::MERGE_ON_ROOT_RANK, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::MERGE_ON_ROOT_RANK, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::TREE_MERGE, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::TREE_MERGE, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::TREE_MERGE, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + */ + + {7000, + 10000, + 8, + 16, + d_mode_t::LOCAL_THEN_DISTRIBUTED, + m_mode_t::UNDEFINED, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::LOCAL_THEN_DISTRIBUTED, + m_mode_t::UNDEFINED, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + + /* + {7000, + 10000, + 8, + 16, + d_mode_t::LOCAL_THEN_DISTRIBUTED, + m_mode_t::UNDEFINED, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + */ + + {3, + 10000, + 8, + 16, + d_mode_t::ROUND_ROBIN, + m_mode_t::UNDEFINED, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {3, + 10000, + 8, + 16, + d_mode_t::ROUND_ROBIN, + m_mode_t::UNDEFINED, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + + /* + {3, + 10000, + 8, + 16, + d_mode_t::ROUND_ROBIN, + m_mode_t::UNDEFINED, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + */ +}; +} // namespace cuvs::neighbors::mg diff --git a/cpp/test/neighbors/mg/test_float.cu b/cpp/test/neighbors/mg/test_float.cu new file mode 100644 index 000000000..ef9c9a043 --- /dev/null +++ b/cpp/test/neighbors/mg/test_float.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../mg.cuh" + +namespace cuvs::neighbors::mg { + +typedef AnnMGTest AnnMGTestF_float; +TEST_P(AnnMGTestF_float, AnnMG) { this->testAnnMG(); } + +INSTANTIATE_TEST_CASE_P(AnnMGTest, AnnMGTestF_float, ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::mg diff --git a/dependencies.yaml b/dependencies.yaml index c18f53305..2b19b987f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -159,6 +159,7 @@ dependencies: packages: - c-compiler - cxx-compiler + - nccl>=2.19 specific: - output_types: conda matrices: diff --git a/docs/source/build.rst b/docs/source/build.rst index 9c7c98989..9d0d391dc 100644 --- a/docs/source/build.rst +++ b/docs/source/build.rst @@ -128,6 +128,16 @@ Once installed, the shared libraries, headers (and any dependencies downloaded a ./build.sh libcuvs --uninstall +Multi-GPU features +^^^^^^^^^^^^^^^^^^ + +To disable the multi-gpu features run : + +.. code-block:: bash + + ./build.sh libcuvs --no-mg + + Building the Googletests ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/cpp_api/neighbors.rst b/docs/source/cpp_api/neighbors.rst index e5a9fc977..0c68c8415 100644 --- a/docs/source/cpp_api/neighbors.rst +++ b/docs/source/cpp_api/neighbors.rst @@ -15,3 +15,4 @@ Nearest Neighbors neighbors_ivf_pq.rst neighbors_nn_descent.rst neighbors_refine.rst + neighbors_mg.rst diff --git a/docs/source/cpp_api/neighbors_mg.rst b/docs/source/cpp_api/neighbors_mg.rst new file mode 100644 index 000000000..b68defec9 --- /dev/null +++ b/docs/source/cpp_api/neighbors_mg.rst @@ -0,0 +1,76 @@ +Distributed ANN +=============== + +The SNMG (single-node multi-GPUs) ANN API provides a set of functions to deploy ANN indexes on multiple GPUs. + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *cuvs::neighbors::mg* + +Index build parameters +---------------------- + +.. doxygengroup:: mg_cpp_index_params + :project: cuvs + :members: + :content-only: + +Search parameters +---------------------- + +.. doxygengroup:: mg_cpp_search_params + :project: cuvs + :members: + :content-only: + +Index build +----------- + +.. doxygengroup:: mg_cpp_index_build + :project: cuvs + :members: + :content-only: + +Index extend +------------ + +.. doxygengroup:: mg_cpp_index_extend + :project: cuvs + :members: + :content-only: + +Index search +------------ + +.. doxygengroup:: mg_cpp_index_search + :project: cuvs + :members: + :content-only: + +Index serialize +--------------- + +.. doxygengroup:: mg_cpp_serialize + :project: cuvs + :members: + :content-only: + +Index deserialize +----------------- + +.. doxygengroup:: mg_cpp_deserialize + :project: cuvs + :members: + :content-only: + +Distribute pre-built local index +-------------------------------- + +.. doxygengroup:: mg_cpp_distribute + :project: cuvs + :members: + :content-only: