diff --git a/build.sh b/build.sh index 8e837f5dd..7fc0d21b0 100755 --- a/build.sh +++ b/build.sh @@ -18,7 +18,7 @@ ARGS=$* # scripts, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libcuvs python rust docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h" +VALIDARGS="clean libcuvs python rust docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --no-nvtx --show_depr_warn --incl-cache-stats --time -h" HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-ann=] [--build-metrics=] where is: clean - remove all existing build artifacts and configuration (start over) @@ -41,6 +41,7 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool==0.0.0a0 - make +- nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index ce9a7f058..a25393050 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -37,6 +37,7 @@ dependencies: - libcusparse=11.7.5.86 - librmm==24.10.*,>=0.0.0a0 - make +- nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 116e80ac2..bb4a96d48 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -34,6 +34,7 @@ dependencies: - libcusparse-dev - librmm==24.10.*,>=0.0.0a0 - make +- nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 7f7ad045d..bd1b95ae8 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -34,6 +34,7 @@ dependencies: - libcusparse-dev - librmm==24.10.*,>=0.0.0a0 - make +- nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 73c42ca71..554ad41ab 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -35,6 +35,7 @@ dependencies: - libcusparse=11.7.5.86 - librmm==24.10.*,>=0.0.0a0 - matplotlib +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-aarch64=11.8 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 473e50bc6..dc38f3565 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -35,6 +35,7 @@ dependencies: - libcusparse=11.7.5.86 - librmm==24.10.*,>=0.0.0a0 - matplotlib +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-64=11.8 diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index 8a877c4c0..aeb23a9ef 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -32,6 +32,7 @@ dependencies: - libcusparse-dev - librmm==24.10.*,>=0.0.0a0 - matplotlib +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index 54859a77f..3a408cd64 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -32,6 +32,7 @@ dependencies: - libcusparse-dev - librmm==24.10.*,>=0.0.0a0 - matplotlib +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas diff --git a/conda/recipes/libcuvs/conda_build_config.yaml b/conda/recipes/libcuvs/conda_build_config.yaml index e165f7ed9..b8c49943e 100644 --- a/conda/recipes/libcuvs/conda_build_config.yaml +++ b/conda/recipes/libcuvs/conda_build_config.yaml @@ -22,6 +22,9 @@ cmake_version: h5py_version: - ">=3.8.0" +nccl_version: + - ">=2.19" + # The CTK libraries below are missing from the conda-forge::cudatoolkit package # for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages # and the "*_run_*" version specifiers correspond to `11.x` packages. diff --git a/conda/recipes/libcuvs/meta.yaml b/conda/recipes/libcuvs/meta.yaml index 6d7712a12..46552c397 100644 --- a/conda/recipes/libcuvs/meta.yaml +++ b/conda/recipes/libcuvs/meta.yaml @@ -65,6 +65,7 @@ outputs: host: - librmm ={{ minor_version }} - libraft-headers ={{ minor_version }} + - nccl {{ nccl_version }} - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }} @@ -131,6 +132,7 @@ outputs: host: - librmm ={{ minor_version }} - libraft-headers ={{ minor_version }} + - nccl {{ nccl_version }} - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }} @@ -198,6 +200,7 @@ outputs: host: - librmm ={{ minor_version }} - libraft-headers ={{ minor_version }} + - nccl {{ nccl_version }} - {{ pin_subpackage('libcuvs', exact=True) }} - cuda-version ={{ cuda_version }} - openblas # required by some CPU algos in benchmarks @@ -269,6 +272,7 @@ outputs: host: - librmm ={{ minor_version }} - libraft-headers ={{ minor_version }} + - nccl {{ nccl_version }} - {{ pin_subpackage('libcuvs', exact=True) }} - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8506bb542..3e98a247e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -57,6 +57,7 @@ option(BUILD_C_LIBRARY "Build cuVS C API library" OFF) option(BUILD_C_TESTS "Build cuVS C API tests" OFF) option(BUILD_CUVS_BENCH "Build cuVS ann benchmarks" OFF) option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON) +option(BUILD_MG_ALGOS "Build with multi-GPU support" ON) option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF) option(CUDA_ENABLE_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF @@ -293,6 +294,24 @@ if(BUILD_SHARED_LIBS) "$<$:${CUVS_CUDA_FLAGS}>" ) + if(BUILD_MG_ALGOS) + set(CUVS_MG_ALGOS + src/neighbors/mg/mg_flat_float_int64_t.cu + src/neighbors/mg/mg_flat_int8_t_int64_t.cu + src/neighbors/mg/mg_flat_uint8_t_int64_t.cu + src/neighbors/mg/mg_pq_float_int64_t.cu + src/neighbors/mg/mg_pq_half_int64_t.cu + src/neighbors/mg/mg_pq_int8_t_int64_t.cu + src/neighbors/mg/mg_pq_uint8_t_int64_t.cu + src/neighbors/mg/mg_cagra_float_uint32_t.cu + src/neighbors/mg/mg_cagra_half_uint32_t.cu + src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu + src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu + src/neighbors/mg/omp_checks.cpp + src/neighbors/mg/nccl_comm.cpp + ) + endif() + add_library( cuvs_objs OBJECT src/cluster/kmeans_balanced_fit_float.cu @@ -373,6 +392,17 @@ if(BUILD_SHARED_LIBS) src/neighbors/cagra_serialize_half.cu src/neighbors/cagra_serialize_int8.cu src/neighbors/cagra_serialize_uint8.cu + src/neighbors/iface/iface_cagra_float_uint32_t.cu + src/neighbors/iface/iface_cagra_half_uint32_t.cu + src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu + src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu + src/neighbors/iface/iface_flat_float_int64_t.cu + src/neighbors/iface/iface_flat_int8_t_int64_t.cu + src/neighbors/iface/iface_flat_uint8_t_int64_t.cu + src/neighbors/iface/iface_pq_float_int64_t.cu + src/neighbors/iface/iface_pq_half_int64_t.cu + src/neighbors/iface/iface_pq_int8_t_int64_t.cu + src/neighbors/iface/iface_pq_uint8_t_int64_t.cu src/neighbors/detail/cagra/cagra_build.cpp src/neighbors/detail/cagra/topk_for_cagra/topk.cu $<$:src/neighbors/hnsw.cpp> @@ -440,6 +470,7 @@ if(BUILD_SHARED_LIBS) src/selection/select_k_half_uint32_t.cu src/stats/silhouette_score.cu src/stats/trustworthiness_score.cu + ${CUVS_MG_ALGOS} ) set_target_properties( @@ -526,22 +557,30 @@ if(BUILD_SHARED_LIBS) ${CUVS_CUSPARSE_DEPENDENCY} ${CUVS_CURAND_DEPENDENCY} ) + if(BUILD_MG_ALGOS) + set(CUVS_COMMS_DEPENDENCY nccl) + endif() + # Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target. target_link_libraries( cuvs PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} PRIVATE nvidia::cutlass::cutlass $ - cuvs-cagra-search + cuvs-cagra-search ${CUVS_COMMS_DEPENDENCY} ) target_link_libraries( cuvs_static PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES} PRIVATE nvidia::cutlass::cutlass $ - cuvs-cagra-search ) endif() + if(BUILD_MG_ALGOS) + target_compile_definitions(cuvs PUBLIC CUVS_BUILD_MG_ALGOS) + target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_MG_ALGOS) + endif() + if(BUILD_CAGRA_HNSWLIB) target_link_libraries(cuvs_objs PRIVATE hnswlib::hnswlib) target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB) @@ -674,7 +713,7 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENAB include(CPack) install( - TARGETS cuvs cuvs_static cuvs-cagra-search + TARGETS cuvs cuvs_static DESTINATION ${lib_dir} COMPONENT cuvs EXPORT cuvs-exports diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 8cbf8c8b3..c36e70ace 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -32,6 +32,7 @@ option(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE "Include cuVS brute force knn in benc option(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB "Include cuVS CAGRA with HNSW search in benchmark" ON) option(CUVS_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" OFF) +option(CUVS_ANN_BENCH_USE_CUVS_MG "Include cuVS ann mg algorithm in benchmark" ${BUILD_MG_ALGOS}) option(CUVS_ANN_BENCH_SINGLE_EXE "Make a single executable with benchmark as shared library modules" OFF ) @@ -55,6 +56,7 @@ if(BUILD_CPU_ONLY) set(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB OFF) set(CUVS_ANN_BENCH_USE_GGNN OFF) set(CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE OFF) + set(CUVS_ANN_BENCH_USE_CUVS_MG OFF) else() set(CUVS_FAISS_ENABLE_GPU ON) endif() @@ -66,6 +68,7 @@ if(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ OR CUVS_ANN_BENCH_USE_CUVS_CAGRA OR CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB OR CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE + OR CUVS_ANN_BENCH_USE_CUVS_MG ) set(CUVS_ANN_BENCH_USE_CUVS ON) endif() @@ -245,6 +248,21 @@ if(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) ) endif() +if(CUVS_ANN_BENCH_USE_CUVS_MG) + ConfigureAnnBench( + NAME + CUVS_MG + PATH + src/cuvs/cuvs_benchmark.cu + $<$:src/cuvs/cuvs_mg_ivf_flat.cu> + $<$:src/cuvs/cuvs_mg_ivf_pq.cu> + $<$:src/cuvs/cuvs_mg_cagra.cu> + LINKS + cuvs + nccl + ) +endif() + message("CUVS_FAISS_TARGETS: ${CUVS_FAISS_TARGETS}") message("CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}") if(CUVS_ANN_BENCH_USE_FAISS_CPU_FLAT) diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h index 22f0cab6f..57d5b1910 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h @@ -45,7 +45,18 @@ extern template class cuvs::bench::cuvs_cagra; extern template class cuvs::bench::cuvs_cagra; #endif -#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT +#ifdef CUVS_ANN_BENCH_USE_CUVS_MG +#include "cuvs_ivf_flat_wrapper.h" +#include "cuvs_mg_ivf_flat_wrapper.h" + +#include "cuvs_ivf_pq_wrapper.h" +#include "cuvs_mg_ivf_pq_wrapper.h" + +#include "cuvs_cagra_wrapper.h" +#include "cuvs_mg_cagra_wrapper.h" +#endif + +#if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT) || defined(CUVS_ANN_BENCH_USE_CUVS_MG) template void parse_build_param(const nlohmann::json& conf, typename cuvs::bench::cuvs_ivf_flat::build_param& param) @@ -64,7 +75,7 @@ void parse_search_param(const nlohmann::json& conf, #endif #if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || \ - defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) + defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || defined(CUVS_ANN_BENCH_USE_CUVS_MG) template void parse_build_param(const nlohmann::json& conf, typename cuvs::bench::cuvs_ivf_pq::build_param& param) @@ -130,7 +141,8 @@ void parse_search_param(const nlohmann::json& conf, } #endif -#if defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) +#if defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || \ + defined(CUVS_ANN_BENCH_USE_CUVS_MG) template void parse_build_param(const nlohmann::json& conf, cuvs::neighbors::nn_descent::index_params& param) { diff --git a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu index a956ab139..893097236 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu +++ b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu @@ -29,6 +29,43 @@ namespace cuvs::bench { +#ifdef CUVS_ANN_BENCH_USE_CUVS_MG +void add_distribution_mode(cuvs::neighbors::mg::distribution_mode* dist_mode, + const nlohmann::json& conf) +{ + if (conf.contains("distribution_mode")) { + std::string distribution_mode = conf.at("distribution_mode"); + if (distribution_mode == "replicated") { + *dist_mode = cuvs::neighbors::mg::distribution_mode::REPLICATED; + } else if (distribution_mode == "sharded") { + *dist_mode = cuvs::neighbors::mg::distribution_mode::SHARDED; + } else { + throw std::runtime_error("invalid value for distribution_mode"); + } + } else { + // default + *dist_mode = cuvs::neighbors::mg::distribution_mode::SHARDED; + } +}; + +void add_merge_mode(cuvs::neighbors::mg::sharded_merge_mode* merge_mode, const nlohmann::json& conf) +{ + if (conf.contains("merge_mode")) { + std::string sharded_merge_mode = conf.at("merge_mode"); + if (sharded_merge_mode == "tree_merge") { + *merge_mode = cuvs::neighbors::mg::sharded_merge_mode::TREE_MERGE; + } else if (sharded_merge_mode == "merge_on_root_rank") { + *merge_mode = cuvs::neighbors::mg::sharded_merge_mode::MERGE_ON_ROOT_RANK; + } else { + throw std::runtime_error("invalid value for merge_mode"); + } + } else { + // default + *merge_mode = cuvs::neighbors::mg::sharded_merge_mode::TREE_MERGE; + } +}; +#endif + template auto create_algo(const std::string& algo_name, const std::string& distance, @@ -71,6 +108,32 @@ auto create_algo(const std::string& algo_name, parse_build_param(conf, param); a = std::make_unique>(metric, dim, param); } +#endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_MG + if constexpr (std::is_same_v || std::is_same_v || + std::is_same_v) { + if (algo_name == "raft_mg_ivf_flat" || algo_name == "cuvs_mg_ivf_flat") { + typename cuvs::bench::cuvs_mg_ivf_flat::build_param param; + parse_build_param(conf, param); + add_distribution_mode(¶m.mode, conf); + a = std::make_unique>(metric, dim, param); + } + } + + if (algo_name == "raft_mg_ivf_pq" || algo_name == "cuvs_mg_ivf_pq") { + typename cuvs::bench::cuvs_mg_ivf_pq::build_param param; + parse_build_param(conf, param); + add_distribution_mode(¶m.mode, conf); + a = std::make_unique>(metric, dim, param); + } + + if (algo_name == "raft_mg_cagra" || algo_name == "cuvs_mg_cagra") { + typename cuvs::bench::cuvs_mg_cagra::build_param param; + parse_build_param(conf, param); + add_distribution_mode(¶m.mode, conf); + a = std::make_unique>(metric, dim, param); + } + #endif if (!a) { throw std::runtime_error("invalid algo: '" + algo_name + "'"); } @@ -113,6 +176,32 @@ auto create_search_param(const std::string& algo_name, const nlohmann::json& con return param; } #endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_MG + if constexpr (std::is_same_v || std::is_same_v || + std::is_same_v) { + if (algo_name == "raft_mg_ivf_flat" || algo_name == "cuvs_mg_ivf_flat") { + auto param = + std::make_unique::search_param>(); + parse_search_param(conf, *param); + add_merge_mode(¶m->merge_mode, conf); + return param; + } + } + + if (algo_name == "raft_mg_ivf_pq" || algo_name == "cuvs_mg_ivf_pq") { + auto param = std::make_unique::search_param>(); + parse_search_param(conf, *param); + add_merge_mode(¶m->merge_mode, conf); + return param; + } + + if (algo_name == "raft_mg_cagra" || algo_name == "cuvs_mg_cagra") { + auto param = std::make_unique::search_param>(); + parse_search_param(conf, *param); + add_merge_mode(¶m->merge_mode, conf); + return param; + } +#endif // else throw std::runtime_error("invalid algo: '" + algo_name + "'"); diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h index ff854f890..b2ba35eee 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h @@ -72,6 +72,23 @@ class cuvs_cagra : public algo, public algo_gpu { std::optional ivf_pq_refine_rate = std::nullopt; std::optional ivf_pq_build_params = std::nullopt; std::optional ivf_pq_search_params = std::nullopt; + + void prepare_build_params(const raft::extent_2d& dataset_extents) + { + if (algo == CagraBuildAlgo::kIvfPq) { + auto pq_params = cuvs::neighbors::cagra::graph_build_params::ivf_pq_params( + dataset_extents, cagra_params.metric); + if (ivf_pq_build_params) { pq_params.build_params = *ivf_pq_build_params; } + if (ivf_pq_search_params) { pq_params.search_params = *ivf_pq_search_params; } + if (ivf_pq_refine_rate) { pq_params.refinement_rate = *ivf_pq_refine_rate; } + cagra_params.graph_build_params = pq_params; + } else if (algo == CagraBuildAlgo::kNnDescent) { + auto nn_params = cuvs::neighbors::cagra::graph_build_params::nn_descent_params( + cagra_params.intermediate_graph_degree); + if (nn_descent_params) { nn_params = *nn_descent_params; } + cagra_params.graph_build_params = nn_params; + } + } }; cuvs_cagra(Metric metric, int dim, const build_param& param, int concurrent_searches = 1) @@ -168,28 +185,9 @@ template void cuvs_cagra::build(const T* dataset, size_t nrow) { auto dataset_extents = raft::make_extents(nrow, dimension_); + index_params_.prepare_build_params(dataset_extents); auto& params = index_params_.cagra_params; - - if (index_params_.algo == CagraBuildAlgo::kIvfPq) { - auto pq_params = - cuvs::neighbors::cagra::graph_build_params::ivf_pq_params(dataset_extents, params.metric); - if (index_params_.ivf_pq_build_params) { - pq_params.build_params = *index_params_.ivf_pq_build_params; - } - if (index_params_.ivf_pq_search_params) { - pq_params.search_params = *index_params_.ivf_pq_search_params; - } - if (index_params_.ivf_pq_refine_rate) { - pq_params.refinement_rate = *index_params_.ivf_pq_refine_rate; - } - params.graph_build_params = pq_params; - } else if (index_params_.algo == CagraBuildAlgo::kNnDescent) { - auto nn_params = cuvs::neighbors::cagra::graph_build_params::nn_descent_params( - params.intermediate_graph_degree); - if (index_params_.nn_descent_params) { nn_params = *index_params_.nn_descent_params; } - params.graph_build_params = nn_params; - } auto dataset_view_host = raft::make_mdspan(dataset, dataset_extents); auto dataset_view_device = diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu new file mode 100644 index 000000000..801caa85f --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cuvs_mg_cagra_wrapper.h" + +namespace cuvs::bench { +template class cuvs_mg_cagra; +template class cuvs_mg_cagra; +template class cuvs_mg_cagra; +template class cuvs_mg_cagra; +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h new file mode 100644 index 000000000..50c1ff4db --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "cuvs_ann_bench_utils.h" +#include "cuvs_cagra_wrapper.h" +#include +#include + +namespace cuvs::bench { +using namespace cuvs::neighbors; + +enum class AllocatorType; +enum class CagraBuildAlgo; + +template +class cuvs_mg_cagra : public algo, public algo_gpu { + public: + using search_param_base = typename algo::search_param; + using algo::dim_; + + struct build_param : public cuvs::bench::cuvs_cagra::build_param { + cuvs::neighbors::mg::distribution_mode mode; + }; + + struct search_param : public cuvs::bench::cuvs_cagra::search_param { + cuvs::neighbors::mg::sharded_merge_mode merge_mode; + }; + + cuvs_mg_cagra(Metric metric, int dim, const build_param& param, int concurrent_searches = 1) + : algo(metric, dim), index_params_(param) + { + index_params_.cagra_params.metric = parse_metric_type(metric); + index_params_.ivf_pq_build_params->metric = parse_metric_type(metric); + + // init nccl clique outside as to not affect benchmark + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle_); + } + + void build(const T* dataset, size_t nrow) final; + + void set_search_param(const search_param_base& param) override; + + void set_search_dataset(const T* dataset, size_t nrow) override; + + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override; + void search_base(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const; + + [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override + { + auto stream = raft::resource::get_cuda_stream(handle_); + return stream; + } + + // to enable dataset access from GPU memory + [[nodiscard]] auto get_preference() const -> algo_property override + { + algo_property property; + property.dataset_memory_type = MemoryType::kHost; + property.query_memory_type = MemoryType::kHost; + return property; + } + void save(const std::string& file) const override; + void load(const std::string&) override; + void save_to_hnswlib(const std::string& file) const; + std::unique_ptr> copy() override; + + private: + raft::device_resources handle_; + float refine_ratio_; + build_param index_params_; + cuvs::neighbors::mg::search_params search_params_; + std::shared_ptr, T, IdxT>> + index_; +}; + +template +void cuvs_mg_cagra::build(const T* dataset, size_t nrow) +{ + auto dataset_extents = raft::make_extents(nrow, dim_); + index_params_.prepare_build_params(dataset_extents); + cuvs::neighbors::mg::index_params build_params = index_params_.cagra_params; + build_params.mode = index_params_.mode; + + auto dataset_view = + raft::make_host_matrix_view(dataset, nrow, dim_); + auto idx = cuvs::neighbors::mg::build(handle_, build_params, dataset_view); + index_ = + std::make_shared, T, IdxT>>( + std::move(idx)); +} + +inline auto allocator_to_string(AllocatorType mem_type) -> std::string; + +template +void cuvs_mg_cagra::set_search_param(const search_param_base& param) +{ + auto sp = dynamic_cast(param); + // search_params_ = static_cast>(sp.p); + cagra::search_params* search_params_ptr_ = static_cast(&search_params_); + *search_params_ptr_ = sp.p; + search_params_.merge_mode = sp.merge_mode; + refine_ratio_ = sp.refine_ratio; +} + +template +void cuvs_mg_cagra::set_search_dataset(const T* dataset, size_t nrow) +{ +} + +template +void cuvs_mg_cagra::save(const std::string& file) const +{ + cuvs::neighbors::mg::serialize(handle_, *index_, file); +} + +template +void cuvs_mg_cagra::load(const std::string& file) +{ + index_ = + std::make_shared, T, IdxT>>( + std::move(cuvs::neighbors::mg::deserialize_cagra(handle_, file))); +} + +template +std::unique_ptr> cuvs_mg_cagra::copy() +{ + return std::make_unique>(*this); // use copy constructor +} + +template +void cuvs_mg_cagra::search_base( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + static_assert(std::is_integral_v); + static_assert(std::is_integral_v); + + auto queries_view = + raft::make_host_matrix_view(queries, batch_size, dim_); + auto neighbors_view = + raft::make_host_matrix_view((IdxT*)neighbors, batch_size, k); + auto distances_view = + raft::make_host_matrix_view(distances, batch_size, k); + + cuvs::neighbors::mg::search( + handle_, *index_, search_params_, queries_view, neighbors_view, distances_view); +} + +template +void cuvs_mg_cagra::search( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + auto k0 = static_cast(refine_ratio_ * k); + const bool disable_refinement = k0 <= static_cast(k); + + if (disable_refinement) { + search_base(queries, batch_size, k, neighbors, distances); + } else { + throw std::runtime_error("refinement not supported"); + } +} +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu new file mode 100644 index 000000000..20cdc41e3 --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cuvs_mg_ivf_flat_wrapper.h" + +namespace cuvs::bench { +template class cuvs_mg_ivf_flat; +// template class cuvs_mg_ivf_flat; +template class cuvs_mg_ivf_flat; +template class cuvs_mg_ivf_flat; +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h new file mode 100644 index 000000000..54a0d2fac --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "cuvs_ann_bench_utils.h" +#include "cuvs_ivf_flat_wrapper.h" +#include +#include + +namespace cuvs::bench { +using namespace cuvs::neighbors; + +template +class cuvs_mg_ivf_flat : public algo, public algo_gpu { + public: + using search_param_base = typename algo::search_param; + using algo::dim_; + + using build_param = cuvs::neighbors::mg::index_params; + + struct search_param : public cuvs::bench::cuvs_ivf_flat::search_param { + cuvs::neighbors::mg::sharded_merge_mode merge_mode; + }; + + cuvs_mg_ivf_flat(Metric metric, int dim, const build_param& param) + : algo(metric, dim), index_params_(param) + { + index_params_.metric = parse_metric_type(metric); + // init nccl clique outside as to not affect benchmark + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle_); + } + + void build(const T* dataset, size_t nrow) final; + void set_search_param(const search_param_base& param) override; + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override; + + [[nodiscard]] auto get_preference() const -> algo_property override + { + algo_property property; + property.dataset_memory_type = MemoryType::kHost; + property.query_memory_type = MemoryType::kHost; + return property; + } + + [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override + { + auto stream = raft::resource::get_cuda_stream(handle_); + return stream; + } + + [[nodiscard]] auto uses_stream() const noexcept -> bool override { return false; } + + void save(const std::string& file) const override; + void load(const std::string&) override; + std::unique_ptr> copy() override; + + private: + raft::device_resources handle_; + build_param index_params_; + cuvs::neighbors::mg::search_params search_params_; + std::shared_ptr, T, IdxT>> + index_; +}; + +template +void cuvs_mg_ivf_flat::build(const T* dataset, size_t nrow) +{ + auto dataset_view = + raft::make_host_matrix_view(dataset, IdxT(nrow), IdxT(dim_)); + auto idx = cuvs::neighbors::mg::build(handle_, index_params_, dataset_view); + index_ = std::make_shared< + cuvs::neighbors::mg::index, T, IdxT>>(std::move(idx)); +} + +template +void cuvs_mg_ivf_flat::set_search_param(const search_param_base& param) +{ + auto sp = dynamic_cast(param); + // search_params_ = sp.ivf_flat_params; + ivf_flat::search_params* search_params_ptr_ = + static_cast(&search_params_); + *search_params_ptr_ = sp.ivf_flat_params; + search_params_.merge_mode = sp.merge_mode; + assert(search_params_.n_probes <= index_params_.n_lists); +} + +template +void cuvs_mg_ivf_flat::save(const std::string& file) const +{ + cuvs::neighbors::mg::serialize(handle_, *index_, file); +} + +template +void cuvs_mg_ivf_flat::load(const std::string& file) +{ + index_ = std::make_shared< + cuvs::neighbors::mg::index, T, IdxT>>( + std::move(cuvs::neighbors::mg::deserialize_flat(handle_, file))); +} + +template +std::unique_ptr> cuvs_mg_ivf_flat::copy() +{ + return std::make_unique>(*this); // use copy constructor +} + +template +void cuvs_mg_ivf_flat::search( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + auto queries_view = raft::make_host_matrix_view( + queries, IdxT(batch_size), IdxT(dim_)); + auto neighbors_view = raft::make_host_matrix_view( + (IdxT*)neighbors, IdxT(batch_size), IdxT(k)); + auto distances_view = raft::make_host_matrix_view( + distances, IdxT(batch_size), IdxT(k)); + + cuvs::neighbors::mg::search( + handle_, *index_, search_params_, queries_view, neighbors_view, distances_view); +} + +} // namespace cuvs::bench \ No newline at end of file diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu new file mode 100644 index 000000000..a74bab6f5 --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cuvs_mg_ivf_pq_wrapper.h" + +namespace cuvs::bench { +template class cuvs_mg_ivf_pq; +template class cuvs_mg_ivf_pq; +template class cuvs_mg_ivf_pq; +template class cuvs_mg_ivf_pq; +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h new file mode 100644 index 000000000..84aea7d4a --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "cuvs_ann_bench_utils.h" +#include "cuvs_ivf_pq_wrapper.h" +#include +#include + +namespace cuvs::bench { +using namespace cuvs::neighbors; + +template +class cuvs_mg_ivf_pq : public algo, public algo_gpu { + public: + using search_param_base = typename algo::search_param; + using algo::dim_; + + using build_param = cuvs::neighbors::mg::index_params; + + struct search_param : public cuvs::bench::cuvs_ivf_pq::search_param { + cuvs::neighbors::mg::sharded_merge_mode merge_mode; + }; + + cuvs_mg_ivf_pq(Metric metric, int dim, const build_param& param) + : algo(metric, dim), index_params_(param) + { + index_params_.metric = parse_metric_type(metric); + // init nccl clique outside as to not affect benchmark + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle_); + } + + void build(const T* dataset, size_t nrow) final; + void set_search_param(const search_param_base& param) override; + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override; + + [[nodiscard]] auto get_preference() const -> algo_property override + { + algo_property property; + property.dataset_memory_type = MemoryType::kHost; + property.query_memory_type = MemoryType::kHost; + return property; + } + + [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override + { + auto stream = raft::resource::get_cuda_stream(handle_); + return stream; + } + + [[nodiscard]] auto uses_stream() const noexcept -> bool override { return false; } + + void save(const std::string& file) const override; + void load(const std::string&) override; + std::unique_ptr> copy() override; + + private: + raft::device_resources handle_; + build_param index_params_; + cuvs::neighbors::mg::search_params search_params_; + std::shared_ptr, T, IdxT>> index_; +}; + +template +void cuvs_mg_ivf_pq::build(const T* dataset, size_t nrow) +{ + auto dataset_view = + raft::make_host_matrix_view(dataset, IdxT(nrow), IdxT(dim_)); + auto idx = cuvs::neighbors::mg::build(handle_, index_params_, dataset_view); + index_ = + std::make_shared, T, IdxT>>( + std::move(idx)); +} + +template +void cuvs_mg_ivf_pq::set_search_param(const search_param_base& param) +{ + auto sp = dynamic_cast(param); + // search_params_ = static_cast>(sp.pq_param); + ivf_pq::search_params* search_params_ptr_ = static_cast(&search_params_); + *search_params_ptr_ = sp.pq_param; + search_params_.merge_mode = sp.merge_mode; + assert(search_params_.n_probes <= index_params_.n_lists); +} + +template +void cuvs_mg_ivf_pq::save(const std::string& file) const +{ + cuvs::neighbors::mg::serialize(handle_, *index_, file); +} + +template +void cuvs_mg_ivf_pq::load(const std::string& file) +{ + index_ = + std::make_shared, T, IdxT>>( + std::move(cuvs::neighbors::mg::deserialize_pq(handle_, file))); +} + +template +std::unique_ptr> cuvs_mg_ivf_pq::copy() +{ + return std::make_unique>(*this); // use copy constructor +} + +template +void cuvs_mg_ivf_pq::search( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + auto queries_view = raft::make_host_matrix_view( + queries, IdxT(batch_size), IdxT(dim_)); + auto neighbors_view = raft::make_host_matrix_view( + (IdxT*)neighbors, IdxT(batch_size), IdxT(k)); + auto distances_view = raft::make_host_matrix_view( + distances, IdxT(batch_size), IdxT(k)); + + cuvs::neighbors::mg::search( + handle_, *index_, search_params_, queries_view, neighbors_view, distances_view); +} + +} // namespace cuvs::bench \ No newline at end of file diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 2459d521d..e28572457 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -2149,7 +2149,7 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = +PREDEFINED = "CUVS_BUILD_MG_ALGOS=1" # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp index 73ce80b41..60b8cc122 100644 --- a/cpp/include/cuvs/neighbors/common.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -19,7 +19,8 @@ #include #include #include -#include +#include +#include #include #include #include // get_device_for_address @@ -636,5 +637,56 @@ enable_if_valid_list_t deserialize_list(const raft::resources& handle, const typename ListT::spec_type& store_spec, const typename ListT::spec_type& device_spec); } // namespace ivf +} // namespace cuvs::neighbors + +namespace cuvs::neighbors { +using namespace raft; + +template +struct iface { + iface() : mutex_(std::make_shared()) {} + + const IdxT size() const { return index_.value().size(); } + + std::optional index_; + std::shared_ptr mutex_; +}; + +template +void build(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + const cuvs::neighbors::index_params* index_params, + raft::mdspan, row_major, Accessor> index_dataset); + +template +void extend( + const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + raft::mdspan, row_major, Accessor1> new_vectors, + std::optional, layout_c_contiguous, Accessor2>> + new_indices); + +template +void search(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + const cuvs::neighbors::search_params* search_params, + raft::device_matrix_view h_queries, + raft::device_matrix_view d_neighbors, + raft::device_matrix_view d_distances); + +template +void serialize(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + std::ostream& os); + +template +void deserialize(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + std::istream& is); + +template +void deserialize(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + const std::string& filename); }; // namespace cuvs::neighbors diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index 67d1b46c0..7f852d635 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -1168,7 +1168,7 @@ void extend(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_flat::search_params& params, - cuvs::neighbors::ivf_flat::index& index, + const cuvs::neighbors::ivf_flat::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1209,7 +1209,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_flat::search_params& params, - cuvs::neighbors::ivf_flat::index& index, + const cuvs::neighbors::ivf_flat::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1250,7 +1250,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_flat::search_params& params, - cuvs::neighbors::ivf_flat::index& index, + const cuvs::neighbors::ivf_flat::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, diff --git a/cpp/include/cuvs/neighbors/ivf_pq.hpp b/cpp/include/cuvs/neighbors/ivf_pq.hpp index 3ce5f382f..ae543c9e9 100644 --- a/cpp/include/cuvs/neighbors/ivf_pq.hpp +++ b/cpp/include/cuvs/neighbors/ivf_pq.hpp @@ -1221,6 +1221,75 @@ void extend(raft::resources const& handle, std::optional> new_indices, cuvs::neighbors::ivf_pq::index* idx); +/** + * @brief Extend the index with the new data. + * + * Note, the user can set a stream pool in the input raft::resource with + * at least one stream to enable kernel and copy overlapping. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_pq::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_pq::build(handle, index_params, dataset); + * // optional: create a stream pool with at least one stream to enable kernel and copy + * // overlapping + * raft::resource::set_cuda_stream_pool(handle, std::make_shared(1)); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * auto index = ivf_pq::extend(handle, new_vectors, no_op, index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors a host matrix view to a row-major matrix [n_rows, idx.dim()] + * @param[in] new_indices a host vector view to a vector of indices [n_rows]. + * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx + */ +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_pq::index& idx) + -> cuvs::neighbors::ivf_pq::index; + +/** + * @brief Extend the index with the new data. + * + * Note, the user can set a stream pool in the input raft::resource with + * at least one stream to enable kernel and copy overlapping. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_pq::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_pq::build(handle, index_params, dataset); + * // optional: create a stream pool with at least one stream to enable kernel and copy + * // overlapping + * raft::resource::set_cuda_stream_pool(handle, std::make_shared(1)); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * ivf_pq::extend(handle, new_vectors, no_op, &index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors a host matrix view to a row-major matrix [n_rows, idx.dim()] + * @param[in] new_indices a host vector view to a vector of indices [n_rows]. + * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx + */ +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_pq::index* idx); + /** * @brief Extend the index with the new data. * @@ -1405,7 +1474,7 @@ void extend(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_pq::search_params& search_params, - cuvs::neighbors::ivf_pq::index& index, + const cuvs::neighbors::ivf_pq::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1450,7 +1519,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_pq::search_params& search_params, - cuvs::neighbors::ivf_pq::index& index, + const cuvs::neighbors::ivf_pq::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1495,7 +1564,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_pq::search_params& search_params, - cuvs::neighbors::ivf_pq::index& index, + const cuvs::neighbors::ivf_pq::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, @@ -1540,7 +1609,7 @@ void search(raft::resources const& handle, */ void search(raft::resources const& handle, const cuvs::neighbors::ivf_pq::search_params& search_params, - cuvs::neighbors::ivf_pq::index& index, + const cuvs::neighbors::ivf_pq::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, diff --git a/cpp/include/cuvs/neighbors/mg.hpp b/cpp/include/cuvs/neighbors/mg.hpp new file mode 100644 index 000000000..4657fa8fb --- /dev/null +++ b/cpp/include/cuvs/neighbors/mg.hpp @@ -0,0 +1,1367 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifdef CUVS_BUILD_MG_ALGOS + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#define DEFAULT_SEARCH_BATCH_SIZE 1 << 20 + +/// \defgroup mg_cpp_index_params ANN MG index build parameters + +namespace cuvs::neighbors::mg { +/** Distribution mode */ +/// \ingroup mg_cpp_index_params +enum distribution_mode { + /** Index is replicated on each device, favors throughput */ + REPLICATED, + /** Index is split on several devices, favors scaling */ + SHARDED +}; + +/// \defgroup mg_cpp_search_params ANN MG search parameters + +/** Search mode when using a replicated index */ +/// \ingroup mg_cpp_search_params +enum replicated_search_mode { + /** Search queries are splited to maintain equal load on GPUs */ + LOAD_BALANCER, + /** Each search query is processed by a single GPU in a round-robin fashion */ + ROUND_ROBIN +}; + +/** Merge mode when using a sharded index */ +/// \ingroup mg_cpp_search_params +enum sharded_merge_mode { + /** Search batches are merged on the root rank */ + MERGE_ON_ROOT_RANK, + /** Search batches are merged in a tree reduction fashion */ + TREE_MERGE +}; + +/** Build parameters */ +/// \ingroup mg_cpp_index_params +template +struct index_params : public Upstream { + index_params() : mode(SHARDED) {} + + index_params(const Upstream& sp) : Upstream(sp), mode(SHARDED) {} + + /** Distribution mode */ + cuvs::neighbors::mg::distribution_mode mode = SHARDED; +}; + +/** Search parameters */ +/// \ingroup mg_cpp_search_params +template +struct search_params : public Upstream { + search_params() : search_mode(LOAD_BALANCER), merge_mode(TREE_MERGE) {} + + search_params(const Upstream& sp) + : Upstream(sp), search_mode(LOAD_BALANCER), merge_mode(TREE_MERGE) + { + } + + /** Replicated search mode */ + cuvs::neighbors::mg::replicated_search_mode search_mode = LOAD_BALANCER; + /** Sharded merge mode */ + cuvs::neighbors::mg::sharded_merge_mode merge_mode = TREE_MERGE; +}; + +} // namespace cuvs::neighbors::mg + +namespace cuvs::neighbors::mg { + +using namespace raft; + +template +struct index { + index(distribution_mode mode, int num_ranks_); + index(const raft::device_resources& handle, const std::string& filename); + + index(const index&) = delete; + index(index&&) = default; + auto operator=(const index&) -> index& = delete; + auto operator=(index&&) -> index& = default; + + distribution_mode mode_; + int num_ranks_; + std::vector> ann_interfaces_; + + // for load balancing mechanism + std::shared_ptr> round_robin_counter_; +}; + +/// \defgroup mg_cpp_index_build ANN MG index build + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-Flat MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, float, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-Flat MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, int8_t, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-Flat MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, uint8_t, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-PQ MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, float, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-PQ MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, half, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-PQ MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, int8_t, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed IVF-PQ MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, uint8_t, int64_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed CAGRA MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, float, uint32_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed CAGRA MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, half, uint32_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed CAGRA MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, int8_t, uint32_t>; + +/// \ingroup mg_cpp_index_build +/** + * @brief Builds a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] index_dataset a row-major matrix on host [n_rows, dim] + * + * @return the constructed CAGRA MG index + */ +auto build(const raft::device_resources& handle, + const mg::index_params& index_params, + raft::host_matrix_view index_dataset) + -> index, uint8_t, uint32_t>; + +/// \defgroup mg_cpp_index_extend ANN MG index extend + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, float, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, int8_t, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, uint8_t, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, float, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, half, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, int8_t, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, uint8_t, int64_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, float, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, half, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, int8_t, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \ingroup mg_cpp_index_extend +/** + * @brief Extends a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::extend(handle, index, new_vectors, std::nullopt); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] new_vectors a row-major matrix on host [n_rows, dim] + * @param[in] new_indices optional vector on host [n_rows], + * `std::nullopt` means default continuous range `[0...n_rows)` + * + */ +void extend(const raft::device_resources& handle, + index, uint8_t, uint32_t>& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices); + +/// \defgroup mg_cpp_index_search ANN MG index search + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, float, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, int8_t, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, uint8_t, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, float, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, half, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, int8_t, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, uint8_t, int64_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, float, uint32_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, half, uint32_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, int8_t, uint32_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \ingroup mg_cpp_index_search +/** + * @brief Searches a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * cuvs::neighbors::mg::search_params search_params; + * cuvs::neighbors::mg::search(handle, index, search_params, queries, neighbors, + * distances); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] search_params configure the index search + * @param[in] queries a row-major matrix on host [n_rows, dim] + * @param[out] neighbors a row-major matrix on host [n_rows, n_neighbors] + * @param[out] distances a row-major matrix on host [n_rows, n_neighbors] + * @param[in] n_rows_per_batch (optional) search batch size + * + */ +void search(const raft::device_resources& handle, + const index, uint8_t, uint32_t>& index, + const mg::search_params& search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch = DEFAULT_SEARCH_BATCH_SIZE); + +/// \defgroup mg_cpp_serialize ANN MG index serialization + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, float, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, int8_t, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, uint8_t, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, float, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, half, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, int8_t, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, uint8_t, int64_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, float, uint32_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, half, uint32_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, int8_t, uint32_t>& index, + const std::string& filename); + +/// \ingroup mg_cpp_serialize +/** + * @brief Serializes a multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * @endcode + * + * @param[in] handle + * @param[in] index the pre-built index + * @param[in] filename path to the file to be serialized + * + */ +void serialize(const raft::device_resources& handle, + const index, uint8_t, uint32_t>& index, + const std::string& filename); + +/// \defgroup mg_cpp_deserialize ANN MG index deserialization + +/// \ingroup mg_cpp_deserialize +/** + * @brief Deserializes an IVF-Flat multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * auto new_index = cuvs::neighbors::mg::deserialize_flat(handle, filename); + * + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized + * + */ +template +auto deserialize_flat(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \ingroup mg_cpp_deserialize +/** + * @brief Deserializes an IVF-PQ multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * auto new_index = cuvs::neighbors::mg::deserialize_pq(handle, filename); + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized + * + */ +template +auto deserialize_pq(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \ingroup mg_cpp_deserialize +/** + * @brief Deserializes a CAGRA multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::mg::index_params index_params; + * auto index = cuvs::neighbors::mg::build(handle, index_params, index_dataset); + * const std::string filename = "mg_index.cuvs"; + * cuvs::neighbors::mg::serialize(handle, index, filename); + * auto new_index = cuvs::neighbors::mg::deserialize_cagra(handle, filename); + * + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized + * + */ +template +auto deserialize_cagra(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \defgroup mg_cpp_distribute ANN MG local index distribution + +/// \ingroup mg_cpp_distribute +/** + * @brief Replicates a locally built and serialized IVF-Flat index to all GPUs to form a distributed + * multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::ivf_flat::index_params index_params; + * auto index = cuvs::neighbors::ivf_flat::build(handle, index_params, index_dataset); + * const std::string filename = "local_index.cuvs"; + * cuvs::neighbors::ivf_flat::serialize(handle, filename, index); + * auto new_index = cuvs::neighbors::mg::distribute_flat(handle, filename); + * + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized : a local index + * + */ +template +auto distribute_flat(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \ingroup mg_cpp_distribute +/** + * @brief Replicates a locally built and serialized IVF-PQ index to all GPUs to form a distributed + * multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::ivf_pq::index_params index_params; + * auto index = cuvs::neighbors::ivf_pq::build(handle, index_params, index_dataset); + * const std::string filename = "local_index.cuvs"; + * cuvs::neighbors::ivf_pq::serialize(handle, filename, index); + * auto new_index = cuvs::neighbors::mg::distribute_pq(handle, filename); + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized : a local index + * + */ +template +auto distribute_pq(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +/// \ingroup mg_cpp_distribute +/** + * @brief Replicates a locally built and serialized CAGRA index to all GPUs to form a distributed + * multi-GPU index + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::neighbors::cagra::index_params index_params; + * auto index = cuvs::neighbors::cagra::build(handle, index_params, index_dataset); + * const std::string filename = "local_index.cuvs"; + * cuvs::neighbors::cagra::serialize(handle, filename, index); + * auto new_index = cuvs::neighbors::mg::distribute_cagra(handle, filename); + * + * @endcode + * + * @param[in] handle + * @param[in] filename path to the file to be deserialized : a local index + * + */ +template +auto distribute_cagra(const raft::device_resources& handle, const std::string& filename) + -> index, T, IdxT>; + +} // namespace cuvs::neighbors::mg + +#else + +static_assert(false, + "FORBIDEN_MG_ALGORITHM_IMPORT\n\n" + "Please recompile the cuVS library with MG algorithms BUILD_MG_ALGOS=ON.\n"); + +#endif diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index 9694a3e7a..b03b8214b 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -29,9 +29,10 @@ #include +namespace cuvs::neighbors::cagra { + static const std::string RAFT_NAME = "raft"; -namespace cuvs::neighbors::cagra { template void add_node_core( raft::resources const& handle, diff --git a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh index f86ed9ef6..b92ef0ace 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh @@ -32,9 +32,10 @@ #include #include -static const std::string RAFT_NAME = "raft"; namespace cuvs::neighbors::cagra::detail { +static const std::string RAFT_NAME = "raft"; + constexpr int serialization_version = 4; /** diff --git a/cpp/src/neighbors/iface/generate_iface.py b/cpp/src/neighbors/iface/generate_iface.py new file mode 100644 index 000000000..794219bbf --- /dev/null +++ b/cpp/src/neighbors/iface/generate_iface.py @@ -0,0 +1,273 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header = """/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +""" + +include_macro = """ +#include "iface.hpp" +""" + +namespace_macro = """ +namespace cuvs::neighbors { +""" + +footer = """ +} // namespace cuvs::neighbors +""" + +flat_macro = """ +#define CUVS_INST_MG_FLAT(T, IdxT) \\ + using T_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using T_da= raft::host_device_accessor, raft::memory_type::host>; \\ + using IdxT_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using IdxT_da = raft::host_device_accessor, raft::memory_type::host>; \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_ha> index_dataset); \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_da> index_dataset); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_ha> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_ha>> new_indices); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_da> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_da>> new_indices); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::device_matrix_view queries, \\ + raft::device_matrix_view neighbors, \\ + raft::device_matrix_view distances); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::host_matrix_view h_queries, \\ + raft::device_matrix_view d_neighbors, \\ + raft::device_matrix_view d_distances); \\ + \\ + template void serialize(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::ostream& os); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::istream& is); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const std::string& filename); +""" + +pq_macro = """ +#define CUVS_INST_MG_PQ(T, IdxT) \\ + using T_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using T_da= raft::host_device_accessor, raft::memory_type::host>; \\ + using IdxT_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using IdxT_da = raft::host_device_accessor, raft::memory_type::host>; \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_ha> index_dataset); \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_da> index_dataset); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_ha> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_ha>> new_indices); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_da> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_da>> new_indices); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::device_matrix_view queries, \\ + raft::device_matrix_view neighbors, \\ + raft::device_matrix_view distances); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::host_matrix_view h_queries, \\ + raft::device_matrix_view d_neighbors, \\ + raft::device_matrix_view d_distances); \\ + \\ + template void serialize(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::ostream& os); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::istream& is); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const std::string& filename); +""" + +cagra_macro = """ +#define CUVS_INST_MG_CAGRA(T, IdxT) \\ + using T_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using T_da= raft::host_device_accessor, raft::memory_type::host>; \\ + using IdxT_ha = raft::host_device_accessor, raft::memory_type::device>; \\ + using IdxT_da = raft::host_device_accessor, raft::memory_type::host>; \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_ha> index_dataset); \\ + \\ + template void build(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::index_params* index_params, \\ + raft::mdspan, row_major, T_da> index_dataset); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_ha> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_ha>> new_indices); \\ + \\ + template void extend(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + raft::mdspan, row_major, T_da> new_vectors, \\ + std::optional, layout_c_contiguous, IdxT_da>> new_indices); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::device_matrix_view queries, \\ + raft::device_matrix_view neighbors, \\ + raft::device_matrix_view distances); \\ + \\ + template void search(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + const cuvs::neighbors::search_params* search_params, \\ + raft::host_matrix_view h_queries, \\ + raft::device_matrix_view d_neighbors, \\ + raft::device_matrix_view d_distances); \\ + \\ + template void serialize(const raft::device_resources& handle, \\ + const cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::ostream& os); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + std::istream& is); \\ + \\ + template void deserialize(const raft::device_resources& handle, \\ + cuvs::neighbors::iface, T, IdxT>& interface, \\ + const std::string& filename); +""" + +flat_macros = dict ( + flat = dict( + include=include_macro, + definition=flat_macro, + name="CUVS_INST_MG_FLAT", + ) +) + +pq_macros = dict ( + pq = dict( + include=include_macro, + definition=pq_macro, + name="CUVS_INST_MG_PQ", + ) +) + +cagra_macros = dict ( + cagra = dict( + include=include_macro, + definition=cagra_macro, + name="CUVS_INST_MG_CAGRA", + ) +) + +flat_types = dict( + float_int64_t=("float", "int64_t"), + int8_t_int64_t=("int8_t", "int64_t"), + uint8_t_int64_t=("uint8_t", "int64_t"), +) + +pq_types = dict( + float_int64_t=("float", "int64_t"), + half_int64_t=("half", "int64_t"), + int8_t_int64_t=("int8_t", "int64_t"), + uint8_t_int64_t=("uint8_t", "int64_t"), +) + +cagra_types = dict( + float_uint32_t=("float", "uint32_t"), + half_uint32_t=("half", "uint32_t"), + int8_t_uint32_t=("int8_t", "uint32_t"), + uint8_t_uint32_t=("uint8_t", "uint32_t"), +) + +for macros, types in [(flat_macros, flat_types), (pq_macros, pq_types), (cagra_macros, cagra_types)]: + for type_path, (T, IdxT) in types.items(): + for macro_path, macro in macros.items(): + path = f"iface_{macro_path}_{type_path}.cu" + with open(path, "w") as f: + f.write(header) + f.write(macro['include']) + f.write(namespace_macro) + f.write(macro["definition"]) + f.write(f"{macro['name']}({T}, {IdxT});\n\n") + f.write(f"#undef {macro['name']}\n") + f.write(footer) + + print(f"src/neighbors/iface/{path}") diff --git a/cpp/src/neighbors/iface/iface.hpp b/cpp/src/neighbors/iface/iface.hpp new file mode 100644 index 000000000..a329db429 --- /dev/null +++ b/cpp/src/neighbors/iface/iface.hpp @@ -0,0 +1,198 @@ +#include + +#include +#include +#include +#include +#include + +namespace cuvs::neighbors { + +using namespace raft; + +template +void build(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + const cuvs::neighbors::index_params* index_params, + raft::mdspan, row_major, Accessor> index_dataset) +{ + interface.mutex_->lock(); + + if constexpr (std::is_same>::value) { + auto idx = cuvs::neighbors::ivf_flat::build( + handle, *static_cast(index_params), index_dataset); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + auto idx = cuvs::neighbors::ivf_pq::build( + handle, *static_cast(index_params), index_dataset); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + auto idx = cuvs::neighbors::cagra::build( + handle, *static_cast(index_params), index_dataset); + interface.index_.emplace(std::move(idx)); + } + resource::sync_stream(handle); + + interface.mutex_->unlock(); +} + +template +void extend( + const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + raft::mdspan, row_major, Accessor1> new_vectors, + std::optional, layout_c_contiguous, Accessor2>> + new_indices) +{ + interface.mutex_->lock(); + + if constexpr (std::is_same>::value) { + auto idx = + cuvs::neighbors::ivf_flat::extend(handle, new_vectors, new_indices, interface.index_.value()); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + auto idx = + cuvs::neighbors::ivf_pq::extend(handle, new_vectors, new_indices, interface.index_.value()); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + RAFT_FAIL("CAGRA does not implement the extend method"); + } + resource::sync_stream(handle); + + interface.mutex_->unlock(); +} + +template +void search(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + const cuvs::neighbors::search_params* search_params, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances) +{ + // interface.mutex_->lock(); + if constexpr (std::is_same>::value) { + cuvs::neighbors::ivf_flat::search( + handle, + *reinterpret_cast(search_params), + interface.index_.value(), + queries, + neighbors, + distances); + } else if constexpr (std::is_same>::value) { + cuvs::neighbors::ivf_pq::search(handle, + *reinterpret_cast(search_params), + interface.index_.value(), + queries, + neighbors, + distances); + } else if constexpr (std::is_same>::value) { + cuvs::neighbors::cagra::search(handle, + *reinterpret_cast(search_params), + interface.index_.value(), + queries, + neighbors, + distances); + } + resource::sync_stream(handle); + + // interface.mutex_->unlock(); +} + +// for MG ANN only +template +void search(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view h_queries, + raft::device_matrix_view d_neighbors, + raft::device_matrix_view d_distances) +{ + // interface.mutex_->lock(); + + int64_t n_rows = h_queries.extent(0); + int64_t n_dims = h_queries.extent(1); + auto d_queries = raft::make_device_matrix(handle, n_rows, n_dims); + raft::copy(d_queries.data_handle(), + h_queries.data_handle(), + n_rows * n_dims, + resource::get_cuda_stream(handle)); + auto d_query_view = raft::make_const_mdspan(d_queries.view()); + + search(handle, interface, search_params, d_query_view, d_neighbors, d_distances); + + // interface.mutex_->unlock(); +} + +template +void serialize(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + std::ostream& os) +{ + interface.mutex_->lock(); + + if constexpr (std::is_same>::value) { + ivf_flat::serialize(handle, os, interface.index_.value()); + } else if constexpr (std::is_same>::value) { + ivf_pq::serialize(handle, os, interface.index_.value()); + } else if constexpr (std::is_same>::value) { + cagra::serialize(handle, os, interface.index_.value(), true); + } + + interface.mutex_->unlock(); +} + +template +void deserialize(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + std::istream& is) +{ + interface.mutex_->lock(); + + if constexpr (std::is_same>::value) { + ivf_flat::index idx(handle); + ivf_flat::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + ivf_pq::index idx(handle); + ivf_pq::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + cagra::index idx(handle); + cagra::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } + + interface.mutex_->unlock(); +} + +template +void deserialize(const raft::device_resources& handle, + cuvs::neighbors::iface& interface, + const std::string& filename) +{ + interface.mutex_->lock(); + + std::ifstream is(filename, std::ios::in | std::ios::binary); + if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + + if constexpr (std::is_same>::value) { + ivf_flat::index idx(handle); + ivf_flat::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + ivf_pq::index idx(handle); + ivf_pq::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } else if constexpr (std::is_same>::value) { + cagra::index idx(handle); + cagra::deserialize(handle, is, &idx); + interface.index_.emplace(std::move(idx)); + } + + is.close(); + + interface.mutex_->unlock(); +} + +}; // namespace cuvs::neighbors \ No newline at end of file diff --git a/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu new file mode 100644 index 000000000..b5e329dd8 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_CAGRA(float, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu new file mode 100644 index 000000000..23fcffc59 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_CAGRA(half, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu new file mode 100644 index 000000000..30377ab66 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_CAGRA(int8_t, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu new file mode 100644 index 000000000..59a1640e8 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_CAGRA(uint8_t, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu b/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu new file mode 100644 index 000000000..a0a455375 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize( \ + const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_FLAT(float, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu new file mode 100644 index 000000000..9fdd6464f --- /dev/null +++ b/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize( \ + const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_FLAT(int8_t, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu new file mode 100644 index 000000000..daee59c4a --- /dev/null +++ b/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize( \ + const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_FLAT(uint8_t, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu new file mode 100644 index 000000000..7282d6bd0 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_PQ(float, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu new file mode 100644 index 000000000..4d67f9aed --- /dev/null +++ b/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_PQ(half, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu new file mode 100644 index 000000000..46537b3f9 --- /dev/null +++ b/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_PQ(int8_t, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu new file mode 100644 index 000000000..591ac881a --- /dev/null +++ b/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_iface.py + * + * Make changes there and run in this directory: + * + * > python generate_iface.py + * + */ + +#include "iface.hpp" + +namespace cuvs::neighbors { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + using T_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using T_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + using IdxT_ha = raft::host_device_accessor, \ + raft::memory_type::device>; \ + using IdxT_da = raft::host_device_accessor, \ + raft::memory_type::host>; \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_ha> index_dataset); \ + \ + template void build( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::index_params* index_params, \ + raft::mdspan, row_major, T_da> index_dataset); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_ha> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_ha>> \ + new_indices); \ + \ + template void extend( \ + const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + raft::mdspan, row_major, T_da> new_vectors, \ + std::optional, layout_c_contiguous, IdxT_da>> \ + new_indices); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void search(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + const cuvs::neighbors::search_params* search_params, \ + raft::host_matrix_view h_queries, \ + raft::device_matrix_view d_neighbors, \ + raft::device_matrix_view d_distances); \ + \ + template void serialize(const raft::device_resources& handle, \ + const cuvs::neighbors::iface, T, IdxT>& interface, \ + std::ostream& os); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + std::istream& is); \ + \ + template void deserialize(const raft::device_resources& handle, \ + cuvs::neighbors::iface, T, IdxT>& interface, \ + const std::string& filename); +CUVS_INST_MG_PQ(uint8_t, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py b/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py index 1fabcca8c..c435cc6d9 100644 --- a/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py +++ b/cpp/src/neighbors/ivf_flat/generate_ivf_flat.py @@ -144,7 +144,7 @@ void search( \\ raft::resources const& handle, \\ const cuvs::neighbors::ivf_flat::search_params& params, \\ - cuvs::neighbors::ivf_flat::index& index, \\ + const cuvs::neighbors::ivf_flat::index& index, \\ raft::device_matrix_view queries, \\ raft::device_matrix_view neighbors, \\ raft::device_matrix_view distances, \\ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu index 3f262d612..87abc0bc0 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_flat { #define CUVS_INST_IVF_FLAT_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::search_params& params, \ - cuvs::neighbors::ivf_flat::index& index, \ + const cuvs::neighbors::ivf_flat::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu index 4357afb0a..c1e92ae5b 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_flat { #define CUVS_INST_IVF_FLAT_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::search_params& params, \ - cuvs::neighbors::ivf_flat::index& index, \ + const cuvs::neighbors::ivf_flat::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu index 8265a3e17..4ff8ed770 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_flat { #define CUVS_INST_IVF_FLAT_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::search_params& params, \ - cuvs::neighbors::ivf_flat::index& index, \ + const cuvs::neighbors::ivf_flat::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py b/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py index a5a829967..a2ac048ff 100644 --- a/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py +++ b/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py @@ -68,7 +68,7 @@ #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \\ void search(raft::resources const& handle, \\ const cuvs::neighbors::ivf_pq::search_params& params, \\ - cuvs::neighbors::ivf_pq::index& index, \\ + const cuvs::neighbors::ivf_pq::index& index, \\ raft::device_matrix_view queries, \\ raft::device_matrix_view neighbors, \\ raft::device_matrix_view distances, \\ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu index 07ee110bc..44e9777ba 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_pq { #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::search_params& params, \ - cuvs::neighbors::ivf_pq::index& index, \ + const cuvs::neighbors::ivf_pq::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu index cf387cb67..d7446e846 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_pq { #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::search_params& params, \ - cuvs::neighbors::ivf_pq::index& index, \ + const cuvs::neighbors::ivf_pq::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu index 5ec9093df..c1ffede97 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_pq { #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::search_params& params, \ - cuvs::neighbors::ivf_pq::index& index, \ + const cuvs::neighbors::ivf_pq::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu index d2e2f3b00..08e4f0536 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu @@ -32,7 +32,7 @@ namespace cuvs::neighbors::ivf_pq { #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \ void search(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::search_params& params, \ - cuvs::neighbors::ivf_pq::index& index, \ + const cuvs::neighbors::ivf_pq::index& index, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbors, \ raft::device_matrix_view distances, \ diff --git a/cpp/src/neighbors/mg/generate_mg.py b/cpp/src/neighbors/mg/generate_mg.py new file mode 100644 index 000000000..af5e60545 --- /dev/null +++ b/cpp/src/neighbors/mg/generate_mg.py @@ -0,0 +1,286 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header = """/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +""" + +include_macro = """ +#include "mg.cuh" +""" + +namespace_macro = """ +namespace cuvs::neighbors::mg { +""" + +footer = """ +} // namespace cuvs::neighbors::mg +""" + +flat_macro = """ +#define CUVS_INST_MG_FLAT(T, IdxT) \\ + index, T, IdxT> build(const raft::device_resources& handle, \\ + const mg::index_params& index_params, \\ + raft::host_matrix_view index_dataset) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::build(handle, index, \\ + static_cast(&index_params), \\ + index_dataset); \\ + return index; \\ + } \\ + \\ + void extend(const raft::device_resources& handle, \\ + index, T, IdxT>& index, \\ + raft::host_matrix_view new_vectors, \\ + std::optional> new_indices) \\ + { \\ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \\ + } \\ + \\ + void search(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const mg::search_params& search_params, \\ + raft::host_matrix_view queries, \\ + raft::host_matrix_view neighbors, \\ + raft::host_matrix_view distances, \\ + int64_t n_rows_per_batch) \\ + { \\ + cuvs::neighbors::mg::detail::search(handle, index, \\ + static_cast(&search_params), \\ + queries, neighbors, distances, n_rows_per_batch); \\ + } \\ + \\ + void serialize(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const std::string& filename) \\ + { \\ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \\ + } \\ + \\ + template<> \\ + index, T, IdxT> deserialize_flat(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + auto idx = index, T, IdxT>(handle, filename); \\ + return idx; \\ + } \\ + \\ + template<> \\ + index, T, IdxT> distribute_flat(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \\ + return idx; \\ + } +""" + +pq_macro = """ +#define CUVS_INST_MG_PQ(T, IdxT) \\ + index, T, IdxT> build(const raft::device_resources& handle, \\ + const mg::index_params& index_params, \\ + raft::host_matrix_view index_dataset) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::build(handle, index, \\ + static_cast(&index_params), \\ + index_dataset); \\ + return index; \\ + } \\ + \\ + void extend(const raft::device_resources& handle, \\ + index, T, IdxT>& index, \\ + raft::host_matrix_view new_vectors, \\ + std::optional> new_indices) \\ + { \\ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \\ + } \\ + \\ + void search(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const mg::search_params& search_params, \\ + raft::host_matrix_view queries, \\ + raft::host_matrix_view neighbors, \\ + raft::host_matrix_view distances, \\ + int64_t n_rows_per_batch) \\ + { \\ + cuvs::neighbors::mg::detail::search(handle, index, \\ + static_cast(&search_params), \\ + queries, neighbors, distances, n_rows_per_batch); \\ + } \\ + \\ + void serialize(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const std::string& filename) \\ + { \\ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \\ + } \\ + \\ + template<> \\ + index, T, IdxT> deserialize_pq(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + auto idx = index, T, IdxT>(handle, filename); \\ + return idx; \\ + } \\ + \\ + template<> \\ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \\ + return idx; \\ + } +""" + +cagra_macro = """ +#define CUVS_INST_MG_CAGRA(T, IdxT) \\ + index, T, IdxT> build(const raft::device_resources& handle, \\ + const mg::index_params& index_params, \\ + raft::host_matrix_view index_dataset) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::build(handle, index, \\ + static_cast(&index_params), \\ + index_dataset); \\ + return index; \\ + } \\ + \\ + void search(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const mg::search_params& search_params, \\ + raft::host_matrix_view queries, \\ + raft::host_matrix_view neighbors, \\ + raft::host_matrix_view distances, \\ + int64_t n_rows_per_batch) \\ + { \\ + cuvs::neighbors::mg::detail::search(handle, index, \\ + static_cast(&search_params), \\ + queries, neighbors, distances, n_rows_per_batch); \\ + } \\ + \\ + void serialize(const raft::device_resources& handle, \\ + const index, T, IdxT>& index, \\ + const std::string& filename) \\ + { \\ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \\ + } \\ + \\ + template<> \\ + index, T, IdxT> deserialize_cagra(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + auto idx = index, T, IdxT>(handle, filename); \\ + return idx; \\ + } \\ + \\ + template<> \\ + index, T, IdxT> distribute_cagra(const raft::device_resources& handle, \\ + const std::string& filename) \\ + { \\ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \\ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \\ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \\ + return idx; \\ + } +""" + +flat_macros = dict ( + flat = dict( + include=include_macro, + definition=flat_macro, + name="CUVS_INST_MG_FLAT", + ) +) + +pq_macros = dict ( + pq = dict( + include=include_macro, + definition=pq_macro, + name="CUVS_INST_MG_PQ", + ) +) + +cagra_macros = dict ( + cagra = dict( + include=include_macro, + definition=cagra_macro, + name="CUVS_INST_MG_CAGRA", + ) +) + +flat_types = dict( + float_int64_t=("float", "int64_t"), + int8_t_int64_t=("int8_t", "int64_t"), + uint8_t_int64_t=("uint8_t", "int64_t"), +) + +pq_types = dict( + float_int64_t=("float", "int64_t"), + half_int64_t=("half", "int64_t"), + int8_t_int64_t=("int8_t", "int64_t"), + uint8_t_int64_t=("uint8_t", "int64_t"), +) + +cagra_types = dict( + float_uint32_t=("float", "uint32_t"), + half_uint32_t=("half", "uint32_t"), + int8_t_uint32_t=("int8_t", "uint32_t"), + uint8_t_uint32_t=("uint8_t", "uint32_t"), +) + +for macros, types in [(flat_macros, flat_types), (pq_macros, pq_types), (cagra_macros, cagra_types)]: + for type_path, (T, IdxT) in types.items(): + for macro_path, macro in macros.items(): + path = f"mg_{macro_path}_{type_path}.cu" + with open(path, "w") as f: + f.write(header) + f.write(macro['include']) + f.write(namespace_macro) + f.write(macro["definition"]) + f.write(f"{macro['name']}({T}, {IdxT});\n\n") + f.write(f"#undef {macro['name']}\n") + f.write(footer) + + print(f"src/neighbors/mg/{path}") diff --git a/cpp/src/neighbors/mg/mg.cuh b/cpp/src/neighbors/mg/mg.cuh new file mode 100644 index 000000000..d3f635bc4 --- /dev/null +++ b/cpp/src/neighbors/mg/mg.cuh @@ -0,0 +1,690 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../detail/knn_merge_parts.cuh" +#include +#include +#include +#include + +#include +#include + +namespace cuvs::neighbors { +using namespace raft; + +template +void search(const raft::device_resources& handle, + const cuvs::neighbors::iface& interface, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view h_queries, + raft::device_matrix_view d_neighbors, + raft::device_matrix_view d_distances); +} // namespace cuvs::neighbors + +namespace cuvs::neighbors::mg { +void check_omp_threads(const int requirements); +} // namespace cuvs::neighbors::mg + +namespace cuvs::neighbors::mg::detail { +using namespace cuvs::neighbors; +using namespace raft; + +// local index deserialization and distribution +template +void deserialize_and_distribute(const raft::device_resources& handle, + index& index, + const std::string& filename) +{ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_.emplace_back(); + cuvs::neighbors::deserialize(dev_res, ann_if, filename); + } +} + +// MG index deserialization +template +void deserialize(const raft::device_resources& handle, + index& index, + const std::string& filename) +{ + std::ifstream is(filename, std::ios::in | std::ios::binary); + if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + index.mode_ = (cuvs::neighbors::mg::distribution_mode)deserialize_scalar(handle, is); + index.num_ranks_ = deserialize_scalar(handle, is); + + if (index.num_ranks_ != clique.num_ranks_) { + RAFT_FAIL("Serialized index has %d ranks whereas NCCL clique has %d ranks", + index.num_ranks_, + clique.num_ranks_); + } + + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_.emplace_back(); + cuvs::neighbors::deserialize(dev_res, ann_if, is); + } + + is.close(); +} + +template +void build(const raft::device_resources& handle, + index& index, + const cuvs::neighbors::index_params* index_params, + raft::host_matrix_view index_dataset) +{ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + if (index.mode_ == REPLICATED) { + int64_t n_rows = index_dataset.extent(0); + RAFT_LOG_INFO("REPLICATED BUILD: %d*%drows", index.num_ranks_, n_rows); + + index.ann_interfaces_.resize(index.num_ranks_); +#pragma omp parallel for + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::build(dev_res, ann_if, index_params, index_dataset); + resource::sync_stream(dev_res); + } + } else if (index.mode_ == SHARDED) { + int64_t n_rows = index_dataset.extent(0); + int64_t n_cols = index_dataset.extent(1); + int64_t n_rows_per_shard = raft::ceildiv(n_rows, (int64_t)index.num_ranks_); + + RAFT_LOG_INFO("SHARDED BUILD: %d*%drows", index.num_ranks_, n_rows_per_shard); + + index.ann_interfaces_.resize(index.num_ranks_); +#pragma omp parallel for + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + int64_t offset = rank * n_rows_per_shard; + int64_t n_rows_of_current_shard = std::min(n_rows_per_shard, n_rows - offset); + const T* partition_ptr = index_dataset.data_handle() + (offset * n_cols); + auto partition = raft::make_host_matrix_view( + partition_ptr, n_rows_of_current_shard, n_cols); + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::build(dev_res, ann_if, index_params, partition); + resource::sync_stream(dev_res); + } + } +} + +template +void extend(const raft::device_resources& handle, + index& index, + raft::host_matrix_view new_vectors, + std::optional> new_indices) +{ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + int64_t n_rows = new_vectors.extent(0); + if (index.mode_ == REPLICATED) { + RAFT_LOG_INFO("REPLICATED EXTEND: %d*%drows", index.num_ranks_, n_rows); + +#pragma omp parallel for + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::extend(dev_res, ann_if, new_vectors, new_indices); + resource::sync_stream(dev_res); + } + } else if (index.mode_ == SHARDED) { + int64_t n_cols = new_vectors.extent(1); + int64_t n_rows_per_shard = raft::ceildiv(n_rows, (int64_t)index.num_ranks_); + + RAFT_LOG_INFO("SHARDED EXTEND: %d*%drows", index.num_ranks_, n_rows_per_shard); + +#pragma omp parallel for + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + int64_t offset = rank * n_rows_per_shard; + int64_t n_rows_of_current_shard = std::min(n_rows_per_shard, n_rows - offset); + const T* new_vectors_ptr = new_vectors.data_handle() + (offset * n_cols); + auto new_vectors_part = raft::make_host_matrix_view( + new_vectors_ptr, n_rows_of_current_shard, n_cols); + + std::optional> new_indices_part = std::nullopt; + if (new_indices.has_value()) { + const IdxT* new_indices_ptr = new_indices.value().data_handle() + offset; + new_indices_part = raft::make_host_vector_view( + new_indices_ptr, n_rows_of_current_shard); + } + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::extend(dev_res, ann_if, new_vectors_part, new_indices_part); + resource::sync_stream(dev_res); + } + } +} + +template +void sharded_search_with_direct_merge(const raft::comms::nccl_clique& clique, + const index& index, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch, + int64_t n_rows, + int64_t n_cols, + int64_t n_neighbors, + int64_t n_batches) +{ + const auto& root_handle = clique.set_current_device_to_root_rank(); + auto in_neighbors = raft::make_device_matrix( + root_handle, index.num_ranks_ * n_rows_per_batch, n_neighbors); + auto in_distances = raft::make_device_matrix( + root_handle, index.num_ranks_ * n_rows_per_batch, n_neighbors); + auto out_neighbors = + raft::make_device_matrix(root_handle, n_rows_per_batch, n_neighbors); + auto out_distances = + raft::make_device_matrix(root_handle, n_rows_per_batch, n_neighbors); + + for (int64_t batch_idx = 0; batch_idx < n_batches; batch_idx++) { + int64_t offset = batch_idx * n_rows_per_batch; + int64_t query_offset = offset * n_cols; + int64_t output_offset = offset * n_neighbors; + int64_t n_rows_of_current_batch = std::min((int64_t)n_rows_per_batch, n_rows - offset); + int64_t part_size = n_rows_of_current_batch * n_neighbors; + auto query_partition = raft::make_host_matrix_view( + queries.data_handle() + query_offset, n_rows_of_current_batch, n_cols); + + const int& requirements = index.num_ranks_; + check_omp_threads(requirements); // should use at least num_ranks_ threads to avoid NCCL hang +#pragma omp parallel for num_threads(index.num_ranks_) + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + auto& ann_if = index.ann_interfaces_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + + if (rank == clique.root_rank_) { // root rank + uint64_t batch_offset = clique.root_rank_ * part_size; + auto d_neighbors = raft::make_device_matrix_view( + in_neighbors.data_handle() + batch_offset, n_rows_of_current_batch, n_neighbors); + auto d_distances = raft::make_device_matrix_view( + in_distances.data_handle() + batch_offset, n_rows_of_current_batch, n_neighbors); + cuvs::neighbors::search( + dev_res, ann_if, search_params, query_partition, d_neighbors, d_distances); + + // wait for other ranks + ncclGroupStart(); + for (int from_rank = 0; from_rank < index.num_ranks_; from_rank++) { + if (from_rank == clique.root_rank_) continue; + + batch_offset = from_rank * part_size; + ncclRecv(in_neighbors.data_handle() + batch_offset, + part_size * sizeof(IdxT), + ncclUint8, + from_rank, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclRecv(in_distances.data_handle() + batch_offset, + part_size * sizeof(float), + ncclUint8, + from_rank, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + } + ncclGroupEnd(); + resource::sync_stream(dev_res); + } else { // non-root ranks + auto d_neighbors = raft::make_device_matrix( + dev_res, n_rows_of_current_batch, n_neighbors); + auto d_distances = raft::make_device_matrix( + dev_res, n_rows_of_current_batch, n_neighbors); + cuvs::neighbors::search( + dev_res, ann_if, search_params, query_partition, d_neighbors.view(), d_distances.view()); + + // send results to root rank + ncclGroupStart(); + ncclSend(d_neighbors.data_handle(), + part_size * sizeof(IdxT), + ncclUint8, + clique.root_rank_, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclSend(d_distances.data_handle(), + part_size * sizeof(float), + ncclUint8, + clique.root_rank_, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclGroupEnd(); + resource::sync_stream(dev_res); + } + } + + const auto& root_handle_ = clique.set_current_device_to_root_rank(); + auto h_trans = std::vector(index.num_ranks_); + int64_t translation_offset = 0; + for (int rank = 0; rank < index.num_ranks_; rank++) { + h_trans[rank] = translation_offset; + translation_offset += index.ann_interfaces_[rank].size(); + } + auto d_trans = raft::make_device_vector(root_handle_, index.num_ranks_); + raft::copy(d_trans.data_handle(), + h_trans.data(), + index.num_ranks_, + resource::get_cuda_stream(root_handle_)); + + cuvs::neighbors::detail::knn_merge_parts(in_distances.data_handle(), + in_neighbors.data_handle(), + out_distances.data_handle(), + out_neighbors.data_handle(), + n_rows_of_current_batch, + index.num_ranks_, + n_neighbors, + resource::get_cuda_stream(root_handle_), + d_trans.data_handle()); + + raft::copy(neighbors.data_handle() + output_offset, + out_neighbors.data_handle(), + part_size, + resource::get_cuda_stream(root_handle_)); + raft::copy(distances.data_handle() + output_offset, + out_distances.data_handle(), + part_size, + resource::get_cuda_stream(root_handle_)); + + resource::sync_stream(root_handle_); + } +} + +template +void sharded_search_with_tree_merge(const raft::comms::nccl_clique& clique, + const index& index, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch, + int64_t n_rows, + int64_t n_cols, + int64_t n_neighbors, + int64_t n_batches) +{ + for (int64_t batch_idx = 0; batch_idx < n_batches; batch_idx++) { + int64_t offset = batch_idx * n_rows_per_batch; + int64_t query_offset = offset * n_cols; + int64_t output_offset = offset * n_neighbors; + int64_t n_rows_of_current_batch = std::min((int64_t)n_rows_per_batch, n_rows - offset); + auto query_partition = raft::make_host_matrix_view( + queries.data_handle() + query_offset, n_rows_of_current_batch, n_cols); + + const int& requirements = index.num_ranks_; + check_omp_threads(requirements); // should use at least num_ranks_ threads to avoid NCCL hang +#pragma omp parallel for num_threads(index.num_ranks_) + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + auto& ann_if = index.ann_interfaces_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + + int64_t part_size = n_rows_of_current_batch * n_neighbors; + + auto tmp_neighbors = raft::make_device_matrix( + dev_res, 2 * n_rows_of_current_batch, n_neighbors); + auto tmp_distances = raft::make_device_matrix( + dev_res, 2 * n_rows_of_current_batch, n_neighbors); + auto neighbors_view = raft::make_device_matrix_view( + tmp_neighbors.data_handle(), n_rows_of_current_batch, n_neighbors); + auto distances_view = raft::make_device_matrix_view( + tmp_distances.data_handle(), n_rows_of_current_batch, n_neighbors); + cuvs::neighbors::search( + dev_res, ann_if, search_params, query_partition, neighbors_view, distances_view); + + int64_t translation_offset = 0; + for (int r = 0; r < rank; r++) { + translation_offset += index.ann_interfaces_[r].size(); + } + raft::linalg::addScalar(neighbors_view.data_handle(), + neighbors_view.data_handle(), + (IdxT)translation_offset, + part_size, + resource::get_cuda_stream(dev_res)); + + auto d_trans = raft::make_device_vector(dev_res, 2); + cudaMemsetAsync( + d_trans.data_handle(), 0, 2 * sizeof(IdxT), resource::get_cuda_stream(dev_res)); + + int64_t remaining = index.num_ranks_; + int64_t radix = 2; + + while (remaining > 1) { + bool received_something = false; + int64_t offset = radix / 2; + ncclGroupStart(); + if (rank % radix == 0) // This is one of the receivers + { + int other_id = rank + offset; + if (other_id < index.num_ranks_) // Make sure someone's sending anything + { + ncclRecv(tmp_neighbors.data_handle() + part_size, + part_size * sizeof(IdxT), + ncclUint8, + other_id, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclRecv(tmp_distances.data_handle() + part_size, + part_size * sizeof(float), + ncclUint8, + other_id, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + received_something = true; + } + } else if (rank % radix == offset) // This is one of the senders + { + int other_id = rank - offset; + ncclSend(tmp_neighbors.data_handle(), + part_size * sizeof(IdxT), + ncclUint8, + other_id, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + ncclSend(tmp_distances.data_handle(), + part_size * sizeof(float), + ncclUint8, + other_id, + clique.nccl_comms_[rank], + resource::get_cuda_stream(dev_res)); + } + ncclGroupEnd(); + + remaining = (remaining + 1) / 2; + radix *= 2; + + if (received_something) { + // merge inplace + cuvs::neighbors::detail::knn_merge_parts(tmp_distances.data_handle(), + tmp_neighbors.data_handle(), + tmp_distances.data_handle(), + tmp_neighbors.data_handle(), + n_rows_of_current_batch, + 2, + n_neighbors, + resource::get_cuda_stream(dev_res), + d_trans.data_handle()); + + // If done, copy the final result + if (remaining <= 1) { + raft::copy(neighbors.data_handle() + output_offset, + tmp_neighbors.data_handle(), + part_size, + resource::get_cuda_stream(dev_res)); + raft::copy(distances.data_handle() + output_offset, + tmp_distances.data_handle(), + part_size, + resource::get_cuda_stream(dev_res)); + + resource::sync_stream(dev_res); + } + } + } + } + } +} + +template +void run_search_batch(const raft::comms::nccl_clique& clique, + const index& index, + int rank, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view& queries, + raft::host_matrix_view& neighbors, + raft::host_matrix_view& distances, + int64_t query_offset, + int64_t output_offset, + int64_t n_rows_of_current_batch, + int64_t n_cols, + int64_t n_neighbors) +{ + int dev_id = clique.device_ids_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + const raft::device_resources& dev_res = clique.device_resources_[rank]; + auto& ann_if = index.ann_interfaces_[rank]; + + auto query_partition = raft::make_host_matrix_view( + queries.data_handle() + query_offset, n_rows_of_current_batch, n_cols); + auto d_neighbors = raft::make_device_matrix( + dev_res, n_rows_of_current_batch, n_neighbors); + auto d_distances = raft::make_device_matrix( + dev_res, n_rows_of_current_batch, n_neighbors); + + cuvs::neighbors::search( + dev_res, ann_if, search_params, query_partition, d_neighbors.view(), d_distances.view()); + + raft::copy(neighbors.data_handle() + output_offset, + d_neighbors.data_handle(), + n_rows_of_current_batch * n_neighbors, + resource::get_cuda_stream(dev_res)); + raft::copy(distances.data_handle() + output_offset, + d_distances.data_handle(), + n_rows_of_current_batch * n_neighbors, + resource::get_cuda_stream(dev_res)); + + resource::sync_stream(dev_res); +} + +template +void search(const raft::device_resources& handle, + const index& index, + const cuvs::neighbors::search_params* search_params, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances, + int64_t n_rows_per_batch) +{ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + int64_t n_rows = queries.extent(0); + int64_t n_cols = queries.extent(1); + int64_t n_neighbors = neighbors.extent(1); + + if (index.mode_ == REPLICATED) { + cuvs::neighbors::mg::replicated_search_mode search_mode; + if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>( + search_params); + search_mode = mg_search_params->search_mode; + } else if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>( + search_params); + search_mode = mg_search_params->search_mode; + } else if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>(search_params); + search_mode = mg_search_params->search_mode; + } + + if (search_mode == LOAD_BALANCER) { + int64_t n_rows_per_rank = raft::ceildiv(n_rows, (int64_t)index.num_ranks_); + n_rows_per_batch = + std::min(n_rows_per_batch, n_rows_per_rank); // get at least num_ranks_ batches + int64_t n_batches = raft::ceildiv(n_rows, (int64_t)n_rows_per_batch); + if (n_batches <= 1) n_rows_per_batch = n_rows; + + RAFT_LOG_INFO( + "REPLICATED SEARCH IN LOAD BALANCER MODE: %d*%drows", n_batches, n_rows_per_batch); + +#pragma omp parallel for + for (int64_t batch_idx = 0; batch_idx < n_batches; batch_idx++) { + int rank = batch_idx % index.num_ranks_; // alternate GPUs + int64_t offset = batch_idx * n_rows_per_batch; + int64_t query_offset = offset * n_cols; + int64_t output_offset = offset * n_neighbors; + int64_t n_rows_of_current_batch = std::min(n_rows_per_batch, n_rows - offset); + + run_search_batch(clique, + index, + rank, + search_params, + queries, + neighbors, + distances, + query_offset, + output_offset, + n_rows_of_current_batch, + n_cols, + n_neighbors); + } + } else if (search_mode == ROUND_ROBIN) { + RAFT_LOG_INFO("REPLICATED SEARCH IN ROUND ROBIN MODE: %d*%drows", 1, n_rows); + + ASSERT(n_rows <= n_rows_per_batch, + "In round-robin mode, n_rows must lower or equal to n_rows_per_batch"); + + auto& rrc = *index.round_robin_counter_; + int64_t rank = rrc++; + rank %= index.num_ranks_; + + run_search_batch(clique, + index, + rank, + search_params, + queries, + neighbors, + distances, + 0, + 0, + n_rows, + n_cols, + n_neighbors); + } + } else if (index.mode_ == SHARDED) { + cuvs::neighbors::mg::sharded_merge_mode merge_mode; + if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>( + search_params); + merge_mode = mg_search_params->merge_mode; + } else if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>( + search_params); + merge_mode = mg_search_params->merge_mode; + } else if constexpr (std::is_same>::value) { + const cuvs::neighbors::mg::search_params* mg_search_params = + static_cast*>(search_params); + merge_mode = mg_search_params->merge_mode; + } + + int64_t n_batches = raft::ceildiv(n_rows, (int64_t)n_rows_per_batch); + if (n_batches <= 1) n_rows_per_batch = n_rows; + + if (merge_mode == MERGE_ON_ROOT_RANK) { + RAFT_LOG_INFO("SHARDED SEARCH WITH MERGE_ON_ROOT_RANK MERGE MODE: %d*%drows", + n_batches, + n_rows_per_batch); + sharded_search_with_direct_merge(clique, + index, + search_params, + queries, + neighbors, + distances, + n_rows_per_batch, + n_rows, + n_cols, + n_neighbors, + n_batches); + } else if (merge_mode == TREE_MERGE) { + RAFT_LOG_INFO( + "SHARDED SEARCH WITH TREE_MERGE MERGE MODE %d*%drows", n_batches, n_rows_per_batch); + sharded_search_with_tree_merge(clique, + index, + search_params, + queries, + neighbors, + distances, + n_rows_per_batch, + n_rows, + n_cols, + n_neighbors, + n_batches); + } + } +} + +template +void serialize(const raft::device_resources& handle, + const index& index, + const std::string& filename) +{ + std::ofstream of(filename, std::ios::out | std::ios::binary); + if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); + + serialize_scalar(handle, of, (int)index.mode_); + serialize_scalar(handle, of, index.num_ranks_); + + for (int rank = 0; rank < index.num_ranks_; rank++) { + int dev_id = clique.device_ids_[rank]; + const raft::device_resources& dev_res = clique.device_resources_[rank]; + RAFT_CUDA_TRY(cudaSetDevice(dev_id)); + auto& ann_if = index.ann_interfaces_[rank]; + cuvs::neighbors::serialize(dev_res, ann_if, of); + } + + of.close(); + if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); } +} + +} // namespace cuvs::neighbors::mg::detail + +namespace cuvs::neighbors::mg { +using namespace cuvs::neighbors; +using namespace raft; + +template +index::index(distribution_mode mode, int num_ranks_) + : mode_(mode), + num_ranks_(num_ranks_), + round_robin_counter_(std::make_shared>(0)) +{ +} + +template +index::index(const raft::device_resources& handle, + const std::string& filename) + : round_robin_counter_(std::make_shared>(0)) +{ + cuvs::neighbors::mg::detail::deserialize(handle, *this, filename); +} +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_cagra_float_uint32_t.cu b/cpp/src/neighbors/mg/mg_cagra_float_uint32_t.cu new file mode 100644 index 000000000..b11610fb4 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_cagra_float_uint32_t.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_CAGRA(float, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_cagra_half_uint32_t.cu b/cpp/src/neighbors/mg/mg_cagra_half_uint32_t.cu new file mode 100644 index 000000000..8f76c69a3 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_cagra_half_uint32_t.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_CAGRA(half, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu b/cpp/src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu new file mode 100644 index 000000000..67b88d742 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_CAGRA(int8_t, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu b/cpp/src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu new file mode 100644 index 000000000..f72174923 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_CAGRA(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_cagra( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_CAGRA(uint8_t, uint32_t); + +#undef CUVS_INST_MG_CAGRA + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_flat_float_int64_t.cu b/cpp/src/neighbors/mg/mg_flat_float_int64_t.cu new file mode 100644 index 000000000..4495e2527 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_flat_float_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_FLAT(float, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_flat_int8_t_int64_t.cu b/cpp/src/neighbors/mg/mg_flat_int8_t_int64_t.cu new file mode 100644 index 000000000..5494414a6 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_flat_int8_t_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_FLAT(int8_t, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_flat_uint8_t_int64_t.cu b/cpp/src/neighbors/mg/mg_flat_uint8_t_int64_t.cu new file mode 100644 index 000000000..35df2146b --- /dev/null +++ b/cpp/src/neighbors/mg/mg_flat_uint8_t_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_FLAT(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_flat( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_FLAT(uint8_t, int64_t); + +#undef CUVS_INST_MG_FLAT + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_pq_float_int64_t.cu b/cpp/src/neighbors/mg/mg_pq_float_int64_t.cu new file mode 100644 index 000000000..c671740e6 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_pq_float_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_pq( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \ + const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_PQ(float, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_pq_half_int64_t.cu b/cpp/src/neighbors/mg/mg_pq_half_int64_t.cu new file mode 100644 index 000000000..b167239c6 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_pq_half_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_pq( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \ + const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_PQ(half, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_pq_int8_t_int64_t.cu b/cpp/src/neighbors/mg/mg_pq_int8_t_int64_t.cu new file mode 100644 index 000000000..127baf8fd --- /dev/null +++ b/cpp/src/neighbors/mg/mg_pq_int8_t_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_pq( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \ + const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_PQ(int8_t, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/mg_pq_uint8_t_int64_t.cu b/cpp/src/neighbors/mg/mg_pq_uint8_t_int64_t.cu new file mode 100644 index 000000000..869e009a5 --- /dev/null +++ b/cpp/src/neighbors/mg/mg_pq_uint8_t_int64_t.cu @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by generate_mg.py + * + * Make changes there and run in this directory: + * + * > python generate_mg.py + * + */ + +#include "mg.cuh" + +namespace cuvs::neighbors::mg { + +#define CUVS_INST_MG_PQ(T, IdxT) \ + index, T, IdxT> build( \ + const raft::device_resources& handle, \ + const mg::index_params& index_params, \ + raft::host_matrix_view index_dataset) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + index, T, IdxT> index(index_params.mode, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::build( \ + handle, \ + index, \ + static_cast(&index_params), \ + index_dataset); \ + return index; \ + } \ + \ + void extend(const raft::device_resources& handle, \ + index, T, IdxT>& index, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices) \ + { \ + cuvs::neighbors::mg::detail::extend(handle, index, new_vectors, new_indices); \ + } \ + \ + void search(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const mg::search_params& search_params, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances, \ + int64_t n_rows_per_batch) \ + { \ + cuvs::neighbors::mg::detail::search( \ + handle, \ + index, \ + static_cast(&search_params), \ + queries, \ + neighbors, \ + distances, \ + n_rows_per_batch); \ + } \ + \ + void serialize(const raft::device_resources& handle, \ + const index, T, IdxT>& index, \ + const std::string& filename) \ + { \ + cuvs::neighbors::mg::detail::serialize(handle, index, filename); \ + } \ + \ + template <> \ + index, T, IdxT> deserialize_pq( \ + const raft::device_resources& handle, const std::string& filename) \ + { \ + auto idx = index, T, IdxT>(handle, filename); \ + return idx; \ + } \ + \ + template <> \ + index, T, IdxT> distribute_pq(const raft::device_resources& handle, \ + const std::string& filename) \ + { \ + const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle); \ + auto idx = index, T, IdxT>(REPLICATED, clique.num_ranks_); \ + cuvs::neighbors::mg::detail::deserialize_and_distribute(handle, idx, filename); \ + return idx; \ + } +CUVS_INST_MG_PQ(uint8_t, int64_t); + +#undef CUVS_INST_MG_PQ + +} // namespace cuvs::neighbors::mg diff --git a/cpp/src/neighbors/mg/nccl_comm.cpp b/cpp/src/neighbors/mg/nccl_comm.cpp new file mode 100644 index 000000000..c4556957a --- /dev/null +++ b/cpp/src/neighbors/mg/nccl_comm.cpp @@ -0,0 +1,8 @@ +#include +#include + +namespace raft::comms { +void build_comms_nccl_only(raft::resources* handle, ncclComm_t nccl_comm, int num_ranks, int rank) +{ +} +} // namespace raft::comms diff --git a/cpp/src/neighbors/mg/omp_checks.cpp b/cpp/src/neighbors/mg/omp_checks.cpp new file mode 100644 index 000000000..e09182dfe --- /dev/null +++ b/cpp/src/neighbors/mg/omp_checks.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace cuvs::neighbors::mg { +using raft::RAFT_NAME; + +void check_omp_threads(const int requirements) +{ + const int max_threads = omp_get_max_threads(); + if (max_threads < requirements) + RAFT_LOG_WARN( + "OpenMP is only allowed %d threads to run %d GPUs. Please increase the number of OpenMP " + "threads to avoid NCCL hangs by modifying the environment variable OMP_NUM_THREADS.", + max_threads, + requirements); +} + +} // namespace cuvs::neighbors::mg diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index bd07bebee..f4d35e438 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -22,7 +22,7 @@ rapids_test_init() function(ConfigureTest) set(options OPTIONAL NOCUDA C_LIB) - set(oneValueArgs NAME GPUS PERCENT) + set(oneValueArgs NAME GPUS PERCENT ADDITIONAL_DEP) set(multiValueArgs PATH TARGETS CONFIGURATIONS) cmake_parse_arguments(_CUVS_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -56,6 +56,7 @@ function(ConfigureTest) $ $ $<$:cuvs::c_api> + ${_CUVS_TEST_ADDITIONAL_DEP} ) set_target_properties( ${TEST_NAME} @@ -159,7 +160,7 @@ if(BUILD_TESTS) 100 ) - ConfigureTest( + ConfigureTest( NAME NEIGHBORS_ANN_VAMANA_TEST PATH @@ -178,6 +179,12 @@ if(BUILD_TESTS) target_compile_definitions(NEIGHBORS_HNSW_TEST PUBLIC CUVS_BUILD_CAGRA_HNSWLIB) endif() + if(BUILD_MG_ALGOS) + ConfigureTest( + NAME NEIGHBORS_MG_TEST PATH neighbors/mg/test_float.cu GPUS 1 PERCENT 100 ADDITIONAL_DEP nccl + ) + endif() + ConfigureTest( NAME DISTANCE_TEST diff --git a/cpp/test/neighbors/mg.cuh b/cpp/test/neighbors/mg.cuh new file mode 100644 index 000000000..be30ca615 --- /dev/null +++ b/cpp/test/neighbors/mg.cuh @@ -0,0 +1,825 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "../test_utils.cuh" +#include "ann_utils.cuh" +#include "naive_knn.cuh" + +#include +#include + +namespace cuvs::neighbors::mg { + +enum class algo_t { IVF_FLAT, IVF_PQ, CAGRA }; +enum class d_mode_t { REPLICATED, SHARDED, LOCAL_THEN_DISTRIBUTED, ROUND_ROBIN }; +enum class m_mode_t { MERGE_ON_ROOT_RANK, TREE_MERGE, UNDEFINED }; + +struct AnnMGInputs { + int64_t num_queries; + int64_t num_db_vecs; + int64_t dim; + int64_t k; + d_mode_t d_mode; + m_mode_t m_mode; + algo_t algo; + int64_t nprobe; + int64_t nlist; + cuvs::distance::DistanceType metric; + bool adaptive_centers; +}; + +template +class AnnMGTest : public ::testing::TestWithParam { + public: + AnnMGTest() + : stream_(resource::get_cuda_stream(handle_)), + clique_(raft::resource::get_nccl_clique(handle_)), + ps(::testing::TestWithParam::GetParam()), + d_index_dataset(0, stream_), + d_queries(0, stream_), + h_index_dataset(0), + h_queries(0) + { + } + + void testAnnMG() + { + size_t queries_size = ps.num_queries * ps.k; + std::vector neighbors_ref(queries_size); + std::vector distances_ref(queries_size); + std::vector neighbors_snmg_ann(queries_size); + std::vector distances_snmg_ann(queries_size); + std::vector neighbors_ref_32bits(queries_size); + std::vector neighbors_snmg_ann_32bits(queries_size); + + { + rmm::device_uvector distances_ref_dev(queries_size, stream_); + rmm::device_uvector neighbors_ref_dev(queries_size, stream_); + cuvs::neighbors::naive_knn(handle_, + distances_ref_dev.data(), + neighbors_ref_dev.data(), + d_queries.data(), + d_index_dataset.data(), + ps.num_queries, + ps.num_db_vecs, + ps.dim, + ps.k, + ps.metric); + update_host(distances_ref.data(), distances_ref_dev.data(), queries_size, stream_); + update_host(neighbors_ref.data(), neighbors_ref_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + int64_t n_rows_per_search_batch = 3000; // [3000, 3000, 1000] == 7000 rows + + // IVF-Flat + if (ps.algo == algo_t::IVF_FLAT && + (ps.d_mode == d_mode_t::REPLICATED || ps.d_mode == d_mode_t::SHARDED)) { + distribution_mode d_mode; + if (ps.d_mode == d_mode_t::REPLICATED) + d_mode = distribution_mode::REPLICATED; + else + d_mode = distribution_mode::SHARDED; + + mg::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + index_params.mode = d_mode; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = LOAD_BALANCER; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + { + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::extend(handle_, index, index_dataset, std::nullopt); + cuvs::neighbors::mg::serialize(handle_, index, "mg_ivf_flat_index"); + } + auto new_index = + cuvs::neighbors::mg::deserialize_flat(handle_, "mg_ivf_flat_index"); + + if (ps.m_mode == m_mode_t::MERGE_ON_ROOT_RANK) + search_params.merge_mode = MERGE_ON_ROOT_RANK; + else + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search( + handle_, new_index, search_params, queries, neighbors, distances, n_rows_per_search_batch); + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref, + neighbors_snmg_ann, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann.begin(), neighbors_snmg_ann.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + // IVF-PQ + if (ps.algo == algo_t::IVF_PQ && + (ps.d_mode == d_mode_t::REPLICATED || ps.d_mode == d_mode_t::SHARDED)) { + distribution_mode d_mode; + if (ps.d_mode == d_mode_t::REPLICATED) + d_mode = distribution_mode::REPLICATED; + else + d_mode = distribution_mode::SHARDED; + + mg::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + index_params.mode = d_mode; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = LOAD_BALANCER; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + { + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::extend(handle_, index, index_dataset, std::nullopt); + cuvs::neighbors::mg::serialize(handle_, index, "mg_ivf_pq_index"); + } + auto new_index = + cuvs::neighbors::mg::deserialize_pq(handle_, "mg_ivf_pq_index"); + + if (ps.m_mode == m_mode_t::MERGE_ON_ROOT_RANK) + search_params.merge_mode = MERGE_ON_ROOT_RANK; + else + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search( + handle_, new_index, search_params, queries, neighbors, distances, n_rows_per_search_batch); + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref, + neighbors_snmg_ann, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann.begin(), neighbors_snmg_ann.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + // CAGRA + if (ps.algo == algo_t::CAGRA && + (ps.d_mode == d_mode_t::REPLICATED || ps.d_mode == d_mode_t::SHARDED)) { + distribution_mode d_mode; + if (ps.d_mode == d_mode_t::REPLICATED) + d_mode = distribution_mode::REPLICATED; + else + d_mode = distribution_mode::SHARDED; + + mg::index_params index_params; + index_params.graph_build_params = cagra::graph_build_params::ivf_pq_params( + raft::matrix_extent(ps.num_db_vecs, ps.dim)); + index_params.mode = d_mode; + + mg::search_params search_params; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann_32bits.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + { + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::serialize(handle_, index, "mg_cagra_index"); + } + auto new_index = + cuvs::neighbors::mg::deserialize_cagra(handle_, "mg_cagra_index"); + + if (ps.m_mode == m_mode_t::MERGE_ON_ROOT_RANK) + search_params.merge_mode = MERGE_ON_ROOT_RANK; + else + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search( + handle_, new_index, search_params, queries, neighbors, distances, n_rows_per_search_batch); + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref_32bits, + neighbors_snmg_ann_32bits, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann_32bits.begin(), neighbors_snmg_ann_32bits.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + if (ps.algo == algo_t::IVF_FLAT && ps.d_mode == d_mode_t::LOCAL_THEN_DISTRIBUTED) { + ivf_flat::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + index_params.add_data_on_build = true; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = LOAD_BALANCER; + + { + auto index_dataset = raft::make_device_matrix_view( + d_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto index = cuvs::neighbors::ivf_flat::build(handle_, index_params, index_dataset); + ivf_flat::serialize(handle_, "local_ivf_flat_index", index); + } + + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + auto distributed_index = + cuvs::neighbors::mg::distribute_flat(handle_, "local_ivf_flat_index"); + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search(handle_, + distributed_index, + search_params, + queries, + neighbors, + distances, + n_rows_per_search_batch); + + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref, + neighbors_snmg_ann, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann.begin(), neighbors_snmg_ann.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + if (ps.algo == algo_t::IVF_PQ && ps.d_mode == d_mode_t::LOCAL_THEN_DISTRIBUTED) { + ivf_pq::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.add_data_on_build = true; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = LOAD_BALANCER; + + { + auto index_dataset = raft::make_device_matrix_view( + d_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto index = cuvs::neighbors::ivf_pq::build(handle_, index_params, index_dataset); + ivf_pq::serialize(handle_, "local_ivf_pq_index", index); + } + + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + auto distributed_index = + cuvs::neighbors::mg::distribute_pq(handle_, "local_ivf_pq_index"); + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search(handle_, + distributed_index, + search_params, + queries, + neighbors, + distances, + n_rows_per_search_batch); + + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref, + neighbors_snmg_ann, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann.begin(), neighbors_snmg_ann.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + if (ps.algo == algo_t::CAGRA && ps.d_mode == d_mode_t::LOCAL_THEN_DISTRIBUTED) { + cagra::index_params index_params; + index_params.graph_build_params = cagra::graph_build_params::ivf_pq_params( + raft::matrix_extent(ps.num_db_vecs, ps.dim)); + + mg::search_params search_params; + + { + auto index_dataset = raft::make_device_matrix_view( + d_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto index = cuvs::neighbors::cagra::build(handle_, index_params, index_dataset); + cuvs::neighbors::cagra::serialize(handle_, "local_cagra_index", index); + } + + auto queries = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + auto neighbors = raft::make_host_matrix_view( + neighbors_snmg_ann_32bits.data(), ps.num_queries, ps.k); + auto distances = raft::make_host_matrix_view( + distances_snmg_ann.data(), ps.num_queries, ps.k); + + auto distributed_index = + cuvs::neighbors::mg::distribute_cagra(handle_, "local_cagra_index"); + + search_params.merge_mode = TREE_MERGE; + cuvs::neighbors::mg::search(handle_, + distributed_index, + search_params, + queries, + neighbors, + distances, + n_rows_per_search_batch); + + resource::sync_stream(handle_); + + double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); + ASSERT_TRUE(eval_neighbours(neighbors_ref_32bits, + neighbors_snmg_ann_32bits, + distances_ref, + distances_snmg_ann, + ps.num_queries, + ps.k, + 0.001, + min_recall)); + std::fill(neighbors_snmg_ann_32bits.begin(), neighbors_snmg_ann_32bits.end(), 0); + std::fill(distances_snmg_ann.begin(), distances_snmg_ann.end(), 0); + } + + if (ps.algo == algo_t::IVF_FLAT && ps.d_mode == d_mode_t::ROUND_ROBIN) { + ASSERT_TRUE(ps.num_queries <= 4); + + mg::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + index_params.mode = REPLICATED; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = ROUND_ROBIN; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto small_batch_query = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::extend(handle_, index, index_dataset, std::nullopt); + + int n_parallel_searches = 16; + std::vector searches_correctness(n_parallel_searches); + std::vector load_balancer_neighbors_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); + std::vector load_balancer_distances_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); +#pragma omp parallel for + for (uint64_t search_idx = 0; search_idx < searches_correctness.size(); search_idx++) { + uint64_t offset = search_idx * ps.num_queries * ps.k; + auto small_batch_neighbors = raft::make_host_matrix_view( + load_balancer_neighbors_snmg_ann.data() + offset, ps.num_queries, ps.k); + auto small_batch_distances = raft::make_host_matrix_view( + load_balancer_distances_snmg_ann.data() + offset, ps.num_queries, ps.k); + cuvs::neighbors::mg::search(handle_, + index, + search_params, + small_batch_query, + small_batch_neighbors, + small_batch_distances, + n_rows_per_search_batch); + + std::vector small_batch_neighbors_vec( + small_batch_neighbors.data_handle(), + small_batch_neighbors.data_handle() + small_batch_neighbors.size()); + std::vector small_batch_distances_vec( + small_batch_distances.data_handle(), + small_batch_distances.data_handle() + small_batch_distances.size()); + searches_correctness[search_idx] = eval_neighbours(neighbors_ref, + small_batch_neighbors_vec, + distances_ref, + small_batch_distances_vec, + ps.num_queries, + ps.k, + 0.001, + 0.9); + } + ASSERT_TRUE(std::all_of(searches_correctness.begin(), + searches_correctness.end(), + [](char val) { return val != 0; })); + } + + if (ps.algo == algo_t::IVF_PQ && ps.d_mode == d_mode_t::ROUND_ROBIN) { + ASSERT_TRUE(ps.num_queries <= 4); + + mg::index_params index_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.add_data_on_build = false; + index_params.kmeans_trainset_fraction = 1.0; + index_params.metric_arg = 0; + index_params.mode = REPLICATED; + + mg::search_params search_params; + search_params.n_probes = ps.nprobe; + search_params.search_mode = ROUND_ROBIN; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto small_batch_query = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + cuvs::neighbors::mg::extend(handle_, index, index_dataset, std::nullopt); + + int n_parallel_searches = 16; + std::vector searches_correctness(n_parallel_searches); + std::vector load_balancer_neighbors_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); + std::vector load_balancer_distances_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); +#pragma omp parallel for + for (uint64_t search_idx = 0; search_idx < searches_correctness.size(); search_idx++) { + uint64_t offset = search_idx * ps.num_queries * ps.k; + auto small_batch_neighbors = raft::make_host_matrix_view( + load_balancer_neighbors_snmg_ann.data() + offset, ps.num_queries, ps.k); + auto small_batch_distances = raft::make_host_matrix_view( + load_balancer_distances_snmg_ann.data() + offset, ps.num_queries, ps.k); + cuvs::neighbors::mg::search(handle_, + index, + search_params, + small_batch_query, + small_batch_neighbors, + small_batch_distances, + n_rows_per_search_batch); + + std::vector small_batch_neighbors_vec( + small_batch_neighbors.data_handle(), + small_batch_neighbors.data_handle() + small_batch_neighbors.size()); + std::vector small_batch_distances_vec( + small_batch_distances.data_handle(), + small_batch_distances.data_handle() + small_batch_distances.size()); + searches_correctness[search_idx] = eval_neighbours(neighbors_ref, + small_batch_neighbors_vec, + distances_ref, + small_batch_distances_vec, + ps.num_queries, + ps.k, + 0.001, + 0.9); + } + ASSERT_TRUE(std::all_of(searches_correctness.begin(), + searches_correctness.end(), + [](char val) { return val != 0; })); + } + + if (ps.algo == algo_t::CAGRA && ps.d_mode == d_mode_t::ROUND_ROBIN) { + ASSERT_TRUE(ps.num_queries <= 4); + + mg::index_params index_params; + index_params.graph_build_params = cagra::graph_build_params::ivf_pq_params( + raft::matrix_extent(ps.num_db_vecs, ps.dim)); + index_params.mode = REPLICATED; + + mg::search_params search_params; + search_params.search_mode = ROUND_ROBIN; + + auto index_dataset = raft::make_host_matrix_view( + h_index_dataset.data(), ps.num_db_vecs, ps.dim); + auto small_batch_query = raft::make_host_matrix_view( + h_queries.data(), ps.num_queries, ps.dim); + + auto index = cuvs::neighbors::mg::build(handle_, index_params, index_dataset); + + int n_parallel_searches = 16; + std::vector searches_correctness(n_parallel_searches); + std::vector load_balancer_neighbors_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); + std::vector load_balancer_distances_snmg_ann(n_parallel_searches * ps.num_queries * + ps.k); +#pragma omp parallel for + for (uint64_t search_idx = 0; search_idx < searches_correctness.size(); search_idx++) { + uint64_t offset = search_idx * ps.num_queries * ps.k; + auto small_batch_neighbors = raft::make_host_matrix_view( + load_balancer_neighbors_snmg_ann.data() + offset, ps.num_queries, ps.k); + auto small_batch_distances = raft::make_host_matrix_view( + load_balancer_distances_snmg_ann.data() + offset, ps.num_queries, ps.k); + cuvs::neighbors::mg::search(handle_, + index, + search_params, + small_batch_query, + small_batch_neighbors, + small_batch_distances, + n_rows_per_search_batch); + + std::vector small_batch_neighbors_vec( + small_batch_neighbors.data_handle(), + small_batch_neighbors.data_handle() + small_batch_neighbors.size()); + std::vector small_batch_distances_vec( + small_batch_distances.data_handle(), + small_batch_distances.data_handle() + small_batch_distances.size()); + searches_correctness[search_idx] = eval_neighbours(neighbors_ref_32bits, + small_batch_neighbors_vec, + distances_ref, + small_batch_distances_vec, + ps.num_queries, + ps.k, + 0.001, + 0.9); + } + ASSERT_TRUE(std::all_of(searches_correctness.begin(), + searches_correctness.end(), + [](char val) { return val != 0; })); + } + } + + void SetUp() override + { + d_index_dataset.resize(ps.num_db_vecs * ps.dim, stream_); + d_queries.resize(ps.num_queries * ps.dim, stream_); + h_index_dataset.resize(ps.num_db_vecs * ps.dim); + h_queries.resize(ps.num_queries * ps.dim); + + raft::random::RngState r(1234ULL); + if constexpr (std::is_same{}) { + raft::random::uniform( + handle_, r, d_index_dataset.data(), d_index_dataset.size(), DataT(0.1), DataT(2.0)); + raft::random::uniform(handle_, r, d_queries.data(), d_queries.size(), DataT(0.1), DataT(2.0)); + } else { + raft::random::uniformInt( + handle_, r, d_index_dataset.data(), d_index_dataset.size(), DataT(1), DataT(20)); + raft::random::uniformInt(handle_, r, d_queries.data(), d_queries.size(), DataT(1), DataT(20)); + } + + raft::copy(h_index_dataset.data(), + d_index_dataset.data(), + d_index_dataset.size(), + resource::get_cuda_stream(handle_)); + raft::copy( + h_queries.data(), d_queries.data(), d_queries.size(), resource::get_cuda_stream(handle_)); + resource::sync_stream(handle_); + } + + void TearDown() override {} + + private: + raft::device_resources handle_; + rmm::cuda_stream_view stream_; + raft::comms::nccl_clique clique_; + AnnMGInputs ps; + std::vector h_index_dataset; + std::vector h_queries; + rmm::device_uvector d_index_dataset; + rmm::device_uvector d_queries; +}; + +const std::vector inputs = { + {7000, + 10000, + 8, + 16, + d_mode_t::REPLICATED, + m_mode_t::UNDEFINED, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::REPLICATED, + m_mode_t::UNDEFINED, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + + /* + {7000, + 10000, + 8, + 16, + d_mode_t::REPLICATED, + m_mode_t::UNDEFINED, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + */ + + /* + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::MERGE_ON_ROOT_RANK, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::MERGE_ON_ROOT_RANK, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::MERGE_ON_ROOT_RANK, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::TREE_MERGE, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::TREE_MERGE, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::SHARDED, + m_mode_t::TREE_MERGE, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + */ + + {7000, + 10000, + 8, + 16, + d_mode_t::LOCAL_THEN_DISTRIBUTED, + m_mode_t::UNDEFINED, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {7000, + 10000, + 8, + 16, + d_mode_t::LOCAL_THEN_DISTRIBUTED, + m_mode_t::UNDEFINED, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + + /* + {7000, + 10000, + 8, + 16, + d_mode_t::LOCAL_THEN_DISTRIBUTED, + m_mode_t::UNDEFINED, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + */ + + {3, + 10000, + 8, + 16, + d_mode_t::ROUND_ROBIN, + m_mode_t::UNDEFINED, + algo_t::IVF_FLAT, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + {3, + 10000, + 8, + 16, + d_mode_t::ROUND_ROBIN, + m_mode_t::UNDEFINED, + algo_t::IVF_PQ, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + + /* + {3, + 10000, + 8, + 16, + d_mode_t::ROUND_ROBIN, + m_mode_t::UNDEFINED, + algo_t::CAGRA, + 40, + 1024, + cuvs::distance::DistanceType::L2Expanded, + true}, + */ +}; +} // namespace cuvs::neighbors::mg diff --git a/cpp/test/neighbors/mg/test_float.cu b/cpp/test/neighbors/mg/test_float.cu new file mode 100644 index 000000000..ef9c9a043 --- /dev/null +++ b/cpp/test/neighbors/mg/test_float.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../mg.cuh" + +namespace cuvs::neighbors::mg { + +typedef AnnMGTest AnnMGTestF_float; +TEST_P(AnnMGTestF_float, AnnMG) { this->testAnnMG(); } + +INSTANTIATE_TEST_CASE_P(AnnMGTest, AnnMGTestF_float, ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::mg diff --git a/dependencies.yaml b/dependencies.yaml index c18f53305..2b19b987f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -159,6 +159,7 @@ dependencies: packages: - c-compiler - cxx-compiler + - nccl>=2.19 specific: - output_types: conda matrices: diff --git a/docs/source/build.rst b/docs/source/build.rst index 9c7c98989..9d0d391dc 100644 --- a/docs/source/build.rst +++ b/docs/source/build.rst @@ -128,6 +128,16 @@ Once installed, the shared libraries, headers (and any dependencies downloaded a ./build.sh libcuvs --uninstall +Multi-GPU features +^^^^^^^^^^^^^^^^^^ + +To disable the multi-gpu features run : + +.. code-block:: bash + + ./build.sh libcuvs --no-mg + + Building the Googletests ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/cpp_api/neighbors.rst b/docs/source/cpp_api/neighbors.rst index e5a9fc977..0c68c8415 100644 --- a/docs/source/cpp_api/neighbors.rst +++ b/docs/source/cpp_api/neighbors.rst @@ -15,3 +15,4 @@ Nearest Neighbors neighbors_ivf_pq.rst neighbors_nn_descent.rst neighbors_refine.rst + neighbors_mg.rst diff --git a/docs/source/cpp_api/neighbors_mg.rst b/docs/source/cpp_api/neighbors_mg.rst new file mode 100644 index 000000000..b68defec9 --- /dev/null +++ b/docs/source/cpp_api/neighbors_mg.rst @@ -0,0 +1,76 @@ +Distributed ANN +=============== + +The SNMG (single-node multi-GPUs) ANN API provides a set of functions to deploy ANN indexes on multiple GPUs. + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *cuvs::neighbors::mg* + +Index build parameters +---------------------- + +.. doxygengroup:: mg_cpp_index_params + :project: cuvs + :members: + :content-only: + +Search parameters +---------------------- + +.. doxygengroup:: mg_cpp_search_params + :project: cuvs + :members: + :content-only: + +Index build +----------- + +.. doxygengroup:: mg_cpp_index_build + :project: cuvs + :members: + :content-only: + +Index extend +------------ + +.. doxygengroup:: mg_cpp_index_extend + :project: cuvs + :members: + :content-only: + +Index search +------------ + +.. doxygengroup:: mg_cpp_index_search + :project: cuvs + :members: + :content-only: + +Index serialize +--------------- + +.. doxygengroup:: mg_cpp_serialize + :project: cuvs + :members: + :content-only: + +Index deserialize +----------------- + +.. doxygengroup:: mg_cpp_deserialize + :project: cuvs + :members: + :content-only: + +Distribute pre-built local index +-------------------------------- + +.. doxygengroup:: mg_cpp_distribute + :project: cuvs + :members: + :content-only: