diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 303a6c6b4..7d8bb0022 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -184,6 +184,8 @@ if(BUILD_MICRO_BENCH OR BUILD_ANN_BENCH)
   rapids_cpm_gbench()
 endif()
 
+include(cmake/thirdparty/get_cutlass.cmake)
+
 # ##################################################################################################
 # * cuvs ---------------------------------------------------------------------
 
@@ -368,7 +370,7 @@ target_include_directories(
 
 if(NOT BUILD_CPU_ONLY)
   # Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target.
-  target_link_libraries(cuvs PUBLIC raft::raft)
+  target_link_libraries(cuvs PUBLIC raft::raft nvidia::cutlass::cutlass)
 endif()
 
 # Endian detection
diff --git a/cpp/cmake/thirdparty/get_cutlass.cmake b/cpp/cmake/thirdparty/get_cutlass.cmake
index 0123c4b07..61065318b 100644
--- a/cpp/cmake/thirdparty/get_cutlass.cmake
+++ b/cpp/cmake/thirdparty/get_cutlass.cmake
@@ -22,7 +22,7 @@ function(find_and_configure_cutlass)
       CACHE BOOL "Enable only the header library"
   )
   set(CUTLASS_NAMESPACE
-      "raft_cutlass"
+      "cuvs_cutlass"
       CACHE STRING "Top level namespace of CUTLASS"
   )
   set(CUTLASS_ENABLE_CUBLAS
@@ -61,21 +61,21 @@ function(find_and_configure_cutlass)
   # We generate the cutlass-config files when we built cutlass locally, so always do
   # `find_dependency`
   rapids_export_package(
-          BUILD NvidiaCutlass raft-exports GLOBAL_TARGETS nvidia::cutlass::cutlass
+          BUILD NvidiaCutlass cuvs-exports GLOBAL_TARGETS nvidia::cutlass::cutlass
   )
   rapids_export_package(
-          INSTALL NvidiaCutlass raft-exports GLOBAL_TARGETS nvidia::cutlass::cutlass
+          INSTALL NvidiaCutlass cuvs-exports GLOBAL_TARGETS nvidia::cutlass::cutlass
   )
 
   # Tell cmake where it can find the generated NvidiaCutlass-config.cmake we wrote.
   include("${rapids-cmake-dir}/export/find_package_root.cmake")
   rapids_export_find_package_root(
           INSTALL NvidiaCutlass [=[${CMAKE_CURRENT_LIST_DIR}/../]=]
-          EXPORT_SET raft-exports
+          EXPORT_SET cuvs-exports
   )
   rapids_export_find_package_root(
           BUILD NvidiaCutlass [=[${CMAKE_CURRENT_LIST_DIR}]=]
-          EXPORT_SET raft-exports
+          EXPORT_SET cuvs-exports
   )
 endfunction()
 
diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake
index 85829554a..718ac7c2d 100644
--- a/cpp/cmake/thirdparty/get_faiss.cmake
+++ b/cpp/cmake/thirdparty/get_faiss.cmake
@@ -77,14 +77,14 @@ function(find_and_configure_faiss)
         endif()
 
     # We generate the faiss-config files when we built faiss locally, so always do `find_dependency`
-    rapids_export_package(BUILD OpenMP raft-ann-bench-exports) # faiss uses openMP but doesn't export a need for it
-    rapids_export_package(BUILD faiss raft-ann-bench-exports GLOBAL_TARGETS ${RAFT_FAISS_GLOBAL_TARGETS} ${RAFT_FAISS_EXPORT_GLOBAL_TARGETS})
-    rapids_export_package(INSTALL faiss raft-ann-bench-exports GLOBAL_TARGETS ${RAFT_FAISS_GLOBAL_TARGETS} ${RAFT_FAISS_EXPORT_GLOBAL_TARGETS})
+    rapids_export_package(BUILD OpenMP cuvs-ann-bench-exports) # faiss uses openMP but doesn't export a need for it
+    rapids_export_package(BUILD faiss cuvs-ann-bench-exports GLOBAL_TARGETS ${RAFT_FAISS_GLOBAL_TARGETS} ${RAFT_FAISS_EXPORT_GLOBAL_TARGETS})
+    rapids_export_package(INSTALL faiss cuvs-ann-bench-exports GLOBAL_TARGETS ${RAFT_FAISS_GLOBAL_TARGETS} ${RAFT_FAISS_EXPORT_GLOBAL_TARGETS})
 
     # Tell cmake where it can find the generated faiss-config.cmake we wrote.
     include("${rapids-cmake-dir}/export/find_package_root.cmake")
     rapids_export_find_package_root(BUILD faiss [=[${CMAKE_CURRENT_LIST_DIR}]=]
-                                    EXPORT_SET raft-ann-bench-exports)
+                                    EXPORT_SET cuvs-ann-bench-exports)
 endfunction()
 
 if(NOT RAFT_FAISS_GIT_TAG)
diff --git a/cpp/cmake/thirdparty/get_ggnn.cmake b/cpp/cmake/thirdparty/get_ggnn.cmake
index 708acb6b8..883fbd68c 100644
--- a/cpp/cmake/thirdparty/get_ggnn.cmake
+++ b/cpp/cmake/thirdparty/get_ggnn.cmake
@@ -37,7 +37,7 @@ endfunction()
 
 # Change pinned tag here to test a commit in CI
 # To use a different RAFT locally, set the CMake variable
-# CPM_raft_SOURCE=/path/to/local/raft
+# CPM_cuvs_SOURCE=/path/to/local/cuvs
 find_and_configure_ggnn(VERSION          0.5
         FORK             cgtuebingen
         PINNED_TAG       release_0.5
diff --git a/cpp/cmake/thirdparty/get_glog.cmake b/cpp/cmake/thirdparty/get_glog.cmake
index 35a9170f9..56ed45274 100644
--- a/cpp/cmake/thirdparty/get_glog.cmake
+++ b/cpp/cmake/thirdparty/get_glog.cmake
@@ -21,8 +21,8 @@ function(find_and_configure_glog)
 
     rapids_cpm_find(glog ${PKG_VERSION}
             GLOBAL_TARGETS      glog::glog
-            BUILD_EXPORT_SET    raft-exports
-            INSTALL_EXPORT_SET  raft-exports
+            BUILD_EXPORT_SET    cuvs-exports
+            INSTALL_EXPORT_SET  cuvs-exports
             CPM_ARGS
             GIT_REPOSITORY         https://github.com/${PKG_FORK}/glog.git
             GIT_TAG                ${PKG_PINNED_TAG}
diff --git a/cpp/cmake/thirdparty/get_hnswlib.cmake b/cpp/cmake/thirdparty/get_hnswlib.cmake
index a4ceacae3..2f0b07f1d 100644
--- a/cpp/cmake/thirdparty/get_hnswlib.cmake
+++ b/cpp/cmake/thirdparty/get_hnswlib.cmake
@@ -47,7 +47,7 @@ endfunction()
 
 # Change pinned tag here to test a commit in CI
 # To use a different RAFT locally, set the CMake variable
-# CPM_raft_SOURCE=/path/to/local/raft
+# CPM_cuvs_SOURCE=/path/to/local/cuvs
 find_and_configure_hnswlib(VERSION  0.6.2
         FORK             nmslib
         PINNED_TAG       v0.6.2
diff --git a/cpp/cmake/thirdparty/get_nlohmann_json.cmake b/cpp/cmake/thirdparty/get_nlohmann_json.cmake
index 5de98a47c..9827b7ee6 100644
--- a/cpp/cmake/thirdparty/get_nlohmann_json.cmake
+++ b/cpp/cmake/thirdparty/get_nlohmann_json.cmake
@@ -21,8 +21,8 @@ function(find_and_configure_nlohmann_json)
 
     rapids_cpm_find(nlohmann_json ${PKG_VERSION}
             GLOBAL_TARGETS      nlohmann_json::nlohmann_json
-            BUILD_EXPORT_SET    raft-bench-ann-exports
-            INSTALL_EXPORT_SET  raft-bench-ann-exports
+            BUILD_EXPORT_SET    cuvs-bench-exports
+            INSTALL_EXPORT_SET  cuvs-bench-exports
             CPM_ARGS
             GIT_REPOSITORY         https://github.com/${PKG_FORK}/json.git
             GIT_TAG                ${PKG_PINNED_TAG}
@@ -32,7 +32,7 @@ endfunction()
 
 # Change pinned tag here to test a commit in CI
 # To use a different RAFT locally, set the CMake variable
-# CPM_raft_SOURCE=/path/to/local/raft
+# CPM_cuvs_SOURCE=/path/to/local/cuvs
 find_and_configure_nlohmann_json(VERSION  3.11.2
         FORK             nlohmann
         PINNED_TAG       v3.11.2
diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake
index 6128b5c43..d45be4aef 100644
--- a/cpp/cmake/thirdparty/get_raft.cmake
+++ b/cpp/cmake/thirdparty/get_raft.cmake
@@ -35,8 +35,8 @@ function(find_and_configure_raft)
     #-----------------------------------------------------
     rapids_cpm_find(raft ${PKG_VERSION}
             GLOBAL_TARGETS      raft::raft
-            BUILD_EXPORT_SET    raft-template-exports
-            INSTALL_EXPORT_SET  raft-template-exports
+            BUILD_EXPORT_SET    cuvs-template-exports
+            INSTALL_EXPORT_SET  cuvs-template-exports
             COMPONENTS          ${RAFT_COMPONENTS}
             CPM_ARGS
             GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git
diff --git a/cpp/cmake/thirdparty/get_rmm.cmake b/cpp/cmake/thirdparty/get_rmm.cmake
index a303193bc..97796215e 100644
--- a/cpp/cmake/thirdparty/get_rmm.cmake
+++ b/cpp/cmake/thirdparty/get_rmm.cmake
@@ -16,8 +16,8 @@
 
 function(find_and_configure_rmm)
     include(${rapids-cmake-dir}/cpm/rmm.cmake)
-    rapids_cpm_rmm(BUILD_EXPORT_SET raft-exports
-                   INSTALL_EXPORT_SET  raft-exports)
+    rapids_cpm_rmm(BUILD_EXPORT_SET cuvs-exports
+                   INSTALL_EXPORT_SET  cuvs-exports)
 endfunction()
 
 find_and_configure_rmm()
diff --git a/cpp/cmake/thirdparty/get_thrust.cmake b/cpp/cmake/thirdparty/get_thrust.cmake
index 6e37aab40..d2d6cb3e6 100644
--- a/cpp/cmake/thirdparty/get_thrust.cmake
+++ b/cpp/cmake/thirdparty/get_thrust.cmake
@@ -16,9 +16,9 @@
 function(find_and_configure_thrust)
         include(${rapids-cmake-dir}/cpm/thrust.cmake)
 
-        rapids_cpm_thrust( NAMESPACE raft
-                           BUILD_EXPORT_SET raft-exports
-                           INSTALL_EXPORT_SET raft-exports)
+        rapids_cpm_thrust( NAMESPACE cuvs
+                           BUILD_EXPORT_SET cuvs-exports
+                           INSTALL_EXPORT_SET cuvs-exports)
 endfunction()
 
 find_and_configure_thrust()
diff --git a/cpp/include/cuvs/distance/distance-inl.cuh b/cpp/include/cuvs/distance/distance-inl.cuh
index 95d19e562..0abdeacff 100644
--- a/cpp/include/cuvs/distance/distance-inl.cuh
+++ b/cpp/include/cuvs/distance/distance-inl.cuh
@@ -409,7 +409,7 @@ void distance(raft::resources const& handle,
   RAFT_EXPECTS(x.is_exhaustive(), "Input x must be contiguous.");
   RAFT_EXPECTS(y.is_exhaustive(), "Input y must be contiguous.");
 
-  constexpr auto is_rowmajor = std::is_same_v<layout, layout_c_contiguous>;
+  constexpr auto is_rowmajor = std::is_same_v<layout, raft::layout_c_contiguous>;
 
   distance<DistT, DataT, AccT, OutT, IdxT>(handle,
                                            x.data_handle(),
@@ -454,7 +454,7 @@ void pairwise_distance(raft::resources const& handle,
   RAFT_EXPECTS(y.is_exhaustive(), "Input y must be contiguous.");
   RAFT_EXPECTS(dist.is_exhaustive(), "Output must be contiguous.");
 
-  constexpr auto rowmajor = std::is_same_v<layout, layout_c_contiguous>;
+  constexpr auto rowmajor = std::is_same_v<layout, raft::layout_c_contiguous>;
 
   auto stream = raft::resource::get_cuda_stream(handle);
   rmm::device_uvector<char> workspace(0, stream);
diff --git a/cpp/include/cuvs/neighbors/brute_force-inl.cuh b/cpp/include/cuvs/neighbors/brute_force-inl.cuh
index 88904dbae..b5584c5b7 100644
--- a/cpp/include/cuvs/neighbors/brute_force-inl.cuh
+++ b/cpp/include/cuvs/neighbors/brute_force-inl.cuh
@@ -303,7 +303,7 @@ index<T> build(
 {
   // certain distance metrics can benefit by pre-calculating the norms for the index dataset
   // which lets us avoid calculating these at query time
-  std::optional<device_vector<T, int64_t>> norms;
+  std::optional<raft::device_vector<T, int64_t>> norms;
   if (metric == cuvs::distance::DistanceType::L2Expanded ||
       metric == cuvs::distance::DistanceType::L2SqrtExpanded ||
       metric == cuvs::distance::DistanceType::CosineExpanded) {
diff --git a/cpp/include/cuvs/neighbors/brute_force_types.hpp b/cpp/include/cuvs/neighbors/brute_force_types.hpp
index 6e80496f2..0d3252d71 100644
--- a/cpp/include/cuvs/neighbors/brute_force_types.hpp
+++ b/cpp/include/cuvs/neighbors/brute_force_types.hpp
@@ -106,7 +106,7 @@ struct index : ann::index {
   {
     if (norms_) { norms_view_ = raft::make_const_mdspan(norms_.value().view()); }
     update_dataset(res, dataset);
-    resource::sync_stream(res);
+    raft::resource::sync_stream(res);
   }
 
   /** Construct a brute force index from dataset
@@ -121,7 +121,7 @@ struct index : ann::index {
         T metric_arg = 0.0)
     : ann::index(),
       metric_(metric),
-      dataset_(make_device_matrix<T, int64_t>(res, 0, 0)),
+      dataset_(raft::make_device_matrix<T, int64_t>(res, 0, 0)),
       dataset_view_(dataset_view),
       norms_view_(norms_view),
       metric_arg_(metric_arg)
diff --git a/cpp/include/cuvs/neighbors/detail/cagra/search_plan.cuh b/cpp/include/cuvs/neighbors/detail/cagra/search_plan.cuh
index c4db1431a..f83418b5c 100644
--- a/cpp/include/cuvs/neighbors/detail/cagra/search_plan.cuh
+++ b/cpp/include/cuvs/neighbors/detail/cagra/search_plan.cuh
@@ -91,7 +91,7 @@ struct search_plan_impl : public search_plan_impl_base {
                    uint32_t topk)
     : search_plan_impl_base(params, dim, graph_degree, topk),
       hashmap(0, raft::resource::get_cuda_stream(res)),
-      num_executed_iterations(0, resource::get_cuda_stream(res)),
+      num_executed_iterations(0, raft::resource::get_cuda_stream(res)),
       dev_seed(0, raft::resource::get_cuda_stream(res)),
       num_seeds(0)
   {
diff --git a/cpp/include/cuvs/neighbors/detail/faiss_select/Select.cuh b/cpp/include/cuvs/neighbors/detail/faiss_select/Select.cuh
index 1ddd610ff..9181fba79 100644
--- a/cpp/include/cuvs/neighbors/detail/faiss_select/Select.cuh
+++ b/cpp/include/cuvs/neighbors/detail/faiss_select/Select.cuh
@@ -191,7 +191,7 @@ struct BlockSelect {
     warpSortAnyRegisters<K, V, NumThreadQ, !Dir, Comp>(threadK, threadV);
 
     constexpr int kNumWarpQRegisters = NumWarpQ / raft::WarpSize;
-    K raft::warpKRegisters[kNumWarpQRegisters];
+    K warpKRegisters[kNumWarpQRegisters];
     V warpVRegisters[kNumWarpQRegisters];
 
 #pragma unroll
@@ -200,18 +200,18 @@ struct BlockSelect {
       warpVRegisters[i] = warpV[i * raft::WarpSize + laneId];
     }
 
-    warpFence();
+    raft::warpFence();
 
     // The warp queue is already sorted, and now that we've sorted the
     // per-thread queue, merge both sorted lists together, producing
     // one sorted list
     warpMergeAnyRegisters<K, V, kNumWarpQRegisters, NumThreadQ, !Dir, Comp, false>(
-      raft::warpKRegisters, warpVRegisters, threadK, threadV);
+      warpKRegisters, warpVRegisters, threadK, threadV);
 
     // Write back out the warp queue
 #pragma unroll
     for (int i = 0; i < kNumWarpQRegisters; ++i) {
-      warpK[i * raft::WarpSize + laneId] = raft::warpKRegisters[i];
+      warpK[i * raft::WarpSize + laneId] = warpKRegisters[i];
       warpV[i * raft::WarpSize + laneId] = warpVRegisters[i];
     }
 
diff --git a/cpp/include/cuvs/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh b/cpp/include/cuvs/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh
index 7d6f98d52..0978c4c7f 100644
--- a/cpp/include/cuvs/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh
+++ b/cpp/include/cuvs/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh
@@ -47,9 +47,9 @@ template <int VecBytes = 16, typename T>
 __device__ inline void copy_vectorized(T* out, const T* in, uint32_t n)
 {
   constexpr int VecElems = VecBytes / sizeof(T);  // NOLINT
-  using align_bytes      = Pow2<(size_t)VecBytes>;
+  using align_bytes      = raft::Pow2<(size_t)VecBytes>;
   if constexpr (VecElems > 1) {
-    using align_elems = Pow2<VecElems>;
+    using align_elems = raft::Pow2<VecElems>;
     if (!align_bytes::areSameAlignOffsets(out, in)) {
       return copy_vectorized<(VecBytes >> 1), T>(out, in, n);
     }
@@ -63,7 +63,7 @@ __device__ inline void copy_vectorized(T* out, const T* in, uint32_t n)
       }
     }
     {  // process main part vectorized
-      using vec_t = typename IOType<T, VecElems>::Type;
+      using vec_t = typename raft::IOType<T, VecElems>::Type;
       copy_vectorized<sizeof(vec_t), vec_t>(
         reinterpret_cast<vec_t*>(out), reinterpret_cast<const vec_t*>(in), align_elems::div(n));
     }
@@ -87,7 +87,7 @@ __device__ inline void copy_vectorized(T* out, const T* in, uint32_t n)
  * between them, and aggregate it using the provided Lambda; one structure per thread, per query,
  * and per index item.
  *
- * @tparam kUnroll elements per loop (normally, kUnroll = WarpSize / Veclen)
+ * @tparam kUnroll elements per loop (normally, kUnroll = raft::WarpSize / Veclen)
  * @tparam Lambda computing the part of the distance for one dimension and aggregating it:
  *                void (AccT& acc, AccT x, AccT y)
  * @tparam Veclen size of the vectorized load
@@ -119,9 +119,9 @@ struct loadAndComputeDist {
 #pragma unroll
     for (int j = 0; j < kUnroll; ++j) {
       T encV[Veclen];
-      ldg(encV, data + (loadIndex + j * kIndexGroupSize) * Veclen);
+      raft::ldg(encV, data + (loadIndex + j * kIndexGroupSize) * Veclen);
       T queryRegs[Veclen];
-      lds(queryRegs, &query_shared[shmemIndex + j * Veclen]);
+      raft::lds(queryRegs, &query_shared[shmemIndex + j * Veclen]);
 #pragma unroll
       for (int k = 0; k < Veclen; ++k) {
         compute_dist(dist, queryRegs[k], encV[k]);
@@ -132,8 +132,8 @@ struct loadAndComputeDist {
   /**
    * Load parts of vectors from the index and query and accumulates the partial distance.
    * This version assumes the query is stored in the global memory and is different for every
-   * thread. One warp loads exactly WarpSize query elements at once and then reshuffles them into
-   * corresponding threads (`WarpSize / (kUnroll * Veclen)` elements per thread at once).
+   * thread. One warp loads exactly raft::WarpSize query elements at once and then reshuffles them
+   * into corresponding threads (`raft::WarpSize / (kUnroll * Veclen)` elements per thread at once).
    */
   template <typename IdxT>
   __device__ __forceinline__ void runLoadShflAndCompute(const T*& data,
@@ -143,18 +143,18 @@ struct loadAndComputeDist {
   {
     T queryReg               = query[baseLoadIndex + lane_id];
     constexpr int stride     = kUnroll * Veclen;
-    constexpr int totalIter  = WarpSize / stride;
+    constexpr int totalIter  = raft::WarpSize / stride;
     constexpr int gmemStride = stride * kIndexGroupSize;
 #pragma unroll
     for (int i = 0; i < totalIter; ++i, data += gmemStride) {
 #pragma unroll
       for (int j = 0; j < kUnroll; ++j) {
         T encV[Veclen];
-        ldg(encV, data + (lane_id + j * kIndexGroupSize) * Veclen);
+        raft::ldg(encV, data + (lane_id + j * kIndexGroupSize) * Veclen);
         const int d = (i * kUnroll + j) * Veclen;
 #pragma unroll
         for (int k = 0; k < Veclen; ++k) {
-          compute_dist(dist, shfl(queryReg, d + k, WarpSize), encV[k]);
+          compute_dist(dist, raft::shfl(queryReg, d + k, raft::WarpSize), encV[k]);
         }
       }
     }
@@ -162,7 +162,7 @@ struct loadAndComputeDist {
 
   /**
    * Load parts of vectors from the index and query and accumulates the partial distance.
-   * This version augments `runLoadShflAndCompute` when `dim` is not a multiple of `WarpSize`.
+   * This version augments `runLoadShflAndCompute` when `dim` is not a multiple of `raft::WarpSize`.
    */
   __device__ __forceinline__ void runLoadShflAndComputeRemainder(
     const T*& data, const T* query, const int lane_id, const int dim, const int dimBlocks)
@@ -172,10 +172,10 @@ struct loadAndComputeDist {
     const int loadDataIdx = lane_id * Veclen;
     for (int d = 0; d < dim - dimBlocks; d += Veclen, data += kIndexGroupSize * Veclen) {
       T enc[Veclen];
-      ldg(enc, data + loadDataIdx);
+      raft::ldg(enc, data + loadDataIdx);
 #pragma unroll
       for (int k = 0; k < Veclen; k++) {
-        compute_dist(dist, shfl(queryReg, d + k, WarpSize), enc[k]);
+        compute_dist(dist, raft::shfl(queryReg, d + k, raft::WarpSize), enc[k]);
       }
     }
   }
@@ -202,8 +202,9 @@ struct loadAndComputeDist<kUnroll, Lambda, uint8_veclen, uint8_t, uint32_t> {
 #pragma unroll
     for (int j = 0; j < kUnroll; ++j) {
       uint32_t encV[veclen_int];
-      ldg(encV,
-          reinterpret_cast<unsigned const*>(data) + loadIndex + j * kIndexGroupSize * veclen_int);
+      raft::ldg(
+        encV,
+        reinterpret_cast<unsigned const*>(data) + loadIndex + j * kIndexGroupSize * veclen_int);
       uint32_t queryRegs[veclen_int];
       lds(queryRegs, reinterpret_cast<unsigned const*>(query_shared + shmemIndex) + j * veclen_int);
 #pragma unroll
@@ -223,16 +224,17 @@ struct loadAndComputeDist<kUnroll, Lambda, uint8_veclen, uint8_t, uint32_t> {
     constexpr int stride = kUnroll * uint8_veclen;
 
 #pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+    for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
 #pragma unroll
       for (int j = 0; j < kUnroll; ++j) {
         uint32_t encV[veclen_int];
-        ldg(encV,
-            reinterpret_cast<unsigned const*>(data) + (lane_id + j * kIndexGroupSize) * veclen_int);
+        raft::ldg(
+          encV,
+          reinterpret_cast<unsigned const*>(data) + (lane_id + j * kIndexGroupSize) * veclen_int);
         const int d = (i * kUnroll + j) * veclen_int;
 #pragma unroll
         for (int k = 0; k < veclen_int; ++k) {
-          compute_dist(dist, shfl(queryReg, d + k, WarpSize), encV[k]);
+          compute_dist(dist, shfl(queryReg, d + k, raft::WarpSize), encV[k]);
         }
       }
     }
@@ -250,10 +252,10 @@ struct loadAndComputeDist<kUnroll, Lambda, uint8_veclen, uint8_t, uint32_t> {
     for (int d = 0; d < dim - dimBlocks;
          d += uint8_veclen, data += kIndexGroupSize * uint8_veclen) {
       uint32_t enc[veclen_int];
-      ldg(enc, reinterpret_cast<uint32_t const*>(data) + lane_id * veclen_int);
+      raft::ldg(enc, reinterpret_cast<uint32_t const*>(data) + lane_id * veclen_int);
 #pragma unroll
       for (int k = 0; k < veclen_int; k++) {
-        uint32_t q = shfl(queryReg, (d / 4) + k, WarpSize);
+        uint32_t q = shfl(queryReg, (d / 4) + k, raft::WarpSize);
         compute_dist(dist, q, enc[k]);
       }
     }
@@ -295,11 +297,11 @@ struct loadAndComputeDist<kUnroll, Lambda, 4, uint8_t, uint32_t> {
     constexpr int stride = kUnroll * veclen;
 
 #pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+    for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
 #pragma unroll
       for (int j = 0; j < kUnroll; ++j) {
         uint32_t encV = reinterpret_cast<unsigned const*>(data)[lane_id + j * kIndexGroupSize];
-        uint32_t q    = shfl(queryReg, i * kUnroll + j, WarpSize);
+        uint32_t q    = shfl(queryReg, i * kUnroll + j, raft::WarpSize);
         compute_dist(dist, q, encV);
       }
     }
@@ -316,7 +318,7 @@ struct loadAndComputeDist<kUnroll, Lambda, 4, uint8_t, uint32_t> {
     uint32_t queryReg    = loadDim < dim ? reinterpret_cast<unsigned const*>(query)[loadDim] : 0;
     for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
       uint32_t enc = reinterpret_cast<unsigned const*>(data)[lane_id];
-      uint32_t q   = shfl(queryReg, d / veclen, WarpSize);
+      uint32_t q   = shfl(queryReg, d / veclen, raft::WarpSize);
       compute_dist(dist, q, enc);
     }
   }
@@ -356,11 +358,11 @@ struct loadAndComputeDist<kUnroll, Lambda, 2, uint8_t, uint32_t> {
     constexpr int stride = kUnroll * veclen;
 
 #pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+    for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
 #pragma unroll
       for (int j = 0; j < kUnroll; ++j) {
         uint32_t encV = reinterpret_cast<uint16_t const*>(data)[lane_id + j * kIndexGroupSize];
-        uint32_t q    = shfl(queryReg, i * kUnroll + j, WarpSize);
+        uint32_t q    = shfl(queryReg, i * kUnroll + j, raft::WarpSize);
         compute_dist(dist, q, encV);
       }
     }
@@ -377,7 +379,7 @@ struct loadAndComputeDist<kUnroll, Lambda, 2, uint8_t, uint32_t> {
     uint32_t queryReg = loadDim < dim ? reinterpret_cast<uint16_t const*>(query + loadDim)[0] : 0;
     for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
       uint32_t enc = reinterpret_cast<uint16_t const*>(data)[lane_id];
-      uint32_t q   = shfl(queryReg, d / veclen, WarpSize);
+      uint32_t q   = shfl(queryReg, d / veclen, raft::WarpSize);
       compute_dist(dist, q, enc);
     }
   }
@@ -416,11 +418,11 @@ struct loadAndComputeDist<kUnroll, Lambda, 1, uint8_t, uint32_t> {
     constexpr int stride = kUnroll * veclen;
 
 #pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+    for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
 #pragma unroll
       for (int j = 0; j < kUnroll; ++j) {
         uint32_t encV = data[lane_id + j * kIndexGroupSize];
-        uint32_t q    = shfl(queryReg, i * kUnroll + j, WarpSize);
+        uint32_t q    = shfl(queryReg, i * kUnroll + j, raft::WarpSize);
         compute_dist(dist, q, encV);
       }
     }
@@ -437,7 +439,7 @@ struct loadAndComputeDist<kUnroll, Lambda, 1, uint8_t, uint32_t> {
     uint32_t queryReg    = loadDim < dim ? query[loadDim] : 0;
     for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
       uint32_t enc = data[lane_id];
-      uint32_t q   = shfl(queryReg, d, WarpSize);
+      uint32_t q   = shfl(queryReg, d, raft::WarpSize);
       compute_dist(dist, q, enc);
     }
   }
@@ -464,10 +466,12 @@ struct loadAndComputeDist<kUnroll, Lambda, int8_veclen, int8_t, int32_t> {
 #pragma unroll
     for (int j = 0; j < kUnroll; ++j) {
       int32_t encV[veclen_int];
-      ldg(encV,
-          reinterpret_cast<int32_t const*>(data) + (loadIndex + j * kIndexGroupSize) * veclen_int);
+      raft::ldg(
+        encV,
+        reinterpret_cast<int32_t const*>(data) + (loadIndex + j * kIndexGroupSize) * veclen_int);
       int32_t queryRegs[veclen_int];
-      lds(queryRegs, reinterpret_cast<int32_t const*>(query_shared + shmemIndex) + j * veclen_int);
+      raft::lds(queryRegs,
+                reinterpret_cast<int32_t const*>(query_shared + shmemIndex) + j * veclen_int);
 #pragma unroll
       for (int k = 0; k < veclen_int; k++) {
         compute_dist(dist, queryRegs[k], encV[k]);
@@ -487,16 +491,17 @@ struct loadAndComputeDist<kUnroll, Lambda, int8_veclen, int8_t, int32_t> {
     constexpr int stride = kUnroll * int8_veclen;
 
 #pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+    for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
 #pragma unroll
       for (int j = 0; j < kUnroll; ++j) {
         int32_t encV[veclen_int];
-        ldg(encV,
-            reinterpret_cast<int32_t const*>(data) + (lane_id + j * kIndexGroupSize) * veclen_int);
+        raft::ldg(
+          encV,
+          reinterpret_cast<int32_t const*>(data) + (lane_id + j * kIndexGroupSize) * veclen_int);
         const int d = (i * kUnroll + j) * veclen_int;
 #pragma unroll
         for (int k = 0; k < veclen_int; ++k) {
-          int32_t q = shfl(queryReg, d + k, WarpSize);
+          int32_t q = raft::shfl(queryReg, d + k, raft::WarpSize);
           compute_dist(dist, q, encV[k]);
         }
       }
@@ -511,10 +516,10 @@ struct loadAndComputeDist<kUnroll, Lambda, int8_veclen, int8_t, int32_t> {
     int32_t queryReg = loadDim < dim ? reinterpret_cast<int32_t const*>(query + loadDim)[0] : 0;
     for (int d = 0; d < dim - dimBlocks; d += int8_veclen, data += kIndexGroupSize * int8_veclen) {
       int32_t enc[veclen_int];
-      ldg(enc, reinterpret_cast<int32_t const*>(data) + lane_id * veclen_int);
+      raft::ldg(enc, reinterpret_cast<int32_t const*>(data) + lane_id * veclen_int);
 #pragma unroll
       for (int k = 0; k < veclen_int; k++) {
-        int32_t q = shfl(queryReg, (d / 4) + k, WarpSize);  // Here 4 is for 1 - int;
+        int32_t q = shfl(queryReg, (d / 4) + k, raft::WarpSize);  // Here 4 is for 1 - int;
         compute_dist(dist, q, enc[k]);
       }
     }
@@ -553,11 +558,11 @@ struct loadAndComputeDist<kUnroll, Lambda, 2, int8_t, int32_t> {
     constexpr int stride = kUnroll * veclen;
 
 #pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+    for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
 #pragma unroll
       for (int j = 0; j < kUnroll; ++j) {
         int32_t encV = reinterpret_cast<uint16_t const*>(data)[lane_id + j * kIndexGroupSize];
-        int32_t q    = shfl(queryReg, i * kUnroll + j, WarpSize);
+        int32_t q    = shfl(queryReg, i * kUnroll + j, raft::WarpSize);
         compute_dist(dist, q, encV);
       }
     }
@@ -571,7 +576,7 @@ struct loadAndComputeDist<kUnroll, Lambda, 2, int8_t, int32_t> {
     int32_t queryReg = loadDim < dim ? reinterpret_cast<uint16_t const*>(query + loadDim)[0] : 0;
     for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
       int32_t enc = reinterpret_cast<uint16_t const*>(data + lane_id * veclen)[0];
-      int32_t q   = shfl(queryReg, d / veclen, WarpSize);
+      int32_t q   = shfl(queryReg, d / veclen, raft::WarpSize);
       compute_dist(dist, q, enc);
     }
   }
@@ -607,11 +612,12 @@ struct loadAndComputeDist<kUnroll, Lambda, 1, int8_t, int32_t> {
     int32_t queryReg     = query[baseLoadIndex + lane_id];
 
 #pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+    for (int i = 0; i < raft::WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
 #pragma unroll
       for (int j = 0; j < kUnroll; ++j) {
-        compute_dist(
-          dist, shfl(queryReg, i * kUnroll + j, WarpSize), data[lane_id + j * kIndexGroupSize]);
+        compute_dist(dist,
+                     raft::shfl(queryReg, i * kUnroll + j, raft::WarpSize),
+                     data[lane_id + j * kIndexGroupSize]);
       }
     }
   }
@@ -622,7 +628,7 @@ struct loadAndComputeDist<kUnroll, Lambda, 1, int8_t, int32_t> {
     const int loadDim    = dimBlocks + lane_id;
     int32_t queryReg     = loadDim < dim ? query[loadDim] : 0;
     for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
-      compute_dist(dist, shfl(queryReg, d, WarpSize), data[lane_id]);
+      compute_dist(dist, raft::shfl(queryReg, d, raft::WarpSize), data[lane_id]);
     }
   }
 };
@@ -637,7 +643,7 @@ struct loadAndComputeDist<kUnroll, Lambda, 1, int8_t, int32_t> {
  *
  * @param compute_dist distance function
  * @param query_smem_elems number of dimensions of the query vector to fit in a shared memory of a
- * block; this number must be a multiple of `WarpSize * Veclen`.
+ * block; this number must be a multiple of `raft::WarpSize * Veclen`.
  * @param[in] query a pointer to all queries in a row-major contiguous format [gridDim.y, dim]
  * @param[in] coarse_index a pointer to the cluster indices to search through [n_probes]
  * @param[in] list_indices index<T, IdxT>.indices
@@ -705,7 +711,7 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock)
   block_sort_t queue(k);
 
   {
-    using align_warp  = Pow2<WarpSize>;
+    using align_warp  = Pow2<raft::WarpSize>;
     const int lane_id = align_warp::mod(threadIdx.x);
 
     // How many full warps needed to compute the distance (without remainder)
@@ -725,9 +731,9 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock)
       const uint32_t num_groups =
         align_warp::div(list_length + align_warp::Mask);  // raft::ceildiv by power of 2
 
-      constexpr int kUnroll        = WarpSize / Veclen;
-      constexpr uint32_t kNumWarps = kThreadsPerBlock / WarpSize;
-      // Every warp reads WarpSize vectors and computes the distances to them.
+      constexpr int kUnroll        = raft::WarpSize / Veclen;
+      constexpr uint32_t kNumWarps = kThreadsPerBlock / raft::WarpSize;
+      // Every warp reads raft::WarpSize vectors and computes the distances to them.
       // Then, the distances and corresponding ids are distributed among the threads,
       // and each thread adds one (id, dist) pair to the filtering queue.
       for (uint32_t group_id = align_warp::div(threadIdx.x); group_id < num_groups;
@@ -737,7 +743,7 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock)
         const T* data = list_data_ptrs[list_id] + (group_id * kIndexGroupSize) * dim;
 
         // This is the vector a given lane/thread handles
-        const uint32_t vec_id = group_id * WarpSize + lane_id;
+        const uint32_t vec_id = group_id * raft::WarpSize + lane_id;
         const bool valid =
           vec_id < list_length && sample_filter(queries_offset + blockIdx.y, list_id, vec_id);
 
@@ -746,7 +752,7 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock)
           loadAndComputeDist<kUnroll, decltype(compute_dist), Veclen, T, AccT> lc(dist,
                                                                                   compute_dist);
           for (int pos = 0; pos < shm_assisted_dim;
-               pos += WarpSize, data += kIndexGroupSize * WarpSize) {
+               pos += raft::WarpSize, data += kIndexGroupSize * raft::WarpSize) {
             lc.runLoadShmemCompute(data, query_shared, lane_id, pos);
           }
         }
@@ -755,7 +761,7 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock)
           // The default path - using shfl ops - for dimensions beyond query_smem_elems
           loadAndComputeDist<kUnroll, decltype(compute_dist), Veclen, T, AccT> lc(dist,
                                                                                   compute_dist);
-          for (int pos = shm_assisted_dim; pos < full_warps_along_dim; pos += WarpSize) {
+          for (int pos = shm_assisted_dim; pos < full_warps_along_dim; pos += raft::WarpSize) {
             lc.runLoadShflAndCompute(data, query, pos, lane_id);
           }
           lc.runLoadShflAndComputeRemainder(data, query, lane_id, dim, full_warps_along_dim);
@@ -840,9 +846,9 @@ void launch_kernel(Lambda lambda,
                                                    PostLambda>;
   const int max_query_smem = 16384;
   int query_smem_elems =
-    std::min<int>(max_query_smem / sizeof(T), Pow2<Veclen * WarpSize>::roundUp(index.dim()));
+    std::min<int>(max_query_smem / sizeof(T), Pow2<Veclen * raft::WarpSize>::roundUp(index.dim()));
   int smem_size              = query_smem_elems * sizeof(T);
-  constexpr int kSubwarpSize = std::min<int>(Capacity, WarpSize);
+  constexpr int kSubwarpSize = std::min<int>(Capacity, raft::WarpSize);
   auto block_merge_mem =
     raft::matrix::detail::select::warpsort::calc_smem_size_for_block_wide<AccT, IdxT>(
       kThreadsPerBlock / kSubwarpSize, k);
@@ -936,7 +942,7 @@ struct inner_prod_dist {
   __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y)
   {
     if constexpr (Veclen > 1 && (std::is_same_v<T, int8_t> || std::is_same_v<T, uint8_t>)) {
-      acc = dp4a(x, y, acc);
+      acc = raft::dp4a(x, y, acc);
     } else {
       acc += x * y;
     }
@@ -1000,7 +1006,7 @@ template <typename T,
           typename AccT,
           typename IdxT,
           typename IvfSampleFilterT,
-          int Capacity = matrix::detail::select::warpsort::kMaxCapacity,
+          int Capacity = raft::matrix::detail::select::warpsort::kMaxCapacity,
           int Veclen   = std::max<int>(1, 16 / sizeof(T))>
 struct select_interleaved_scan_kernel {
   /**
@@ -1097,7 +1103,7 @@ void ivfflat_interleaved_scan(const index<T, IdxT>& index,
                               uint32_t& grid_dim_x,
                               rmm::cuda_stream_view stream)
 {
-  const int capacity = bound_by_power_of_two(k);
+  const int capacity = raft::bound_by_power_of_two(k);
 
   auto filter_adapter = cuvs::neighbors::filtering::ivf_to_sample_filter(
     index.inds_ptrs().data_handle(), sample_filter);
diff --git a/cpp/include/cuvs/neighbors/detail/knn_brute_force.cuh b/cpp/include/cuvs/neighbors/detail/knn_brute_force.cuh
index 2b75eb048..6914ea030 100644
--- a/cpp/include/cuvs/neighbors/detail/knn_brute_force.cuh
+++ b/cpp/include/cuvs/neighbors/detail/knn_brute_force.cuh
@@ -76,8 +76,8 @@ void tiled_brute_force_knn(const raft::resources& handle,
   // Figure out the number of rows/cols to tile for
   size_t tile_rows   = 0;
   size_t tile_cols   = 0;
-  auto stream        = resource::get_cuda_stream(handle);
-  auto device_memory = resource::get_workspace_resource(handle);
+  auto stream        = raft::resource::get_cuda_stream(handle);
+  auto device_memory = raft::resource::get_workspace_resource(handle);
   auto total_mem     = device_memory->get_mem_info(stream).second;
   faiss_select::chooseTileSize(m, n, d, sizeof(ElementType), total_mem, tile_rows, tile_cols);
 
@@ -236,7 +236,7 @@ void tiled_brute_force_knn(const raft::resources& handle,
         }
       }
 
-      matrix::select_k<ElementType, IndexType>(
+      raft::matrix::select_k<ElementType, IndexType>(
         handle,
         raft::make_device_matrix_view<const ElementType, int64_t, raft::row_major>(
           temp_distances.data(), current_query_size, current_centroid_size),
@@ -263,7 +263,7 @@ void tiled_brute_force_knn(const raft::resources& handle,
         IndexType* out_indices          = temp_out_indices.data();
 
         auto count = thrust::make_counting_iterator<IndexType>(0);
-        thrust::for_each(resource::get_thrust_policy(handle),
+        thrust::for_each(raft::resource::get_thrust_policy(handle),
                          count,
                          count + current_query_size * current_k,
                          [=] __device__(IndexType i) {
@@ -278,7 +278,7 @@ void tiled_brute_force_knn(const raft::resources& handle,
 
     if (tile_cols != n) {
       // select the actual top-k items here from the temporary output
-      matrix::select_k<ElementType, IndexType>(
+      raft::matrix::select_k<ElementType, IndexType>(
         handle,
         raft::make_device_matrix_view<const ElementType, int64_t, raft::row_major>(
           temp_out_distances.data(), current_query_size, temp_out_cols),
diff --git a/cpp/include/cuvs/neighbors/detail/knn_brute_force_batch_k_query.cuh b/cpp/include/cuvs/neighbors/detail/knn_brute_force_batch_k_query.cuh
index f4961ff3a..8d6dce407 100644
--- a/cpp/include/cuvs/neighbors/detail/knn_brute_force_batch_k_query.cuh
+++ b/cpp/include/cuvs/neighbors/detail/knn_brute_force_batch_k_query.cuh
@@ -86,9 +86,9 @@ class gpu_batch_k_query : public batch_k_query<T, IdxT> {
 
     if (!num_queries || !batch_size) { return; }
 
-    matrix::slice_coordinates<int64_t> coords{0, offset, num_queries, offset + batch_size};
-    matrix::slice(this->res, input.indices(), output->indices(), coords);
-    matrix::slice(this->res, input.distances(), output->distances(), coords);
+    raft::matrix::slice_coordinates<int64_t> coords{0, offset, num_queries, offset + batch_size};
+    raft::matrix::slice(this->res, input.indices(), output->indices(), coords);
+    raft::matrix::slice(this->res, input.distances(), output->distances(), coords);
   }
 
   const cuvs::neighbors::brute_force::index<T>& index;
diff --git a/cpp/include/cuvs/neighbors/detail/knn_merge_parts.cuh b/cpp/include/cuvs/neighbors/detail/knn_merge_parts.cuh
index 54b97f562..555f7b258 100644
--- a/cpp/include/cuvs/neighbors/detail/knn_merge_parts.cuh
+++ b/cpp/include/cuvs/neighbors/detail/knn_merge_parts.cuh
@@ -68,7 +68,7 @@ RAFT_KERNEL knn_merge_parts_kernel(const value_t* inK,
   const value_t* inKStart   = inK + (row_idx + col);
   const value_idx* inVStart = inV + (row_idx + col);
 
-  int limit             = Pow2<WarpSize>::roundDown(total_k);
+  int limit             = raft::Pow2<raft::WarpSize>::roundDown(total_k);
   value_idx translation = 0;
 
   for (; i < limit; i += tpb) {
diff --git a/cpp/include/cuvs/neighbors/ivf_flat_types.hpp b/cpp/include/cuvs/neighbors/ivf_flat_types.hpp
index e9c452f5c..4863805fa 100644
--- a/cpp/include/cuvs/neighbors/ivf_flat_types.hpp
+++ b/cpp/include/cuvs/neighbors/ivf_flat_types.hpp
@@ -204,7 +204,7 @@ struct index : ann::index {
    * NB: this may be empty if the index is empty or if the metric does not require the center norms
    * calculation.
    */
-  inline auto center_norms() noexcept -> std::optional<device_vector_view<float, uint32_t>>
+  inline auto center_norms() noexcept -> std::optional<raft::device_vector_view<float, uint32_t>>
   {
     if (center_norms_.has_value()) {
       return std::make_optional<device_vector_view<float, uint32_t>>(center_norms_->view());
@@ -213,10 +213,11 @@ struct index : ann::index {
     }
   }
   [[nodiscard]] inline auto center_norms() const noexcept
-    -> std::optional<device_vector_view<const float, uint32_t>>
+    -> std::optional<raft::device_vector_view<const float, uint32_t>>
   {
     if (center_norms_.has_value()) {
-      return std::make_optional<device_vector_view<const float, uint32_t>>(center_norms_->view());
+      return std::make_optional<raft::device_vector_view<const float, uint32_t>>(
+        center_norms_->view());
     } else {
       return std::nullopt;
     }
@@ -251,12 +252,12 @@ struct index : ann::index {
       metric_(metric),
       adaptive_centers_(adaptive_centers),
       conservative_memory_allocation_{conservative_memory_allocation},
-      centers_(make_device_matrix<float, uint32_t>(res, n_lists, dim)),
+      centers_(raft::make_device_matrix<float, uint32_t>(res, n_lists, dim)),
       center_norms_(std::nullopt),
       lists_{n_lists},
-      list_sizes_{make_device_vector<uint32_t, uint32_t>(res, n_lists)},
-      data_ptrs_{make_device_vector<T*, uint32_t>(res, n_lists)},
-      inds_ptrs_{make_device_vector<IdxT*, uint32_t>(res, n_lists)},
+      list_sizes_{raft::make_device_vector<uint32_t, uint32_t>(res, n_lists)},
+      data_ptrs_{raft::make_device_vector<T*, uint32_t>(res, n_lists)},
+      inds_ptrs_{raft::make_device_vector<IdxT*, uint32_t>(res, n_lists)},
       total_size_{0}
   {
     check_consistency();
@@ -366,7 +367,7 @@ struct index : ann::index {
   std::vector<std::shared_ptr<list_data<T, IdxT>>> lists_;
   raft::device_vector<uint32_t, uint32_t> list_sizes_;
   raft::device_matrix<float, uint32_t, raft::row_major> centers_;
-  std::optional<device_vector<float, uint32_t>> center_norms_;
+  std::optional<raft::device_vector<float, uint32_t>> center_norms_;
 
   // Computed members
   raft::device_vector<T*, uint32_t> data_ptrs_;
diff --git a/cpp/include/cuvs/neighbors/ivf_list_types.hpp b/cpp/include/cuvs/neighbors/ivf_list_types.hpp
index 67dfba807..8d57971a2 100644
--- a/cpp/include/cuvs/neighbors/ivf_list_types.hpp
+++ b/cpp/include/cuvs/neighbors/ivf_list_types.hpp
@@ -46,9 +46,9 @@ struct list {
   using list_extents = typename spec_type::list_extents;
 
   /** Possibly encoded data; it's layout is defined by `SpecT`. */
-  device_mdarray<value_type, list_extents, raft::row_major> data;
+  raft::device_mdarray<value_type, list_extents, raft::row_major> data;
   /** Source indices. */
-  device_mdarray<index_type, extent_1d<size_type>, raft::row_major> indices;
+  raft::device_mdarray<index_type, raft::extent_1d<size_type>, raft::row_major> indices;
   /** The actual size of the content. */
   std::atomic<size_type> size;
 
diff --git a/cpp/include/cuvs/spatial/knn/detail/haversine_distance.cuh b/cpp/include/cuvs/spatial/knn/detail/haversine_distance.cuh
index 44fa210b8..992d27912 100644
--- a/cpp/include/cuvs/spatial/knn/detail/haversine_distance.cuh
+++ b/cpp/include/cuvs/spatial/knn/detail/haversine_distance.cuh
@@ -70,7 +70,7 @@ RAFT_KERNEL haversine_knn_kernel(value_idx* out_inds,
     std::numeric_limits<value_t>::max(), std::numeric_limits<value_idx>::max(), smemK, smemV, k);
 
   // Grid is exactly sized to rows available
-  int limit = Pow2<raft::WarpSize>::roundDown(n_index_rows);
+  int limit = raft::Pow2<raft::WarpSize>::roundDown(n_index_rows);
 
   const value_t* query_ptr = query + (blockIdx.x * 2);
   value_t x1               = query_ptr[0];