rapidsai · rapids-bot · May 20, 2025 · May 17, 2025 · May 19, 2025
@@ -294,9 +294,9 @@ class edge_major_property_t {
 
   void clear()
   {
-    edge_partition_keys_                    = std::nullopt;
-    edge_partition_key_chunk_start_offsets_ = std::nullopt;
-    key_chunk_size_                         = std::nullopt;
+    edge_partition_keys_.reset();
+    edge_partition_key_chunk_start_offsets_.reset();
+    key_chunk_size_.reset();
 
     buffers_.clear();
     buffers_.shrink_to_fit();
@@ -424,9 +424,9 @@ class edge_minor_property_t {
 
   void clear()
   {
-    keys_                    = std::nullopt;
-    key_chunk_start_offsets_ = std::nullopt;
-    key_chunk_size_          = std::nullopt;
+    keys_.reset();
+    key_chunk_start_offsets_.reset();
+    key_chunk_size_.reset();
 
     rmm::cuda_stream_view stream{};
     if constexpr (std::is_arithmetic_v<T>) {
@@ -546,6 +546,12 @@ class edge_src_property_t {
  public:
   static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic<T>::value);
 
+  using value_type = T;
+  using value_iterator =
+    dataframe_buffer_iterator_type_t<std::conditional_t<std::is_same_v<T, bool>, uint32_t, T>>;
+  using const_value_iterator = dataframe_buffer_const_iterator_type_t<
+    std::conditional_t<std::is_same_v<T, bool>, uint32_t, T>>;
+
   edge_src_property_t(raft::handle_t const& handle) {}
 
   template <typename GraphViewType>
@@ -599,6 +605,10 @@ class edge_dst_property_t {
   static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic<T>::value);
 
   using value_type = T;
+  using value_iterator =
+    dataframe_buffer_iterator_type_t<std::conditional_t<std::is_same_v<T, bool>, uint32_t, T>>;
+  using const_value_iterator = dataframe_buffer_const_iterator_type_t<
+    std::conditional_t<std::is_same_v<T, bool>, uint32_t, T>>;
 
   edge_dst_property_t(raft::handle_t const& handle) {}
 
@@ -661,6 +671,160 @@ class edge_dst_dummy_property_t {
   auto view() const { return detail::edge_endpoint_dummy_property_view_t{}; }
 };
 
+// SG-only (use a vertex property buffer instead of creating a new edge_src_property_t object to
+// save memory)
+template <typename vertex_t, typename T, typename GraphViewType>
+detail::edge_endpoint_property_view_t<vertex_t,
+                                      typename edge_src_property_t<vertex_t,
+                                                                   T>::const_value_iterator,
+                                      T>
+make_edge_src_property_view(
+  GraphViewType const& graph_view,
+  typename edge_src_property_t<vertex_t, T>::const_value_iterator value_first,
+  size_t num_values)
+{
+  using const_value_iterator = typename edge_src_property_t<vertex_t, T>::const_value_iterator;
+
+  CUGRAPH_EXPECTS(!GraphViewType::is_multi_gpu,
+                  "Invalid input argument: this function is only for single-GPU.");
+
+  vertex_t range_size{};
+  if constexpr (GraphViewType::is_storage_transposed) {  // minor
+    range_size = graph_view.local_edge_partition_src_range_size();
+  } else {  // major
+    range_size = graph_view.local_edge_partition_src_range_size(size_t{0});
+  }
+  auto expected_num_values = std::is_same_v<T, bool>
+                               ? cugraph::packed_bool_size(static_cast<size_t>(range_size))
+                               : static_cast<size_t>(range_size);
+  CUGRAPH_EXPECTS(
+    num_values == expected_num_values,
+    "Invalid input argument: num_values does not match the expected number of values.");
+
+  if constexpr (GraphViewType::is_storage_transposed) {  // minor
+    return detail::edge_endpoint_property_view_t<vertex_t, const_value_iterator, T>(value_first,
+                                                                                    vertex_t{0});
+  } else {  // major
+    return detail::edge_endpoint_property_view_t<vertex_t, const_value_iterator, T>(
+      std::vector<const_value_iterator>{value_first}, std::vector<vertex_t>{vertex_t{0}});
+  }
+}
+
+// SG-only (use a vertex property buffer instead of creating a new edge_src_property_t object to
+// save memory)
+template <typename vertex_t, typename T, typename GraphViewType>
+detail::edge_endpoint_property_view_t<vertex_t,
+                                      typename edge_src_property_t<vertex_t, T>::value_iterator,
+                                      T>
+make_edge_src_property_mutable_view(
+  GraphViewType const& graph_view,
+  typename edge_src_property_t<vertex_t, T>::value_iterator value_first,
+  size_t num_values)
+{
+  using value_iterator = typename edge_src_property_t<vertex_t, T>::value_iterator;
+
+  CUGRAPH_EXPECTS(!GraphViewType::is_multi_gpu,
+                  "Invalid input argument: this function is only for single-GPU.");
+
+  vertex_t range_size{};
+  if constexpr (GraphViewType::is_storage_transposed) {  // minor
+    range_size = graph_view.local_edge_partition_src_range_size();
+  } else {  // major
+    range_size = graph_view.local_edge_partition_src_range_size(size_t{0});
+  }
+  auto expected_num_values = std::is_same_v<T, bool>
+                               ? cugraph::packed_bool_size(static_cast<size_t>(range_size))
+                               : static_cast<size_t>(range_size);
+  CUGRAPH_EXPECTS(
+    num_values == expected_num_values,
+    "Invalid input argument: num_values does not match the expected number of values.");
+
+  if constexpr (GraphViewType::is_storage_transposed) {  // minor
+    return detail::edge_endpoint_property_view_t<vertex_t, value_iterator, T>(value_first,
+                                                                              vertex_t{0});
+  } else {  // major
+    return detail::edge_endpoint_property_view_t<vertex_t, value_iterator, T>(
+      std::vector<value_iterator>{value_first}, std::vector<vertex_t>{vertex_t{0}});
+  }
+}
+
+// SG-only (use a vertex property buffer instead of creating a new edge_dst_property_t object to
+// save memory)
+template <typename vertex_t, typename T, typename GraphViewType>
+detail::edge_endpoint_property_view_t<vertex_t,
+                                      typename edge_dst_property_t<vertex_t,
+                                                                   T>::const_value_iterator,
+                                      T>
+make_edge_dst_property_view(
+  GraphViewType const& graph_view,
+  typename edge_dst_property_t<vertex_t, T>::const_value_iterator value_first,
+  size_t num_values)
+{
+  using const_value_iterator = typename edge_dst_property_t<vertex_t, T>::const_value_iterator;
+
+  CUGRAPH_EXPECTS(!GraphViewType::is_multi_gpu,
+                  "Invalid input argument: this function is only for single-GPU.");
+
+  vertex_t range_size{};
+  if constexpr (GraphViewType::is_storage_transposed) {  // major
+    range_size = graph_view.local_edge_partition_dst_range_size(size_t{0});
+  } else {  // minor
+    range_size = graph_view.local_edge_partition_dst_range_size();
+  }
+  auto expected_num_values = std::is_same_v<T, bool>
+                               ? cugraph::packed_bool_size(static_cast<size_t>(range_size))
+                               : static_cast<size_t>(range_size);
+  CUGRAPH_EXPECTS(
+    num_values == expected_num_values,
+    "Invalid input argument: num_values does not match the expected number of values.");
+
+  if constexpr (GraphViewType::is_storage_transposed) {  // major
+    return detail::edge_endpoint_property_view_t<vertex_t, const_value_iterator, T>(
+      std::vector<const_value_iterator>{value_first}, std::vector<vertex_t>{vertex_t{0}});
+  } else {  // minor
+    return detail::edge_endpoint_property_view_t<vertex_t, const_value_iterator, T>(value_first,
+                                                                                    vertex_t{0});
+  }
+}
+
+// SG-only (use a vertex property buffer instead of creating a new edge_dst_property_t object to
+// save memory)
+template <typename vertex_t, typename T, typename GraphViewType>
+detail::edge_endpoint_property_view_t<vertex_t,
+                                      typename edge_dst_property_t<vertex_t, T>::value_iterator,
+                                      T>
+make_edge_dst_property_mutable_view(
+  GraphViewType const& graph_view,
+  typename edge_dst_property_t<vertex_t, T>::value_iterator value_first,
+  size_t num_values)
+{
+  using value_iterator = typename edge_dst_property_t<vertex_t, T>::value_iterator;
+
+  CUGRAPH_EXPECTS(!GraphViewType::is_multi_gpu,
+                  "Invalid input argument: this function is only for single-GPU.");
+
+  vertex_t range_size{};
+  if constexpr (GraphViewType::is_storage_transposed) {  // major
+    range_size = graph_view.local_edge_partition_dst_range_size(size_t{0});
+  } else {  // minor
+    range_size = graph_view.local_edge_partition_dst_range_size();
+  }
+  auto expected_num_values = std::is_same_v<T, bool>
+                               ? cugraph::packed_bool_size(static_cast<size_t>(range_size))
+                               : static_cast<size_t>(range_size);
+  CUGRAPH_EXPECTS(
+    num_values == expected_num_values,
+    "Invalid input argument: num_values does not match the expected number of values.");
+
+  if constexpr (GraphViewType::is_storage_transposed) {  // major
+    return detail::edge_endpoint_property_view_t<vertex_t, value_iterator, T>(
+      std::vector<value_iterator>{value_first}, std::vector<vertex_t>{vertex_t{0}});
+  } else {  // minor
+    return detail::edge_endpoint_property_view_t<vertex_t, value_iterator, T>(value_first,
+                                                                              vertex_t{0});
+  }
+}
+
 template <typename vertex_t, typename... Iters, typename... Types>
 auto view_concat(detail::edge_endpoint_property_view_t<vertex_t, Iters, Types> const&... views)
 {

@@ -45,9 +45,7 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
                   "Invalid input arguments: input graph for approximate_weighted_matching must "
                   "need to be symmetric");
 
-  using graph_view_t = cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu>;
-
-  graph_view_t current_graph_view(graph_view);
+  auto current_graph_view = graph_view;
   if (current_graph_view.has_edge_mask()) { current_graph_view.clear_edge_mask(); }
 
   cugraph::edge_property_t<edge_t, bool> edge_masks_even(handle, current_graph_view);
@@ -100,7 +98,7 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
   cugraph::edge_src_property_t<vertex_t, bool> src_match_flags(handle);
   cugraph::edge_dst_property_t<vertex_t, bool> dst_match_flags(handle);
 
-  if constexpr (graph_view_t::is_multi_gpu) {
+  if constexpr (multi_gpu) {
     src_key_cache = cugraph::edge_src_property_t<vertex_t, vertex_t>(handle, current_graph_view);
 
     update_edge_src_property(
@@ -129,11 +127,9 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
         cugraph::edge_src_dummy_property_t{}.view(),
         cugraph::edge_dst_dummy_property_t{}.view(),
         edge_weight_view,
-        graph_view_t::is_multi_gpu
-          ? src_key_cache.view()
-          : detail::edge_endpoint_property_view_t<vertex_t, vertex_t const*>(
-              std::vector<vertex_t const*>{local_vertices.begin()},
-              std::vector<vertex_t>{vertex_t{0}}),
+        multi_gpu ? src_key_cache.view()
+                  : make_edge_src_property_view<vertex_t, vertex_t>(
+                      current_graph_view, local_vertices.begin(), local_vertices.size()),
         [] __device__(auto, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto wt) {
           return thrust::make_tuple(wt, dst);
         },
@@ -145,7 +141,7 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
     // For each target, find the best offer
     //
 
-    if constexpr (graph_view_t::is_multi_gpu) {
+    if constexpr (multi_gpu) {
       auto vertex_partition_range_lasts = current_graph_view.vertex_partition_range_lasts();
 
       rmm::device_uvector<vertex_t> d_vertex_partition_range_lasts(
@@ -225,7 +221,7 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
 
     rmm::device_uvector<vertex_t> candidates_of_candidates(0, handle.get_stream());
 
-    if (graph_view_t::is_multi_gpu) {
+    if constexpr (multi_gpu) {
       auto& comm       = handle.get_comms();
       auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name());
       auto const major_comm_size = major_comm.get_size();
@@ -302,15 +298,15 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
 
     if (current_graph_view.compute_number_of_edges(handle) == 0) { break; }
 
-    if constexpr (graph_view_t::is_multi_gpu) {
+    if constexpr (multi_gpu) {
       cugraph::update_edge_src_property(
         handle, current_graph_view, is_vertex_matched.begin(), src_match_flags.mutable_view());
       cugraph::update_edge_dst_property(
         handle, current_graph_view, is_vertex_matched.begin(), dst_match_flags.mutable_view());
     }
 
     if (loop_counter % 2 == 0) {
-      if constexpr (graph_view_t::is_multi_gpu) {
+      if constexpr (multi_gpu) {
         cugraph::transform_e(
           handle,
           current_graph_view,
@@ -326,14 +322,17 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
         cugraph::transform_e(
           handle,
           current_graph_view,
-          detail::edge_endpoint_property_view_t<vertex_t, bool const*>(
-            std::vector<bool const*>{is_vertex_matched.begin()},
-            std::vector<vertex_t>{vertex_t{0}}),
-          detail::edge_endpoint_property_view_t<vertex_t, bool const*>(is_vertex_matched.begin(),
-                                                                       vertex_t{0}),
+          cugraph::edge_src_dummy_property_t{}.view(),
+          cugraph::edge_dst_dummy_property_t{}.view(),
           cugraph::edge_dummy_property_t{}.view(),
-          [] __device__(
-            auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) {
+          [is_vertex_matched = raft::device_span<bool const>(
+             is_vertex_matched.data(), is_vertex_matched.size())] __device__(auto src,
+                                                                             auto dst,
+                                                                             cuda::std::nullopt_t,
+                                                                             cuda::std::nullopt_t,
+                                                                             cuda::std::nullopt_t) {
+            auto is_src_matched = is_vertex_matched[src];
+            auto is_dst_matched = is_vertex_matched[dst];
             return !((is_src_matched == true) || (is_dst_matched == true));
           },
           edge_masks_odd.mutable_view());
@@ -344,7 +343,7 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
         handle, current_graph_view, edge_masks_even.mutable_view(), bool{false});
       current_graph_view.attach_edge_mask(edge_masks_odd.view());
     } else {
-      if constexpr (graph_view_t::is_multi_gpu) {
+      if constexpr (multi_gpu) {
         cugraph::transform_e(
           handle,
           current_graph_view,
@@ -360,14 +359,17 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
         cugraph::transform_e(
           handle,
           current_graph_view,
-          detail::edge_endpoint_property_view_t<vertex_t, bool const*>(
-            std::vector<bool const*>{is_vertex_matched.begin()},
-            std::vector<vertex_t>{vertex_t{0}}),
-          detail::edge_endpoint_property_view_t<vertex_t, bool const*>(is_vertex_matched.begin(),
-                                                                       vertex_t{0}),
+          cugraph::edge_src_dummy_property_t{}.view(),
+          cugraph::edge_dst_dummy_property_t{}.view(),
           cugraph::edge_dummy_property_t{}.view(),
-          [] __device__(
-            auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) {
+          [is_vertex_matched = raft::device_span<bool const>(
+             is_vertex_matched.data(), is_vertex_matched.size())] __device__(auto src,
+                                                                             auto dst,
+                                                                             cuda::std::nullopt_t,
+                                                                             cuda::std::nullopt_t,
+                                                                             cuda::std::nullopt_t) {
+            auto is_src_matched = is_vertex_matched[src];
+            auto is_dst_matched = is_vertex_matched[dst];
             return !((is_src_matched == true) || (is_dst_matched == true));
           },
           edge_masks_even.mutable_view());
@@ -385,13 +387,14 @@ std::tuple<rmm::device_uvector<vertex_t>, weight_t> approximate_weighted_matchin
   weight_t sum_matched_edge_weights = thrust::reduce(
     handle.get_thrust_policy(), offers_from_partners.begin(), offers_from_partners.end());
 
-  if constexpr (graph_view_t::is_multi_gpu) {
+  if constexpr (multi_gpu) {
     sum_matched_edge_weights = host_scalar_allreduce(
       handle.get_comms(), sum_matched_edge_weights, raft::comms::op_t::SUM, handle.get_stream());
   }
 
   return std::make_tuple(std::move(partners), sum_matched_edge_weights / 2.0);
 }
+
 }  // namespace detail
 
 template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>