rapidsai · rapids-bot · Oct 7, 2025 · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -51,7 +51,11 @@ class instance_manager_t {
   {
     for (size_t i = 0; i < nccl_comms_.size(); ++i) {
       rmm::cuda_set_device_raii local_set_device(device_ids_[i]);
-      RAFT_NCCL_TRY(ncclCommDestroy(*nccl_comms_[i]));
+      try {
+        RAFT_NCCL_TRY(ncclCommDestroy(*nccl_comms_[i]));
+      } catch (const std::exception& e) {
+        std::cerr << "Error destroying NCCL communication: " << e.what() << std::endl;
+      }
     }
   }
 

@@ -254,6 +254,16 @@ biased_neighbor_sample(
   bool dedupe_sources                             = false,
   bool do_expensive_check                         = false);
 
+enum class temporal_sampling_comparison_t {
+  STRICTLY_INCREASING = 0,  /** Time strictly increasing (each time is after the previous one) */
+  MONOTONICALLY_INCREASING, /** Time monotonically increasing (could have multiple edges with same
+                              time) */
+  STRICTLY_DECREASING,      /** Time strictly decreasing (each time is before the previous one) */
+  MONOTONICALLY_DECREASING, /** Time monotonically decreasing (could have multiple edges with same
+                                time) */
+  LAST                      /** Support last n behavior */
+};
+
 struct sampling_flags_t {
   /**
    * Specifies how to handle prior sources. Default is DEFAULT.
@@ -277,6 +287,12 @@ struct sampling_flags_t {
    *   (true) or without replacement (false).  Default is true.
    */
   bool with_replacement{true};
+
+  /**
+   * Specifies how to handle temporal sampling. Default is STRICTLY_INCREASING.
+   */
+  temporal_sampling_comparison_t temporal_sampling_comparison{
+    temporal_sampling_comparison_t::STRICTLY_INCREASING};
 };
 
 /**

@@ -279,6 +279,27 @@ struct temporal_neighbor_sampling_functor : public cugraph::c_api::abstract_func
       std::optional<rmm::device_uvector<label_t>> edge_label{std::nullopt};
       std::optional<rmm::device_uvector<size_t>> offsets{std::nullopt};
 
+      cugraph::temporal_sampling_comparison_t temporal_sampling_comparison{};
+      switch (options_.temporal_sampling_comparison_) {
+        case cugraph_temporal_sampling_comparison_t::STRICTLY_INCREASING:
+          temporal_sampling_comparison =
+            cugraph::temporal_sampling_comparison_t::STRICTLY_INCREASING;
+          break;
+        case cugraph_temporal_sampling_comparison_t::MONOTONICALLY_INCREASING:
+          temporal_sampling_comparison =
+            cugraph::temporal_sampling_comparison_t::MONOTONICALLY_INCREASING;
+          break;
+        case cugraph_temporal_sampling_comparison_t::STRICTLY_DECREASING:
+          temporal_sampling_comparison =
+            cugraph::temporal_sampling_comparison_t::STRICTLY_DECREASING;
+          break;
+        case cugraph_temporal_sampling_comparison_t::MONOTONICALLY_DECREASING:
+          temporal_sampling_comparison =
+            cugraph::temporal_sampling_comparison_t::MONOTONICALLY_DECREASING;
+          break;
+        default: CUGRAPH_FAIL("Invalid temporal sampling comparison type");
+      };
+
       // FIXME: For biased sampling, the user should pass either biases or edge weights,
       // otherwised throw an error and suggest the user to call uniform neighbor sample instead
 
@@ -321,7 +342,8 @@ struct temporal_neighbor_sampling_functor : public cugraph::c_api::abstract_func
               cugraph::sampling_flags_t{options_.prior_sources_behavior_,
                                         options_.return_hops_,
                                         options_.dedupe_sources_,
-                                        options_.with_replacement_},
+                                        options_.with_replacement_,
+                                        temporal_sampling_comparison},
               do_expensive_check_);
         } else {
           std::tie(sampled_edge_srcs,
@@ -356,7 +378,8 @@ struct temporal_neighbor_sampling_functor : public cugraph::c_api::abstract_func
               cugraph::sampling_flags_t{options_.prior_sources_behavior_,
                                         options_.return_hops_,
                                         options_.dedupe_sources_,
-                                        options_.with_replacement_},
+                                        options_.with_replacement_,
+                                        temporal_sampling_comparison},
               do_expensive_check_);
         }
       } else {
@@ -394,7 +417,8 @@ struct temporal_neighbor_sampling_functor : public cugraph::c_api::abstract_func
               cugraph::sampling_flags_t{options_.prior_sources_behavior_,
                                         options_.return_hops_,
                                         options_.dedupe_sources_,
-                                        options_.with_replacement_},
+                                        options_.with_replacement_,
+                                        temporal_sampling_comparison},
               do_expensive_check_);
         } else {
           std::tie(sampled_edge_srcs,
@@ -428,7 +452,8 @@ struct temporal_neighbor_sampling_functor : public cugraph::c_api::abstract_func
               cugraph::sampling_flags_t{options_.prior_sources_behavior_,
                                         options_.return_hops_,
                                         options_.dedupe_sources_,
-                                        options_.with_replacement_},
+                                        options_.with_replacement_,
+                                        temporal_sampling_comparison},
               do_expensive_check_);
         }
       }

@@ -27,6 +27,7 @@
 
 #include <cugraph/arithmetic_variant_types.hpp>
 #include <cugraph/edge_property.hpp>
+#include <cugraph/sampling_functions.hpp>
 #include <cugraph/utilities/mask_utils.cuh>
 
 #include <raft/util/cudart_utils.hpp>
@@ -355,6 +356,7 @@ temporal_gather_one_hop_edgelist(
   raft::device_span<edge_time_t const> active_major_times,
   std::optional<raft::device_span<int32_t const>> active_major_labels,
   std::optional<raft::device_span<uint8_t const>> gather_flags,
+  temporal_sampling_comparison_t temporal_sampling_comparison,
   bool do_expensive_check)
 {
   constexpr bool store_transposed = false;
@@ -532,24 +534,48 @@ temporal_gather_one_hop_edgelist(
       tmp_positions
         ? detail::mark_entries(handle,
                                edge_times.size(),
-                               [d_tmp           = edge_times.data(),
+                               [temporal_sampling_comparison,
+                                d_tmp           = edge_times.data(),
                                 d_tmp_positions = tmp_positions->data(),
                                 kv_store_view =
                                   kv_binary_search_store_device_view_t<decltype(kv_store.view())>{
                                     kv_store.view()}] __device__(auto index) {
                                  auto edge_time = d_tmp[index];
                                  auto key_time =
                                    cuda::std::get<0>(kv_store_view.find(d_tmp_positions[index]));
-                                 return (edge_time > key_time);
+
+                                 switch (temporal_sampling_comparison) {
+                                   case temporal_sampling_comparison_t::STRICTLY_INCREASING:
+                                     return (edge_time > key_time);
+                                   case temporal_sampling_comparison_t::MONOTONICALLY_INCREASING:
+                                     return (edge_time >= key_time);
+                                   case temporal_sampling_comparison_t::STRICTLY_DECREASING:
+                                     return (edge_time < key_time);
+                                   case temporal_sampling_comparison_t::MONOTONICALLY_DECREASING:
+                                     return (edge_time <= key_time);
+                                 }
+                                 assert(false);
                                })
-        : detail::mark_entries(
-            handle,
-            edge_times.size(),
-            [d_tmp = edge_times.data(), d_tmp_time = tmp_times->data()] __device__(auto index) {
-              auto edge_time = d_tmp[index];
-              auto key_time  = d_tmp_time[index];
-              return (edge_time > key_time);
-            });
+        : detail::mark_entries(handle,
+                               edge_times.size(),
+                               [temporal_sampling_comparison,
+                                d_tmp      = edge_times.data(),
+                                d_tmp_time = tmp_times->data()] __device__(auto index) {
+                                 auto edge_time = d_tmp[index];
+                                 auto key_time  = d_tmp_time[index];
+
+                                 switch (temporal_sampling_comparison) {
+                                   case temporal_sampling_comparison_t::STRICTLY_INCREASING:
+                                     return (edge_time > key_time);
+                                   case temporal_sampling_comparison_t::MONOTONICALLY_INCREASING:
+                                     return (edge_time >= key_time);
+                                   case temporal_sampling_comparison_t::STRICTLY_DECREASING:
+                                     return (edge_time < key_time);
+                                   case temporal_sampling_comparison_t::MONOTONICALLY_DECREASING:
+                                     return (edge_time <= key_time);
+                                 }
+                                 assert(false);
+                               });
 
     raft::device_span<uint32_t const> marked_entry_span{marked_entries.data(),
                                                         marked_entries.size()};
@@ -574,12 +600,14 @@ temporal_gather_one_hop_edgelist(
         handle, std::move(*tmp_positions), marked_entry_span, keep_count);
     }
 
-    result_labels = rmm::device_uvector<label_t>(keep_count, handle.get_stream());
-    kv_store.view().find(
-      tmp_positions->begin(),
-      tmp_positions->end(),
-      thrust::make_zip_iterator(thrust::make_discard_iterator(), result_labels->begin()),
-      handle.get_stream());
+    if (active_major_labels) {
+      result_labels = rmm::device_uvector<label_t>(keep_count, handle.get_stream());
+      kv_store.view().find(
+        tmp_positions->begin(),
+        tmp_positions->end(),
+        thrust::make_zip_iterator(thrust::make_discard_iterator(), result_labels->begin()),
+        handle.get_stream());
+    }
   }
 
   std::tie(result_srcs, result_dsts, result_properties) =

@@ -51,6 +51,7 @@ temporal_gather_one_hop_edgelist(
   raft::device_span<int32_t const> active_major_times,
   std::optional<raft::device_span<int32_t const>> active_major_labels,
   std::optional<raft::device_span<uint8_t const>> gather_flags,
+  temporal_sampling_comparison_t temporal_sampling_comparison,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<vertex_t>,
@@ -67,6 +68,7 @@ temporal_gather_one_hop_edgelist(
   raft::device_span<int64_t const> active_major_times,
   std::optional<raft::device_span<int32_t const>> active_major_labels,
   std::optional<raft::device_span<uint8_t const>> gather_flags,
+  temporal_sampling_comparison_t temporal_sampling_comparison,
   bool do_expensive_check);
 
 }  // namespace detail

@@ -51,6 +51,7 @@ temporal_gather_one_hop_edgelist(
   raft::device_span<int32_t const> active_major_times,
   std::optional<raft::device_span<int32_t const>> active_major_labels,
   std::optional<raft::device_span<uint8_t const>> gather_flags,
+  temporal_sampling_comparison_t temporal_sampling_comparison,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<vertex_t>,
@@ -67,6 +68,7 @@ temporal_gather_one_hop_edgelist(
   raft::device_span<int64_t const> active_major_times,
   std::optional<raft::device_span<int32_t const>> active_major_labels,
   std::optional<raft::device_span<uint8_t const>> gather_flags,
+  temporal_sampling_comparison_t temporal_sampling_comparison,
   bool do_expensive_check);
 
 }  // namespace detail

@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "cugraph/sampling_functions.hpp"
 #include "gather_one_hop_impl.cuh"
 
 namespace cugraph {
@@ -51,6 +52,7 @@ temporal_gather_one_hop_edgelist(
   raft::device_span<int32_t const> active_major_times,
   std::optional<raft::device_span<int32_t const>> active_major_labels,
   std::optional<raft::device_span<uint8_t const>> gather_flags,
+  temporal_sampling_comparison_t temporal_sampling_comparison,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<vertex_t>,
@@ -67,6 +69,7 @@ temporal_gather_one_hop_edgelist(
   raft::device_span<int64_t const> active_major_times,
   std::optional<raft::device_span<int32_t const>> active_major_labels,
   std::optional<raft::device_span<uint8_t const>> gather_flags,
+  temporal_sampling_comparison_t temporal_sampling_comparison,
   bool do_expensive_check);
 
 }  // namespace detail

@@ -51,6 +51,7 @@ temporal_gather_one_hop_edgelist(
   raft::device_span<int32_t const> active_major_times,
   std::optional<raft::device_span<int32_t const>> active_major_labels,
   std::optional<raft::device_span<uint8_t const>> gather_flags,
+  temporal_sampling_comparison_t temporal_sampling_comparison,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<vertex_t>,
@@ -67,6 +68,7 @@ temporal_gather_one_hop_edgelist(
   raft::device_span<int64_t const> active_major_times,
   std::optional<raft::device_span<int32_t const>> active_major_labels,
   std::optional<raft::device_span<uint8_t const>> gather_flags,
+  temporal_sampling_comparison_t temporal_sampling_comparison,
   bool do_expensive_check);
 
 }  // namespace detail